1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 struct Lisp_Char_Byte_Table* cpt
299 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
300 Lisp_Object ret = cpt->property [ch >> 24];
302 if (CHAR_BYTE_TABLE_P (ret))
303 cpt = XCHAR_BYTE_TABLE (ret);
307 ret = cpt->property [(unsigned char) (ch >> 16)];
308 if (CHAR_BYTE_TABLE_P (ret))
309 cpt = XCHAR_BYTE_TABLE (ret);
313 ret = cpt->property [(unsigned char) (ch >> 8)];
314 if (CHAR_BYTE_TABLE_P (ret))
315 cpt = XCHAR_BYTE_TABLE (ret);
319 return cpt->property [(unsigned char) ch];
323 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
325 struct Lisp_Char_Byte_Table* cpt1
326 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
327 Lisp_Object ret = cpt1->property[ch >> 24];
329 if (CHAR_BYTE_TABLE_P (ret))
331 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
333 ret = cpt2->property[(unsigned char)(ch >> 16)];
334 if (CHAR_BYTE_TABLE_P (ret))
336 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
338 ret = cpt3->property[(unsigned char)(ch >> 8)];
339 if (CHAR_BYTE_TABLE_P (ret))
341 struct Lisp_Char_Byte_Table* cpt4
342 = XCHAR_BYTE_TABLE (ret);
344 cpt4->property[(unsigned char)ch] = value;
346 else if (!EQ (ret, value))
348 Lisp_Object cpt4 = make_char_byte_table (ret);
350 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
351 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
354 else if (!EQ (ret, value))
356 Lisp_Object cpt3 = make_char_byte_table (ret);
357 Lisp_Object cpt4 = make_char_byte_table (ret);
359 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
360 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
362 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
365 else if (!EQ (ret, value))
367 Lisp_Object cpt2 = make_char_byte_table (ret);
368 Lisp_Object cpt3 = make_char_byte_table (ret);
369 Lisp_Object cpt4 = make_char_byte_table (ret);
371 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
372 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
373 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
374 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
379 Lisp_Object Vcharacter_attribute_table;
381 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
382 Return the alist of attributes of CHARACTER.
386 CHECK_CHAR (character);
387 return Fcopy_alist (get_char_code_table (XCHAR (character),
388 Vcharacter_attribute_table));
391 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
392 Return the value of CHARACTER's ATTRIBUTE.
394 (character, attribute))
397 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
403 if (!NILP (ccs = Ffind_charset (attribute)))
406 return Fcdr (Fassq (attribute, ret));
410 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
413 Emchar char_code = XCHAR (character);
415 = get_char_code_table (char_code, Vcharacter_attribute_table);
418 cell = Fassq (attribute, ret);
422 ret = Fcons (Fcons (attribute, value), ret);
424 else if (!EQ (Fcdr (cell), value))
426 Fsetcdr (cell, value);
428 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
432 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
433 Store CHARACTER's ATTRIBUTE with VALUE.
435 (character, attribute, value))
439 ccs = Ffind_charset (attribute);
443 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
448 /* ad-hoc method for `ascii' */
449 if ((XCHARSET_CHARS (ccs) == 94) &&
450 (XCHARSET_BYTE_OFFSET (ccs) != 33))
451 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
453 ccs_len = XCHARSET_CHARS (ccs);
456 signal_simple_error ("Invalid value for coded-charset",
460 rest = Fget_char_attribute (character, attribute);
467 Lisp_Object ei = Fcar (rest);
469 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
470 nv = XVECTOR_DATA(v)[i];
477 XVECTOR_DATA(v)[i] = Qnil;
478 v = XCHARSET_DECODING_TABLE (ccs);
483 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
490 Lisp_Object ei = Fcar (rest);
493 signal_simple_error ("Invalid value for coded-charset",
495 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
496 nv = XVECTOR_DATA(v)[i];
502 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
509 XVECTOR_DATA(v)[i] = character;
511 return put_char_attribute (character, attribute, value);
516 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
517 Store character's ATTRIBUTES.
521 Lisp_Object rest = attributes;
522 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
523 Lisp_Object character;
529 Lisp_Object cell = Fcar (rest);
533 signal_simple_error ("Invalid argument", attributes);
534 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
535 && XCHARSET_FINAL (ccs))
539 if (XCHARSET_DIMENSION (ccs) == 1)
541 Lisp_Object eb1 = Fcar (Fcdr (cell));
545 signal_simple_error ("Invalid argument", attributes);
547 switch (XCHARSET_CHARS (ccs))
551 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
555 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
561 else if (XCHARSET_DIMENSION (ccs) == 2)
563 Lisp_Object eb1 = Fcar (Fcdr (cell));
564 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
568 signal_simple_error ("Invalid argument", attributes);
571 signal_simple_error ("Invalid argument", attributes);
573 switch (XCHARSET_CHARS (ccs))
576 code = MIN_CHAR_94x94
577 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
578 + (b1 - 33) * 94 + (b2 - 33);
581 code = MIN_CHAR_96x96
582 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
583 + (b1 - 32) * 96 + (b2 - 32);
594 character = make_char (code);
595 goto setup_attributes;
601 else if (!INTP (code))
602 signal_simple_error ("Invalid argument", attributes);
604 character = make_char (XINT (code));
610 Lisp_Object cell = Fcar (rest);
613 signal_simple_error ("Invalid argument", attributes);
614 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
618 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
621 Lisp_Object Vutf_2000_version;
625 int leading_code_private_11;
628 Lisp_Object Qcharsetp;
630 /* Qdoc_string, Qdimension, Qchars defined in general.c */
631 Lisp_Object Qregistry, Qfinal, Qgraphic;
632 Lisp_Object Qdirection;
633 Lisp_Object Qreverse_direction_charset;
634 Lisp_Object Qleading_byte;
635 Lisp_Object Qshort_name, Qlong_name;
651 Qjapanese_jisx0208_1978,
663 Qvietnamese_viscii_lower,
664 Qvietnamese_viscii_upper,
672 Lisp_Object Ql2r, Qr2l;
674 Lisp_Object Vcharset_hash_table;
677 static Charset_ID next_allocated_leading_byte;
679 static Charset_ID next_allocated_1_byte_leading_byte;
680 static Charset_ID next_allocated_2_byte_leading_byte;
683 /* Composite characters are characters constructed by overstriking two
684 or more regular characters.
686 1) The old Mule implementation involves storing composite characters
687 in a buffer as a tag followed by all of the actual characters
688 used to make up the composite character. I think this is a bad
689 idea; it greatly complicates code that wants to handle strings
690 one character at a time because it has to deal with the possibility
691 of great big ungainly characters. It's much more reasonable to
692 simply store an index into a table of composite characters.
694 2) The current implementation only allows for 16,384 separate
695 composite characters over the lifetime of the XEmacs process.
696 This could become a potential problem if the user
697 edited lots of different files that use composite characters.
698 Due to FSF bogosity, increasing the number of allowable
699 composite characters under Mule would decrease the number
700 of possible faces that can exist. Mule already has shrunk
701 this to 2048, and further shrinkage would become uncomfortable.
702 No such problems exist in XEmacs.
704 Composite characters could be represented as 0x80 C1 C2 C3,
705 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
706 for slightly under 2^20 (one million) composite characters
707 over the XEmacs process lifetime, and you only need to
708 increase the size of a Mule character from 19 to 21 bits.
709 Or you could use 0x80 C1 C2 C3 C4, allowing for about
710 85 million (slightly over 2^26) composite characters. */
713 /************************************************************************/
714 /* Basic Emchar functions */
715 /************************************************************************/
717 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
718 string in STR. Returns the number of bytes stored.
719 Do not call this directly. Use the macro set_charptr_emchar() instead.
723 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
738 else if ( c <= 0x7ff )
740 *p++ = (c >> 6) | 0xc0;
741 *p++ = (c & 0x3f) | 0x80;
743 else if ( c <= 0xffff )
745 *p++ = (c >> 12) | 0xe0;
746 *p++ = ((c >> 6) & 0x3f) | 0x80;
747 *p++ = (c & 0x3f) | 0x80;
749 else if ( c <= 0x1fffff )
751 *p++ = (c >> 18) | 0xf0;
752 *p++ = ((c >> 12) & 0x3f) | 0x80;
753 *p++ = ((c >> 6) & 0x3f) | 0x80;
754 *p++ = (c & 0x3f) | 0x80;
756 else if ( c <= 0x3ffffff )
758 *p++ = (c >> 24) | 0xf8;
759 *p++ = ((c >> 18) & 0x3f) | 0x80;
760 *p++ = ((c >> 12) & 0x3f) | 0x80;
761 *p++ = ((c >> 6) & 0x3f) | 0x80;
762 *p++ = (c & 0x3f) | 0x80;
766 *p++ = (c >> 30) | 0xfc;
767 *p++ = ((c >> 24) & 0x3f) | 0x80;
768 *p++ = ((c >> 18) & 0x3f) | 0x80;
769 *p++ = ((c >> 12) & 0x3f) | 0x80;
770 *p++ = ((c >> 6) & 0x3f) | 0x80;
771 *p++ = (c & 0x3f) | 0x80;
774 BREAKUP_CHAR (c, charset, c1, c2);
775 lb = CHAR_LEADING_BYTE (c);
776 if (LEADING_BYTE_PRIVATE_P (lb))
777 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
779 if (EQ (charset, Vcharset_control_1))
788 /* Return the first character from a Mule-encoded string in STR,
789 assuming it's non-ASCII. Do not call this directly.
790 Use the macro charptr_emchar() instead. */
793 non_ascii_charptr_emchar (CONST Bufbyte *str)
806 else if ( b >= 0xf8 )
811 else if ( b >= 0xf0 )
816 else if ( b >= 0xe0 )
821 else if ( b >= 0xc0 )
831 for( ; len > 0; len-- )
834 ch = ( ch << 6 ) | ( b & 0x3f );
838 Bufbyte i0 = *str, i1, i2 = 0;
841 if (i0 == LEADING_BYTE_CONTROL_1)
842 return (Emchar) (*++str - 0x20);
844 if (LEADING_BYTE_PREFIX_P (i0))
849 charset = CHARSET_BY_LEADING_BYTE (i0);
850 if (XCHARSET_DIMENSION (charset) == 2)
853 return MAKE_CHAR (charset, i1, i2);
857 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
858 Do not call this directly. Use the macro valid_char_p() instead. */
862 non_ascii_valid_char_p (Emchar ch)
866 /* Must have only lowest 19 bits set */
870 f1 = CHAR_FIELD1 (ch);
871 f2 = CHAR_FIELD2 (ch);
872 f3 = CHAR_FIELD3 (ch);
878 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
879 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
880 f2 > MAX_CHAR_FIELD2_PRIVATE)
885 if (f3 != 0x20 && f3 != 0x7F)
889 NOTE: This takes advantage of the fact that
890 FIELD2_TO_OFFICIAL_LEADING_BYTE and
891 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
893 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
894 return (XCHARSET_CHARS (charset) == 96);
900 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
901 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
902 f1 > MAX_CHAR_FIELD1_PRIVATE)
904 if (f2 < 0x20 || f3 < 0x20)
907 #ifdef ENABLE_COMPOSITE_CHARS
908 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
910 if (UNBOUNDP (Fgethash (make_int (ch),
911 Vcomposite_char_char2string_hash_table,
916 #endif /* ENABLE_COMPOSITE_CHARS */
918 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
921 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
923 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
926 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
928 return (XCHARSET_CHARS (charset) == 96);
934 /************************************************************************/
935 /* Basic string functions */
936 /************************************************************************/
938 /* Copy the character pointed to by PTR into STR, assuming it's
939 non-ASCII. Do not call this directly. Use the macro
940 charptr_copy_char() instead. */
943 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
945 Bufbyte *strptr = str;
947 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
949 /* Notice fallthrough. */
951 case 6: *++strptr = *ptr++;
952 case 5: *++strptr = *ptr++;
954 case 4: *++strptr = *ptr++;
955 case 3: *++strptr = *ptr++;
956 case 2: *++strptr = *ptr;
961 return strptr + 1 - str;
965 /************************************************************************/
966 /* streams of Emchars */
967 /************************************************************************/
969 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
970 The functions below are not meant to be called directly; use
971 the macros in insdel.h. */
974 Lstream_get_emchar_1 (Lstream *stream, int ch)
976 Bufbyte str[MAX_EMCHAR_LEN];
977 Bufbyte *strptr = str;
979 str[0] = (Bufbyte) ch;
980 switch (REP_BYTES_BY_FIRST_BYTE (ch))
982 /* Notice fallthrough. */
985 ch = Lstream_getc (stream);
987 *++strptr = (Bufbyte) ch;
989 ch = Lstream_getc (stream);
991 *++strptr = (Bufbyte) ch;
994 ch = Lstream_getc (stream);
996 *++strptr = (Bufbyte) ch;
998 ch = Lstream_getc (stream);
1000 *++strptr = (Bufbyte) ch;
1002 ch = Lstream_getc (stream);
1004 *++strptr = (Bufbyte) ch;
1009 return charptr_emchar (str);
1013 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1015 Bufbyte str[MAX_EMCHAR_LEN];
1016 Bytecount len = set_charptr_emchar (str, ch);
1017 return Lstream_write (stream, str, len);
1021 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1023 Bufbyte str[MAX_EMCHAR_LEN];
1024 Bytecount len = set_charptr_emchar (str, ch);
1025 Lstream_unread (stream, str, len);
1029 /************************************************************************/
1030 /* charset object */
1031 /************************************************************************/
1034 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1036 struct Lisp_Charset *cs = XCHARSET (obj);
1038 markobj (cs->short_name);
1039 markobj (cs->long_name);
1040 markobj (cs->doc_string);
1041 markobj (cs->registry);
1042 markobj (cs->ccl_program);
1044 markobj (cs->decoding_table);
1050 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1052 struct Lisp_Charset *cs = XCHARSET (obj);
1056 error ("printing unreadable object #<charset %s 0x%x>",
1057 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1060 write_c_string ("#<charset ", printcharfun);
1061 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1062 write_c_string (" ", printcharfun);
1063 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1064 write_c_string (" ", printcharfun);
1065 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1066 write_c_string (" ", printcharfun);
1067 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1068 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1069 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1070 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1071 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1073 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1074 CHARSET_COLUMNS (cs),
1075 CHARSET_GRAPHIC (cs),
1076 CHARSET_FINAL (cs));
1077 write_c_string (buf, printcharfun);
1078 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1079 sprintf (buf, " 0x%x>", cs->header.uid);
1080 write_c_string (buf, printcharfun);
1083 static const struct lrecord_description charset_description[] = {
1084 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1086 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1091 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1092 mark_charset, print_charset, 0, 0, 0,
1093 charset_description,
1094 struct Lisp_Charset);
1096 /* Make a new charset. */
1099 make_charset (Charset_ID id, Lisp_Object name,
1100 unsigned char type, unsigned char columns, unsigned char graphic,
1101 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1102 Lisp_Object long_name, Lisp_Object doc,
1104 Lisp_Object decoding_table,
1105 Emchar ucs_min, Emchar ucs_max,
1106 Emchar code_offset, unsigned char byte_offset)
1109 struct Lisp_Charset *cs =
1110 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1111 XSETCHARSET (obj, cs);
1113 CHARSET_ID (cs) = id;
1114 CHARSET_NAME (cs) = name;
1115 CHARSET_SHORT_NAME (cs) = short_name;
1116 CHARSET_LONG_NAME (cs) = long_name;
1117 CHARSET_DIRECTION (cs) = direction;
1118 CHARSET_TYPE (cs) = type;
1119 CHARSET_COLUMNS (cs) = columns;
1120 CHARSET_GRAPHIC (cs) = graphic;
1121 CHARSET_FINAL (cs) = final;
1122 CHARSET_DOC_STRING (cs) = doc;
1123 CHARSET_REGISTRY (cs) = reg;
1124 CHARSET_CCL_PROGRAM (cs) = Qnil;
1125 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1127 CHARSET_DECODING_TABLE(cs) = Qnil;
1128 CHARSET_UCS_MIN(cs) = ucs_min;
1129 CHARSET_UCS_MAX(cs) = ucs_max;
1130 CHARSET_CODE_OFFSET(cs) = code_offset;
1131 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1134 switch (CHARSET_TYPE (cs))
1136 case CHARSET_TYPE_94:
1137 CHARSET_DIMENSION (cs) = 1;
1138 CHARSET_CHARS (cs) = 94;
1140 case CHARSET_TYPE_96:
1141 CHARSET_DIMENSION (cs) = 1;
1142 CHARSET_CHARS (cs) = 96;
1144 case CHARSET_TYPE_94X94:
1145 CHARSET_DIMENSION (cs) = 2;
1146 CHARSET_CHARS (cs) = 94;
1148 case CHARSET_TYPE_96X96:
1149 CHARSET_DIMENSION (cs) = 2;
1150 CHARSET_CHARS (cs) = 96;
1153 case CHARSET_TYPE_128:
1154 CHARSET_DIMENSION (cs) = 1;
1155 CHARSET_CHARS (cs) = 128;
1157 case CHARSET_TYPE_128X128:
1158 CHARSET_DIMENSION (cs) = 2;
1159 CHARSET_CHARS (cs) = 128;
1161 case CHARSET_TYPE_256:
1162 CHARSET_DIMENSION (cs) = 1;
1163 CHARSET_CHARS (cs) = 256;
1165 case CHARSET_TYPE_256X256:
1166 CHARSET_DIMENSION (cs) = 2;
1167 CHARSET_CHARS (cs) = 256;
1173 if (id == LEADING_BYTE_ASCII)
1174 CHARSET_REP_BYTES (cs) = 1;
1176 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1178 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1183 /* some charsets do not have final characters. This includes
1184 ASCII, Control-1, Composite, and the two faux private
1187 if (code_offset == 0)
1189 assert (NILP (charset_by_attributes[type][final]));
1190 charset_by_attributes[type][final] = obj;
1193 assert (NILP (charset_by_attributes[type][final][direction]));
1194 charset_by_attributes[type][final][direction] = obj;
1198 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1199 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1202 /* official leading byte */
1203 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1206 /* Some charsets are "faux" and don't have names or really exist at
1207 all except in the leading-byte table. */
1209 Fputhash (name, obj, Vcharset_hash_table);
1214 get_unallocated_leading_byte (int dimension)
1219 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1222 lb = next_allocated_leading_byte++;
1226 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1229 lb = next_allocated_1_byte_leading_byte++;
1233 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1236 lb = next_allocated_2_byte_leading_byte++;
1242 ("No more character sets free for this dimension",
1243 make_int (dimension));
1250 range_charset_code_point (Lisp_Object charset, Emchar ch)
1254 if ((XCHARSET_UCS_MIN (charset) <= ch)
1255 && (ch <= XCHARSET_UCS_MAX (charset)))
1257 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1259 if (XCHARSET_DIMENSION (charset) == 1)
1260 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1261 else if (XCHARSET_DIMENSION (charset) == 2)
1262 return list2 (make_int (d / XCHARSET_CHARS (charset)
1263 + XCHARSET_BYTE_OFFSET (charset)),
1264 make_int (d % XCHARSET_CHARS (charset)
1265 + XCHARSET_BYTE_OFFSET (charset)));
1266 else if (XCHARSET_DIMENSION (charset) == 3)
1267 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1268 * XCHARSET_CHARS (charset))
1269 + XCHARSET_BYTE_OFFSET (charset)),
1270 make_int (d / XCHARSET_CHARS (charset)
1271 % XCHARSET_CHARS (charset)
1272 + XCHARSET_BYTE_OFFSET (charset)),
1273 make_int (d % XCHARSET_CHARS (charset)
1274 + XCHARSET_BYTE_OFFSET (charset)));
1275 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1276 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1277 * XCHARSET_CHARS (charset)
1278 * XCHARSET_CHARS (charset))
1279 + XCHARSET_BYTE_OFFSET (charset)),
1280 make_int (d / (XCHARSET_CHARS (charset)
1281 * XCHARSET_CHARS (charset))
1282 % XCHARSET_CHARS (charset)
1283 + XCHARSET_BYTE_OFFSET (charset)),
1284 make_int (d / XCHARSET_CHARS (charset)
1285 % XCHARSET_CHARS (charset)
1286 + XCHARSET_BYTE_OFFSET (charset)),
1287 make_int (d % XCHARSET_CHARS (charset)
1288 + XCHARSET_BYTE_OFFSET (charset)));
1290 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1292 if (XCHARSET_DIMENSION (charset) == 1)
1294 if (XCHARSET_CHARS (charset) == 94)
1296 if (((d = ch - (MIN_CHAR_94
1297 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1299 return list1 (make_int (d + 33));
1301 else if (XCHARSET_CHARS (charset) == 96)
1303 if (((d = ch - (MIN_CHAR_96
1304 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1306 return list1 (make_int (d + 32));
1311 else if (XCHARSET_DIMENSION (charset) == 2)
1313 if (XCHARSET_CHARS (charset) == 94)
1315 if (((d = ch - (MIN_CHAR_94x94
1316 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1319 return list2 (make_int ((d / 94) + 33),
1320 make_int (d % 94 + 33));
1322 else if (XCHARSET_CHARS (charset) == 96)
1324 if (((d = ch - (MIN_CHAR_96x96
1325 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1328 return list2 (make_int ((d / 96) + 32),
1329 make_int (d % 96 + 32));
1337 split_builtin_char (Emchar c)
1339 if (c < MIN_CHAR_OBS_94x94)
1341 if (c <= MAX_CHAR_BASIC_LATIN)
1343 return list2 (Vcharset_ascii, make_int (c));
1347 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1351 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1353 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1355 return list2 (Vcharset_greek_iso8859_7,
1356 make_int (c - MIN_CHAR_GREEK + 0x20));
1358 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1360 return list2 (Vcharset_cyrillic_iso8859_5,
1361 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1363 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1365 return list2 (Vcharset_hebrew_iso8859_8,
1366 make_int (c - MIN_CHAR_HEBREW + 0x20));
1368 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1370 return list2 (Vcharset_thai_tis620,
1371 make_int (c - MIN_CHAR_THAI + 0x20));
1373 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1374 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1376 return list2 (Vcharset_katakana_jisx0201,
1377 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1381 return list3 (Vcharset_ucs_bmp,
1382 make_int (c >> 8), make_int (c & 0xff));
1385 else if (c <= MAX_CHAR_OBS_94x94)
1387 return list3 (CHARSET_BY_ATTRIBUTES
1388 (CHARSET_TYPE_94X94,
1389 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1390 CHARSET_LEFT_TO_RIGHT),
1391 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1392 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1394 else if (c <= MAX_CHAR_94)
1396 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1397 ((c - MIN_CHAR_94) / 94) + '0',
1398 CHARSET_LEFT_TO_RIGHT),
1399 make_int (((c - MIN_CHAR_94) % 94) + 33));
1401 else if (c <= MAX_CHAR_96)
1403 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1404 ((c - MIN_CHAR_96) / 96) + '0',
1405 CHARSET_LEFT_TO_RIGHT),
1406 make_int (((c - MIN_CHAR_96) % 96) + 32));
1408 else if (c <= MAX_CHAR_94x94)
1410 return list3 (CHARSET_BY_ATTRIBUTES
1411 (CHARSET_TYPE_94X94,
1412 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1413 CHARSET_LEFT_TO_RIGHT),
1414 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1415 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1417 else if (c <= MAX_CHAR_96x96)
1419 return list3 (CHARSET_BY_ATTRIBUTES
1420 (CHARSET_TYPE_96X96,
1421 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1422 CHARSET_LEFT_TO_RIGHT),
1423 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1424 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1433 charset_code_point (Lisp_Object charset, Emchar ch)
1435 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1437 if (!EQ (cdef, Qnil))
1439 Lisp_Object field = Fassq (charset, cdef);
1441 if (!EQ (field, Qnil))
1442 return Fcdr (field);
1444 return range_charset_code_point (charset, ch);
1447 Lisp_Object Vdefault_coded_charset_priority_list;
1451 /************************************************************************/
1452 /* Basic charset Lisp functions */
1453 /************************************************************************/
1455 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1456 Return non-nil if OBJECT is a charset.
1460 return CHARSETP (object) ? Qt : Qnil;
1463 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1464 Retrieve the charset of the given name.
1465 If CHARSET-OR-NAME is a charset object, it is simply returned.
1466 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1467 nil is returned. Otherwise the associated charset object is returned.
1471 if (CHARSETP (charset_or_name))
1472 return charset_or_name;
1474 CHECK_SYMBOL (charset_or_name);
1475 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1478 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1479 Retrieve the charset of the given name.
1480 Same as `find-charset' except an error is signalled if there is no such
1481 charset instead of returning nil.
1485 Lisp_Object charset = Ffind_charset (name);
1488 signal_simple_error ("No such charset", name);
1492 /* We store the charsets in hash tables with the names as the key and the
1493 actual charset object as the value. Occasionally we need to use them
1494 in a list format. These routines provide us with that. */
1495 struct charset_list_closure
1497 Lisp_Object *charset_list;
1501 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1502 void *charset_list_closure)
1504 /* This function can GC */
1505 struct charset_list_closure *chcl =
1506 (struct charset_list_closure*) charset_list_closure;
1507 Lisp_Object *charset_list = chcl->charset_list;
1509 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1513 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1514 Return a list of the names of all defined charsets.
1518 Lisp_Object charset_list = Qnil;
1519 struct gcpro gcpro1;
1520 struct charset_list_closure charset_list_closure;
1522 GCPRO1 (charset_list);
1523 charset_list_closure.charset_list = &charset_list;
1524 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1525 &charset_list_closure);
1528 return charset_list;
1531 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1532 Return the name of the given charset.
1536 return XCHARSET_NAME (Fget_charset (charset));
1539 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1540 Define a new character set.
1541 This function is for use with Mule support.
1542 NAME is a symbol, the name by which the character set is normally referred.
1543 DOC-STRING is a string describing the character set.
1544 PROPS is a property list, describing the specific nature of the
1545 character set. Recognized properties are:
1547 'short-name Short version of the charset name (ex: Latin-1)
1548 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1549 'registry A regular expression matching the font registry field for
1551 'dimension Number of octets used to index a character in this charset.
1552 Either 1 or 2. Defaults to 1.
1553 'columns Number of columns used to display a character in this charset.
1554 Only used in TTY mode. (Under X, the actual width of a
1555 character can be derived from the font used to display the
1556 characters.) If unspecified, defaults to the dimension
1557 (this is almost always the correct value).
1558 'chars Number of characters in each dimension (94 or 96).
1559 Defaults to 94. Note that if the dimension is 2, the
1560 character set thus described is 94x94 or 96x96.
1561 'final Final byte of ISO 2022 escape sequence. Must be
1562 supplied. Each combination of (DIMENSION, CHARS) defines a
1563 separate namespace for final bytes. Note that ISO
1564 2022 restricts the final byte to the range
1565 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1566 dimension == 2. Note also that final bytes in the range
1567 0x30 - 0x3F are reserved for user-defined (not official)
1569 'graphic 0 (use left half of font on output) or 1 (use right half
1570 of font on output). Defaults to 0. For example, for
1571 a font whose registry is ISO8859-1, the left half
1572 (octets 0x20 - 0x7F) is the `ascii' character set, while
1573 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1574 character set. With 'graphic set to 0, the octets
1575 will have their high bit cleared; with it set to 1,
1576 the octets will have their high bit set.
1577 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1579 'ccl-program A compiled CCL program used to convert a character in
1580 this charset into an index into the font. This is in
1581 addition to the 'graphic property. The CCL program
1582 is passed the octets of the character, with the high
1583 bit cleared and set depending upon whether the value
1584 of the 'graphic property is 0 or 1.
1586 (name, doc_string, props))
1588 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1589 int direction = CHARSET_LEFT_TO_RIGHT;
1591 Lisp_Object registry = Qnil;
1592 Lisp_Object charset;
1593 Lisp_Object rest, keyword, value;
1594 Lisp_Object ccl_program = Qnil;
1595 Lisp_Object short_name = Qnil, long_name = Qnil;
1596 int byte_offset = -1;
1598 CHECK_SYMBOL (name);
1599 if (!NILP (doc_string))
1600 CHECK_STRING (doc_string);
1602 charset = Ffind_charset (name);
1603 if (!NILP (charset))
1604 signal_simple_error ("Cannot redefine existing charset", name);
1606 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1608 if (EQ (keyword, Qshort_name))
1610 CHECK_STRING (value);
1614 if (EQ (keyword, Qlong_name))
1616 CHECK_STRING (value);
1620 else if (EQ (keyword, Qdimension))
1623 dimension = XINT (value);
1624 if (dimension < 1 || dimension > 2)
1625 signal_simple_error ("Invalid value for 'dimension", value);
1628 else if (EQ (keyword, Qchars))
1631 chars = XINT (value);
1632 if (chars != 94 && chars != 96)
1633 signal_simple_error ("Invalid value for 'chars", value);
1636 else if (EQ (keyword, Qcolumns))
1639 columns = XINT (value);
1640 if (columns != 1 && columns != 2)
1641 signal_simple_error ("Invalid value for 'columns", value);
1644 else if (EQ (keyword, Qgraphic))
1647 graphic = XINT (value);
1649 if (graphic < 0 || graphic > 2)
1651 if (graphic < 0 || graphic > 1)
1653 signal_simple_error ("Invalid value for 'graphic", value);
1656 else if (EQ (keyword, Qregistry))
1658 CHECK_STRING (value);
1662 else if (EQ (keyword, Qdirection))
1664 if (EQ (value, Ql2r))
1665 direction = CHARSET_LEFT_TO_RIGHT;
1666 else if (EQ (value, Qr2l))
1667 direction = CHARSET_RIGHT_TO_LEFT;
1669 signal_simple_error ("Invalid value for 'direction", value);
1672 else if (EQ (keyword, Qfinal))
1674 CHECK_CHAR_COERCE_INT (value);
1675 final = XCHAR (value);
1676 if (final < '0' || final > '~')
1677 signal_simple_error ("Invalid value for 'final", value);
1680 else if (EQ (keyword, Qccl_program))
1682 CHECK_VECTOR (value);
1683 ccl_program = value;
1687 signal_simple_error ("Unrecognized property", keyword);
1691 error ("'final must be specified");
1692 if (dimension == 2 && final > 0x5F)
1694 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1698 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1700 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1702 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1703 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1705 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1707 id = get_unallocated_leading_byte (dimension);
1709 if (NILP (doc_string))
1710 doc_string = build_string ("");
1712 if (NILP (registry))
1713 registry = build_string ("");
1715 if (NILP (short_name))
1716 XSETSTRING (short_name, XSYMBOL (name)->name);
1718 if (NILP (long_name))
1719 long_name = doc_string;
1722 columns = dimension;
1724 if (byte_offset < 0)
1728 else if (chars == 96)
1734 charset = make_charset (id, name, type, columns, graphic,
1735 final, direction, short_name, long_name,
1736 doc_string, registry,
1737 Qnil, 0, 0, 0, byte_offset);
1738 if (!NILP (ccl_program))
1739 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1743 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1745 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1746 NEW-NAME is the name of the new charset. Return the new charset.
1748 (charset, new_name))
1750 Lisp_Object new_charset = Qnil;
1751 int id, dimension, columns, graphic, final;
1752 int direction, type;
1753 Lisp_Object registry, doc_string, short_name, long_name;
1754 struct Lisp_Charset *cs;
1756 charset = Fget_charset (charset);
1757 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1758 signal_simple_error ("Charset already has reverse-direction charset",
1761 CHECK_SYMBOL (new_name);
1762 if (!NILP (Ffind_charset (new_name)))
1763 signal_simple_error ("Cannot redefine existing charset", new_name);
1765 cs = XCHARSET (charset);
1767 type = CHARSET_TYPE (cs);
1768 columns = CHARSET_COLUMNS (cs);
1769 dimension = CHARSET_DIMENSION (cs);
1770 id = get_unallocated_leading_byte (dimension);
1772 graphic = CHARSET_GRAPHIC (cs);
1773 final = CHARSET_FINAL (cs);
1774 direction = CHARSET_RIGHT_TO_LEFT;
1775 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1776 direction = CHARSET_LEFT_TO_RIGHT;
1777 doc_string = CHARSET_DOC_STRING (cs);
1778 short_name = CHARSET_SHORT_NAME (cs);
1779 long_name = CHARSET_LONG_NAME (cs);
1780 registry = CHARSET_REGISTRY (cs);
1782 new_charset = make_charset (id, new_name, type, columns,
1783 graphic, final, direction, short_name, long_name,
1784 doc_string, registry,
1786 CHARSET_DECODING_TABLE(cs),
1787 CHARSET_UCS_MIN(cs),
1788 CHARSET_UCS_MAX(cs),
1789 CHARSET_CODE_OFFSET(cs),
1790 CHARSET_BYTE_OFFSET(cs)
1796 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1797 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1802 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1803 Define symbol ALIAS as an alias for CHARSET.
1807 CHECK_SYMBOL (alias);
1808 charset = Fget_charset (charset);
1809 return Fputhash (alias, charset, Vcharset_hash_table);
1812 /* #### Reverse direction charsets not yet implemented. */
1814 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1816 Return the reverse-direction charset parallel to CHARSET, if any.
1817 This is the charset with the same properties (in particular, the same
1818 dimension, number of characters per dimension, and final byte) as
1819 CHARSET but whose characters are displayed in the opposite direction.
1823 charset = Fget_charset (charset);
1824 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1828 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1829 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1830 If DIRECTION is omitted, both directions will be checked (left-to-right
1831 will be returned if character sets exist for both directions).
1833 (dimension, chars, final, direction))
1835 int dm, ch, fi, di = -1;
1837 Lisp_Object obj = Qnil;
1839 CHECK_INT (dimension);
1840 dm = XINT (dimension);
1841 if (dm < 1 || dm > 2)
1842 signal_simple_error ("Invalid value for DIMENSION", dimension);
1846 if (ch != 94 && ch != 96)
1847 signal_simple_error ("Invalid value for CHARS", chars);
1849 CHECK_CHAR_COERCE_INT (final);
1851 if (fi < '0' || fi > '~')
1852 signal_simple_error ("Invalid value for FINAL", final);
1854 if (EQ (direction, Ql2r))
1855 di = CHARSET_LEFT_TO_RIGHT;
1856 else if (EQ (direction, Qr2l))
1857 di = CHARSET_RIGHT_TO_LEFT;
1858 else if (!NILP (direction))
1859 signal_simple_error ("Invalid value for DIRECTION", direction);
1861 if (dm == 2 && fi > 0x5F)
1863 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1866 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1868 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1872 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1874 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1877 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1880 return XCHARSET_NAME (obj);
1884 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1885 Return short name of CHARSET.
1889 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1892 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1893 Return long name of CHARSET.
1897 return XCHARSET_LONG_NAME (Fget_charset (charset));
1900 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1901 Return description of CHARSET.
1905 return XCHARSET_DOC_STRING (Fget_charset (charset));
1908 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1909 Return dimension of CHARSET.
1913 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1916 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1917 Return property PROP of CHARSET.
1918 Recognized properties are those listed in `make-charset', as well as
1919 'name and 'doc-string.
1923 struct Lisp_Charset *cs;
1925 charset = Fget_charset (charset);
1926 cs = XCHARSET (charset);
1928 CHECK_SYMBOL (prop);
1929 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1930 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1931 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1932 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1933 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1934 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1935 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1936 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1937 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1938 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1939 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1940 if (EQ (prop, Qdirection))
1941 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1942 if (EQ (prop, Qreverse_direction_charset))
1944 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1948 return XCHARSET_NAME (obj);
1950 signal_simple_error ("Unrecognized charset property name", prop);
1951 return Qnil; /* not reached */
1954 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1955 Return charset identification number of CHARSET.
1959 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1962 /* #### We need to figure out which properties we really want to
1965 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1966 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1968 (charset, ccl_program))
1970 charset = Fget_charset (charset);
1971 CHECK_VECTOR (ccl_program);
1972 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1977 invalidate_charset_font_caches (Lisp_Object charset)
1979 /* Invalidate font cache entries for charset on all devices. */
1980 Lisp_Object devcons, concons, hash_table;
1981 DEVICE_LOOP_NO_BREAK (devcons, concons)
1983 struct device *d = XDEVICE (XCAR (devcons));
1984 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1985 if (!UNBOUNDP (hash_table))
1986 Fclrhash (hash_table);
1990 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1991 Set the 'registry property of CHARSET to REGISTRY.
1993 (charset, registry))
1995 charset = Fget_charset (charset);
1996 CHECK_STRING (registry);
1997 XCHARSET_REGISTRY (charset) = registry;
1998 invalidate_charset_font_caches (charset);
1999 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2004 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2005 Return mapping-table of CHARSET.
2009 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2012 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2013 Set mapping-table of CHARSET to TABLE.
2017 struct Lisp_Charset *cs;
2018 Lisp_Object old_table;
2021 charset = Fget_charset (charset);
2022 cs = XCHARSET (charset);
2024 if (EQ (table, Qnil))
2026 CHARSET_DECODING_TABLE(cs) = table;
2029 else if (VECTORP (table))
2031 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
2032 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2033 old_table = CHARSET_DECODING_TABLE(cs);
2034 CHARSET_DECODING_TABLE(cs) = table;
2037 signal_error (Qwrong_type_argument,
2038 list2 (build_translated_string ("vector-or-nil-p"),
2040 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2042 switch (CHARSET_DIMENSION (cs))
2045 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2047 Lisp_Object c = XVECTOR_DATA(table)[i];
2052 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2056 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2058 Lisp_Object v = XVECTOR_DATA(table)[i];
2064 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2066 CHARSET_DECODING_TABLE(cs) = old_table;
2067 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2069 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2071 Lisp_Object c = XVECTOR_DATA(v)[j];
2074 put_char_attribute (c, charset,
2077 (i + CHARSET_BYTE_OFFSET (cs)),
2079 (j + CHARSET_BYTE_OFFSET (cs))));
2083 put_char_attribute (v, charset,
2085 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2094 /************************************************************************/
2095 /* Lisp primitives for working with characters */
2096 /************************************************************************/
2098 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2099 Make a character from CHARSET and octets ARG1 and ARG2.
2100 ARG2 is required only for characters from two-dimensional charsets.
2101 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2102 character s with caron.
2104 (charset, arg1, arg2))
2106 struct Lisp_Charset *cs;
2108 int lowlim, highlim;
2110 charset = Fget_charset (charset);
2111 cs = XCHARSET (charset);
2113 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2114 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2116 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2118 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2119 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2122 /* It is useful (and safe, according to Olivier Galibert) to strip
2123 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2124 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2125 Latin 2 code of the character. */
2133 if (a1 < lowlim || a1 > highlim)
2134 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2136 if (CHARSET_DIMENSION (cs) == 1)
2140 ("Charset is of dimension one; second octet must be nil", arg2);
2141 return make_char (MAKE_CHAR (charset, a1, 0));
2150 a2 = XINT (arg2) & 0x7f;
2152 if (a2 < lowlim || a2 > highlim)
2153 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2155 return make_char (MAKE_CHAR (charset, a1, a2));
2158 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2159 Return the character set of char CH.
2163 CHECK_CHAR_COERCE_INT (ch);
2165 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2168 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2169 Return list of charset and one or two position-codes of CHAR.
2173 /* This function can GC */
2174 struct gcpro gcpro1, gcpro2;
2175 Lisp_Object charset = Qnil;
2176 Lisp_Object rc = Qnil;
2179 GCPRO2 (charset, rc);
2180 CHECK_CHAR_COERCE_INT (character);
2182 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2184 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2186 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2190 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2198 #ifdef ENABLE_COMPOSITE_CHARS
2199 /************************************************************************/
2200 /* composite character functions */
2201 /************************************************************************/
2204 lookup_composite_char (Bufbyte *str, int len)
2206 Lisp_Object lispstr = make_string (str, len);
2207 Lisp_Object ch = Fgethash (lispstr,
2208 Vcomposite_char_string2char_hash_table,
2214 if (composite_char_row_next >= 128)
2215 signal_simple_error ("No more composite chars available", lispstr);
2216 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2217 composite_char_col_next);
2218 Fputhash (make_char (emch), lispstr,
2219 Vcomposite_char_char2string_hash_table);
2220 Fputhash (lispstr, make_char (emch),
2221 Vcomposite_char_string2char_hash_table);
2222 composite_char_col_next++;
2223 if (composite_char_col_next >= 128)
2225 composite_char_col_next = 32;
2226 composite_char_row_next++;
2235 composite_char_string (Emchar ch)
2237 Lisp_Object str = Fgethash (make_char (ch),
2238 Vcomposite_char_char2string_hash_table,
2240 assert (!UNBOUNDP (str));
2244 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2245 Convert a string into a single composite character.
2246 The character is the result of overstriking all the characters in
2251 CHECK_STRING (string);
2252 return make_char (lookup_composite_char (XSTRING_DATA (string),
2253 XSTRING_LENGTH (string)));
2256 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2257 Return a string of the characters comprising a composite character.
2265 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2266 signal_simple_error ("Must be composite char", ch);
2267 return composite_char_string (emch);
2269 #endif /* ENABLE_COMPOSITE_CHARS */
2272 /************************************************************************/
2273 /* initialization */
2274 /************************************************************************/
2277 syms_of_mule_charset (void)
2279 DEFSUBR (Fcharsetp);
2280 DEFSUBR (Ffind_charset);
2281 DEFSUBR (Fget_charset);
2282 DEFSUBR (Fcharset_list);
2283 DEFSUBR (Fcharset_name);
2284 DEFSUBR (Fmake_charset);
2285 DEFSUBR (Fmake_reverse_direction_charset);
2286 /* DEFSUBR (Freverse_direction_charset); */
2287 DEFSUBR (Fdefine_charset_alias);
2288 DEFSUBR (Fcharset_from_attributes);
2289 DEFSUBR (Fcharset_short_name);
2290 DEFSUBR (Fcharset_long_name);
2291 DEFSUBR (Fcharset_description);
2292 DEFSUBR (Fcharset_dimension);
2293 DEFSUBR (Fcharset_property);
2294 DEFSUBR (Fcharset_id);
2295 DEFSUBR (Fset_charset_ccl_program);
2296 DEFSUBR (Fset_charset_registry);
2298 DEFSUBR (Fchar_attribute_alist);
2299 DEFSUBR (Fget_char_attribute);
2300 DEFSUBR (Fput_char_attribute);
2301 DEFSUBR (Fdefine_char);
2302 DEFSUBR (Fcharset_mapping_table);
2303 DEFSUBR (Fset_charset_mapping_table);
2306 DEFSUBR (Fmake_char);
2307 DEFSUBR (Fchar_charset);
2308 DEFSUBR (Fsplit_char);
2310 #ifdef ENABLE_COMPOSITE_CHARS
2311 DEFSUBR (Fmake_composite_char);
2312 DEFSUBR (Fcomposite_char_string);
2315 defsymbol (&Qcharsetp, "charsetp");
2316 defsymbol (&Qregistry, "registry");
2317 defsymbol (&Qfinal, "final");
2318 defsymbol (&Qgraphic, "graphic");
2319 defsymbol (&Qdirection, "direction");
2320 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2321 defsymbol (&Qshort_name, "short-name");
2322 defsymbol (&Qlong_name, "long-name");
2324 defsymbol (&Ql2r, "l2r");
2325 defsymbol (&Qr2l, "r2l");
2327 /* Charsets, compatible with FSF 20.3
2328 Naming convention is Script-Charset[-Edition] */
2329 defsymbol (&Qascii, "ascii");
2330 defsymbol (&Qcontrol_1, "control-1");
2331 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2332 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2333 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2334 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2335 defsymbol (&Qthai_tis620, "thai-tis620");
2336 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2337 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2338 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2339 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2340 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2341 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2342 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2343 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2344 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2345 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2346 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2347 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2348 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2349 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2351 defsymbol (&Qucs, "ucs");
2352 defsymbol (&Qucs_bmp, "ucs-bmp");
2353 defsymbol (&Qlatin_viscii, "latin-viscii");
2354 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2355 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2356 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2357 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2358 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2359 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2361 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2362 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2364 defsymbol (&Qcomposite, "composite");
2368 vars_of_mule_charset (void)
2375 /* Table of charsets indexed by leading byte. */
2376 for (i = 0; i < countof (charset_by_leading_byte); i++)
2377 charset_by_leading_byte[i] = Qnil;
2380 /* Table of charsets indexed by type/final-byte. */
2381 for (i = 0; i < countof (charset_by_attributes); i++)
2382 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2383 charset_by_attributes[i][j] = Qnil;
2385 /* Table of charsets indexed by type/final-byte/direction. */
2386 for (i = 0; i < countof (charset_by_attributes); i++)
2387 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2388 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2389 charset_by_attributes[i][j][k] = Qnil;
2393 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2395 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2396 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2400 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2401 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2402 Leading-code of private TYPE9N charset of column-width 1.
2404 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2408 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2409 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2410 Version number of UTF-2000.
2413 staticpro (&Vcharacter_attribute_table);
2414 Vcharacter_attribute_table = make_char_code_table (Qnil);
2416 Vdefault_coded_charset_priority_list = Qnil;
2417 DEFVAR_LISP ("default-coded-charset-priority-list",
2418 &Vdefault_coded_charset_priority_list /*
2419 Default order of preferred coded-character-sets.
2425 complex_vars_of_mule_charset (void)
2427 staticpro (&Vcharset_hash_table);
2428 Vcharset_hash_table =
2429 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2431 /* Predefined character sets. We store them into variables for
2436 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2437 CHARSET_TYPE_256X256, 1, 2, 0,
2438 CHARSET_LEFT_TO_RIGHT,
2439 build_string ("BMP"),
2440 build_string ("BMP"),
2441 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2442 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2443 Qnil, 0, 0xFFFF, 0, 0);
2445 # define MIN_CHAR_THAI 0
2446 # define MAX_CHAR_THAI 0
2447 # define MIN_CHAR_GREEK 0
2448 # define MAX_CHAR_GREEK 0
2449 # define MIN_CHAR_HEBREW 0
2450 # define MAX_CHAR_HEBREW 0
2451 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2452 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2453 # define MIN_CHAR_CYRILLIC 0
2454 # define MAX_CHAR_CYRILLIC 0
2457 make_charset (LEADING_BYTE_ASCII, Qascii,
2458 CHARSET_TYPE_94, 1, 0, 'B',
2459 CHARSET_LEFT_TO_RIGHT,
2460 build_string ("ASCII"),
2461 build_string ("ASCII)"),
2462 build_string ("ASCII (ISO646 IRV)"),
2463 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2464 Qnil, 0, 0x7F, 0, 0);
2465 Vcharset_control_1 =
2466 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2467 CHARSET_TYPE_94, 1, 1, 0,
2468 CHARSET_LEFT_TO_RIGHT,
2469 build_string ("C1"),
2470 build_string ("Control characters"),
2471 build_string ("Control characters 128-191"),
2473 Qnil, 0x80, 0x9F, 0, 0);
2474 Vcharset_latin_iso8859_1 =
2475 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2476 CHARSET_TYPE_96, 1, 1, 'A',
2477 CHARSET_LEFT_TO_RIGHT,
2478 build_string ("Latin-1"),
2479 build_string ("ISO8859-1 (Latin-1)"),
2480 build_string ("ISO8859-1 (Latin-1)"),
2481 build_string ("iso8859-1"),
2482 Qnil, 0xA0, 0xFF, 0, 32);
2483 Vcharset_latin_iso8859_2 =
2484 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2485 CHARSET_TYPE_96, 1, 1, 'B',
2486 CHARSET_LEFT_TO_RIGHT,
2487 build_string ("Latin-2"),
2488 build_string ("ISO8859-2 (Latin-2)"),
2489 build_string ("ISO8859-2 (Latin-2)"),
2490 build_string ("iso8859-2"),
2492 Vcharset_latin_iso8859_3 =
2493 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2494 CHARSET_TYPE_96, 1, 1, 'C',
2495 CHARSET_LEFT_TO_RIGHT,
2496 build_string ("Latin-3"),
2497 build_string ("ISO8859-3 (Latin-3)"),
2498 build_string ("ISO8859-3 (Latin-3)"),
2499 build_string ("iso8859-3"),
2501 Vcharset_latin_iso8859_4 =
2502 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2503 CHARSET_TYPE_96, 1, 1, 'D',
2504 CHARSET_LEFT_TO_RIGHT,
2505 build_string ("Latin-4"),
2506 build_string ("ISO8859-4 (Latin-4)"),
2507 build_string ("ISO8859-4 (Latin-4)"),
2508 build_string ("iso8859-4"),
2510 Vcharset_thai_tis620 =
2511 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2512 CHARSET_TYPE_96, 1, 1, 'T',
2513 CHARSET_LEFT_TO_RIGHT,
2514 build_string ("TIS620"),
2515 build_string ("TIS620 (Thai)"),
2516 build_string ("TIS620.2529 (Thai)"),
2517 build_string ("tis620"),
2518 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2519 Vcharset_greek_iso8859_7 =
2520 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2521 CHARSET_TYPE_96, 1, 1, 'F',
2522 CHARSET_LEFT_TO_RIGHT,
2523 build_string ("ISO8859-7"),
2524 build_string ("ISO8859-7 (Greek)"),
2525 build_string ("ISO8859-7 (Greek)"),
2526 build_string ("iso8859-7"),
2527 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2528 Vcharset_arabic_iso8859_6 =
2529 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2530 CHARSET_TYPE_96, 1, 1, 'G',
2531 CHARSET_RIGHT_TO_LEFT,
2532 build_string ("ISO8859-6"),
2533 build_string ("ISO8859-6 (Arabic)"),
2534 build_string ("ISO8859-6 (Arabic)"),
2535 build_string ("iso8859-6"),
2537 Vcharset_hebrew_iso8859_8 =
2538 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2539 CHARSET_TYPE_96, 1, 1, 'H',
2540 CHARSET_RIGHT_TO_LEFT,
2541 build_string ("ISO8859-8"),
2542 build_string ("ISO8859-8 (Hebrew)"),
2543 build_string ("ISO8859-8 (Hebrew)"),
2544 build_string ("iso8859-8"),
2545 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2546 Vcharset_katakana_jisx0201 =
2547 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2548 CHARSET_TYPE_94, 1, 1, 'I',
2549 CHARSET_LEFT_TO_RIGHT,
2550 build_string ("JISX0201 Kana"),
2551 build_string ("JISX0201.1976 (Japanese Kana)"),
2552 build_string ("JISX0201.1976 Japanese Kana"),
2553 build_string ("jisx0201\\.1976"),
2555 MIN_CHAR_HALFWIDTH_KATAKANA,
2556 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2557 Vcharset_latin_jisx0201 =
2558 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2559 CHARSET_TYPE_94, 1, 0, 'J',
2560 CHARSET_LEFT_TO_RIGHT,
2561 build_string ("JISX0201 Roman"),
2562 build_string ("JISX0201.1976 (Japanese Roman)"),
2563 build_string ("JISX0201.1976 Japanese Roman"),
2564 build_string ("jisx0201\\.1976"),
2566 Vcharset_cyrillic_iso8859_5 =
2567 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2568 CHARSET_TYPE_96, 1, 1, 'L',
2569 CHARSET_LEFT_TO_RIGHT,
2570 build_string ("ISO8859-5"),
2571 build_string ("ISO8859-5 (Cyrillic)"),
2572 build_string ("ISO8859-5 (Cyrillic)"),
2573 build_string ("iso8859-5"),
2574 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2575 Vcharset_latin_iso8859_9 =
2576 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2577 CHARSET_TYPE_96, 1, 1, 'M',
2578 CHARSET_LEFT_TO_RIGHT,
2579 build_string ("Latin-5"),
2580 build_string ("ISO8859-9 (Latin-5)"),
2581 build_string ("ISO8859-9 (Latin-5)"),
2582 build_string ("iso8859-9"),
2584 Vcharset_japanese_jisx0208_1978 =
2585 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2586 CHARSET_TYPE_94X94, 2, 0, '@',
2587 CHARSET_LEFT_TO_RIGHT,
2588 build_string ("JIS X0208:1978"),
2589 build_string ("JIS X0208:1978 (Japanese)"),
2591 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2592 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2594 Vcharset_chinese_gb2312 =
2595 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2596 CHARSET_TYPE_94X94, 2, 0, 'A',
2597 CHARSET_LEFT_TO_RIGHT,
2598 build_string ("GB2312"),
2599 build_string ("GB2312)"),
2600 build_string ("GB2312 Chinese simplified"),
2601 build_string ("gb2312"),
2603 Vcharset_japanese_jisx0208 =
2604 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2605 CHARSET_TYPE_94X94, 2, 0, 'B',
2606 CHARSET_LEFT_TO_RIGHT,
2607 build_string ("JISX0208"),
2608 build_string ("JIS X0208:1983 (Japanese)"),
2609 build_string ("JIS X0208:1983 Japanese Kanji"),
2610 build_string ("jisx0208\\.1983"),
2612 Vcharset_korean_ksc5601 =
2613 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2614 CHARSET_TYPE_94X94, 2, 0, 'C',
2615 CHARSET_LEFT_TO_RIGHT,
2616 build_string ("KSC5601"),
2617 build_string ("KSC5601 (Korean"),
2618 build_string ("KSC5601 Korean Hangul and Hanja"),
2619 build_string ("ksc5601"),
2621 Vcharset_japanese_jisx0212 =
2622 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2623 CHARSET_TYPE_94X94, 2, 0, 'D',
2624 CHARSET_LEFT_TO_RIGHT,
2625 build_string ("JISX0212"),
2626 build_string ("JISX0212 (Japanese)"),
2627 build_string ("JISX0212 Japanese Supplement"),
2628 build_string ("jisx0212"),
2631 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2632 Vcharset_chinese_cns11643_1 =
2633 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2634 CHARSET_TYPE_94X94, 2, 0, 'G',
2635 CHARSET_LEFT_TO_RIGHT,
2636 build_string ("CNS11643-1"),
2637 build_string ("CNS11643-1 (Chinese traditional)"),
2639 ("CNS 11643 Plane 1 Chinese traditional"),
2640 build_string (CHINESE_CNS_PLANE_RE("1")),
2642 Vcharset_chinese_cns11643_2 =
2643 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2644 CHARSET_TYPE_94X94, 2, 0, 'H',
2645 CHARSET_LEFT_TO_RIGHT,
2646 build_string ("CNS11643-2"),
2647 build_string ("CNS11643-2 (Chinese traditional)"),
2649 ("CNS 11643 Plane 2 Chinese traditional"),
2650 build_string (CHINESE_CNS_PLANE_RE("2")),
2653 Vcharset_latin_viscii_lower =
2654 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2655 CHARSET_TYPE_96, 1, 1, '1',
2656 CHARSET_LEFT_TO_RIGHT,
2657 build_string ("VISCII lower"),
2658 build_string ("VISCII lower (Vietnamese)"),
2659 build_string ("VISCII lower (Vietnamese)"),
2660 build_string ("MULEVISCII-LOWER"),
2662 Vcharset_latin_viscii_upper =
2663 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2664 CHARSET_TYPE_96, 1, 1, '2',
2665 CHARSET_LEFT_TO_RIGHT,
2666 build_string ("VISCII upper"),
2667 build_string ("VISCII upper (Vietnamese)"),
2668 build_string ("VISCII upper (Vietnamese)"),
2669 build_string ("MULEVISCII-UPPER"),
2671 Vcharset_latin_viscii =
2672 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2673 CHARSET_TYPE_256, 1, 2, 0,
2674 CHARSET_LEFT_TO_RIGHT,
2675 build_string ("VISCII"),
2676 build_string ("VISCII 1.1 (Vietnamese)"),
2677 build_string ("VISCII 1.1 (Vietnamese)"),
2678 build_string ("VISCII1\\.1"),
2680 Vcharset_hiragana_jisx0208 =
2681 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2682 CHARSET_TYPE_94X94, 2, 0, 'B',
2683 CHARSET_LEFT_TO_RIGHT,
2684 build_string ("Hiragana"),
2685 build_string ("Hiragana of JIS X0208"),
2686 build_string ("Japanese Hiragana of JIS X0208"),
2687 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2688 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2689 (0x24 - 33) * 94 + (0x21 - 33), 33);
2690 Vcharset_katakana_jisx0208 =
2691 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2692 CHARSET_TYPE_94X94, 2, 0, 'B',
2693 CHARSET_LEFT_TO_RIGHT,
2694 build_string ("Katakana"),
2695 build_string ("Katakana of JIS X0208"),
2696 build_string ("Japanese Katakana of JIS X0208"),
2697 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2698 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2699 (0x25 - 33) * 94 + (0x21 - 33), 33);
2701 Vcharset_chinese_big5_1 =
2702 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2703 CHARSET_TYPE_94X94, 2, 0, '0',
2704 CHARSET_LEFT_TO_RIGHT,
2705 build_string ("Big5"),
2706 build_string ("Big5 (Level-1)"),
2708 ("Big5 Level-1 Chinese traditional"),
2709 build_string ("big5"),
2711 Vcharset_chinese_big5_2 =
2712 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2713 CHARSET_TYPE_94X94, 2, 0, '1',
2714 CHARSET_LEFT_TO_RIGHT,
2715 build_string ("Big5"),
2716 build_string ("Big5 (Level-2)"),
2718 ("Big5 Level-2 Chinese traditional"),
2719 build_string ("big5"),
2722 #ifdef ENABLE_COMPOSITE_CHARS
2723 /* #### For simplicity, we put composite chars into a 96x96 charset.
2724 This is going to lead to problems because you can run out of
2725 room, esp. as we don't yet recycle numbers. */
2726 Vcharset_composite =
2727 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2728 CHARSET_TYPE_96X96, 2, 0, 0,
2729 CHARSET_LEFT_TO_RIGHT,
2730 build_string ("Composite"),
2731 build_string ("Composite characters"),
2732 build_string ("Composite characters"),
2735 composite_char_row_next = 32;
2736 composite_char_col_next = 32;
2738 Vcomposite_char_string2char_hash_table =
2739 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2740 Vcomposite_char_char2string_hash_table =
2741 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2742 staticpro (&Vcomposite_char_string2char_hash_table);
2743 staticpro (&Vcomposite_char_char2string_hash_table);
2744 #endif /* ENABLE_COMPOSITE_CHARS */