1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 unsigned int code = ch;
299 struct Lisp_Char_Byte_Table* cpt
300 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
301 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
303 if (CHAR_BYTE_TABLE_P (ret))
304 cpt = XCHAR_BYTE_TABLE (ret);
308 ret = cpt->property [(unsigned char) (code >> 16)];
309 if (CHAR_BYTE_TABLE_P (ret))
310 cpt = XCHAR_BYTE_TABLE (ret);
314 ret = cpt->property [(unsigned char) (code >> 8)];
315 if (CHAR_BYTE_TABLE_P (ret))
316 cpt = XCHAR_BYTE_TABLE (ret);
320 return cpt->property [(unsigned char) code];
324 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
326 unsigned int code = ch;
327 struct Lisp_Char_Byte_Table* cpt1
328 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
329 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
331 if (CHAR_BYTE_TABLE_P (ret))
333 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
335 ret = cpt2->property[(unsigned char)(code >> 16)];
336 if (CHAR_BYTE_TABLE_P (ret))
338 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
340 ret = cpt3->property[(unsigned char)(code >> 8)];
341 if (CHAR_BYTE_TABLE_P (ret))
343 struct Lisp_Char_Byte_Table* cpt4
344 = XCHAR_BYTE_TABLE (ret);
346 cpt4->property[(unsigned char)code] = value;
348 else if (!EQ (ret, value))
350 Lisp_Object cpt4 = make_char_byte_table (ret);
352 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
353 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
356 else if (!EQ (ret, value))
358 Lisp_Object cpt3 = make_char_byte_table (ret);
359 Lisp_Object cpt4 = make_char_byte_table (ret);
361 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
362 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
364 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
367 else if (!EQ (ret, value))
369 Lisp_Object cpt2 = make_char_byte_table (ret);
370 Lisp_Object cpt3 = make_char_byte_table (ret);
371 Lisp_Object cpt4 = make_char_byte_table (ret);
373 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
374 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
375 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
376 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
381 Lisp_Object Vcharacter_attribute_table;
382 Lisp_Object Vcharacter_composition_table;
383 Lisp_Object Vcharacter_variant_table;
385 Lisp_Object Q_decomposition;
390 Lisp_Object QnoBreak;
392 Lisp_Object Qfraction;
395 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
401 else if (EQ (v, Qwide))
403 else if (EQ (v, Qnarrow))
405 else if (EQ (v, Qcompat))
407 else if (EQ (v, QnoBreak))
409 else if (EQ (v, Qsuper))
411 else if (EQ (v, Qfraction))
414 signal_simple_error (err_msg, err_arg);
417 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
418 Return character corresponding with list.
422 Lisp_Object table = Vcharacter_composition_table;
423 Lisp_Object rest = list;
427 Lisp_Object v = Fcar (rest);
429 Emchar c = to_char_code (v, "Invalid value for composition", list);
431 ret = get_char_code_table (c, table);
436 if (!CHAR_CODE_TABLE_P (ret))
441 else if (!CONSP (rest))
443 else if (CHAR_CODE_TABLE_P (ret))
446 signal_simple_error ("Invalid table is found with", list);
448 signal_simple_error ("Invalid value for composition", list);
451 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
452 Return variants of CHARACTER.
456 CHECK_CHAR (character);
457 return Fcopy_list (get_char_code_table (XCHAR (character),
458 Vcharacter_variant_table));
461 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
462 Return the alist of attributes of CHARACTER.
466 CHECK_CHAR (character);
467 return Fcopy_alist (get_char_code_table (XCHAR (character),
468 Vcharacter_attribute_table));
471 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
472 Return the value of CHARACTER's ATTRIBUTE.
474 (character, attribute))
477 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
483 if (!NILP (ccs = Ffind_charset (attribute)))
486 return Fcdr (Fassq (attribute, ret));
490 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
493 Emchar char_code = XCHAR (character);
495 = get_char_code_table (char_code, Vcharacter_attribute_table);
498 cell = Fassq (attribute, ret);
502 ret = Fcons (Fcons (attribute, value), ret);
504 else if (!EQ (Fcdr (cell), value))
506 Fsetcdr (cell, value);
508 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
512 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
513 Store CHARACTER's ATTRIBUTE with VALUE.
515 (character, attribute, value))
519 ccs = Ffind_charset (attribute);
523 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
528 /* ad-hoc method for `ascii' */
529 if ((XCHARSET_CHARS (ccs) == 94) &&
530 (XCHARSET_BYTE_OFFSET (ccs) != 33))
531 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
533 ccs_len = XCHARSET_CHARS (ccs);
536 signal_simple_error ("Invalid value for coded-charset",
540 rest = Fget_char_attribute (character, attribute);
547 Lisp_Object ei = Fcar (rest);
549 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
550 nv = XVECTOR_DATA(v)[i];
557 XVECTOR_DATA(v)[i] = Qnil;
558 v = XCHARSET_DECODING_TABLE (ccs);
563 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
570 Lisp_Object ei = Fcar (rest);
573 signal_simple_error ("Invalid value for coded-charset",
575 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
576 nv = XVECTOR_DATA(v)[i];
582 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
589 XVECTOR_DATA(v)[i] = character;
591 else if (EQ (attribute, Q_decomposition))
593 Lisp_Object rest = value;
594 Lisp_Object table = Vcharacter_composition_table;
597 signal_simple_error ("Invalid value for ->decomposition",
602 Lisp_Object v = Fcar (rest);
605 = to_char_code (v, "Invalid value for ->decomposition", value);
610 put_char_code_table (c, character, table);
615 ntable = get_char_code_table (c, table);
616 if (!CHAR_CODE_TABLE_P (ntable))
618 ntable = make_char_code_table (Qnil);
619 put_char_code_table (c, ntable, table);
625 else if (EQ (attribute, Q_ucs))
631 signal_simple_error ("Invalid value for ->ucs", value);
635 ret = get_char_code_table (c, Vcharacter_variant_table);
636 if (NILP (Fmemq (character, ret)))
638 put_char_code_table (c, Fcons (character, ret),
639 Vcharacter_variant_table);
642 return put_char_attribute (character, attribute, value);
647 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
648 Store character's ATTRIBUTES.
652 Lisp_Object rest = attributes;
653 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
654 Lisp_Object character;
660 Lisp_Object cell = Fcar (rest);
664 signal_simple_error ("Invalid argument", attributes);
665 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
666 && XCHARSET_FINAL (ccs))
670 if (XCHARSET_DIMENSION (ccs) == 1)
672 Lisp_Object eb1 = Fcar (Fcdr (cell));
676 signal_simple_error ("Invalid argument", attributes);
678 switch (XCHARSET_CHARS (ccs))
682 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
686 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
692 else if (XCHARSET_DIMENSION (ccs) == 2)
694 Lisp_Object eb1 = Fcar (Fcdr (cell));
695 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
699 signal_simple_error ("Invalid argument", attributes);
702 signal_simple_error ("Invalid argument", attributes);
704 switch (XCHARSET_CHARS (ccs))
707 code = MIN_CHAR_94x94
708 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
709 + (b1 - 33) * 94 + (b2 - 33);
712 code = MIN_CHAR_96x96
713 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
714 + (b1 - 32) * 96 + (b2 - 32);
725 character = make_char (code);
726 goto setup_attributes;
732 else if (!INTP (code))
733 signal_simple_error ("Invalid argument", attributes);
735 character = make_char (XINT (code));
741 Lisp_Object cell = Fcar (rest);
744 signal_simple_error ("Invalid argument", attributes);
745 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
749 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
752 Lisp_Object Vutf_2000_version;
756 int leading_code_private_11;
759 Lisp_Object Qcharsetp;
761 /* Qdoc_string, Qdimension, Qchars defined in general.c */
762 Lisp_Object Qregistry, Qfinal, Qgraphic;
763 Lisp_Object Qdirection;
764 Lisp_Object Qreverse_direction_charset;
765 Lisp_Object Qleading_byte;
766 Lisp_Object Qshort_name, Qlong_name;
782 Qjapanese_jisx0208_1978,
794 Qvietnamese_viscii_lower,
795 Qvietnamese_viscii_upper,
803 Lisp_Object Ql2r, Qr2l;
805 Lisp_Object Vcharset_hash_table;
808 static Charset_ID next_allocated_leading_byte;
810 static Charset_ID next_allocated_1_byte_leading_byte;
811 static Charset_ID next_allocated_2_byte_leading_byte;
814 /* Composite characters are characters constructed by overstriking two
815 or more regular characters.
817 1) The old Mule implementation involves storing composite characters
818 in a buffer as a tag followed by all of the actual characters
819 used to make up the composite character. I think this is a bad
820 idea; it greatly complicates code that wants to handle strings
821 one character at a time because it has to deal with the possibility
822 of great big ungainly characters. It's much more reasonable to
823 simply store an index into a table of composite characters.
825 2) The current implementation only allows for 16,384 separate
826 composite characters over the lifetime of the XEmacs process.
827 This could become a potential problem if the user
828 edited lots of different files that use composite characters.
829 Due to FSF bogosity, increasing the number of allowable
830 composite characters under Mule would decrease the number
831 of possible faces that can exist. Mule already has shrunk
832 this to 2048, and further shrinkage would become uncomfortable.
833 No such problems exist in XEmacs.
835 Composite characters could be represented as 0x80 C1 C2 C3,
836 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
837 for slightly under 2^20 (one million) composite characters
838 over the XEmacs process lifetime, and you only need to
839 increase the size of a Mule character from 19 to 21 bits.
840 Or you could use 0x80 C1 C2 C3 C4, allowing for about
841 85 million (slightly over 2^26) composite characters. */
844 /************************************************************************/
845 /* Basic Emchar functions */
846 /************************************************************************/
848 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
849 string in STR. Returns the number of bytes stored.
850 Do not call this directly. Use the macro set_charptr_emchar() instead.
854 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
869 else if ( c <= 0x7ff )
871 *p++ = (c >> 6) | 0xc0;
872 *p++ = (c & 0x3f) | 0x80;
874 else if ( c <= 0xffff )
876 *p++ = (c >> 12) | 0xe0;
877 *p++ = ((c >> 6) & 0x3f) | 0x80;
878 *p++ = (c & 0x3f) | 0x80;
880 else if ( c <= 0x1fffff )
882 *p++ = (c >> 18) | 0xf0;
883 *p++ = ((c >> 12) & 0x3f) | 0x80;
884 *p++ = ((c >> 6) & 0x3f) | 0x80;
885 *p++ = (c & 0x3f) | 0x80;
887 else if ( c <= 0x3ffffff )
889 *p++ = (c >> 24) | 0xf8;
890 *p++ = ((c >> 18) & 0x3f) | 0x80;
891 *p++ = ((c >> 12) & 0x3f) | 0x80;
892 *p++ = ((c >> 6) & 0x3f) | 0x80;
893 *p++ = (c & 0x3f) | 0x80;
897 *p++ = (c >> 30) | 0xfc;
898 *p++ = ((c >> 24) & 0x3f) | 0x80;
899 *p++ = ((c >> 18) & 0x3f) | 0x80;
900 *p++ = ((c >> 12) & 0x3f) | 0x80;
901 *p++ = ((c >> 6) & 0x3f) | 0x80;
902 *p++ = (c & 0x3f) | 0x80;
905 BREAKUP_CHAR (c, charset, c1, c2);
906 lb = CHAR_LEADING_BYTE (c);
907 if (LEADING_BYTE_PRIVATE_P (lb))
908 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
910 if (EQ (charset, Vcharset_control_1))
919 /* Return the first character from a Mule-encoded string in STR,
920 assuming it's non-ASCII. Do not call this directly.
921 Use the macro charptr_emchar() instead. */
924 non_ascii_charptr_emchar (CONST Bufbyte *str)
937 else if ( b >= 0xf8 )
942 else if ( b >= 0xf0 )
947 else if ( b >= 0xe0 )
952 else if ( b >= 0xc0 )
962 for( ; len > 0; len-- )
965 ch = ( ch << 6 ) | ( b & 0x3f );
969 Bufbyte i0 = *str, i1, i2 = 0;
972 if (i0 == LEADING_BYTE_CONTROL_1)
973 return (Emchar) (*++str - 0x20);
975 if (LEADING_BYTE_PREFIX_P (i0))
980 charset = CHARSET_BY_LEADING_BYTE (i0);
981 if (XCHARSET_DIMENSION (charset) == 2)
984 return MAKE_CHAR (charset, i1, i2);
988 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
989 Do not call this directly. Use the macro valid_char_p() instead. */
993 non_ascii_valid_char_p (Emchar ch)
997 /* Must have only lowest 19 bits set */
1001 f1 = CHAR_FIELD1 (ch);
1002 f2 = CHAR_FIELD2 (ch);
1003 f3 = CHAR_FIELD3 (ch);
1007 Lisp_Object charset;
1009 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1010 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1011 f2 > MAX_CHAR_FIELD2_PRIVATE)
1016 if (f3 != 0x20 && f3 != 0x7F)
1020 NOTE: This takes advantage of the fact that
1021 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1022 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1024 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1025 return (XCHARSET_CHARS (charset) == 96);
1029 Lisp_Object charset;
1031 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1032 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1033 f1 > MAX_CHAR_FIELD1_PRIVATE)
1035 if (f2 < 0x20 || f3 < 0x20)
1038 #ifdef ENABLE_COMPOSITE_CHARS
1039 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1041 if (UNBOUNDP (Fgethash (make_int (ch),
1042 Vcomposite_char_char2string_hash_table,
1047 #endif /* ENABLE_COMPOSITE_CHARS */
1049 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1052 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1054 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1057 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1059 return (XCHARSET_CHARS (charset) == 96);
1065 /************************************************************************/
1066 /* Basic string functions */
1067 /************************************************************************/
1069 /* Copy the character pointed to by PTR into STR, assuming it's
1070 non-ASCII. Do not call this directly. Use the macro
1071 charptr_copy_char() instead. */
1074 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1076 Bufbyte *strptr = str;
1078 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1080 /* Notice fallthrough. */
1082 case 6: *++strptr = *ptr++;
1083 case 5: *++strptr = *ptr++;
1085 case 4: *++strptr = *ptr++;
1086 case 3: *++strptr = *ptr++;
1087 case 2: *++strptr = *ptr;
1092 return strptr + 1 - str;
1096 /************************************************************************/
1097 /* streams of Emchars */
1098 /************************************************************************/
1100 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1101 The functions below are not meant to be called directly; use
1102 the macros in insdel.h. */
1105 Lstream_get_emchar_1 (Lstream *stream, int ch)
1107 Bufbyte str[MAX_EMCHAR_LEN];
1108 Bufbyte *strptr = str;
1110 str[0] = (Bufbyte) ch;
1111 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1113 /* Notice fallthrough. */
1116 ch = Lstream_getc (stream);
1118 *++strptr = (Bufbyte) ch;
1120 ch = Lstream_getc (stream);
1122 *++strptr = (Bufbyte) ch;
1125 ch = Lstream_getc (stream);
1127 *++strptr = (Bufbyte) ch;
1129 ch = Lstream_getc (stream);
1131 *++strptr = (Bufbyte) ch;
1133 ch = Lstream_getc (stream);
1135 *++strptr = (Bufbyte) ch;
1140 return charptr_emchar (str);
1144 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1146 Bufbyte str[MAX_EMCHAR_LEN];
1147 Bytecount len = set_charptr_emchar (str, ch);
1148 return Lstream_write (stream, str, len);
1152 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1154 Bufbyte str[MAX_EMCHAR_LEN];
1155 Bytecount len = set_charptr_emchar (str, ch);
1156 Lstream_unread (stream, str, len);
1160 /************************************************************************/
1161 /* charset object */
1162 /************************************************************************/
1165 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1167 struct Lisp_Charset *cs = XCHARSET (obj);
1169 markobj (cs->short_name);
1170 markobj (cs->long_name);
1171 markobj (cs->doc_string);
1172 markobj (cs->registry);
1173 markobj (cs->ccl_program);
1175 markobj (cs->decoding_table);
1181 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1183 struct Lisp_Charset *cs = XCHARSET (obj);
1187 error ("printing unreadable object #<charset %s 0x%x>",
1188 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1191 write_c_string ("#<charset ", printcharfun);
1192 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1193 write_c_string (" ", printcharfun);
1194 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1195 write_c_string (" ", printcharfun);
1196 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1197 write_c_string (" ", printcharfun);
1198 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1199 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1200 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1201 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1202 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1204 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1205 CHARSET_COLUMNS (cs),
1206 CHARSET_GRAPHIC (cs),
1207 CHARSET_FINAL (cs));
1208 write_c_string (buf, printcharfun);
1209 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1210 sprintf (buf, " 0x%x>", cs->header.uid);
1211 write_c_string (buf, printcharfun);
1214 static const struct lrecord_description charset_description[] = {
1215 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1217 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1222 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1223 mark_charset, print_charset, 0, 0, 0,
1224 charset_description,
1225 struct Lisp_Charset);
1227 /* Make a new charset. */
1230 make_charset (Charset_ID id, Lisp_Object name,
1231 unsigned char type, unsigned char columns, unsigned char graphic,
1232 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1233 Lisp_Object long_name, Lisp_Object doc,
1235 Lisp_Object decoding_table,
1236 Emchar ucs_min, Emchar ucs_max,
1237 Emchar code_offset, unsigned char byte_offset)
1240 struct Lisp_Charset *cs =
1241 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1242 XSETCHARSET (obj, cs);
1244 CHARSET_ID (cs) = id;
1245 CHARSET_NAME (cs) = name;
1246 CHARSET_SHORT_NAME (cs) = short_name;
1247 CHARSET_LONG_NAME (cs) = long_name;
1248 CHARSET_DIRECTION (cs) = direction;
1249 CHARSET_TYPE (cs) = type;
1250 CHARSET_COLUMNS (cs) = columns;
1251 CHARSET_GRAPHIC (cs) = graphic;
1252 CHARSET_FINAL (cs) = final;
1253 CHARSET_DOC_STRING (cs) = doc;
1254 CHARSET_REGISTRY (cs) = reg;
1255 CHARSET_CCL_PROGRAM (cs) = Qnil;
1256 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1258 CHARSET_DECODING_TABLE(cs) = Qnil;
1259 CHARSET_UCS_MIN(cs) = ucs_min;
1260 CHARSET_UCS_MAX(cs) = ucs_max;
1261 CHARSET_CODE_OFFSET(cs) = code_offset;
1262 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1265 switch (CHARSET_TYPE (cs))
1267 case CHARSET_TYPE_94:
1268 CHARSET_DIMENSION (cs) = 1;
1269 CHARSET_CHARS (cs) = 94;
1271 case CHARSET_TYPE_96:
1272 CHARSET_DIMENSION (cs) = 1;
1273 CHARSET_CHARS (cs) = 96;
1275 case CHARSET_TYPE_94X94:
1276 CHARSET_DIMENSION (cs) = 2;
1277 CHARSET_CHARS (cs) = 94;
1279 case CHARSET_TYPE_96X96:
1280 CHARSET_DIMENSION (cs) = 2;
1281 CHARSET_CHARS (cs) = 96;
1284 case CHARSET_TYPE_128:
1285 CHARSET_DIMENSION (cs) = 1;
1286 CHARSET_CHARS (cs) = 128;
1288 case CHARSET_TYPE_128X128:
1289 CHARSET_DIMENSION (cs) = 2;
1290 CHARSET_CHARS (cs) = 128;
1292 case CHARSET_TYPE_256:
1293 CHARSET_DIMENSION (cs) = 1;
1294 CHARSET_CHARS (cs) = 256;
1296 case CHARSET_TYPE_256X256:
1297 CHARSET_DIMENSION (cs) = 2;
1298 CHARSET_CHARS (cs) = 256;
1304 if (id == LEADING_BYTE_ASCII)
1305 CHARSET_REP_BYTES (cs) = 1;
1307 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1309 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1314 /* some charsets do not have final characters. This includes
1315 ASCII, Control-1, Composite, and the two faux private
1318 if (code_offset == 0)
1320 assert (NILP (charset_by_attributes[type][final]));
1321 charset_by_attributes[type][final] = obj;
1324 assert (NILP (charset_by_attributes[type][final][direction]));
1325 charset_by_attributes[type][final][direction] = obj;
1329 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1330 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1333 /* official leading byte */
1334 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1337 /* Some charsets are "faux" and don't have names or really exist at
1338 all except in the leading-byte table. */
1340 Fputhash (name, obj, Vcharset_hash_table);
1345 get_unallocated_leading_byte (int dimension)
1350 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1353 lb = next_allocated_leading_byte++;
1357 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1360 lb = next_allocated_1_byte_leading_byte++;
1364 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1367 lb = next_allocated_2_byte_leading_byte++;
1373 ("No more character sets free for this dimension",
1374 make_int (dimension));
1381 range_charset_code_point (Lisp_Object charset, Emchar ch)
1385 if ((XCHARSET_UCS_MIN (charset) <= ch)
1386 && (ch <= XCHARSET_UCS_MAX (charset)))
1388 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1390 if (XCHARSET_DIMENSION (charset) == 1)
1391 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1392 else if (XCHARSET_DIMENSION (charset) == 2)
1393 return list2 (make_int (d / XCHARSET_CHARS (charset)
1394 + XCHARSET_BYTE_OFFSET (charset)),
1395 make_int (d % XCHARSET_CHARS (charset)
1396 + XCHARSET_BYTE_OFFSET (charset)));
1397 else if (XCHARSET_DIMENSION (charset) == 3)
1398 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1399 * XCHARSET_CHARS (charset))
1400 + XCHARSET_BYTE_OFFSET (charset)),
1401 make_int (d / XCHARSET_CHARS (charset)
1402 % XCHARSET_CHARS (charset)
1403 + XCHARSET_BYTE_OFFSET (charset)),
1404 make_int (d % XCHARSET_CHARS (charset)
1405 + XCHARSET_BYTE_OFFSET (charset)));
1406 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1407 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1408 * XCHARSET_CHARS (charset)
1409 * XCHARSET_CHARS (charset))
1410 + XCHARSET_BYTE_OFFSET (charset)),
1411 make_int (d / (XCHARSET_CHARS (charset)
1412 * XCHARSET_CHARS (charset))
1413 % XCHARSET_CHARS (charset)
1414 + XCHARSET_BYTE_OFFSET (charset)),
1415 make_int (d / XCHARSET_CHARS (charset)
1416 % XCHARSET_CHARS (charset)
1417 + XCHARSET_BYTE_OFFSET (charset)),
1418 make_int (d % XCHARSET_CHARS (charset)
1419 + XCHARSET_BYTE_OFFSET (charset)));
1421 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1423 if (XCHARSET_DIMENSION (charset) == 1)
1425 if (XCHARSET_CHARS (charset) == 94)
1427 if (((d = ch - (MIN_CHAR_94
1428 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1430 return list1 (make_int (d + 33));
1432 else if (XCHARSET_CHARS (charset) == 96)
1434 if (((d = ch - (MIN_CHAR_96
1435 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1437 return list1 (make_int (d + 32));
1442 else if (XCHARSET_DIMENSION (charset) == 2)
1444 if (XCHARSET_CHARS (charset) == 94)
1446 if (((d = ch - (MIN_CHAR_94x94
1447 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1450 return list2 (make_int ((d / 94) + 33),
1451 make_int (d % 94 + 33));
1453 else if (XCHARSET_CHARS (charset) == 96)
1455 if (((d = ch - (MIN_CHAR_96x96
1456 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1459 return list2 (make_int ((d / 96) + 32),
1460 make_int (d % 96 + 32));
1468 split_builtin_char (Emchar c)
1470 if (c < MIN_CHAR_OBS_94x94)
1472 if (c <= MAX_CHAR_BASIC_LATIN)
1474 return list2 (Vcharset_ascii, make_int (c));
1478 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1482 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1484 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1486 return list2 (Vcharset_greek_iso8859_7,
1487 make_int (c - MIN_CHAR_GREEK + 0x20));
1489 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1491 return list2 (Vcharset_cyrillic_iso8859_5,
1492 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1494 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1496 return list2 (Vcharset_hebrew_iso8859_8,
1497 make_int (c - MIN_CHAR_HEBREW + 0x20));
1499 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1501 return list2 (Vcharset_thai_tis620,
1502 make_int (c - MIN_CHAR_THAI + 0x20));
1504 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1505 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1507 return list2 (Vcharset_katakana_jisx0201,
1508 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1512 return list3 (Vcharset_ucs_bmp,
1513 make_int (c >> 8), make_int (c & 0xff));
1516 else if (c <= MAX_CHAR_OBS_94x94)
1518 return list3 (CHARSET_BY_ATTRIBUTES
1519 (CHARSET_TYPE_94X94,
1520 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1521 CHARSET_LEFT_TO_RIGHT),
1522 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1523 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1525 else if (c <= MAX_CHAR_94)
1527 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1528 ((c - MIN_CHAR_94) / 94) + '0',
1529 CHARSET_LEFT_TO_RIGHT),
1530 make_int (((c - MIN_CHAR_94) % 94) + 33));
1532 else if (c <= MAX_CHAR_96)
1534 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1535 ((c - MIN_CHAR_96) / 96) + '0',
1536 CHARSET_LEFT_TO_RIGHT),
1537 make_int (((c - MIN_CHAR_96) % 96) + 32));
1539 else if (c <= MAX_CHAR_94x94)
1541 return list3 (CHARSET_BY_ATTRIBUTES
1542 (CHARSET_TYPE_94X94,
1543 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1544 CHARSET_LEFT_TO_RIGHT),
1545 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1546 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1548 else if (c <= MAX_CHAR_96x96)
1550 return list3 (CHARSET_BY_ATTRIBUTES
1551 (CHARSET_TYPE_96X96,
1552 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1553 CHARSET_LEFT_TO_RIGHT),
1554 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1555 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1564 charset_code_point (Lisp_Object charset, Emchar ch)
1566 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1568 if (!EQ (cdef, Qnil))
1570 Lisp_Object field = Fassq (charset, cdef);
1572 if (!EQ (field, Qnil))
1573 return Fcdr (field);
1575 return range_charset_code_point (charset, ch);
1578 Lisp_Object Vdefault_coded_charset_priority_list;
1582 /************************************************************************/
1583 /* Basic charset Lisp functions */
1584 /************************************************************************/
1586 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1587 Return non-nil if OBJECT is a charset.
1591 return CHARSETP (object) ? Qt : Qnil;
1594 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1595 Retrieve the charset of the given name.
1596 If CHARSET-OR-NAME is a charset object, it is simply returned.
1597 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1598 nil is returned. Otherwise the associated charset object is returned.
1602 if (CHARSETP (charset_or_name))
1603 return charset_or_name;
1605 CHECK_SYMBOL (charset_or_name);
1606 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1609 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1610 Retrieve the charset of the given name.
1611 Same as `find-charset' except an error is signalled if there is no such
1612 charset instead of returning nil.
1616 Lisp_Object charset = Ffind_charset (name);
1619 signal_simple_error ("No such charset", name);
1623 /* We store the charsets in hash tables with the names as the key and the
1624 actual charset object as the value. Occasionally we need to use them
1625 in a list format. These routines provide us with that. */
1626 struct charset_list_closure
1628 Lisp_Object *charset_list;
1632 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1633 void *charset_list_closure)
1635 /* This function can GC */
1636 struct charset_list_closure *chcl =
1637 (struct charset_list_closure*) charset_list_closure;
1638 Lisp_Object *charset_list = chcl->charset_list;
1640 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1644 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1645 Return a list of the names of all defined charsets.
1649 Lisp_Object charset_list = Qnil;
1650 struct gcpro gcpro1;
1651 struct charset_list_closure charset_list_closure;
1653 GCPRO1 (charset_list);
1654 charset_list_closure.charset_list = &charset_list;
1655 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1656 &charset_list_closure);
1659 return charset_list;
1662 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1663 Return the name of the given charset.
1667 return XCHARSET_NAME (Fget_charset (charset));
1670 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1671 Define a new character set.
1672 This function is for use with Mule support.
1673 NAME is a symbol, the name by which the character set is normally referred.
1674 DOC-STRING is a string describing the character set.
1675 PROPS is a property list, describing the specific nature of the
1676 character set. Recognized properties are:
1678 'short-name Short version of the charset name (ex: Latin-1)
1679 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1680 'registry A regular expression matching the font registry field for
1682 'dimension Number of octets used to index a character in this charset.
1683 Either 1 or 2. Defaults to 1.
1684 'columns Number of columns used to display a character in this charset.
1685 Only used in TTY mode. (Under X, the actual width of a
1686 character can be derived from the font used to display the
1687 characters.) If unspecified, defaults to the dimension
1688 (this is almost always the correct value).
1689 'chars Number of characters in each dimension (94 or 96).
1690 Defaults to 94. Note that if the dimension is 2, the
1691 character set thus described is 94x94 or 96x96.
1692 'final Final byte of ISO 2022 escape sequence. Must be
1693 supplied. Each combination of (DIMENSION, CHARS) defines a
1694 separate namespace for final bytes. Note that ISO
1695 2022 restricts the final byte to the range
1696 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1697 dimension == 2. Note also that final bytes in the range
1698 0x30 - 0x3F are reserved for user-defined (not official)
1700 'graphic 0 (use left half of font on output) or 1 (use right half
1701 of font on output). Defaults to 0. For example, for
1702 a font whose registry is ISO8859-1, the left half
1703 (octets 0x20 - 0x7F) is the `ascii' character set, while
1704 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1705 character set. With 'graphic set to 0, the octets
1706 will have their high bit cleared; with it set to 1,
1707 the octets will have their high bit set.
1708 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1710 'ccl-program A compiled CCL program used to convert a character in
1711 this charset into an index into the font. This is in
1712 addition to the 'graphic property. The CCL program
1713 is passed the octets of the character, with the high
1714 bit cleared and set depending upon whether the value
1715 of the 'graphic property is 0 or 1.
1717 (name, doc_string, props))
1719 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1720 int direction = CHARSET_LEFT_TO_RIGHT;
1722 Lisp_Object registry = Qnil;
1723 Lisp_Object charset;
1724 Lisp_Object rest, keyword, value;
1725 Lisp_Object ccl_program = Qnil;
1726 Lisp_Object short_name = Qnil, long_name = Qnil;
1727 int byte_offset = -1;
1729 CHECK_SYMBOL (name);
1730 if (!NILP (doc_string))
1731 CHECK_STRING (doc_string);
1733 charset = Ffind_charset (name);
1734 if (!NILP (charset))
1735 signal_simple_error ("Cannot redefine existing charset", name);
1737 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1739 if (EQ (keyword, Qshort_name))
1741 CHECK_STRING (value);
1745 if (EQ (keyword, Qlong_name))
1747 CHECK_STRING (value);
1751 else if (EQ (keyword, Qdimension))
1754 dimension = XINT (value);
1755 if (dimension < 1 || dimension > 2)
1756 signal_simple_error ("Invalid value for 'dimension", value);
1759 else if (EQ (keyword, Qchars))
1762 chars = XINT (value);
1763 if (chars != 94 && chars != 96)
1764 signal_simple_error ("Invalid value for 'chars", value);
1767 else if (EQ (keyword, Qcolumns))
1770 columns = XINT (value);
1771 if (columns != 1 && columns != 2)
1772 signal_simple_error ("Invalid value for 'columns", value);
1775 else if (EQ (keyword, Qgraphic))
1778 graphic = XINT (value);
1780 if (graphic < 0 || graphic > 2)
1782 if (graphic < 0 || graphic > 1)
1784 signal_simple_error ("Invalid value for 'graphic", value);
1787 else if (EQ (keyword, Qregistry))
1789 CHECK_STRING (value);
1793 else if (EQ (keyword, Qdirection))
1795 if (EQ (value, Ql2r))
1796 direction = CHARSET_LEFT_TO_RIGHT;
1797 else if (EQ (value, Qr2l))
1798 direction = CHARSET_RIGHT_TO_LEFT;
1800 signal_simple_error ("Invalid value for 'direction", value);
1803 else if (EQ (keyword, Qfinal))
1805 CHECK_CHAR_COERCE_INT (value);
1806 final = XCHAR (value);
1807 if (final < '0' || final > '~')
1808 signal_simple_error ("Invalid value for 'final", value);
1811 else if (EQ (keyword, Qccl_program))
1813 CHECK_VECTOR (value);
1814 ccl_program = value;
1818 signal_simple_error ("Unrecognized property", keyword);
1822 error ("'final must be specified");
1823 if (dimension == 2 && final > 0x5F)
1825 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1829 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1831 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1833 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1834 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1836 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1838 id = get_unallocated_leading_byte (dimension);
1840 if (NILP (doc_string))
1841 doc_string = build_string ("");
1843 if (NILP (registry))
1844 registry = build_string ("");
1846 if (NILP (short_name))
1847 XSETSTRING (short_name, XSYMBOL (name)->name);
1849 if (NILP (long_name))
1850 long_name = doc_string;
1853 columns = dimension;
1855 if (byte_offset < 0)
1859 else if (chars == 96)
1865 charset = make_charset (id, name, type, columns, graphic,
1866 final, direction, short_name, long_name,
1867 doc_string, registry,
1868 Qnil, 0, 0, 0, byte_offset);
1869 if (!NILP (ccl_program))
1870 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1874 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1876 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1877 NEW-NAME is the name of the new charset. Return the new charset.
1879 (charset, new_name))
1881 Lisp_Object new_charset = Qnil;
1882 int id, dimension, columns, graphic, final;
1883 int direction, type;
1884 Lisp_Object registry, doc_string, short_name, long_name;
1885 struct Lisp_Charset *cs;
1887 charset = Fget_charset (charset);
1888 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1889 signal_simple_error ("Charset already has reverse-direction charset",
1892 CHECK_SYMBOL (new_name);
1893 if (!NILP (Ffind_charset (new_name)))
1894 signal_simple_error ("Cannot redefine existing charset", new_name);
1896 cs = XCHARSET (charset);
1898 type = CHARSET_TYPE (cs);
1899 columns = CHARSET_COLUMNS (cs);
1900 dimension = CHARSET_DIMENSION (cs);
1901 id = get_unallocated_leading_byte (dimension);
1903 graphic = CHARSET_GRAPHIC (cs);
1904 final = CHARSET_FINAL (cs);
1905 direction = CHARSET_RIGHT_TO_LEFT;
1906 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1907 direction = CHARSET_LEFT_TO_RIGHT;
1908 doc_string = CHARSET_DOC_STRING (cs);
1909 short_name = CHARSET_SHORT_NAME (cs);
1910 long_name = CHARSET_LONG_NAME (cs);
1911 registry = CHARSET_REGISTRY (cs);
1913 new_charset = make_charset (id, new_name, type, columns,
1914 graphic, final, direction, short_name, long_name,
1915 doc_string, registry,
1917 CHARSET_DECODING_TABLE(cs),
1918 CHARSET_UCS_MIN(cs),
1919 CHARSET_UCS_MAX(cs),
1920 CHARSET_CODE_OFFSET(cs),
1921 CHARSET_BYTE_OFFSET(cs)
1927 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1928 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1933 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1934 Define symbol ALIAS as an alias for CHARSET.
1938 CHECK_SYMBOL (alias);
1939 charset = Fget_charset (charset);
1940 return Fputhash (alias, charset, Vcharset_hash_table);
1943 /* #### Reverse direction charsets not yet implemented. */
1945 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1947 Return the reverse-direction charset parallel to CHARSET, if any.
1948 This is the charset with the same properties (in particular, the same
1949 dimension, number of characters per dimension, and final byte) as
1950 CHARSET but whose characters are displayed in the opposite direction.
1954 charset = Fget_charset (charset);
1955 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1959 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1960 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1961 If DIRECTION is omitted, both directions will be checked (left-to-right
1962 will be returned if character sets exist for both directions).
1964 (dimension, chars, final, direction))
1966 int dm, ch, fi, di = -1;
1968 Lisp_Object obj = Qnil;
1970 CHECK_INT (dimension);
1971 dm = XINT (dimension);
1972 if (dm < 1 || dm > 2)
1973 signal_simple_error ("Invalid value for DIMENSION", dimension);
1977 if (ch != 94 && ch != 96)
1978 signal_simple_error ("Invalid value for CHARS", chars);
1980 CHECK_CHAR_COERCE_INT (final);
1982 if (fi < '0' || fi > '~')
1983 signal_simple_error ("Invalid value for FINAL", final);
1985 if (EQ (direction, Ql2r))
1986 di = CHARSET_LEFT_TO_RIGHT;
1987 else if (EQ (direction, Qr2l))
1988 di = CHARSET_RIGHT_TO_LEFT;
1989 else if (!NILP (direction))
1990 signal_simple_error ("Invalid value for DIRECTION", direction);
1992 if (dm == 2 && fi > 0x5F)
1994 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1997 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1999 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2003 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2005 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2008 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2011 return XCHARSET_NAME (obj);
2015 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2016 Return short name of CHARSET.
2020 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2023 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2024 Return long name of CHARSET.
2028 return XCHARSET_LONG_NAME (Fget_charset (charset));
2031 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2032 Return description of CHARSET.
2036 return XCHARSET_DOC_STRING (Fget_charset (charset));
2039 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2040 Return dimension of CHARSET.
2044 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2047 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2048 Return property PROP of CHARSET.
2049 Recognized properties are those listed in `make-charset', as well as
2050 'name and 'doc-string.
2054 struct Lisp_Charset *cs;
2056 charset = Fget_charset (charset);
2057 cs = XCHARSET (charset);
2059 CHECK_SYMBOL (prop);
2060 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2061 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2062 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2063 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2064 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2065 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2066 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2067 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2068 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2069 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2070 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2071 if (EQ (prop, Qdirection))
2072 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2073 if (EQ (prop, Qreverse_direction_charset))
2075 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2079 return XCHARSET_NAME (obj);
2081 signal_simple_error ("Unrecognized charset property name", prop);
2082 return Qnil; /* not reached */
2085 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2086 Return charset identification number of CHARSET.
2090 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2093 /* #### We need to figure out which properties we really want to
2096 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2097 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2099 (charset, ccl_program))
2101 charset = Fget_charset (charset);
2102 CHECK_VECTOR (ccl_program);
2103 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2108 invalidate_charset_font_caches (Lisp_Object charset)
2110 /* Invalidate font cache entries for charset on all devices. */
2111 Lisp_Object devcons, concons, hash_table;
2112 DEVICE_LOOP_NO_BREAK (devcons, concons)
2114 struct device *d = XDEVICE (XCAR (devcons));
2115 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2116 if (!UNBOUNDP (hash_table))
2117 Fclrhash (hash_table);
2121 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2122 Set the 'registry property of CHARSET to REGISTRY.
2124 (charset, registry))
2126 charset = Fget_charset (charset);
2127 CHECK_STRING (registry);
2128 XCHARSET_REGISTRY (charset) = registry;
2129 invalidate_charset_font_caches (charset);
2130 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2135 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2136 Return mapping-table of CHARSET.
2140 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2143 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2144 Set mapping-table of CHARSET to TABLE.
2148 struct Lisp_Charset *cs;
2149 Lisp_Object old_table;
2152 charset = Fget_charset (charset);
2153 cs = XCHARSET (charset);
2155 if (EQ (table, Qnil))
2157 CHARSET_DECODING_TABLE(cs) = table;
2160 else if (VECTORP (table))
2162 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
2163 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2164 old_table = CHARSET_DECODING_TABLE(cs);
2165 CHARSET_DECODING_TABLE(cs) = table;
2168 signal_error (Qwrong_type_argument,
2169 list2 (build_translated_string ("vector-or-nil-p"),
2171 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2173 switch (CHARSET_DIMENSION (cs))
2176 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2178 Lisp_Object c = XVECTOR_DATA(table)[i];
2183 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2187 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2189 Lisp_Object v = XVECTOR_DATA(table)[i];
2195 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2197 CHARSET_DECODING_TABLE(cs) = old_table;
2198 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2200 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2202 Lisp_Object c = XVECTOR_DATA(v)[j];
2205 put_char_attribute (c, charset,
2208 (i + CHARSET_BYTE_OFFSET (cs)),
2210 (j + CHARSET_BYTE_OFFSET (cs))));
2214 put_char_attribute (v, charset,
2216 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2225 /************************************************************************/
2226 /* Lisp primitives for working with characters */
2227 /************************************************************************/
2229 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2230 Make a character from CHARSET and octets ARG1 and ARG2.
2231 ARG2 is required only for characters from two-dimensional charsets.
2232 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2233 character s with caron.
2235 (charset, arg1, arg2))
2237 struct Lisp_Charset *cs;
2239 int lowlim, highlim;
2241 charset = Fget_charset (charset);
2242 cs = XCHARSET (charset);
2244 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2245 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2247 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2249 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2250 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2253 /* It is useful (and safe, according to Olivier Galibert) to strip
2254 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2255 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2256 Latin 2 code of the character. */
2264 if (a1 < lowlim || a1 > highlim)
2265 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2267 if (CHARSET_DIMENSION (cs) == 1)
2271 ("Charset is of dimension one; second octet must be nil", arg2);
2272 return make_char (MAKE_CHAR (charset, a1, 0));
2281 a2 = XINT (arg2) & 0x7f;
2283 if (a2 < lowlim || a2 > highlim)
2284 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2286 return make_char (MAKE_CHAR (charset, a1, a2));
2289 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2290 Return the character set of char CH.
2294 CHECK_CHAR_COERCE_INT (ch);
2296 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2299 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2300 Return list of charset and one or two position-codes of CHAR.
2304 /* This function can GC */
2305 struct gcpro gcpro1, gcpro2;
2306 Lisp_Object charset = Qnil;
2307 Lisp_Object rc = Qnil;
2310 GCPRO2 (charset, rc);
2311 CHECK_CHAR_COERCE_INT (character);
2313 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2315 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2317 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2321 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2329 #ifdef ENABLE_COMPOSITE_CHARS
2330 /************************************************************************/
2331 /* composite character functions */
2332 /************************************************************************/
2335 lookup_composite_char (Bufbyte *str, int len)
2337 Lisp_Object lispstr = make_string (str, len);
2338 Lisp_Object ch = Fgethash (lispstr,
2339 Vcomposite_char_string2char_hash_table,
2345 if (composite_char_row_next >= 128)
2346 signal_simple_error ("No more composite chars available", lispstr);
2347 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2348 composite_char_col_next);
2349 Fputhash (make_char (emch), lispstr,
2350 Vcomposite_char_char2string_hash_table);
2351 Fputhash (lispstr, make_char (emch),
2352 Vcomposite_char_string2char_hash_table);
2353 composite_char_col_next++;
2354 if (composite_char_col_next >= 128)
2356 composite_char_col_next = 32;
2357 composite_char_row_next++;
2366 composite_char_string (Emchar ch)
2368 Lisp_Object str = Fgethash (make_char (ch),
2369 Vcomposite_char_char2string_hash_table,
2371 assert (!UNBOUNDP (str));
2375 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2376 Convert a string into a single composite character.
2377 The character is the result of overstriking all the characters in
2382 CHECK_STRING (string);
2383 return make_char (lookup_composite_char (XSTRING_DATA (string),
2384 XSTRING_LENGTH (string)));
2387 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2388 Return a string of the characters comprising a composite character.
2396 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2397 signal_simple_error ("Must be composite char", ch);
2398 return composite_char_string (emch);
2400 #endif /* ENABLE_COMPOSITE_CHARS */
2403 /************************************************************************/
2404 /* initialization */
2405 /************************************************************************/
2408 syms_of_mule_charset (void)
2410 DEFSUBR (Fcharsetp);
2411 DEFSUBR (Ffind_charset);
2412 DEFSUBR (Fget_charset);
2413 DEFSUBR (Fcharset_list);
2414 DEFSUBR (Fcharset_name);
2415 DEFSUBR (Fmake_charset);
2416 DEFSUBR (Fmake_reverse_direction_charset);
2417 /* DEFSUBR (Freverse_direction_charset); */
2418 DEFSUBR (Fdefine_charset_alias);
2419 DEFSUBR (Fcharset_from_attributes);
2420 DEFSUBR (Fcharset_short_name);
2421 DEFSUBR (Fcharset_long_name);
2422 DEFSUBR (Fcharset_description);
2423 DEFSUBR (Fcharset_dimension);
2424 DEFSUBR (Fcharset_property);
2425 DEFSUBR (Fcharset_id);
2426 DEFSUBR (Fset_charset_ccl_program);
2427 DEFSUBR (Fset_charset_registry);
2429 DEFSUBR (Fchar_attribute_alist);
2430 DEFSUBR (Fget_char_attribute);
2431 DEFSUBR (Fput_char_attribute);
2432 DEFSUBR (Fdefine_char);
2433 DEFSUBR (Fchar_variants);
2434 DEFSUBR (Fget_composite_char);
2435 DEFSUBR (Fcharset_mapping_table);
2436 DEFSUBR (Fset_charset_mapping_table);
2439 DEFSUBR (Fmake_char);
2440 DEFSUBR (Fchar_charset);
2441 DEFSUBR (Fsplit_char);
2443 #ifdef ENABLE_COMPOSITE_CHARS
2444 DEFSUBR (Fmake_composite_char);
2445 DEFSUBR (Fcomposite_char_string);
2448 defsymbol (&Qcharsetp, "charsetp");
2449 defsymbol (&Qregistry, "registry");
2450 defsymbol (&Qfinal, "final");
2451 defsymbol (&Qgraphic, "graphic");
2452 defsymbol (&Qdirection, "direction");
2453 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2454 defsymbol (&Qshort_name, "short-name");
2455 defsymbol (&Qlong_name, "long-name");
2457 defsymbol (&Ql2r, "l2r");
2458 defsymbol (&Qr2l, "r2l");
2460 /* Charsets, compatible with FSF 20.3
2461 Naming convention is Script-Charset[-Edition] */
2462 defsymbol (&Qascii, "ascii");
2463 defsymbol (&Qcontrol_1, "control-1");
2464 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2465 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2466 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2467 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2468 defsymbol (&Qthai_tis620, "thai-tis620");
2469 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2470 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2471 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2472 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2473 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2474 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2475 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2476 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2477 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2478 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2479 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2480 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2481 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2482 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2484 defsymbol (&Q_ucs, "->ucs");
2485 defsymbol (&Q_decomposition, "->decomposition");
2486 defsymbol (&Qwide, "wide");
2487 defsymbol (&Qnarrow, "narrow");
2488 defsymbol (&Qcompat, "compat");
2489 defsymbol (&QnoBreak, "noBreak");
2490 defsymbol (&Qsuper, "super");
2491 defsymbol (&Qfraction, "fraction");
2492 defsymbol (&Qucs, "ucs");
2493 defsymbol (&Qucs_bmp, "ucs-bmp");
2494 defsymbol (&Qlatin_viscii, "latin-viscii");
2495 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2496 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2497 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2498 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2499 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2500 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2502 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2503 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2505 defsymbol (&Qcomposite, "composite");
2509 vars_of_mule_charset (void)
2516 /* Table of charsets indexed by leading byte. */
2517 for (i = 0; i < countof (charset_by_leading_byte); i++)
2518 charset_by_leading_byte[i] = Qnil;
2521 /* Table of charsets indexed by type/final-byte. */
2522 for (i = 0; i < countof (charset_by_attributes); i++)
2523 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2524 charset_by_attributes[i][j] = Qnil;
2526 /* Table of charsets indexed by type/final-byte/direction. */
2527 for (i = 0; i < countof (charset_by_attributes); i++)
2528 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2529 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2530 charset_by_attributes[i][j][k] = Qnil;
2534 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2536 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2537 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2541 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2542 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2543 Leading-code of private TYPE9N charset of column-width 1.
2545 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2549 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2550 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2551 Version number of UTF-2000.
2554 staticpro (&Vcharacter_attribute_table);
2555 Vcharacter_attribute_table = make_char_code_table (Qnil);
2557 staticpro (&Vcharacter_composition_table);
2558 Vcharacter_composition_table = make_char_code_table (Qnil);
2560 staticpro (&Vcharacter_variant_table);
2561 Vcharacter_variant_table = make_char_code_table (Qnil);
2563 Vdefault_coded_charset_priority_list = Qnil;
2564 DEFVAR_LISP ("default-coded-charset-priority-list",
2565 &Vdefault_coded_charset_priority_list /*
2566 Default order of preferred coded-character-sets.
2572 complex_vars_of_mule_charset (void)
2574 staticpro (&Vcharset_hash_table);
2575 Vcharset_hash_table =
2576 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2578 /* Predefined character sets. We store them into variables for
2583 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2584 CHARSET_TYPE_256X256, 1, 2, 0,
2585 CHARSET_LEFT_TO_RIGHT,
2586 build_string ("BMP"),
2587 build_string ("BMP"),
2588 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2589 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2590 Qnil, 0, 0xFFFF, 0, 0);
2592 # define MIN_CHAR_THAI 0
2593 # define MAX_CHAR_THAI 0
2594 # define MIN_CHAR_GREEK 0
2595 # define MAX_CHAR_GREEK 0
2596 # define MIN_CHAR_HEBREW 0
2597 # define MAX_CHAR_HEBREW 0
2598 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2599 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2600 # define MIN_CHAR_CYRILLIC 0
2601 # define MAX_CHAR_CYRILLIC 0
2604 make_charset (LEADING_BYTE_ASCII, Qascii,
2605 CHARSET_TYPE_94, 1, 0, 'B',
2606 CHARSET_LEFT_TO_RIGHT,
2607 build_string ("ASCII"),
2608 build_string ("ASCII)"),
2609 build_string ("ASCII (ISO646 IRV)"),
2610 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2611 Qnil, 0, 0x7F, 0, 0);
2612 Vcharset_control_1 =
2613 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2614 CHARSET_TYPE_94, 1, 1, 0,
2615 CHARSET_LEFT_TO_RIGHT,
2616 build_string ("C1"),
2617 build_string ("Control characters"),
2618 build_string ("Control characters 128-191"),
2620 Qnil, 0x80, 0x9F, 0, 0);
2621 Vcharset_latin_iso8859_1 =
2622 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2623 CHARSET_TYPE_96, 1, 1, 'A',
2624 CHARSET_LEFT_TO_RIGHT,
2625 build_string ("Latin-1"),
2626 build_string ("ISO8859-1 (Latin-1)"),
2627 build_string ("ISO8859-1 (Latin-1)"),
2628 build_string ("iso8859-1"),
2629 Qnil, 0xA0, 0xFF, 0, 32);
2630 Vcharset_latin_iso8859_2 =
2631 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2632 CHARSET_TYPE_96, 1, 1, 'B',
2633 CHARSET_LEFT_TO_RIGHT,
2634 build_string ("Latin-2"),
2635 build_string ("ISO8859-2 (Latin-2)"),
2636 build_string ("ISO8859-2 (Latin-2)"),
2637 build_string ("iso8859-2"),
2639 Vcharset_latin_iso8859_3 =
2640 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2641 CHARSET_TYPE_96, 1, 1, 'C',
2642 CHARSET_LEFT_TO_RIGHT,
2643 build_string ("Latin-3"),
2644 build_string ("ISO8859-3 (Latin-3)"),
2645 build_string ("ISO8859-3 (Latin-3)"),
2646 build_string ("iso8859-3"),
2648 Vcharset_latin_iso8859_4 =
2649 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2650 CHARSET_TYPE_96, 1, 1, 'D',
2651 CHARSET_LEFT_TO_RIGHT,
2652 build_string ("Latin-4"),
2653 build_string ("ISO8859-4 (Latin-4)"),
2654 build_string ("ISO8859-4 (Latin-4)"),
2655 build_string ("iso8859-4"),
2657 Vcharset_thai_tis620 =
2658 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2659 CHARSET_TYPE_96, 1, 1, 'T',
2660 CHARSET_LEFT_TO_RIGHT,
2661 build_string ("TIS620"),
2662 build_string ("TIS620 (Thai)"),
2663 build_string ("TIS620.2529 (Thai)"),
2664 build_string ("tis620"),
2665 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2666 Vcharset_greek_iso8859_7 =
2667 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2668 CHARSET_TYPE_96, 1, 1, 'F',
2669 CHARSET_LEFT_TO_RIGHT,
2670 build_string ("ISO8859-7"),
2671 build_string ("ISO8859-7 (Greek)"),
2672 build_string ("ISO8859-7 (Greek)"),
2673 build_string ("iso8859-7"),
2674 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2675 Vcharset_arabic_iso8859_6 =
2676 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2677 CHARSET_TYPE_96, 1, 1, 'G',
2678 CHARSET_RIGHT_TO_LEFT,
2679 build_string ("ISO8859-6"),
2680 build_string ("ISO8859-6 (Arabic)"),
2681 build_string ("ISO8859-6 (Arabic)"),
2682 build_string ("iso8859-6"),
2684 Vcharset_hebrew_iso8859_8 =
2685 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2686 CHARSET_TYPE_96, 1, 1, 'H',
2687 CHARSET_RIGHT_TO_LEFT,
2688 build_string ("ISO8859-8"),
2689 build_string ("ISO8859-8 (Hebrew)"),
2690 build_string ("ISO8859-8 (Hebrew)"),
2691 build_string ("iso8859-8"),
2692 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2693 Vcharset_katakana_jisx0201 =
2694 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2695 CHARSET_TYPE_94, 1, 1, 'I',
2696 CHARSET_LEFT_TO_RIGHT,
2697 build_string ("JISX0201 Kana"),
2698 build_string ("JISX0201.1976 (Japanese Kana)"),
2699 build_string ("JISX0201.1976 Japanese Kana"),
2700 build_string ("jisx0201\\.1976"),
2702 MIN_CHAR_HALFWIDTH_KATAKANA,
2703 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2704 Vcharset_latin_jisx0201 =
2705 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2706 CHARSET_TYPE_94, 1, 0, 'J',
2707 CHARSET_LEFT_TO_RIGHT,
2708 build_string ("JISX0201 Roman"),
2709 build_string ("JISX0201.1976 (Japanese Roman)"),
2710 build_string ("JISX0201.1976 Japanese Roman"),
2711 build_string ("jisx0201\\.1976"),
2713 Vcharset_cyrillic_iso8859_5 =
2714 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2715 CHARSET_TYPE_96, 1, 1, 'L',
2716 CHARSET_LEFT_TO_RIGHT,
2717 build_string ("ISO8859-5"),
2718 build_string ("ISO8859-5 (Cyrillic)"),
2719 build_string ("ISO8859-5 (Cyrillic)"),
2720 build_string ("iso8859-5"),
2721 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2722 Vcharset_latin_iso8859_9 =
2723 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2724 CHARSET_TYPE_96, 1, 1, 'M',
2725 CHARSET_LEFT_TO_RIGHT,
2726 build_string ("Latin-5"),
2727 build_string ("ISO8859-9 (Latin-5)"),
2728 build_string ("ISO8859-9 (Latin-5)"),
2729 build_string ("iso8859-9"),
2731 Vcharset_japanese_jisx0208_1978 =
2732 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2733 CHARSET_TYPE_94X94, 2, 0, '@',
2734 CHARSET_LEFT_TO_RIGHT,
2735 build_string ("JIS X0208:1978"),
2736 build_string ("JIS X0208:1978 (Japanese)"),
2738 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2739 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2741 Vcharset_chinese_gb2312 =
2742 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2743 CHARSET_TYPE_94X94, 2, 0, 'A',
2744 CHARSET_LEFT_TO_RIGHT,
2745 build_string ("GB2312"),
2746 build_string ("GB2312)"),
2747 build_string ("GB2312 Chinese simplified"),
2748 build_string ("gb2312"),
2750 Vcharset_japanese_jisx0208 =
2751 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2752 CHARSET_TYPE_94X94, 2, 0, 'B',
2753 CHARSET_LEFT_TO_RIGHT,
2754 build_string ("JISX0208"),
2755 build_string ("JIS X0208:1983 (Japanese)"),
2756 build_string ("JIS X0208:1983 Japanese Kanji"),
2757 build_string ("jisx0208\\.1983"),
2759 Vcharset_korean_ksc5601 =
2760 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2761 CHARSET_TYPE_94X94, 2, 0, 'C',
2762 CHARSET_LEFT_TO_RIGHT,
2763 build_string ("KSC5601"),
2764 build_string ("KSC5601 (Korean"),
2765 build_string ("KSC5601 Korean Hangul and Hanja"),
2766 build_string ("ksc5601"),
2768 Vcharset_japanese_jisx0212 =
2769 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2770 CHARSET_TYPE_94X94, 2, 0, 'D',
2771 CHARSET_LEFT_TO_RIGHT,
2772 build_string ("JISX0212"),
2773 build_string ("JISX0212 (Japanese)"),
2774 build_string ("JISX0212 Japanese Supplement"),
2775 build_string ("jisx0212"),
2778 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2779 Vcharset_chinese_cns11643_1 =
2780 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2781 CHARSET_TYPE_94X94, 2, 0, 'G',
2782 CHARSET_LEFT_TO_RIGHT,
2783 build_string ("CNS11643-1"),
2784 build_string ("CNS11643-1 (Chinese traditional)"),
2786 ("CNS 11643 Plane 1 Chinese traditional"),
2787 build_string (CHINESE_CNS_PLANE_RE("1")),
2789 Vcharset_chinese_cns11643_2 =
2790 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2791 CHARSET_TYPE_94X94, 2, 0, 'H',
2792 CHARSET_LEFT_TO_RIGHT,
2793 build_string ("CNS11643-2"),
2794 build_string ("CNS11643-2 (Chinese traditional)"),
2796 ("CNS 11643 Plane 2 Chinese traditional"),
2797 build_string (CHINESE_CNS_PLANE_RE("2")),
2800 Vcharset_latin_viscii_lower =
2801 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2802 CHARSET_TYPE_96, 1, 1, '1',
2803 CHARSET_LEFT_TO_RIGHT,
2804 build_string ("VISCII lower"),
2805 build_string ("VISCII lower (Vietnamese)"),
2806 build_string ("VISCII lower (Vietnamese)"),
2807 build_string ("MULEVISCII-LOWER"),
2809 Vcharset_latin_viscii_upper =
2810 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2811 CHARSET_TYPE_96, 1, 1, '2',
2812 CHARSET_LEFT_TO_RIGHT,
2813 build_string ("VISCII upper"),
2814 build_string ("VISCII upper (Vietnamese)"),
2815 build_string ("VISCII upper (Vietnamese)"),
2816 build_string ("MULEVISCII-UPPER"),
2818 Vcharset_latin_viscii =
2819 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2820 CHARSET_TYPE_256, 1, 2, 0,
2821 CHARSET_LEFT_TO_RIGHT,
2822 build_string ("VISCII"),
2823 build_string ("VISCII 1.1 (Vietnamese)"),
2824 build_string ("VISCII 1.1 (Vietnamese)"),
2825 build_string ("VISCII1\\.1"),
2827 Vcharset_hiragana_jisx0208 =
2828 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2829 CHARSET_TYPE_94X94, 2, 0, 'B',
2830 CHARSET_LEFT_TO_RIGHT,
2831 build_string ("Hiragana"),
2832 build_string ("Hiragana of JIS X0208"),
2833 build_string ("Japanese Hiragana of JIS X0208"),
2834 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2835 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2836 (0x24 - 33) * 94 + (0x21 - 33), 33);
2837 Vcharset_katakana_jisx0208 =
2838 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2839 CHARSET_TYPE_94X94, 2, 0, 'B',
2840 CHARSET_LEFT_TO_RIGHT,
2841 build_string ("Katakana"),
2842 build_string ("Katakana of JIS X0208"),
2843 build_string ("Japanese Katakana of JIS X0208"),
2844 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2845 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2846 (0x25 - 33) * 94 + (0x21 - 33), 33);
2848 Vcharset_chinese_big5_1 =
2849 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2850 CHARSET_TYPE_94X94, 2, 0, '0',
2851 CHARSET_LEFT_TO_RIGHT,
2852 build_string ("Big5"),
2853 build_string ("Big5 (Level-1)"),
2855 ("Big5 Level-1 Chinese traditional"),
2856 build_string ("big5"),
2858 Vcharset_chinese_big5_2 =
2859 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2860 CHARSET_TYPE_94X94, 2, 0, '1',
2861 CHARSET_LEFT_TO_RIGHT,
2862 build_string ("Big5"),
2863 build_string ("Big5 (Level-2)"),
2865 ("Big5 Level-2 Chinese traditional"),
2866 build_string ("big5"),
2869 #ifdef ENABLE_COMPOSITE_CHARS
2870 /* #### For simplicity, we put composite chars into a 96x96 charset.
2871 This is going to lead to problems because you can run out of
2872 room, esp. as we don't yet recycle numbers. */
2873 Vcharset_composite =
2874 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2875 CHARSET_TYPE_96X96, 2, 0, 0,
2876 CHARSET_LEFT_TO_RIGHT,
2877 build_string ("Composite"),
2878 build_string ("Composite characters"),
2879 build_string ("Composite characters"),
2882 composite_char_row_next = 32;
2883 composite_char_col_next = 32;
2885 Vcomposite_char_string2char_hash_table =
2886 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2887 Vcomposite_char_char2string_hash_table =
2888 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2889 staticpro (&Vcomposite_char_string2char_hash_table);
2890 staticpro (&Vcomposite_char_char2string_hash_table);
2891 #endif /* ENABLE_COMPOSITE_CHARS */