1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
391 Lisp_Object QnoBreak;
393 Lisp_Object Qfraction;
396 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
402 else if (EQ (v, Qwide))
404 else if (EQ (v, Qnarrow))
406 else if (EQ (v, Qcompat))
408 else if (EQ (v, QnoBreak))
410 else if (EQ (v, Qsuper))
412 else if (EQ (v, Qfraction))
415 signal_simple_error (err_msg, err_arg);
418 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
419 Return character corresponding with list.
423 Lisp_Object table = Vcharacter_composition_table;
424 Lisp_Object rest = list;
428 Lisp_Object v = Fcar (rest);
430 Emchar c = to_char_code (v, "Invalid value for composition", list);
432 ret = get_char_code_table (c, table);
437 if (!CHAR_CODE_TABLE_P (ret))
442 else if (!CONSP (rest))
444 else if (CHAR_CODE_TABLE_P (ret))
447 signal_simple_error ("Invalid table is found with", list);
449 signal_simple_error ("Invalid value for composition", list);
452 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
453 Return variants of CHARACTER.
457 CHECK_CHAR (character);
458 return Fcopy_list (get_char_code_table (XCHAR (character),
459 Vcharacter_variant_table));
462 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
463 Return the alist of attributes of CHARACTER.
467 CHECK_CHAR (character);
468 return Fcopy_alist (get_char_code_table (XCHAR (character),
469 Vcharacter_attribute_table));
472 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
473 Return the value of CHARACTER's ATTRIBUTE.
475 (character, attribute))
478 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
484 if (!NILP (ccs = Ffind_charset (attribute)))
487 return Fcdr (Fassq (attribute, ret));
491 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
494 Emchar char_code = XCHAR (character);
496 = get_char_code_table (char_code, Vcharacter_attribute_table);
499 cell = Fassq (attribute, ret);
503 ret = Fcons (Fcons (attribute, value), ret);
505 else if (!EQ (Fcdr (cell), value))
507 Fsetcdr (cell, value);
509 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
513 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
514 Store CHARACTER's ATTRIBUTE with VALUE.
516 (character, attribute, value))
520 ccs = Ffind_charset (attribute);
524 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
529 /* ad-hoc method for `ascii' */
530 if ((XCHARSET_CHARS (ccs) == 94) &&
531 (XCHARSET_BYTE_OFFSET (ccs) != 33))
532 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
534 ccs_len = XCHARSET_CHARS (ccs);
537 signal_simple_error ("Invalid value for coded-charset",
541 rest = Fget_char_attribute (character, attribute);
548 Lisp_Object ei = Fcar (rest);
550 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
551 nv = XVECTOR_DATA(v)[i];
558 XVECTOR_DATA(v)[i] = Qnil;
559 v = XCHARSET_DECODING_TABLE (ccs);
564 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
571 Lisp_Object ei = Fcar (rest);
574 signal_simple_error ("Invalid value for coded-charset", value);
576 if ((i < 0) || (255 < i))
577 signal_simple_error ("Invalid value for coded-charset", value);
578 i -= XCHARSET_BYTE_OFFSET (ccs);
579 nv = XVECTOR_DATA(v)[i];
585 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
592 XVECTOR_DATA(v)[i] = character;
594 else if (EQ (attribute, Q_decomposition))
596 Lisp_Object rest = value;
597 Lisp_Object table = Vcharacter_composition_table;
600 signal_simple_error ("Invalid value for ->decomposition",
605 Lisp_Object v = Fcar (rest);
608 = to_char_code (v, "Invalid value for ->decomposition", value);
613 put_char_code_table (c, character, table);
618 ntable = get_char_code_table (c, table);
619 if (!CHAR_CODE_TABLE_P (ntable))
621 ntable = make_char_code_table (Qnil);
622 put_char_code_table (c, ntable, table);
628 else if (EQ (attribute, Q_ucs))
634 signal_simple_error ("Invalid value for ->ucs", value);
638 ret = get_char_code_table (c, Vcharacter_variant_table);
639 if (NILP (Fmemq (character, ret)))
641 put_char_code_table (c, Fcons (character, ret),
642 Vcharacter_variant_table);
645 return put_char_attribute (character, attribute, value);
650 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
651 Store character's ATTRIBUTES.
655 Lisp_Object rest = attributes;
656 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
657 Lisp_Object character;
663 Lisp_Object cell = Fcar (rest);
667 signal_simple_error ("Invalid argument", attributes);
668 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
669 && XCHARSET_FINAL (ccs))
673 if (XCHARSET_DIMENSION (ccs) == 1)
675 Lisp_Object eb1 = Fcar (Fcdr (cell));
679 signal_simple_error ("Invalid argument", attributes);
681 switch (XCHARSET_CHARS (ccs))
685 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
689 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
695 else if (XCHARSET_DIMENSION (ccs) == 2)
697 Lisp_Object eb1 = Fcar (Fcdr (cell));
698 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
702 signal_simple_error ("Invalid argument", attributes);
705 signal_simple_error ("Invalid argument", attributes);
707 switch (XCHARSET_CHARS (ccs))
710 code = MIN_CHAR_94x94
711 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
712 + (b1 - 33) * 94 + (b2 - 33);
715 code = MIN_CHAR_96x96
716 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
717 + (b1 - 32) * 96 + (b2 - 32);
728 character = make_char (code);
729 goto setup_attributes;
735 else if (!INTP (code))
736 signal_simple_error ("Invalid argument", attributes);
738 character = make_char (XINT (code));
744 Lisp_Object cell = Fcar (rest);
747 signal_simple_error ("Invalid argument", attributes);
748 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
752 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
755 Lisp_Object Vutf_2000_version;
759 int leading_code_private_11;
762 Lisp_Object Qcharsetp;
764 /* Qdoc_string, Qdimension, Qchars defined in general.c */
765 Lisp_Object Qregistry, Qfinal, Qgraphic;
766 Lisp_Object Qdirection;
767 Lisp_Object Qreverse_direction_charset;
768 Lisp_Object Qleading_byte;
769 Lisp_Object Qshort_name, Qlong_name;
785 Qjapanese_jisx0208_1978,
797 Qvietnamese_viscii_lower,
798 Qvietnamese_viscii_upper,
807 Lisp_Object Ql2r, Qr2l;
809 Lisp_Object Vcharset_hash_table;
812 static Charset_ID next_allocated_leading_byte;
814 static Charset_ID next_allocated_1_byte_leading_byte;
815 static Charset_ID next_allocated_2_byte_leading_byte;
818 /* Composite characters are characters constructed by overstriking two
819 or more regular characters.
821 1) The old Mule implementation involves storing composite characters
822 in a buffer as a tag followed by all of the actual characters
823 used to make up the composite character. I think this is a bad
824 idea; it greatly complicates code that wants to handle strings
825 one character at a time because it has to deal with the possibility
826 of great big ungainly characters. It's much more reasonable to
827 simply store an index into a table of composite characters.
829 2) The current implementation only allows for 16,384 separate
830 composite characters over the lifetime of the XEmacs process.
831 This could become a potential problem if the user
832 edited lots of different files that use composite characters.
833 Due to FSF bogosity, increasing the number of allowable
834 composite characters under Mule would decrease the number
835 of possible faces that can exist. Mule already has shrunk
836 this to 2048, and further shrinkage would become uncomfortable.
837 No such problems exist in XEmacs.
839 Composite characters could be represented as 0x80 C1 C2 C3,
840 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
841 for slightly under 2^20 (one million) composite characters
842 over the XEmacs process lifetime, and you only need to
843 increase the size of a Mule character from 19 to 21 bits.
844 Or you could use 0x80 C1 C2 C3 C4, allowing for about
845 85 million (slightly over 2^26) composite characters. */
848 /************************************************************************/
849 /* Basic Emchar functions */
850 /************************************************************************/
852 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
853 string in STR. Returns the number of bytes stored.
854 Do not call this directly. Use the macro set_charptr_emchar() instead.
858 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
873 else if ( c <= 0x7ff )
875 *p++ = (c >> 6) | 0xc0;
876 *p++ = (c & 0x3f) | 0x80;
878 else if ( c <= 0xffff )
880 *p++ = (c >> 12) | 0xe0;
881 *p++ = ((c >> 6) & 0x3f) | 0x80;
882 *p++ = (c & 0x3f) | 0x80;
884 else if ( c <= 0x1fffff )
886 *p++ = (c >> 18) | 0xf0;
887 *p++ = ((c >> 12) & 0x3f) | 0x80;
888 *p++ = ((c >> 6) & 0x3f) | 0x80;
889 *p++ = (c & 0x3f) | 0x80;
891 else if ( c <= 0x3ffffff )
893 *p++ = (c >> 24) | 0xf8;
894 *p++ = ((c >> 18) & 0x3f) | 0x80;
895 *p++ = ((c >> 12) & 0x3f) | 0x80;
896 *p++ = ((c >> 6) & 0x3f) | 0x80;
897 *p++ = (c & 0x3f) | 0x80;
901 *p++ = (c >> 30) | 0xfc;
902 *p++ = ((c >> 24) & 0x3f) | 0x80;
903 *p++ = ((c >> 18) & 0x3f) | 0x80;
904 *p++ = ((c >> 12) & 0x3f) | 0x80;
905 *p++ = ((c >> 6) & 0x3f) | 0x80;
906 *p++ = (c & 0x3f) | 0x80;
909 BREAKUP_CHAR (c, charset, c1, c2);
910 lb = CHAR_LEADING_BYTE (c);
911 if (LEADING_BYTE_PRIVATE_P (lb))
912 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
914 if (EQ (charset, Vcharset_control_1))
923 /* Return the first character from a Mule-encoded string in STR,
924 assuming it's non-ASCII. Do not call this directly.
925 Use the macro charptr_emchar() instead. */
928 non_ascii_charptr_emchar (CONST Bufbyte *str)
941 else if ( b >= 0xf8 )
946 else if ( b >= 0xf0 )
951 else if ( b >= 0xe0 )
956 else if ( b >= 0xc0 )
966 for( ; len > 0; len-- )
969 ch = ( ch << 6 ) | ( b & 0x3f );
973 Bufbyte i0 = *str, i1, i2 = 0;
976 if (i0 == LEADING_BYTE_CONTROL_1)
977 return (Emchar) (*++str - 0x20);
979 if (LEADING_BYTE_PREFIX_P (i0))
984 charset = CHARSET_BY_LEADING_BYTE (i0);
985 if (XCHARSET_DIMENSION (charset) == 2)
988 return MAKE_CHAR (charset, i1, i2);
992 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
993 Do not call this directly. Use the macro valid_char_p() instead. */
997 non_ascii_valid_char_p (Emchar ch)
1001 /* Must have only lowest 19 bits set */
1005 f1 = CHAR_FIELD1 (ch);
1006 f2 = CHAR_FIELD2 (ch);
1007 f3 = CHAR_FIELD3 (ch);
1011 Lisp_Object charset;
1013 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1014 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1015 f2 > MAX_CHAR_FIELD2_PRIVATE)
1020 if (f3 != 0x20 && f3 != 0x7F)
1024 NOTE: This takes advantage of the fact that
1025 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1026 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1028 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1029 return (XCHARSET_CHARS (charset) == 96);
1033 Lisp_Object charset;
1035 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1036 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1037 f1 > MAX_CHAR_FIELD1_PRIVATE)
1039 if (f2 < 0x20 || f3 < 0x20)
1042 #ifdef ENABLE_COMPOSITE_CHARS
1043 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1045 if (UNBOUNDP (Fgethash (make_int (ch),
1046 Vcomposite_char_char2string_hash_table,
1051 #endif /* ENABLE_COMPOSITE_CHARS */
1053 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1056 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1058 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1061 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1063 return (XCHARSET_CHARS (charset) == 96);
1069 /************************************************************************/
1070 /* Basic string functions */
1071 /************************************************************************/
1073 /* Copy the character pointed to by PTR into STR, assuming it's
1074 non-ASCII. Do not call this directly. Use the macro
1075 charptr_copy_char() instead. */
1078 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1080 Bufbyte *strptr = str;
1082 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1084 /* Notice fallthrough. */
1086 case 6: *++strptr = *ptr++;
1087 case 5: *++strptr = *ptr++;
1089 case 4: *++strptr = *ptr++;
1090 case 3: *++strptr = *ptr++;
1091 case 2: *++strptr = *ptr;
1096 return strptr + 1 - str;
1100 /************************************************************************/
1101 /* streams of Emchars */
1102 /************************************************************************/
1104 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1105 The functions below are not meant to be called directly; use
1106 the macros in insdel.h. */
1109 Lstream_get_emchar_1 (Lstream *stream, int ch)
1111 Bufbyte str[MAX_EMCHAR_LEN];
1112 Bufbyte *strptr = str;
1114 str[0] = (Bufbyte) ch;
1115 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1117 /* Notice fallthrough. */
1120 ch = Lstream_getc (stream);
1122 *++strptr = (Bufbyte) ch;
1124 ch = Lstream_getc (stream);
1126 *++strptr = (Bufbyte) ch;
1129 ch = Lstream_getc (stream);
1131 *++strptr = (Bufbyte) ch;
1133 ch = Lstream_getc (stream);
1135 *++strptr = (Bufbyte) ch;
1137 ch = Lstream_getc (stream);
1139 *++strptr = (Bufbyte) ch;
1144 return charptr_emchar (str);
1148 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1150 Bufbyte str[MAX_EMCHAR_LEN];
1151 Bytecount len = set_charptr_emchar (str, ch);
1152 return Lstream_write (stream, str, len);
1156 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1158 Bufbyte str[MAX_EMCHAR_LEN];
1159 Bytecount len = set_charptr_emchar (str, ch);
1160 Lstream_unread (stream, str, len);
1164 /************************************************************************/
1165 /* charset object */
1166 /************************************************************************/
1169 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1171 struct Lisp_Charset *cs = XCHARSET (obj);
1173 markobj (cs->short_name);
1174 markobj (cs->long_name);
1175 markobj (cs->doc_string);
1176 markobj (cs->registry);
1177 markobj (cs->ccl_program);
1179 markobj (cs->decoding_table);
1185 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1187 struct Lisp_Charset *cs = XCHARSET (obj);
1191 error ("printing unreadable object #<charset %s 0x%x>",
1192 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1195 write_c_string ("#<charset ", printcharfun);
1196 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1197 write_c_string (" ", printcharfun);
1198 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1199 write_c_string (" ", printcharfun);
1200 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1201 write_c_string (" ", printcharfun);
1202 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1203 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1204 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1205 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1206 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1208 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1209 CHARSET_COLUMNS (cs),
1210 CHARSET_GRAPHIC (cs),
1211 CHARSET_FINAL (cs));
1212 write_c_string (buf, printcharfun);
1213 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1214 sprintf (buf, " 0x%x>", cs->header.uid);
1215 write_c_string (buf, printcharfun);
1218 static const struct lrecord_description charset_description[] = {
1219 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1221 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1226 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1227 mark_charset, print_charset, 0, 0, 0,
1228 charset_description,
1229 struct Lisp_Charset);
1231 /* Make a new charset. */
1234 make_charset (Charset_ID id, Lisp_Object name,
1235 unsigned char type, unsigned char columns, unsigned char graphic,
1236 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1237 Lisp_Object long_name, Lisp_Object doc,
1239 Lisp_Object decoding_table,
1240 Emchar ucs_min, Emchar ucs_max,
1241 Emchar code_offset, unsigned char byte_offset)
1244 struct Lisp_Charset *cs =
1245 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1246 XSETCHARSET (obj, cs);
1248 CHARSET_ID (cs) = id;
1249 CHARSET_NAME (cs) = name;
1250 CHARSET_SHORT_NAME (cs) = short_name;
1251 CHARSET_LONG_NAME (cs) = long_name;
1252 CHARSET_DIRECTION (cs) = direction;
1253 CHARSET_TYPE (cs) = type;
1254 CHARSET_COLUMNS (cs) = columns;
1255 CHARSET_GRAPHIC (cs) = graphic;
1256 CHARSET_FINAL (cs) = final;
1257 CHARSET_DOC_STRING (cs) = doc;
1258 CHARSET_REGISTRY (cs) = reg;
1259 CHARSET_CCL_PROGRAM (cs) = Qnil;
1260 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1262 CHARSET_DECODING_TABLE(cs) = Qnil;
1263 CHARSET_UCS_MIN(cs) = ucs_min;
1264 CHARSET_UCS_MAX(cs) = ucs_max;
1265 CHARSET_CODE_OFFSET(cs) = code_offset;
1266 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1269 switch (CHARSET_TYPE (cs))
1271 case CHARSET_TYPE_94:
1272 CHARSET_DIMENSION (cs) = 1;
1273 CHARSET_CHARS (cs) = 94;
1275 case CHARSET_TYPE_96:
1276 CHARSET_DIMENSION (cs) = 1;
1277 CHARSET_CHARS (cs) = 96;
1279 case CHARSET_TYPE_94X94:
1280 CHARSET_DIMENSION (cs) = 2;
1281 CHARSET_CHARS (cs) = 94;
1283 case CHARSET_TYPE_96X96:
1284 CHARSET_DIMENSION (cs) = 2;
1285 CHARSET_CHARS (cs) = 96;
1288 case CHARSET_TYPE_128:
1289 CHARSET_DIMENSION (cs) = 1;
1290 CHARSET_CHARS (cs) = 128;
1292 case CHARSET_TYPE_128X128:
1293 CHARSET_DIMENSION (cs) = 2;
1294 CHARSET_CHARS (cs) = 128;
1296 case CHARSET_TYPE_256:
1297 CHARSET_DIMENSION (cs) = 1;
1298 CHARSET_CHARS (cs) = 256;
1300 case CHARSET_TYPE_256X256:
1301 CHARSET_DIMENSION (cs) = 2;
1302 CHARSET_CHARS (cs) = 256;
1308 if (id == LEADING_BYTE_ASCII)
1309 CHARSET_REP_BYTES (cs) = 1;
1311 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1313 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1318 /* some charsets do not have final characters. This includes
1319 ASCII, Control-1, Composite, and the two faux private
1322 if (code_offset == 0)
1324 assert (NILP (charset_by_attributes[type][final]));
1325 charset_by_attributes[type][final] = obj;
1328 assert (NILP (charset_by_attributes[type][final][direction]));
1329 charset_by_attributes[type][final][direction] = obj;
1333 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1334 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1337 /* official leading byte */
1338 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1341 /* Some charsets are "faux" and don't have names or really exist at
1342 all except in the leading-byte table. */
1344 Fputhash (name, obj, Vcharset_hash_table);
1349 get_unallocated_leading_byte (int dimension)
1354 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1357 lb = next_allocated_leading_byte++;
1361 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1364 lb = next_allocated_1_byte_leading_byte++;
1368 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1371 lb = next_allocated_2_byte_leading_byte++;
1377 ("No more character sets free for this dimension",
1378 make_int (dimension));
1385 range_charset_code_point (Lisp_Object charset, Emchar ch)
1389 if ((XCHARSET_UCS_MIN (charset) <= ch)
1390 && (ch <= XCHARSET_UCS_MAX (charset)))
1392 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1394 if (XCHARSET_DIMENSION (charset) == 1)
1395 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1396 else if (XCHARSET_DIMENSION (charset) == 2)
1397 return list2 (make_int (d / XCHARSET_CHARS (charset)
1398 + XCHARSET_BYTE_OFFSET (charset)),
1399 make_int (d % XCHARSET_CHARS (charset)
1400 + XCHARSET_BYTE_OFFSET (charset)));
1401 else if (XCHARSET_DIMENSION (charset) == 3)
1402 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1403 * XCHARSET_CHARS (charset))
1404 + XCHARSET_BYTE_OFFSET (charset)),
1405 make_int (d / XCHARSET_CHARS (charset)
1406 % XCHARSET_CHARS (charset)
1407 + XCHARSET_BYTE_OFFSET (charset)),
1408 make_int (d % XCHARSET_CHARS (charset)
1409 + XCHARSET_BYTE_OFFSET (charset)));
1410 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1411 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1412 * XCHARSET_CHARS (charset)
1413 * XCHARSET_CHARS (charset))
1414 + XCHARSET_BYTE_OFFSET (charset)),
1415 make_int (d / (XCHARSET_CHARS (charset)
1416 * XCHARSET_CHARS (charset))
1417 % XCHARSET_CHARS (charset)
1418 + XCHARSET_BYTE_OFFSET (charset)),
1419 make_int (d / XCHARSET_CHARS (charset)
1420 % XCHARSET_CHARS (charset)
1421 + XCHARSET_BYTE_OFFSET (charset)),
1422 make_int (d % XCHARSET_CHARS (charset)
1423 + XCHARSET_BYTE_OFFSET (charset)));
1425 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1427 if (XCHARSET_DIMENSION (charset) == 1)
1429 if (XCHARSET_CHARS (charset) == 94)
1431 if (((d = ch - (MIN_CHAR_94
1432 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1434 return list1 (make_int (d + 33));
1436 else if (XCHARSET_CHARS (charset) == 96)
1438 if (((d = ch - (MIN_CHAR_96
1439 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1441 return list1 (make_int (d + 32));
1446 else if (XCHARSET_DIMENSION (charset) == 2)
1448 if (XCHARSET_CHARS (charset) == 94)
1450 if (((d = ch - (MIN_CHAR_94x94
1451 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1454 return list2 (make_int ((d / 94) + 33),
1455 make_int (d % 94 + 33));
1457 else if (XCHARSET_CHARS (charset) == 96)
1459 if (((d = ch - (MIN_CHAR_96x96
1460 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1463 return list2 (make_int ((d / 96) + 32),
1464 make_int (d % 96 + 32));
1472 split_builtin_char (Emchar c)
1474 if (c < MIN_CHAR_OBS_94x94)
1476 if (c <= MAX_CHAR_BASIC_LATIN)
1478 return list2 (Vcharset_ascii, make_int (c));
1482 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1486 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1488 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1490 return list2 (Vcharset_greek_iso8859_7,
1491 make_int (c - MIN_CHAR_GREEK + 0x20));
1493 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1495 return list2 (Vcharset_cyrillic_iso8859_5,
1496 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1498 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1500 return list2 (Vcharset_hebrew_iso8859_8,
1501 make_int (c - MIN_CHAR_HEBREW + 0x20));
1503 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1505 return list2 (Vcharset_thai_tis620,
1506 make_int (c - MIN_CHAR_THAI + 0x20));
1508 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1509 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1511 return list2 (Vcharset_katakana_jisx0201,
1512 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1516 return list3 (Vcharset_ucs_bmp,
1517 make_int (c >> 8), make_int (c & 0xff));
1520 else if (c <= MAX_CHAR_OBS_94x94)
1522 return list3 (CHARSET_BY_ATTRIBUTES
1523 (CHARSET_TYPE_94X94,
1524 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1525 CHARSET_LEFT_TO_RIGHT),
1526 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1527 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1529 else if (c <= MAX_CHAR_94)
1531 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1532 ((c - MIN_CHAR_94) / 94) + '0',
1533 CHARSET_LEFT_TO_RIGHT),
1534 make_int (((c - MIN_CHAR_94) % 94) + 33));
1536 else if (c <= MAX_CHAR_96)
1538 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1539 ((c - MIN_CHAR_96) / 96) + '0',
1540 CHARSET_LEFT_TO_RIGHT),
1541 make_int (((c - MIN_CHAR_96) % 96) + 32));
1543 else if (c <= MAX_CHAR_94x94)
1545 return list3 (CHARSET_BY_ATTRIBUTES
1546 (CHARSET_TYPE_94X94,
1547 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1548 CHARSET_LEFT_TO_RIGHT),
1549 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1550 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1552 else if (c <= MAX_CHAR_96x96)
1554 return list3 (CHARSET_BY_ATTRIBUTES
1555 (CHARSET_TYPE_96X96,
1556 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1557 CHARSET_LEFT_TO_RIGHT),
1558 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1559 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1568 charset_code_point (Lisp_Object charset, Emchar ch)
1570 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1572 if (!EQ (cdef, Qnil))
1574 Lisp_Object field = Fassq (charset, cdef);
1576 if (!EQ (field, Qnil))
1577 return Fcdr (field);
1579 return range_charset_code_point (charset, ch);
1582 Lisp_Object Vdefault_coded_charset_priority_list;
1586 /************************************************************************/
1587 /* Basic charset Lisp functions */
1588 /************************************************************************/
1590 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1591 Return non-nil if OBJECT is a charset.
1595 return CHARSETP (object) ? Qt : Qnil;
1598 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1599 Retrieve the charset of the given name.
1600 If CHARSET-OR-NAME is a charset object, it is simply returned.
1601 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1602 nil is returned. Otherwise the associated charset object is returned.
1606 if (CHARSETP (charset_or_name))
1607 return charset_or_name;
1609 CHECK_SYMBOL (charset_or_name);
1610 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1613 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1614 Retrieve the charset of the given name.
1615 Same as `find-charset' except an error is signalled if there is no such
1616 charset instead of returning nil.
1620 Lisp_Object charset = Ffind_charset (name);
1623 signal_simple_error ("No such charset", name);
1627 /* We store the charsets in hash tables with the names as the key and the
1628 actual charset object as the value. Occasionally we need to use them
1629 in a list format. These routines provide us with that. */
1630 struct charset_list_closure
1632 Lisp_Object *charset_list;
1636 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1637 void *charset_list_closure)
1639 /* This function can GC */
1640 struct charset_list_closure *chcl =
1641 (struct charset_list_closure*) charset_list_closure;
1642 Lisp_Object *charset_list = chcl->charset_list;
1644 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1648 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1649 Return a list of the names of all defined charsets.
1653 Lisp_Object charset_list = Qnil;
1654 struct gcpro gcpro1;
1655 struct charset_list_closure charset_list_closure;
1657 GCPRO1 (charset_list);
1658 charset_list_closure.charset_list = &charset_list;
1659 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1660 &charset_list_closure);
1663 return charset_list;
1666 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1667 Return the name of the given charset.
1671 return XCHARSET_NAME (Fget_charset (charset));
1674 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1675 Define a new character set.
1676 This function is for use with Mule support.
1677 NAME is a symbol, the name by which the character set is normally referred.
1678 DOC-STRING is a string describing the character set.
1679 PROPS is a property list, describing the specific nature of the
1680 character set. Recognized properties are:
1682 'short-name Short version of the charset name (ex: Latin-1)
1683 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1684 'registry A regular expression matching the font registry field for
1686 'dimension Number of octets used to index a character in this charset.
1687 Either 1 or 2. Defaults to 1.
1688 'columns Number of columns used to display a character in this charset.
1689 Only used in TTY mode. (Under X, the actual width of a
1690 character can be derived from the font used to display the
1691 characters.) If unspecified, defaults to the dimension
1692 (this is almost always the correct value).
1693 'chars Number of characters in each dimension (94 or 96).
1694 Defaults to 94. Note that if the dimension is 2, the
1695 character set thus described is 94x94 or 96x96.
1696 'final Final byte of ISO 2022 escape sequence. Must be
1697 supplied. Each combination of (DIMENSION, CHARS) defines a
1698 separate namespace for final bytes. Note that ISO
1699 2022 restricts the final byte to the range
1700 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1701 dimension == 2. Note also that final bytes in the range
1702 0x30 - 0x3F are reserved for user-defined (not official)
1704 'graphic 0 (use left half of font on output) or 1 (use right half
1705 of font on output). Defaults to 0. For example, for
1706 a font whose registry is ISO8859-1, the left half
1707 (octets 0x20 - 0x7F) is the `ascii' character set, while
1708 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1709 character set. With 'graphic set to 0, the octets
1710 will have their high bit cleared; with it set to 1,
1711 the octets will have their high bit set.
1712 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1714 'ccl-program A compiled CCL program used to convert a character in
1715 this charset into an index into the font. This is in
1716 addition to the 'graphic property. The CCL program
1717 is passed the octets of the character, with the high
1718 bit cleared and set depending upon whether the value
1719 of the 'graphic property is 0 or 1.
1721 (name, doc_string, props))
1723 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1724 int direction = CHARSET_LEFT_TO_RIGHT;
1726 Lisp_Object registry = Qnil;
1727 Lisp_Object charset;
1728 Lisp_Object rest, keyword, value;
1729 Lisp_Object ccl_program = Qnil;
1730 Lisp_Object short_name = Qnil, long_name = Qnil;
1731 int byte_offset = -1;
1733 CHECK_SYMBOL (name);
1734 if (!NILP (doc_string))
1735 CHECK_STRING (doc_string);
1737 charset = Ffind_charset (name);
1738 if (!NILP (charset))
1739 signal_simple_error ("Cannot redefine existing charset", name);
1741 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1743 if (EQ (keyword, Qshort_name))
1745 CHECK_STRING (value);
1749 if (EQ (keyword, Qlong_name))
1751 CHECK_STRING (value);
1755 else if (EQ (keyword, Qdimension))
1758 dimension = XINT (value);
1759 if (dimension < 1 || dimension > 2)
1760 signal_simple_error ("Invalid value for 'dimension", value);
1763 else if (EQ (keyword, Qchars))
1766 chars = XINT (value);
1767 if (chars != 94 && chars != 96)
1768 signal_simple_error ("Invalid value for 'chars", value);
1771 else if (EQ (keyword, Qcolumns))
1774 columns = XINT (value);
1775 if (columns != 1 && columns != 2)
1776 signal_simple_error ("Invalid value for 'columns", value);
1779 else if (EQ (keyword, Qgraphic))
1782 graphic = XINT (value);
1784 if (graphic < 0 || graphic > 2)
1786 if (graphic < 0 || graphic > 1)
1788 signal_simple_error ("Invalid value for 'graphic", value);
1791 else if (EQ (keyword, Qregistry))
1793 CHECK_STRING (value);
1797 else if (EQ (keyword, Qdirection))
1799 if (EQ (value, Ql2r))
1800 direction = CHARSET_LEFT_TO_RIGHT;
1801 else if (EQ (value, Qr2l))
1802 direction = CHARSET_RIGHT_TO_LEFT;
1804 signal_simple_error ("Invalid value for 'direction", value);
1807 else if (EQ (keyword, Qfinal))
1809 CHECK_CHAR_COERCE_INT (value);
1810 final = XCHAR (value);
1811 if (final < '0' || final > '~')
1812 signal_simple_error ("Invalid value for 'final", value);
1815 else if (EQ (keyword, Qccl_program))
1817 CHECK_VECTOR (value);
1818 ccl_program = value;
1822 signal_simple_error ("Unrecognized property", keyword);
1826 error ("'final must be specified");
1827 if (dimension == 2 && final > 0x5F)
1829 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1833 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1835 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1837 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1838 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1840 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1842 id = get_unallocated_leading_byte (dimension);
1844 if (NILP (doc_string))
1845 doc_string = build_string ("");
1847 if (NILP (registry))
1848 registry = build_string ("");
1850 if (NILP (short_name))
1851 XSETSTRING (short_name, XSYMBOL (name)->name);
1853 if (NILP (long_name))
1854 long_name = doc_string;
1857 columns = dimension;
1859 if (byte_offset < 0)
1863 else if (chars == 96)
1869 charset = make_charset (id, name, type, columns, graphic,
1870 final, direction, short_name, long_name,
1871 doc_string, registry,
1872 Qnil, 0, 0, 0, byte_offset);
1873 if (!NILP (ccl_program))
1874 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1878 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1880 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1881 NEW-NAME is the name of the new charset. Return the new charset.
1883 (charset, new_name))
1885 Lisp_Object new_charset = Qnil;
1886 int id, dimension, columns, graphic, final;
1887 int direction, type;
1888 Lisp_Object registry, doc_string, short_name, long_name;
1889 struct Lisp_Charset *cs;
1891 charset = Fget_charset (charset);
1892 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1893 signal_simple_error ("Charset already has reverse-direction charset",
1896 CHECK_SYMBOL (new_name);
1897 if (!NILP (Ffind_charset (new_name)))
1898 signal_simple_error ("Cannot redefine existing charset", new_name);
1900 cs = XCHARSET (charset);
1902 type = CHARSET_TYPE (cs);
1903 columns = CHARSET_COLUMNS (cs);
1904 dimension = CHARSET_DIMENSION (cs);
1905 id = get_unallocated_leading_byte (dimension);
1907 graphic = CHARSET_GRAPHIC (cs);
1908 final = CHARSET_FINAL (cs);
1909 direction = CHARSET_RIGHT_TO_LEFT;
1910 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1911 direction = CHARSET_LEFT_TO_RIGHT;
1912 doc_string = CHARSET_DOC_STRING (cs);
1913 short_name = CHARSET_SHORT_NAME (cs);
1914 long_name = CHARSET_LONG_NAME (cs);
1915 registry = CHARSET_REGISTRY (cs);
1917 new_charset = make_charset (id, new_name, type, columns,
1918 graphic, final, direction, short_name, long_name,
1919 doc_string, registry,
1921 CHARSET_DECODING_TABLE(cs),
1922 CHARSET_UCS_MIN(cs),
1923 CHARSET_UCS_MAX(cs),
1924 CHARSET_CODE_OFFSET(cs),
1925 CHARSET_BYTE_OFFSET(cs)
1931 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1932 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1937 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1938 Define symbol ALIAS as an alias for CHARSET.
1942 CHECK_SYMBOL (alias);
1943 charset = Fget_charset (charset);
1944 return Fputhash (alias, charset, Vcharset_hash_table);
1947 /* #### Reverse direction charsets not yet implemented. */
1949 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1951 Return the reverse-direction charset parallel to CHARSET, if any.
1952 This is the charset with the same properties (in particular, the same
1953 dimension, number of characters per dimension, and final byte) as
1954 CHARSET but whose characters are displayed in the opposite direction.
1958 charset = Fget_charset (charset);
1959 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1963 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1964 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1965 If DIRECTION is omitted, both directions will be checked (left-to-right
1966 will be returned if character sets exist for both directions).
1968 (dimension, chars, final, direction))
1970 int dm, ch, fi, di = -1;
1972 Lisp_Object obj = Qnil;
1974 CHECK_INT (dimension);
1975 dm = XINT (dimension);
1976 if (dm < 1 || dm > 2)
1977 signal_simple_error ("Invalid value for DIMENSION", dimension);
1981 if (ch != 94 && ch != 96)
1982 signal_simple_error ("Invalid value for CHARS", chars);
1984 CHECK_CHAR_COERCE_INT (final);
1986 if (fi < '0' || fi > '~')
1987 signal_simple_error ("Invalid value for FINAL", final);
1989 if (EQ (direction, Ql2r))
1990 di = CHARSET_LEFT_TO_RIGHT;
1991 else if (EQ (direction, Qr2l))
1992 di = CHARSET_RIGHT_TO_LEFT;
1993 else if (!NILP (direction))
1994 signal_simple_error ("Invalid value for DIRECTION", direction);
1996 if (dm == 2 && fi > 0x5F)
1998 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2001 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2003 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2007 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2009 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2012 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2015 return XCHARSET_NAME (obj);
2019 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2020 Return short name of CHARSET.
2024 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2027 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2028 Return long name of CHARSET.
2032 return XCHARSET_LONG_NAME (Fget_charset (charset));
2035 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2036 Return description of CHARSET.
2040 return XCHARSET_DOC_STRING (Fget_charset (charset));
2043 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2044 Return dimension of CHARSET.
2048 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2051 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2052 Return property PROP of CHARSET.
2053 Recognized properties are those listed in `make-charset', as well as
2054 'name and 'doc-string.
2058 struct Lisp_Charset *cs;
2060 charset = Fget_charset (charset);
2061 cs = XCHARSET (charset);
2063 CHECK_SYMBOL (prop);
2064 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2065 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2066 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2067 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2068 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2069 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2070 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2071 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2072 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2073 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2074 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2075 if (EQ (prop, Qdirection))
2076 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2077 if (EQ (prop, Qreverse_direction_charset))
2079 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2083 return XCHARSET_NAME (obj);
2085 signal_simple_error ("Unrecognized charset property name", prop);
2086 return Qnil; /* not reached */
2089 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2090 Return charset identification number of CHARSET.
2094 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2097 /* #### We need to figure out which properties we really want to
2100 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2101 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2103 (charset, ccl_program))
2105 charset = Fget_charset (charset);
2106 CHECK_VECTOR (ccl_program);
2107 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2112 invalidate_charset_font_caches (Lisp_Object charset)
2114 /* Invalidate font cache entries for charset on all devices. */
2115 Lisp_Object devcons, concons, hash_table;
2116 DEVICE_LOOP_NO_BREAK (devcons, concons)
2118 struct device *d = XDEVICE (XCAR (devcons));
2119 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2120 if (!UNBOUNDP (hash_table))
2121 Fclrhash (hash_table);
2125 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2126 Set the 'registry property of CHARSET to REGISTRY.
2128 (charset, registry))
2130 charset = Fget_charset (charset);
2131 CHECK_STRING (registry);
2132 XCHARSET_REGISTRY (charset) = registry;
2133 invalidate_charset_font_caches (charset);
2134 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2139 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2140 Return mapping-table of CHARSET.
2144 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2147 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2148 Set mapping-table of CHARSET to TABLE.
2152 struct Lisp_Charset *cs;
2153 Lisp_Object old_table;
2156 charset = Fget_charset (charset);
2157 cs = XCHARSET (charset);
2159 if (EQ (table, Qnil))
2161 CHARSET_DECODING_TABLE(cs) = table;
2164 else if (VECTORP (table))
2168 /* ad-hoc method for `ascii' */
2169 if ((CHARSET_CHARS (cs) == 94) &&
2170 (CHARSET_BYTE_OFFSET (cs) != 33))
2171 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2173 ccs_len = CHARSET_CHARS (cs);
2175 if (XVECTOR_LENGTH (table) > ccs_len)
2176 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2177 old_table = CHARSET_DECODING_TABLE(cs);
2178 CHARSET_DECODING_TABLE(cs) = table;
2181 signal_error (Qwrong_type_argument,
2182 list2 (build_translated_string ("vector-or-nil-p"),
2184 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2186 switch (CHARSET_DIMENSION (cs))
2189 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2191 Lisp_Object c = XVECTOR_DATA(table)[i];
2196 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2200 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2202 Lisp_Object v = XVECTOR_DATA(table)[i];
2208 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2210 CHARSET_DECODING_TABLE(cs) = old_table;
2211 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2213 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2215 Lisp_Object c = XVECTOR_DATA(v)[j];
2218 put_char_attribute (c, charset,
2221 (i + CHARSET_BYTE_OFFSET (cs)),
2223 (j + CHARSET_BYTE_OFFSET (cs))));
2227 put_char_attribute (v, charset,
2229 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2238 /************************************************************************/
2239 /* Lisp primitives for working with characters */
2240 /************************************************************************/
2242 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2243 Make a character from CHARSET and octets ARG1 and ARG2.
2244 ARG2 is required only for characters from two-dimensional charsets.
2245 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2246 character s with caron.
2248 (charset, arg1, arg2))
2250 struct Lisp_Charset *cs;
2252 int lowlim, highlim;
2254 charset = Fget_charset (charset);
2255 cs = XCHARSET (charset);
2257 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2258 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2260 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2262 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2263 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2266 /* It is useful (and safe, according to Olivier Galibert) to strip
2267 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2268 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2269 Latin 2 code of the character. */
2277 if (a1 < lowlim || a1 > highlim)
2278 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2280 if (CHARSET_DIMENSION (cs) == 1)
2284 ("Charset is of dimension one; second octet must be nil", arg2);
2285 return make_char (MAKE_CHAR (charset, a1, 0));
2294 a2 = XINT (arg2) & 0x7f;
2296 if (a2 < lowlim || a2 > highlim)
2297 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2299 return make_char (MAKE_CHAR (charset, a1, a2));
2302 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2303 Return the character set of char CH.
2307 CHECK_CHAR_COERCE_INT (ch);
2309 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2312 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2313 Return list of charset and one or two position-codes of CHAR.
2317 /* This function can GC */
2318 struct gcpro gcpro1, gcpro2;
2319 Lisp_Object charset = Qnil;
2320 Lisp_Object rc = Qnil;
2323 GCPRO2 (charset, rc);
2324 CHECK_CHAR_COERCE_INT (character);
2326 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2328 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2330 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2334 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2342 #ifdef ENABLE_COMPOSITE_CHARS
2343 /************************************************************************/
2344 /* composite character functions */
2345 /************************************************************************/
2348 lookup_composite_char (Bufbyte *str, int len)
2350 Lisp_Object lispstr = make_string (str, len);
2351 Lisp_Object ch = Fgethash (lispstr,
2352 Vcomposite_char_string2char_hash_table,
2358 if (composite_char_row_next >= 128)
2359 signal_simple_error ("No more composite chars available", lispstr);
2360 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2361 composite_char_col_next);
2362 Fputhash (make_char (emch), lispstr,
2363 Vcomposite_char_char2string_hash_table);
2364 Fputhash (lispstr, make_char (emch),
2365 Vcomposite_char_string2char_hash_table);
2366 composite_char_col_next++;
2367 if (composite_char_col_next >= 128)
2369 composite_char_col_next = 32;
2370 composite_char_row_next++;
2379 composite_char_string (Emchar ch)
2381 Lisp_Object str = Fgethash (make_char (ch),
2382 Vcomposite_char_char2string_hash_table,
2384 assert (!UNBOUNDP (str));
2388 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2389 Convert a string into a single composite character.
2390 The character is the result of overstriking all the characters in
2395 CHECK_STRING (string);
2396 return make_char (lookup_composite_char (XSTRING_DATA (string),
2397 XSTRING_LENGTH (string)));
2400 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2401 Return a string of the characters comprising a composite character.
2409 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2410 signal_simple_error ("Must be composite char", ch);
2411 return composite_char_string (emch);
2413 #endif /* ENABLE_COMPOSITE_CHARS */
2416 /************************************************************************/
2417 /* initialization */
2418 /************************************************************************/
2421 syms_of_mule_charset (void)
2423 DEFSUBR (Fcharsetp);
2424 DEFSUBR (Ffind_charset);
2425 DEFSUBR (Fget_charset);
2426 DEFSUBR (Fcharset_list);
2427 DEFSUBR (Fcharset_name);
2428 DEFSUBR (Fmake_charset);
2429 DEFSUBR (Fmake_reverse_direction_charset);
2430 /* DEFSUBR (Freverse_direction_charset); */
2431 DEFSUBR (Fdefine_charset_alias);
2432 DEFSUBR (Fcharset_from_attributes);
2433 DEFSUBR (Fcharset_short_name);
2434 DEFSUBR (Fcharset_long_name);
2435 DEFSUBR (Fcharset_description);
2436 DEFSUBR (Fcharset_dimension);
2437 DEFSUBR (Fcharset_property);
2438 DEFSUBR (Fcharset_id);
2439 DEFSUBR (Fset_charset_ccl_program);
2440 DEFSUBR (Fset_charset_registry);
2442 DEFSUBR (Fchar_attribute_alist);
2443 DEFSUBR (Fget_char_attribute);
2444 DEFSUBR (Fput_char_attribute);
2445 DEFSUBR (Fdefine_char);
2446 DEFSUBR (Fchar_variants);
2447 DEFSUBR (Fget_composite_char);
2448 DEFSUBR (Fcharset_mapping_table);
2449 DEFSUBR (Fset_charset_mapping_table);
2452 DEFSUBR (Fmake_char);
2453 DEFSUBR (Fchar_charset);
2454 DEFSUBR (Fsplit_char);
2456 #ifdef ENABLE_COMPOSITE_CHARS
2457 DEFSUBR (Fmake_composite_char);
2458 DEFSUBR (Fcomposite_char_string);
2461 defsymbol (&Qcharsetp, "charsetp");
2462 defsymbol (&Qregistry, "registry");
2463 defsymbol (&Qfinal, "final");
2464 defsymbol (&Qgraphic, "graphic");
2465 defsymbol (&Qdirection, "direction");
2466 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2467 defsymbol (&Qshort_name, "short-name");
2468 defsymbol (&Qlong_name, "long-name");
2470 defsymbol (&Ql2r, "l2r");
2471 defsymbol (&Qr2l, "r2l");
2473 /* Charsets, compatible with FSF 20.3
2474 Naming convention is Script-Charset[-Edition] */
2475 defsymbol (&Qascii, "ascii");
2476 defsymbol (&Qcontrol_1, "control-1");
2477 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2478 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2479 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2480 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2481 defsymbol (&Qthai_tis620, "thai-tis620");
2482 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2483 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2484 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2485 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2486 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2487 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2488 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2489 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2490 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2491 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2492 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2493 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2494 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2495 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2497 defsymbol (&Q_ucs, "->ucs");
2498 defsymbol (&Q_decomposition, "->decomposition");
2499 defsymbol (&Qwide, "wide");
2500 defsymbol (&Qnarrow, "narrow");
2501 defsymbol (&Qcompat, "compat");
2502 defsymbol (&QnoBreak, "noBreak");
2503 defsymbol (&Qsuper, "super");
2504 defsymbol (&Qfraction, "fraction");
2505 defsymbol (&Qucs, "ucs");
2506 defsymbol (&Qucs_bmp, "ucs-bmp");
2507 defsymbol (&Qlatin_viscii, "latin-viscii");
2508 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2509 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2510 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2511 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2512 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2513 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2514 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2516 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2517 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2519 defsymbol (&Qcomposite, "composite");
2523 vars_of_mule_charset (void)
2530 /* Table of charsets indexed by leading byte. */
2531 for (i = 0; i < countof (charset_by_leading_byte); i++)
2532 charset_by_leading_byte[i] = Qnil;
2535 /* Table of charsets indexed by type/final-byte. */
2536 for (i = 0; i < countof (charset_by_attributes); i++)
2537 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2538 charset_by_attributes[i][j] = Qnil;
2540 /* Table of charsets indexed by type/final-byte/direction. */
2541 for (i = 0; i < countof (charset_by_attributes); i++)
2542 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2543 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2544 charset_by_attributes[i][j][k] = Qnil;
2548 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2550 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2551 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2555 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2556 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2557 Leading-code of private TYPE9N charset of column-width 1.
2559 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2563 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2564 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2565 Version number of UTF-2000.
2568 staticpro (&Vcharacter_attribute_table);
2569 Vcharacter_attribute_table = make_char_code_table (Qnil);
2571 staticpro (&Vcharacter_composition_table);
2572 Vcharacter_composition_table = make_char_code_table (Qnil);
2574 staticpro (&Vcharacter_variant_table);
2575 Vcharacter_variant_table = make_char_code_table (Qnil);
2577 Vdefault_coded_charset_priority_list = Qnil;
2578 DEFVAR_LISP ("default-coded-charset-priority-list",
2579 &Vdefault_coded_charset_priority_list /*
2580 Default order of preferred coded-character-sets.
2586 complex_vars_of_mule_charset (void)
2588 staticpro (&Vcharset_hash_table);
2589 Vcharset_hash_table =
2590 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2592 /* Predefined character sets. We store them into variables for
2597 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2598 CHARSET_TYPE_256X256, 1, 2, 0,
2599 CHARSET_LEFT_TO_RIGHT,
2600 build_string ("BMP"),
2601 build_string ("BMP"),
2602 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2603 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2604 Qnil, 0, 0xFFFF, 0, 0);
2606 # define MIN_CHAR_THAI 0
2607 # define MAX_CHAR_THAI 0
2608 # define MIN_CHAR_GREEK 0
2609 # define MAX_CHAR_GREEK 0
2610 # define MIN_CHAR_HEBREW 0
2611 # define MAX_CHAR_HEBREW 0
2612 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2613 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2614 # define MIN_CHAR_CYRILLIC 0
2615 # define MAX_CHAR_CYRILLIC 0
2618 make_charset (LEADING_BYTE_ASCII, Qascii,
2619 CHARSET_TYPE_94, 1, 0, 'B',
2620 CHARSET_LEFT_TO_RIGHT,
2621 build_string ("ASCII"),
2622 build_string ("ASCII)"),
2623 build_string ("ASCII (ISO646 IRV)"),
2624 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2625 Qnil, 0, 0x7F, 0, 0);
2626 Vcharset_control_1 =
2627 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2628 CHARSET_TYPE_94, 1, 1, 0,
2629 CHARSET_LEFT_TO_RIGHT,
2630 build_string ("C1"),
2631 build_string ("Control characters"),
2632 build_string ("Control characters 128-191"),
2634 Qnil, 0x80, 0x9F, 0, 0);
2635 Vcharset_latin_iso8859_1 =
2636 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2637 CHARSET_TYPE_96, 1, 1, 'A',
2638 CHARSET_LEFT_TO_RIGHT,
2639 build_string ("Latin-1"),
2640 build_string ("ISO8859-1 (Latin-1)"),
2641 build_string ("ISO8859-1 (Latin-1)"),
2642 build_string ("iso8859-1"),
2643 Qnil, 0xA0, 0xFF, 0, 32);
2644 Vcharset_latin_iso8859_2 =
2645 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2646 CHARSET_TYPE_96, 1, 1, 'B',
2647 CHARSET_LEFT_TO_RIGHT,
2648 build_string ("Latin-2"),
2649 build_string ("ISO8859-2 (Latin-2)"),
2650 build_string ("ISO8859-2 (Latin-2)"),
2651 build_string ("iso8859-2"),
2653 Vcharset_latin_iso8859_3 =
2654 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2655 CHARSET_TYPE_96, 1, 1, 'C',
2656 CHARSET_LEFT_TO_RIGHT,
2657 build_string ("Latin-3"),
2658 build_string ("ISO8859-3 (Latin-3)"),
2659 build_string ("ISO8859-3 (Latin-3)"),
2660 build_string ("iso8859-3"),
2662 Vcharset_latin_iso8859_4 =
2663 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2664 CHARSET_TYPE_96, 1, 1, 'D',
2665 CHARSET_LEFT_TO_RIGHT,
2666 build_string ("Latin-4"),
2667 build_string ("ISO8859-4 (Latin-4)"),
2668 build_string ("ISO8859-4 (Latin-4)"),
2669 build_string ("iso8859-4"),
2671 Vcharset_thai_tis620 =
2672 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2673 CHARSET_TYPE_96, 1, 1, 'T',
2674 CHARSET_LEFT_TO_RIGHT,
2675 build_string ("TIS620"),
2676 build_string ("TIS620 (Thai)"),
2677 build_string ("TIS620.2529 (Thai)"),
2678 build_string ("tis620"),
2679 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2680 Vcharset_greek_iso8859_7 =
2681 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2682 CHARSET_TYPE_96, 1, 1, 'F',
2683 CHARSET_LEFT_TO_RIGHT,
2684 build_string ("ISO8859-7"),
2685 build_string ("ISO8859-7 (Greek)"),
2686 build_string ("ISO8859-7 (Greek)"),
2687 build_string ("iso8859-7"),
2688 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2689 Vcharset_arabic_iso8859_6 =
2690 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2691 CHARSET_TYPE_96, 1, 1, 'G',
2692 CHARSET_RIGHT_TO_LEFT,
2693 build_string ("ISO8859-6"),
2694 build_string ("ISO8859-6 (Arabic)"),
2695 build_string ("ISO8859-6 (Arabic)"),
2696 build_string ("iso8859-6"),
2698 Vcharset_hebrew_iso8859_8 =
2699 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2700 CHARSET_TYPE_96, 1, 1, 'H',
2701 CHARSET_RIGHT_TO_LEFT,
2702 build_string ("ISO8859-8"),
2703 build_string ("ISO8859-8 (Hebrew)"),
2704 build_string ("ISO8859-8 (Hebrew)"),
2705 build_string ("iso8859-8"),
2706 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2707 Vcharset_katakana_jisx0201 =
2708 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2709 CHARSET_TYPE_94, 1, 1, 'I',
2710 CHARSET_LEFT_TO_RIGHT,
2711 build_string ("JISX0201 Kana"),
2712 build_string ("JISX0201.1976 (Japanese Kana)"),
2713 build_string ("JISX0201.1976 Japanese Kana"),
2714 build_string ("jisx0201\\.1976"),
2716 MIN_CHAR_HALFWIDTH_KATAKANA,
2717 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2718 Vcharset_latin_jisx0201 =
2719 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2720 CHARSET_TYPE_94, 1, 0, 'J',
2721 CHARSET_LEFT_TO_RIGHT,
2722 build_string ("JISX0201 Roman"),
2723 build_string ("JISX0201.1976 (Japanese Roman)"),
2724 build_string ("JISX0201.1976 Japanese Roman"),
2725 build_string ("jisx0201\\.1976"),
2727 Vcharset_cyrillic_iso8859_5 =
2728 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2729 CHARSET_TYPE_96, 1, 1, 'L',
2730 CHARSET_LEFT_TO_RIGHT,
2731 build_string ("ISO8859-5"),
2732 build_string ("ISO8859-5 (Cyrillic)"),
2733 build_string ("ISO8859-5 (Cyrillic)"),
2734 build_string ("iso8859-5"),
2735 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2736 Vcharset_latin_iso8859_9 =
2737 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2738 CHARSET_TYPE_96, 1, 1, 'M',
2739 CHARSET_LEFT_TO_RIGHT,
2740 build_string ("Latin-5"),
2741 build_string ("ISO8859-9 (Latin-5)"),
2742 build_string ("ISO8859-9 (Latin-5)"),
2743 build_string ("iso8859-9"),
2745 Vcharset_japanese_jisx0208_1978 =
2746 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2747 CHARSET_TYPE_94X94, 2, 0, '@',
2748 CHARSET_LEFT_TO_RIGHT,
2749 build_string ("JIS X0208:1978"),
2750 build_string ("JIS X0208:1978 (Japanese)"),
2752 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2753 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2755 Vcharset_chinese_gb2312 =
2756 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2757 CHARSET_TYPE_94X94, 2, 0, 'A',
2758 CHARSET_LEFT_TO_RIGHT,
2759 build_string ("GB2312"),
2760 build_string ("GB2312)"),
2761 build_string ("GB2312 Chinese simplified"),
2762 build_string ("gb2312"),
2764 Vcharset_japanese_jisx0208 =
2765 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2766 CHARSET_TYPE_94X94, 2, 0, 'B',
2767 CHARSET_LEFT_TO_RIGHT,
2768 build_string ("JISX0208"),
2769 build_string ("JIS X0208:1983 (Japanese)"),
2770 build_string ("JIS X0208:1983 Japanese Kanji"),
2771 build_string ("jisx0208\\.1983"),
2773 Vcharset_korean_ksc5601 =
2774 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2775 CHARSET_TYPE_94X94, 2, 0, 'C',
2776 CHARSET_LEFT_TO_RIGHT,
2777 build_string ("KSC5601"),
2778 build_string ("KSC5601 (Korean"),
2779 build_string ("KSC5601 Korean Hangul and Hanja"),
2780 build_string ("ksc5601"),
2782 Vcharset_japanese_jisx0212 =
2783 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2784 CHARSET_TYPE_94X94, 2, 0, 'D',
2785 CHARSET_LEFT_TO_RIGHT,
2786 build_string ("JISX0212"),
2787 build_string ("JISX0212 (Japanese)"),
2788 build_string ("JISX0212 Japanese Supplement"),
2789 build_string ("jisx0212"),
2792 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2793 Vcharset_chinese_cns11643_1 =
2794 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2795 CHARSET_TYPE_94X94, 2, 0, 'G',
2796 CHARSET_LEFT_TO_RIGHT,
2797 build_string ("CNS11643-1"),
2798 build_string ("CNS11643-1 (Chinese traditional)"),
2800 ("CNS 11643 Plane 1 Chinese traditional"),
2801 build_string (CHINESE_CNS_PLANE_RE("1")),
2803 Vcharset_chinese_cns11643_2 =
2804 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2805 CHARSET_TYPE_94X94, 2, 0, 'H',
2806 CHARSET_LEFT_TO_RIGHT,
2807 build_string ("CNS11643-2"),
2808 build_string ("CNS11643-2 (Chinese traditional)"),
2810 ("CNS 11643 Plane 2 Chinese traditional"),
2811 build_string (CHINESE_CNS_PLANE_RE("2")),
2814 Vcharset_latin_viscii_lower =
2815 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2816 CHARSET_TYPE_96, 1, 1, '1',
2817 CHARSET_LEFT_TO_RIGHT,
2818 build_string ("VISCII lower"),
2819 build_string ("VISCII lower (Vietnamese)"),
2820 build_string ("VISCII lower (Vietnamese)"),
2821 build_string ("MULEVISCII-LOWER"),
2823 Vcharset_latin_viscii_upper =
2824 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2825 CHARSET_TYPE_96, 1, 1, '2',
2826 CHARSET_LEFT_TO_RIGHT,
2827 build_string ("VISCII upper"),
2828 build_string ("VISCII upper (Vietnamese)"),
2829 build_string ("VISCII upper (Vietnamese)"),
2830 build_string ("MULEVISCII-UPPER"),
2832 Vcharset_latin_viscii =
2833 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2834 CHARSET_TYPE_256, 1, 2, 0,
2835 CHARSET_LEFT_TO_RIGHT,
2836 build_string ("VISCII"),
2837 build_string ("VISCII 1.1 (Vietnamese)"),
2838 build_string ("VISCII 1.1 (Vietnamese)"),
2839 build_string ("VISCII1\\.1"),
2841 Vcharset_ethiopic_ucs =
2842 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2843 CHARSET_TYPE_256X256, 2, 2, 0,
2844 CHARSET_LEFT_TO_RIGHT,
2845 build_string ("Ethiopic (UCS)"),
2846 build_string ("Ethiopic (UCS)"),
2847 build_string ("Ethiopic of UCS"),
2848 build_string ("Ethiopic-Unicode"),
2849 Qnil, 0x1200, 0x137F, 0x1200, 0);
2850 Vcharset_hiragana_jisx0208 =
2851 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2852 CHARSET_TYPE_94X94, 2, 0, 'B',
2853 CHARSET_LEFT_TO_RIGHT,
2854 build_string ("Hiragana"),
2855 build_string ("Hiragana of JIS X0208"),
2856 build_string ("Japanese Hiragana of JIS X0208"),
2857 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2858 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2859 (0x24 - 33) * 94 + (0x21 - 33), 33);
2860 Vcharset_katakana_jisx0208 =
2861 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2862 CHARSET_TYPE_94X94, 2, 0, 'B',
2863 CHARSET_LEFT_TO_RIGHT,
2864 build_string ("Katakana"),
2865 build_string ("Katakana of JIS X0208"),
2866 build_string ("Japanese Katakana of JIS X0208"),
2867 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2868 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2869 (0x25 - 33) * 94 + (0x21 - 33), 33);
2871 Vcharset_chinese_big5_1 =
2872 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2873 CHARSET_TYPE_94X94, 2, 0, '0',
2874 CHARSET_LEFT_TO_RIGHT,
2875 build_string ("Big5"),
2876 build_string ("Big5 (Level-1)"),
2878 ("Big5 Level-1 Chinese traditional"),
2879 build_string ("big5"),
2881 Vcharset_chinese_big5_2 =
2882 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2883 CHARSET_TYPE_94X94, 2, 0, '1',
2884 CHARSET_LEFT_TO_RIGHT,
2885 build_string ("Big5"),
2886 build_string ("Big5 (Level-2)"),
2888 ("Big5 Level-2 Chinese traditional"),
2889 build_string ("big5"),
2892 #ifdef ENABLE_COMPOSITE_CHARS
2893 /* #### For simplicity, we put composite chars into a 96x96 charset.
2894 This is going to lead to problems because you can run out of
2895 room, esp. as we don't yet recycle numbers. */
2896 Vcharset_composite =
2897 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2898 CHARSET_TYPE_96X96, 2, 0, 0,
2899 CHARSET_LEFT_TO_RIGHT,
2900 build_string ("Composite"),
2901 build_string ("Composite characters"),
2902 build_string ("Composite characters"),
2905 composite_char_row_next = 32;
2906 composite_char_col_next = 32;
2908 Vcomposite_char_string2char_hash_table =
2909 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2910 Vcomposite_char_char2string_hash_table =
2911 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2912 staticpro (&Vcomposite_char_string2char_hash_table);
2913 staticpro (&Vcomposite_char_char2string_hash_table);
2914 #endif /* ENABLE_COMPOSITE_CHARS */