1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
391 Lisp_Object QnoBreak;
394 Lisp_Object Qfraction;
397 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
403 else if (EQ (v, Qwide))
405 else if (EQ (v, Qnarrow))
407 else if (EQ (v, Qcompat))
409 else if (EQ (v, QnoBreak))
411 else if (EQ (v, Qsuper))
413 else if (EQ (v, Qsub))
415 else if (EQ (v, Qfraction))
418 signal_simple_error (err_msg, err_arg);
421 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
422 Return character corresponding with list.
426 Lisp_Object table = Vcharacter_composition_table;
427 Lisp_Object rest = list;
431 Lisp_Object v = Fcar (rest);
433 Emchar c = to_char_code (v, "Invalid value for composition", list);
435 ret = get_char_code_table (c, table);
440 if (!CHAR_CODE_TABLE_P (ret))
445 else if (!CONSP (rest))
447 else if (CHAR_CODE_TABLE_P (ret))
450 signal_simple_error ("Invalid table is found with", list);
452 signal_simple_error ("Invalid value for composition", list);
455 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
456 Return variants of CHARACTER.
460 CHECK_CHAR (character);
461 return Fcopy_list (get_char_code_table (XCHAR (character),
462 Vcharacter_variant_table));
465 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
466 Return the alist of attributes of CHARACTER.
470 CHECK_CHAR (character);
471 return Fcopy_alist (get_char_code_table (XCHAR (character),
472 Vcharacter_attribute_table));
475 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
476 Return the value of CHARACTER's ATTRIBUTE.
478 (character, attribute))
481 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
487 if (!NILP (ccs = Ffind_charset (attribute)))
490 return Fcdr (Fassq (attribute, ret));
494 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
497 Emchar char_code = XCHAR (character);
499 = get_char_code_table (char_code, Vcharacter_attribute_table);
502 cell = Fassq (attribute, ret);
506 ret = Fcons (Fcons (attribute, value), ret);
508 else if (!EQ (Fcdr (cell), value))
510 Fsetcdr (cell, value);
512 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
516 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
517 Store CHARACTER's ATTRIBUTE with VALUE.
519 (character, attribute, value))
523 ccs = Ffind_charset (attribute);
527 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
532 /* ad-hoc method for `ascii' */
533 if ((XCHARSET_CHARS (ccs) == 94) &&
534 (XCHARSET_BYTE_OFFSET (ccs) != 33))
535 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
537 ccs_len = XCHARSET_CHARS (ccs);
540 signal_simple_error ("Invalid value for coded-charset",
544 rest = Fget_char_attribute (character, attribute);
551 Lisp_Object ei = Fcar (rest);
553 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
554 nv = XVECTOR_DATA(v)[i];
561 XVECTOR_DATA(v)[i] = Qnil;
562 v = XCHARSET_DECODING_TABLE (ccs);
567 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
570 if (XCHARSET_GRAPHIC (ccs) == 1)
571 value = Fcopy_list (value);
576 Lisp_Object ei = Fcar (rest);
579 signal_simple_error ("Invalid value for coded-charset", value);
581 if ((i < 0) || (255 < i))
582 signal_simple_error ("Invalid value for coded-charset", value);
583 if (XCHARSET_GRAPHIC (ccs) == 1)
586 Fsetcar (rest, make_int (i));
588 i -= XCHARSET_BYTE_OFFSET (ccs);
589 nv = XVECTOR_DATA(v)[i];
595 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
602 XVECTOR_DATA(v)[i] = character;
604 else if (EQ (attribute, Q_decomposition))
606 Lisp_Object rest = value;
607 Lisp_Object table = Vcharacter_composition_table;
610 signal_simple_error ("Invalid value for ->decomposition",
615 Lisp_Object v = Fcar (rest);
618 = to_char_code (v, "Invalid value for ->decomposition", value);
623 put_char_code_table (c, character, table);
628 ntable = get_char_code_table (c, table);
629 if (!CHAR_CODE_TABLE_P (ntable))
631 ntable = make_char_code_table (Qnil);
632 put_char_code_table (c, ntable, table);
638 else if (EQ (attribute, Q_ucs))
644 signal_simple_error ("Invalid value for ->ucs", value);
648 ret = get_char_code_table (c, Vcharacter_variant_table);
649 if (NILP (Fmemq (character, ret)))
651 put_char_code_table (c, Fcons (character, ret),
652 Vcharacter_variant_table);
655 return put_char_attribute (character, attribute, value);
660 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
661 Store character's ATTRIBUTES.
665 Lisp_Object rest = attributes;
666 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
667 Lisp_Object character;
673 Lisp_Object cell = Fcar (rest);
677 signal_simple_error ("Invalid argument", attributes);
678 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
679 && XCHARSET_FINAL (ccs))
683 if (XCHARSET_DIMENSION (ccs) == 1)
685 Lisp_Object eb1 = Fcar (Fcdr (cell));
689 signal_simple_error ("Invalid argument", attributes);
691 switch (XCHARSET_CHARS (ccs))
695 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
699 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
705 else if (XCHARSET_DIMENSION (ccs) == 2)
707 Lisp_Object eb1 = Fcar (Fcdr (cell));
708 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
712 signal_simple_error ("Invalid argument", attributes);
715 signal_simple_error ("Invalid argument", attributes);
717 switch (XCHARSET_CHARS (ccs))
720 code = MIN_CHAR_94x94
721 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
722 + (b1 - 33) * 94 + (b2 - 33);
725 code = MIN_CHAR_96x96
726 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
727 + (b1 - 32) * 96 + (b2 - 32);
738 character = make_char (code);
739 goto setup_attributes;
745 else if (!INTP (code))
746 signal_simple_error ("Invalid argument", attributes);
748 character = make_char (XINT (code));
754 Lisp_Object cell = Fcar (rest);
757 signal_simple_error ("Invalid argument", attributes);
758 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
762 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
765 Lisp_Object Vutf_2000_version;
769 int leading_code_private_11;
772 Lisp_Object Qcharsetp;
774 /* Qdoc_string, Qdimension, Qchars defined in general.c */
775 Lisp_Object Qregistry, Qfinal, Qgraphic;
776 Lisp_Object Qdirection;
777 Lisp_Object Qreverse_direction_charset;
778 Lisp_Object Qleading_byte;
779 Lisp_Object Qshort_name, Qlong_name;
795 Qjapanese_jisx0208_1978,
807 Qvietnamese_viscii_lower,
808 Qvietnamese_viscii_upper,
817 Lisp_Object Ql2r, Qr2l;
819 Lisp_Object Vcharset_hash_table;
822 static Charset_ID next_allocated_leading_byte;
824 static Charset_ID next_allocated_1_byte_leading_byte;
825 static Charset_ID next_allocated_2_byte_leading_byte;
828 /* Composite characters are characters constructed by overstriking two
829 or more regular characters.
831 1) The old Mule implementation involves storing composite characters
832 in a buffer as a tag followed by all of the actual characters
833 used to make up the composite character. I think this is a bad
834 idea; it greatly complicates code that wants to handle strings
835 one character at a time because it has to deal with the possibility
836 of great big ungainly characters. It's much more reasonable to
837 simply store an index into a table of composite characters.
839 2) The current implementation only allows for 16,384 separate
840 composite characters over the lifetime of the XEmacs process.
841 This could become a potential problem if the user
842 edited lots of different files that use composite characters.
843 Due to FSF bogosity, increasing the number of allowable
844 composite characters under Mule would decrease the number
845 of possible faces that can exist. Mule already has shrunk
846 this to 2048, and further shrinkage would become uncomfortable.
847 No such problems exist in XEmacs.
849 Composite characters could be represented as 0x80 C1 C2 C3,
850 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
851 for slightly under 2^20 (one million) composite characters
852 over the XEmacs process lifetime, and you only need to
853 increase the size of a Mule character from 19 to 21 bits.
854 Or you could use 0x80 C1 C2 C3 C4, allowing for about
855 85 million (slightly over 2^26) composite characters. */
858 /************************************************************************/
859 /* Basic Emchar functions */
860 /************************************************************************/
862 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
863 string in STR. Returns the number of bytes stored.
864 Do not call this directly. Use the macro set_charptr_emchar() instead.
868 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
883 else if ( c <= 0x7ff )
885 *p++ = (c >> 6) | 0xc0;
886 *p++ = (c & 0x3f) | 0x80;
888 else if ( c <= 0xffff )
890 *p++ = (c >> 12) | 0xe0;
891 *p++ = ((c >> 6) & 0x3f) | 0x80;
892 *p++ = (c & 0x3f) | 0x80;
894 else if ( c <= 0x1fffff )
896 *p++ = (c >> 18) | 0xf0;
897 *p++ = ((c >> 12) & 0x3f) | 0x80;
898 *p++ = ((c >> 6) & 0x3f) | 0x80;
899 *p++ = (c & 0x3f) | 0x80;
901 else if ( c <= 0x3ffffff )
903 *p++ = (c >> 24) | 0xf8;
904 *p++ = ((c >> 18) & 0x3f) | 0x80;
905 *p++ = ((c >> 12) & 0x3f) | 0x80;
906 *p++ = ((c >> 6) & 0x3f) | 0x80;
907 *p++ = (c & 0x3f) | 0x80;
911 *p++ = (c >> 30) | 0xfc;
912 *p++ = ((c >> 24) & 0x3f) | 0x80;
913 *p++ = ((c >> 18) & 0x3f) | 0x80;
914 *p++ = ((c >> 12) & 0x3f) | 0x80;
915 *p++ = ((c >> 6) & 0x3f) | 0x80;
916 *p++ = (c & 0x3f) | 0x80;
919 BREAKUP_CHAR (c, charset, c1, c2);
920 lb = CHAR_LEADING_BYTE (c);
921 if (LEADING_BYTE_PRIVATE_P (lb))
922 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
924 if (EQ (charset, Vcharset_control_1))
933 /* Return the first character from a Mule-encoded string in STR,
934 assuming it's non-ASCII. Do not call this directly.
935 Use the macro charptr_emchar() instead. */
938 non_ascii_charptr_emchar (CONST Bufbyte *str)
951 else if ( b >= 0xf8 )
956 else if ( b >= 0xf0 )
961 else if ( b >= 0xe0 )
966 else if ( b >= 0xc0 )
976 for( ; len > 0; len-- )
979 ch = ( ch << 6 ) | ( b & 0x3f );
983 Bufbyte i0 = *str, i1, i2 = 0;
986 if (i0 == LEADING_BYTE_CONTROL_1)
987 return (Emchar) (*++str - 0x20);
989 if (LEADING_BYTE_PREFIX_P (i0))
994 charset = CHARSET_BY_LEADING_BYTE (i0);
995 if (XCHARSET_DIMENSION (charset) == 2)
998 return MAKE_CHAR (charset, i1, i2);
1002 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1003 Do not call this directly. Use the macro valid_char_p() instead. */
1007 non_ascii_valid_char_p (Emchar ch)
1011 /* Must have only lowest 19 bits set */
1015 f1 = CHAR_FIELD1 (ch);
1016 f2 = CHAR_FIELD2 (ch);
1017 f3 = CHAR_FIELD3 (ch);
1021 Lisp_Object charset;
1023 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1024 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1025 f2 > MAX_CHAR_FIELD2_PRIVATE)
1030 if (f3 != 0x20 && f3 != 0x7F)
1034 NOTE: This takes advantage of the fact that
1035 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1036 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1038 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1039 return (XCHARSET_CHARS (charset) == 96);
1043 Lisp_Object charset;
1045 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1046 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1047 f1 > MAX_CHAR_FIELD1_PRIVATE)
1049 if (f2 < 0x20 || f3 < 0x20)
1052 #ifdef ENABLE_COMPOSITE_CHARS
1053 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1055 if (UNBOUNDP (Fgethash (make_int (ch),
1056 Vcomposite_char_char2string_hash_table,
1061 #endif /* ENABLE_COMPOSITE_CHARS */
1063 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1066 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1068 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1071 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1073 return (XCHARSET_CHARS (charset) == 96);
1079 /************************************************************************/
1080 /* Basic string functions */
1081 /************************************************************************/
1083 /* Copy the character pointed to by PTR into STR, assuming it's
1084 non-ASCII. Do not call this directly. Use the macro
1085 charptr_copy_char() instead. */
1088 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1090 Bufbyte *strptr = str;
1092 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1094 /* Notice fallthrough. */
1096 case 6: *++strptr = *ptr++;
1097 case 5: *++strptr = *ptr++;
1099 case 4: *++strptr = *ptr++;
1100 case 3: *++strptr = *ptr++;
1101 case 2: *++strptr = *ptr;
1106 return strptr + 1 - str;
1110 /************************************************************************/
1111 /* streams of Emchars */
1112 /************************************************************************/
1114 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1115 The functions below are not meant to be called directly; use
1116 the macros in insdel.h. */
1119 Lstream_get_emchar_1 (Lstream *stream, int ch)
1121 Bufbyte str[MAX_EMCHAR_LEN];
1122 Bufbyte *strptr = str;
1124 str[0] = (Bufbyte) ch;
1125 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1127 /* Notice fallthrough. */
1130 ch = Lstream_getc (stream);
1132 *++strptr = (Bufbyte) ch;
1134 ch = Lstream_getc (stream);
1136 *++strptr = (Bufbyte) ch;
1139 ch = Lstream_getc (stream);
1141 *++strptr = (Bufbyte) ch;
1143 ch = Lstream_getc (stream);
1145 *++strptr = (Bufbyte) ch;
1147 ch = Lstream_getc (stream);
1149 *++strptr = (Bufbyte) ch;
1154 return charptr_emchar (str);
1158 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1160 Bufbyte str[MAX_EMCHAR_LEN];
1161 Bytecount len = set_charptr_emchar (str, ch);
1162 return Lstream_write (stream, str, len);
1166 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1168 Bufbyte str[MAX_EMCHAR_LEN];
1169 Bytecount len = set_charptr_emchar (str, ch);
1170 Lstream_unread (stream, str, len);
1174 /************************************************************************/
1175 /* charset object */
1176 /************************************************************************/
1179 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1181 struct Lisp_Charset *cs = XCHARSET (obj);
1183 markobj (cs->short_name);
1184 markobj (cs->long_name);
1185 markobj (cs->doc_string);
1186 markobj (cs->registry);
1187 markobj (cs->ccl_program);
1189 markobj (cs->decoding_table);
1195 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1197 struct Lisp_Charset *cs = XCHARSET (obj);
1201 error ("printing unreadable object #<charset %s 0x%x>",
1202 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1205 write_c_string ("#<charset ", printcharfun);
1206 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1207 write_c_string (" ", printcharfun);
1208 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1209 write_c_string (" ", printcharfun);
1210 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1211 write_c_string (" ", printcharfun);
1212 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1213 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1214 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1215 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1216 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1218 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1219 CHARSET_COLUMNS (cs),
1220 CHARSET_GRAPHIC (cs),
1221 CHARSET_FINAL (cs));
1222 write_c_string (buf, printcharfun);
1223 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1224 sprintf (buf, " 0x%x>", cs->header.uid);
1225 write_c_string (buf, printcharfun);
1228 static const struct lrecord_description charset_description[] = {
1229 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1231 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1236 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1237 mark_charset, print_charset, 0, 0, 0,
1238 charset_description,
1239 struct Lisp_Charset);
1241 /* Make a new charset. */
1244 make_charset (Charset_ID id, Lisp_Object name,
1245 unsigned char type, unsigned char columns, unsigned char graphic,
1246 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1247 Lisp_Object long_name, Lisp_Object doc,
1249 Lisp_Object decoding_table,
1250 Emchar ucs_min, Emchar ucs_max,
1251 Emchar code_offset, unsigned char byte_offset)
1254 struct Lisp_Charset *cs =
1255 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1256 XSETCHARSET (obj, cs);
1258 CHARSET_ID (cs) = id;
1259 CHARSET_NAME (cs) = name;
1260 CHARSET_SHORT_NAME (cs) = short_name;
1261 CHARSET_LONG_NAME (cs) = long_name;
1262 CHARSET_DIRECTION (cs) = direction;
1263 CHARSET_TYPE (cs) = type;
1264 CHARSET_COLUMNS (cs) = columns;
1265 CHARSET_GRAPHIC (cs) = graphic;
1266 CHARSET_FINAL (cs) = final;
1267 CHARSET_DOC_STRING (cs) = doc;
1268 CHARSET_REGISTRY (cs) = reg;
1269 CHARSET_CCL_PROGRAM (cs) = Qnil;
1270 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1272 CHARSET_DECODING_TABLE(cs) = Qnil;
1273 CHARSET_UCS_MIN(cs) = ucs_min;
1274 CHARSET_UCS_MAX(cs) = ucs_max;
1275 CHARSET_CODE_OFFSET(cs) = code_offset;
1276 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1279 switch (CHARSET_TYPE (cs))
1281 case CHARSET_TYPE_94:
1282 CHARSET_DIMENSION (cs) = 1;
1283 CHARSET_CHARS (cs) = 94;
1285 case CHARSET_TYPE_96:
1286 CHARSET_DIMENSION (cs) = 1;
1287 CHARSET_CHARS (cs) = 96;
1289 case CHARSET_TYPE_94X94:
1290 CHARSET_DIMENSION (cs) = 2;
1291 CHARSET_CHARS (cs) = 94;
1293 case CHARSET_TYPE_96X96:
1294 CHARSET_DIMENSION (cs) = 2;
1295 CHARSET_CHARS (cs) = 96;
1298 case CHARSET_TYPE_128:
1299 CHARSET_DIMENSION (cs) = 1;
1300 CHARSET_CHARS (cs) = 128;
1302 case CHARSET_TYPE_128X128:
1303 CHARSET_DIMENSION (cs) = 2;
1304 CHARSET_CHARS (cs) = 128;
1306 case CHARSET_TYPE_256:
1307 CHARSET_DIMENSION (cs) = 1;
1308 CHARSET_CHARS (cs) = 256;
1310 case CHARSET_TYPE_256X256:
1311 CHARSET_DIMENSION (cs) = 2;
1312 CHARSET_CHARS (cs) = 256;
1318 if (id == LEADING_BYTE_ASCII)
1319 CHARSET_REP_BYTES (cs) = 1;
1321 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1323 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1328 /* some charsets do not have final characters. This includes
1329 ASCII, Control-1, Composite, and the two faux private
1332 if (code_offset == 0)
1334 assert (NILP (charset_by_attributes[type][final]));
1335 charset_by_attributes[type][final] = obj;
1338 assert (NILP (charset_by_attributes[type][final][direction]));
1339 charset_by_attributes[type][final][direction] = obj;
1343 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1344 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1347 /* official leading byte */
1348 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1351 /* Some charsets are "faux" and don't have names or really exist at
1352 all except in the leading-byte table. */
1354 Fputhash (name, obj, Vcharset_hash_table);
1359 get_unallocated_leading_byte (int dimension)
1364 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1367 lb = next_allocated_leading_byte++;
1371 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1374 lb = next_allocated_1_byte_leading_byte++;
1378 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1381 lb = next_allocated_2_byte_leading_byte++;
1387 ("No more character sets free for this dimension",
1388 make_int (dimension));
1395 range_charset_code_point (Lisp_Object charset, Emchar ch)
1399 if ((XCHARSET_UCS_MIN (charset) <= ch)
1400 && (ch <= XCHARSET_UCS_MAX (charset)))
1402 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1404 if (XCHARSET_DIMENSION (charset) == 1)
1405 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1406 else if (XCHARSET_DIMENSION (charset) == 2)
1407 return list2 (make_int (d / XCHARSET_CHARS (charset)
1408 + XCHARSET_BYTE_OFFSET (charset)),
1409 make_int (d % XCHARSET_CHARS (charset)
1410 + XCHARSET_BYTE_OFFSET (charset)));
1411 else if (XCHARSET_DIMENSION (charset) == 3)
1412 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1413 * XCHARSET_CHARS (charset))
1414 + XCHARSET_BYTE_OFFSET (charset)),
1415 make_int (d / XCHARSET_CHARS (charset)
1416 % XCHARSET_CHARS (charset)
1417 + XCHARSET_BYTE_OFFSET (charset)),
1418 make_int (d % XCHARSET_CHARS (charset)
1419 + XCHARSET_BYTE_OFFSET (charset)));
1420 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1421 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1422 * XCHARSET_CHARS (charset)
1423 * XCHARSET_CHARS (charset))
1424 + XCHARSET_BYTE_OFFSET (charset)),
1425 make_int (d / (XCHARSET_CHARS (charset)
1426 * XCHARSET_CHARS (charset))
1427 % XCHARSET_CHARS (charset)
1428 + XCHARSET_BYTE_OFFSET (charset)),
1429 make_int (d / XCHARSET_CHARS (charset)
1430 % XCHARSET_CHARS (charset)
1431 + XCHARSET_BYTE_OFFSET (charset)),
1432 make_int (d % XCHARSET_CHARS (charset)
1433 + XCHARSET_BYTE_OFFSET (charset)));
1435 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1437 if (XCHARSET_DIMENSION (charset) == 1)
1439 if (XCHARSET_CHARS (charset) == 94)
1441 if (((d = ch - (MIN_CHAR_94
1442 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1444 return list1 (make_int (d + 33));
1446 else if (XCHARSET_CHARS (charset) == 96)
1448 if (((d = ch - (MIN_CHAR_96
1449 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1451 return list1 (make_int (d + 32));
1456 else if (XCHARSET_DIMENSION (charset) == 2)
1458 if (XCHARSET_CHARS (charset) == 94)
1460 if (((d = ch - (MIN_CHAR_94x94
1461 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1464 return list2 (make_int ((d / 94) + 33),
1465 make_int (d % 94 + 33));
1467 else if (XCHARSET_CHARS (charset) == 96)
1469 if (((d = ch - (MIN_CHAR_96x96
1470 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1473 return list2 (make_int ((d / 96) + 32),
1474 make_int (d % 96 + 32));
1482 split_builtin_char (Emchar c)
1484 if (c < MIN_CHAR_OBS_94x94)
1486 if (c <= MAX_CHAR_BASIC_LATIN)
1488 return list2 (Vcharset_ascii, make_int (c));
1492 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1496 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1498 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1500 return list2 (Vcharset_greek_iso8859_7,
1501 make_int (c - MIN_CHAR_GREEK + 0x20));
1503 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1505 return list2 (Vcharset_cyrillic_iso8859_5,
1506 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1508 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1510 return list2 (Vcharset_hebrew_iso8859_8,
1511 make_int (c - MIN_CHAR_HEBREW + 0x20));
1513 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1515 return list2 (Vcharset_thai_tis620,
1516 make_int (c - MIN_CHAR_THAI + 0x20));
1518 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1519 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1521 return list2 (Vcharset_katakana_jisx0201,
1522 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1526 return list3 (Vcharset_ucs_bmp,
1527 make_int (c >> 8), make_int (c & 0xff));
1530 else if (c <= MAX_CHAR_OBS_94x94)
1532 return list3 (CHARSET_BY_ATTRIBUTES
1533 (CHARSET_TYPE_94X94,
1534 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1535 CHARSET_LEFT_TO_RIGHT),
1536 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1537 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1539 else if (c <= MAX_CHAR_94)
1541 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1542 ((c - MIN_CHAR_94) / 94) + '0',
1543 CHARSET_LEFT_TO_RIGHT),
1544 make_int (((c - MIN_CHAR_94) % 94) + 33));
1546 else if (c <= MAX_CHAR_96)
1548 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1549 ((c - MIN_CHAR_96) / 96) + '0',
1550 CHARSET_LEFT_TO_RIGHT),
1551 make_int (((c - MIN_CHAR_96) % 96) + 32));
1553 else if (c <= MAX_CHAR_94x94)
1555 return list3 (CHARSET_BY_ATTRIBUTES
1556 (CHARSET_TYPE_94X94,
1557 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1558 CHARSET_LEFT_TO_RIGHT),
1559 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1560 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1562 else if (c <= MAX_CHAR_96x96)
1564 return list3 (CHARSET_BY_ATTRIBUTES
1565 (CHARSET_TYPE_96X96,
1566 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1567 CHARSET_LEFT_TO_RIGHT),
1568 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1569 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1578 charset_code_point (Lisp_Object charset, Emchar ch)
1580 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1582 if (!EQ (cdef, Qnil))
1584 Lisp_Object field = Fassq (charset, cdef);
1586 if (!EQ (field, Qnil))
1587 return Fcdr (field);
1589 return range_charset_code_point (charset, ch);
1592 Lisp_Object Vdefault_coded_charset_priority_list;
1596 /************************************************************************/
1597 /* Basic charset Lisp functions */
1598 /************************************************************************/
1600 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1601 Return non-nil if OBJECT is a charset.
1605 return CHARSETP (object) ? Qt : Qnil;
1608 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1609 Retrieve the charset of the given name.
1610 If CHARSET-OR-NAME is a charset object, it is simply returned.
1611 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1612 nil is returned. Otherwise the associated charset object is returned.
1616 if (CHARSETP (charset_or_name))
1617 return charset_or_name;
1619 CHECK_SYMBOL (charset_or_name);
1620 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1623 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1624 Retrieve the charset of the given name.
1625 Same as `find-charset' except an error is signalled if there is no such
1626 charset instead of returning nil.
1630 Lisp_Object charset = Ffind_charset (name);
1633 signal_simple_error ("No such charset", name);
1637 /* We store the charsets in hash tables with the names as the key and the
1638 actual charset object as the value. Occasionally we need to use them
1639 in a list format. These routines provide us with that. */
1640 struct charset_list_closure
1642 Lisp_Object *charset_list;
1646 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1647 void *charset_list_closure)
1649 /* This function can GC */
1650 struct charset_list_closure *chcl =
1651 (struct charset_list_closure*) charset_list_closure;
1652 Lisp_Object *charset_list = chcl->charset_list;
1654 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1658 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1659 Return a list of the names of all defined charsets.
1663 Lisp_Object charset_list = Qnil;
1664 struct gcpro gcpro1;
1665 struct charset_list_closure charset_list_closure;
1667 GCPRO1 (charset_list);
1668 charset_list_closure.charset_list = &charset_list;
1669 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1670 &charset_list_closure);
1673 return charset_list;
1676 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1677 Return the name of the given charset.
1681 return XCHARSET_NAME (Fget_charset (charset));
1684 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1685 Define a new character set.
1686 This function is for use with Mule support.
1687 NAME is a symbol, the name by which the character set is normally referred.
1688 DOC-STRING is a string describing the character set.
1689 PROPS is a property list, describing the specific nature of the
1690 character set. Recognized properties are:
1692 'short-name Short version of the charset name (ex: Latin-1)
1693 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1694 'registry A regular expression matching the font registry field for
1696 'dimension Number of octets used to index a character in this charset.
1697 Either 1 or 2. Defaults to 1.
1698 'columns Number of columns used to display a character in this charset.
1699 Only used in TTY mode. (Under X, the actual width of a
1700 character can be derived from the font used to display the
1701 characters.) If unspecified, defaults to the dimension
1702 (this is almost always the correct value).
1703 'chars Number of characters in each dimension (94 or 96).
1704 Defaults to 94. Note that if the dimension is 2, the
1705 character set thus described is 94x94 or 96x96.
1706 'final Final byte of ISO 2022 escape sequence. Must be
1707 supplied. Each combination of (DIMENSION, CHARS) defines a
1708 separate namespace for final bytes. Note that ISO
1709 2022 restricts the final byte to the range
1710 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1711 dimension == 2. Note also that final bytes in the range
1712 0x30 - 0x3F are reserved for user-defined (not official)
1714 'graphic 0 (use left half of font on output) or 1 (use right half
1715 of font on output). Defaults to 0. For example, for
1716 a font whose registry is ISO8859-1, the left half
1717 (octets 0x20 - 0x7F) is the `ascii' character set, while
1718 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1719 character set. With 'graphic set to 0, the octets
1720 will have their high bit cleared; with it set to 1,
1721 the octets will have their high bit set.
1722 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1724 'ccl-program A compiled CCL program used to convert a character in
1725 this charset into an index into the font. This is in
1726 addition to the 'graphic property. The CCL program
1727 is passed the octets of the character, with the high
1728 bit cleared and set depending upon whether the value
1729 of the 'graphic property is 0 or 1.
1731 (name, doc_string, props))
1733 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1734 int direction = CHARSET_LEFT_TO_RIGHT;
1736 Lisp_Object registry = Qnil;
1737 Lisp_Object charset;
1738 Lisp_Object rest, keyword, value;
1739 Lisp_Object ccl_program = Qnil;
1740 Lisp_Object short_name = Qnil, long_name = Qnil;
1741 int byte_offset = -1;
1743 CHECK_SYMBOL (name);
1744 if (!NILP (doc_string))
1745 CHECK_STRING (doc_string);
1747 charset = Ffind_charset (name);
1748 if (!NILP (charset))
1749 signal_simple_error ("Cannot redefine existing charset", name);
1751 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1753 if (EQ (keyword, Qshort_name))
1755 CHECK_STRING (value);
1759 if (EQ (keyword, Qlong_name))
1761 CHECK_STRING (value);
1765 else if (EQ (keyword, Qdimension))
1768 dimension = XINT (value);
1769 if (dimension < 1 || dimension > 2)
1770 signal_simple_error ("Invalid value for 'dimension", value);
1773 else if (EQ (keyword, Qchars))
1776 chars = XINT (value);
1777 if (chars != 94 && chars != 96)
1778 signal_simple_error ("Invalid value for 'chars", value);
1781 else if (EQ (keyword, Qcolumns))
1784 columns = XINT (value);
1785 if (columns != 1 && columns != 2)
1786 signal_simple_error ("Invalid value for 'columns", value);
1789 else if (EQ (keyword, Qgraphic))
1792 graphic = XINT (value);
1794 if (graphic < 0 || graphic > 2)
1796 if (graphic < 0 || graphic > 1)
1798 signal_simple_error ("Invalid value for 'graphic", value);
1801 else if (EQ (keyword, Qregistry))
1803 CHECK_STRING (value);
1807 else if (EQ (keyword, Qdirection))
1809 if (EQ (value, Ql2r))
1810 direction = CHARSET_LEFT_TO_RIGHT;
1811 else if (EQ (value, Qr2l))
1812 direction = CHARSET_RIGHT_TO_LEFT;
1814 signal_simple_error ("Invalid value for 'direction", value);
1817 else if (EQ (keyword, Qfinal))
1819 CHECK_CHAR_COERCE_INT (value);
1820 final = XCHAR (value);
1821 if (final < '0' || final > '~')
1822 signal_simple_error ("Invalid value for 'final", value);
1825 else if (EQ (keyword, Qccl_program))
1827 CHECK_VECTOR (value);
1828 ccl_program = value;
1832 signal_simple_error ("Unrecognized property", keyword);
1836 error ("'final must be specified");
1837 if (dimension == 2 && final > 0x5F)
1839 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1843 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1845 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1847 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1848 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1850 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1852 id = get_unallocated_leading_byte (dimension);
1854 if (NILP (doc_string))
1855 doc_string = build_string ("");
1857 if (NILP (registry))
1858 registry = build_string ("");
1860 if (NILP (short_name))
1861 XSETSTRING (short_name, XSYMBOL (name)->name);
1863 if (NILP (long_name))
1864 long_name = doc_string;
1867 columns = dimension;
1869 if (byte_offset < 0)
1873 else if (chars == 96)
1879 charset = make_charset (id, name, type, columns, graphic,
1880 final, direction, short_name, long_name,
1881 doc_string, registry,
1882 Qnil, 0, 0, 0, byte_offset);
1883 if (!NILP (ccl_program))
1884 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1888 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1890 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1891 NEW-NAME is the name of the new charset. Return the new charset.
1893 (charset, new_name))
1895 Lisp_Object new_charset = Qnil;
1896 int id, dimension, columns, graphic, final;
1897 int direction, type;
1898 Lisp_Object registry, doc_string, short_name, long_name;
1899 struct Lisp_Charset *cs;
1901 charset = Fget_charset (charset);
1902 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1903 signal_simple_error ("Charset already has reverse-direction charset",
1906 CHECK_SYMBOL (new_name);
1907 if (!NILP (Ffind_charset (new_name)))
1908 signal_simple_error ("Cannot redefine existing charset", new_name);
1910 cs = XCHARSET (charset);
1912 type = CHARSET_TYPE (cs);
1913 columns = CHARSET_COLUMNS (cs);
1914 dimension = CHARSET_DIMENSION (cs);
1915 id = get_unallocated_leading_byte (dimension);
1917 graphic = CHARSET_GRAPHIC (cs);
1918 final = CHARSET_FINAL (cs);
1919 direction = CHARSET_RIGHT_TO_LEFT;
1920 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1921 direction = CHARSET_LEFT_TO_RIGHT;
1922 doc_string = CHARSET_DOC_STRING (cs);
1923 short_name = CHARSET_SHORT_NAME (cs);
1924 long_name = CHARSET_LONG_NAME (cs);
1925 registry = CHARSET_REGISTRY (cs);
1927 new_charset = make_charset (id, new_name, type, columns,
1928 graphic, final, direction, short_name, long_name,
1929 doc_string, registry,
1931 CHARSET_DECODING_TABLE(cs),
1932 CHARSET_UCS_MIN(cs),
1933 CHARSET_UCS_MAX(cs),
1934 CHARSET_CODE_OFFSET(cs),
1935 CHARSET_BYTE_OFFSET(cs)
1941 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1942 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1947 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1948 Define symbol ALIAS as an alias for CHARSET.
1952 CHECK_SYMBOL (alias);
1953 charset = Fget_charset (charset);
1954 return Fputhash (alias, charset, Vcharset_hash_table);
1957 /* #### Reverse direction charsets not yet implemented. */
1959 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1961 Return the reverse-direction charset parallel to CHARSET, if any.
1962 This is the charset with the same properties (in particular, the same
1963 dimension, number of characters per dimension, and final byte) as
1964 CHARSET but whose characters are displayed in the opposite direction.
1968 charset = Fget_charset (charset);
1969 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1973 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1974 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1975 If DIRECTION is omitted, both directions will be checked (left-to-right
1976 will be returned if character sets exist for both directions).
1978 (dimension, chars, final, direction))
1980 int dm, ch, fi, di = -1;
1982 Lisp_Object obj = Qnil;
1984 CHECK_INT (dimension);
1985 dm = XINT (dimension);
1986 if (dm < 1 || dm > 2)
1987 signal_simple_error ("Invalid value for DIMENSION", dimension);
1991 if (ch != 94 && ch != 96)
1992 signal_simple_error ("Invalid value for CHARS", chars);
1994 CHECK_CHAR_COERCE_INT (final);
1996 if (fi < '0' || fi > '~')
1997 signal_simple_error ("Invalid value for FINAL", final);
1999 if (EQ (direction, Ql2r))
2000 di = CHARSET_LEFT_TO_RIGHT;
2001 else if (EQ (direction, Qr2l))
2002 di = CHARSET_RIGHT_TO_LEFT;
2003 else if (!NILP (direction))
2004 signal_simple_error ("Invalid value for DIRECTION", direction);
2006 if (dm == 2 && fi > 0x5F)
2008 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2011 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2013 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2017 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2019 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2022 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2025 return XCHARSET_NAME (obj);
2029 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2030 Return short name of CHARSET.
2034 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2037 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2038 Return long name of CHARSET.
2042 return XCHARSET_LONG_NAME (Fget_charset (charset));
2045 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2046 Return description of CHARSET.
2050 return XCHARSET_DOC_STRING (Fget_charset (charset));
2053 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2054 Return dimension of CHARSET.
2058 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2061 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2062 Return property PROP of CHARSET.
2063 Recognized properties are those listed in `make-charset', as well as
2064 'name and 'doc-string.
2068 struct Lisp_Charset *cs;
2070 charset = Fget_charset (charset);
2071 cs = XCHARSET (charset);
2073 CHECK_SYMBOL (prop);
2074 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2075 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2076 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2077 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2078 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2079 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2080 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2081 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2082 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2083 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2084 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2085 if (EQ (prop, Qdirection))
2086 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2087 if (EQ (prop, Qreverse_direction_charset))
2089 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2093 return XCHARSET_NAME (obj);
2095 signal_simple_error ("Unrecognized charset property name", prop);
2096 return Qnil; /* not reached */
2099 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2100 Return charset identification number of CHARSET.
2104 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2107 /* #### We need to figure out which properties we really want to
2110 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2111 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2113 (charset, ccl_program))
2115 charset = Fget_charset (charset);
2116 CHECK_VECTOR (ccl_program);
2117 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2122 invalidate_charset_font_caches (Lisp_Object charset)
2124 /* Invalidate font cache entries for charset on all devices. */
2125 Lisp_Object devcons, concons, hash_table;
2126 DEVICE_LOOP_NO_BREAK (devcons, concons)
2128 struct device *d = XDEVICE (XCAR (devcons));
2129 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2130 if (!UNBOUNDP (hash_table))
2131 Fclrhash (hash_table);
2135 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2136 Set the 'registry property of CHARSET to REGISTRY.
2138 (charset, registry))
2140 charset = Fget_charset (charset);
2141 CHECK_STRING (registry);
2142 XCHARSET_REGISTRY (charset) = registry;
2143 invalidate_charset_font_caches (charset);
2144 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2149 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2150 Return mapping-table of CHARSET.
2154 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2157 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2158 Set mapping-table of CHARSET to TABLE.
2162 struct Lisp_Charset *cs;
2163 Lisp_Object old_table;
2166 charset = Fget_charset (charset);
2167 cs = XCHARSET (charset);
2169 if (EQ (table, Qnil))
2171 CHARSET_DECODING_TABLE(cs) = table;
2174 else if (VECTORP (table))
2178 /* ad-hoc method for `ascii' */
2179 if ((CHARSET_CHARS (cs) == 94) &&
2180 (CHARSET_BYTE_OFFSET (cs) != 33))
2181 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2183 ccs_len = CHARSET_CHARS (cs);
2185 if (XVECTOR_LENGTH (table) > ccs_len)
2186 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2187 old_table = CHARSET_DECODING_TABLE(cs);
2188 CHARSET_DECODING_TABLE(cs) = table;
2191 signal_error (Qwrong_type_argument,
2192 list2 (build_translated_string ("vector-or-nil-p"),
2194 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2196 switch (CHARSET_DIMENSION (cs))
2199 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2201 Lisp_Object c = XVECTOR_DATA(table)[i];
2206 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2210 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2212 Lisp_Object v = XVECTOR_DATA(table)[i];
2218 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2220 CHARSET_DECODING_TABLE(cs) = old_table;
2221 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2223 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2225 Lisp_Object c = XVECTOR_DATA(v)[j];
2228 put_char_attribute (c, charset,
2231 (i + CHARSET_BYTE_OFFSET (cs)),
2233 (j + CHARSET_BYTE_OFFSET (cs))));
2237 put_char_attribute (v, charset,
2239 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2248 /************************************************************************/
2249 /* Lisp primitives for working with characters */
2250 /************************************************************************/
2252 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2253 Make a character from CHARSET and octets ARG1 and ARG2.
2254 ARG2 is required only for characters from two-dimensional charsets.
2255 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2256 character s with caron.
2258 (charset, arg1, arg2))
2260 struct Lisp_Charset *cs;
2262 int lowlim, highlim;
2264 charset = Fget_charset (charset);
2265 cs = XCHARSET (charset);
2267 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2268 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2270 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2272 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2273 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2276 /* It is useful (and safe, according to Olivier Galibert) to strip
2277 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2278 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2279 Latin 2 code of the character. */
2287 if (a1 < lowlim || a1 > highlim)
2288 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2290 if (CHARSET_DIMENSION (cs) == 1)
2294 ("Charset is of dimension one; second octet must be nil", arg2);
2295 return make_char (MAKE_CHAR (charset, a1, 0));
2304 a2 = XINT (arg2) & 0x7f;
2306 if (a2 < lowlim || a2 > highlim)
2307 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2309 return make_char (MAKE_CHAR (charset, a1, a2));
2312 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2313 Return the character set of char CH.
2317 CHECK_CHAR_COERCE_INT (ch);
2319 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2322 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2323 Return list of charset and one or two position-codes of CHAR.
2327 /* This function can GC */
2328 struct gcpro gcpro1, gcpro2;
2329 Lisp_Object charset = Qnil;
2330 Lisp_Object rc = Qnil;
2333 GCPRO2 (charset, rc);
2334 CHECK_CHAR_COERCE_INT (character);
2336 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2338 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2340 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2344 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2352 #ifdef ENABLE_COMPOSITE_CHARS
2353 /************************************************************************/
2354 /* composite character functions */
2355 /************************************************************************/
2358 lookup_composite_char (Bufbyte *str, int len)
2360 Lisp_Object lispstr = make_string (str, len);
2361 Lisp_Object ch = Fgethash (lispstr,
2362 Vcomposite_char_string2char_hash_table,
2368 if (composite_char_row_next >= 128)
2369 signal_simple_error ("No more composite chars available", lispstr);
2370 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2371 composite_char_col_next);
2372 Fputhash (make_char (emch), lispstr,
2373 Vcomposite_char_char2string_hash_table);
2374 Fputhash (lispstr, make_char (emch),
2375 Vcomposite_char_string2char_hash_table);
2376 composite_char_col_next++;
2377 if (composite_char_col_next >= 128)
2379 composite_char_col_next = 32;
2380 composite_char_row_next++;
2389 composite_char_string (Emchar ch)
2391 Lisp_Object str = Fgethash (make_char (ch),
2392 Vcomposite_char_char2string_hash_table,
2394 assert (!UNBOUNDP (str));
2398 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2399 Convert a string into a single composite character.
2400 The character is the result of overstriking all the characters in
2405 CHECK_STRING (string);
2406 return make_char (lookup_composite_char (XSTRING_DATA (string),
2407 XSTRING_LENGTH (string)));
2410 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2411 Return a string of the characters comprising a composite character.
2419 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2420 signal_simple_error ("Must be composite char", ch);
2421 return composite_char_string (emch);
2423 #endif /* ENABLE_COMPOSITE_CHARS */
2426 /************************************************************************/
2427 /* initialization */
2428 /************************************************************************/
2431 syms_of_mule_charset (void)
2433 DEFSUBR (Fcharsetp);
2434 DEFSUBR (Ffind_charset);
2435 DEFSUBR (Fget_charset);
2436 DEFSUBR (Fcharset_list);
2437 DEFSUBR (Fcharset_name);
2438 DEFSUBR (Fmake_charset);
2439 DEFSUBR (Fmake_reverse_direction_charset);
2440 /* DEFSUBR (Freverse_direction_charset); */
2441 DEFSUBR (Fdefine_charset_alias);
2442 DEFSUBR (Fcharset_from_attributes);
2443 DEFSUBR (Fcharset_short_name);
2444 DEFSUBR (Fcharset_long_name);
2445 DEFSUBR (Fcharset_description);
2446 DEFSUBR (Fcharset_dimension);
2447 DEFSUBR (Fcharset_property);
2448 DEFSUBR (Fcharset_id);
2449 DEFSUBR (Fset_charset_ccl_program);
2450 DEFSUBR (Fset_charset_registry);
2452 DEFSUBR (Fchar_attribute_alist);
2453 DEFSUBR (Fget_char_attribute);
2454 DEFSUBR (Fput_char_attribute);
2455 DEFSUBR (Fdefine_char);
2456 DEFSUBR (Fchar_variants);
2457 DEFSUBR (Fget_composite_char);
2458 DEFSUBR (Fcharset_mapping_table);
2459 DEFSUBR (Fset_charset_mapping_table);
2462 DEFSUBR (Fmake_char);
2463 DEFSUBR (Fchar_charset);
2464 DEFSUBR (Fsplit_char);
2466 #ifdef ENABLE_COMPOSITE_CHARS
2467 DEFSUBR (Fmake_composite_char);
2468 DEFSUBR (Fcomposite_char_string);
2471 defsymbol (&Qcharsetp, "charsetp");
2472 defsymbol (&Qregistry, "registry");
2473 defsymbol (&Qfinal, "final");
2474 defsymbol (&Qgraphic, "graphic");
2475 defsymbol (&Qdirection, "direction");
2476 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2477 defsymbol (&Qshort_name, "short-name");
2478 defsymbol (&Qlong_name, "long-name");
2480 defsymbol (&Ql2r, "l2r");
2481 defsymbol (&Qr2l, "r2l");
2483 /* Charsets, compatible with FSF 20.3
2484 Naming convention is Script-Charset[-Edition] */
2485 defsymbol (&Qascii, "ascii");
2486 defsymbol (&Qcontrol_1, "control-1");
2487 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2488 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2489 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2490 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2491 defsymbol (&Qthai_tis620, "thai-tis620");
2492 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2493 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2494 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2495 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2496 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2497 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2498 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2499 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2500 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2501 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2502 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2503 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2504 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2505 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2507 defsymbol (&Q_ucs, "->ucs");
2508 defsymbol (&Q_decomposition, "->decomposition");
2509 defsymbol (&Qwide, "wide");
2510 defsymbol (&Qnarrow, "narrow");
2511 defsymbol (&Qcompat, "compat");
2512 defsymbol (&QnoBreak, "noBreak");
2513 defsymbol (&Qsuper, "super");
2514 defsymbol (&Qsub, "sub");
2515 defsymbol (&Qfraction, "fraction");
2516 defsymbol (&Qucs, "ucs");
2517 defsymbol (&Qucs_bmp, "ucs-bmp");
2518 defsymbol (&Qlatin_viscii, "latin-viscii");
2519 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2520 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2521 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2522 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2523 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2524 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2525 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2527 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2528 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2530 defsymbol (&Qcomposite, "composite");
2534 vars_of_mule_charset (void)
2541 /* Table of charsets indexed by leading byte. */
2542 for (i = 0; i < countof (charset_by_leading_byte); i++)
2543 charset_by_leading_byte[i] = Qnil;
2546 /* Table of charsets indexed by type/final-byte. */
2547 for (i = 0; i < countof (charset_by_attributes); i++)
2548 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2549 charset_by_attributes[i][j] = Qnil;
2551 /* Table of charsets indexed by type/final-byte/direction. */
2552 for (i = 0; i < countof (charset_by_attributes); i++)
2553 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2554 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2555 charset_by_attributes[i][j][k] = Qnil;
2559 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2561 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2562 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2566 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2567 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2568 Leading-code of private TYPE9N charset of column-width 1.
2570 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2574 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2575 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2576 Version number of UTF-2000.
2579 staticpro (&Vcharacter_attribute_table);
2580 Vcharacter_attribute_table = make_char_code_table (Qnil);
2582 staticpro (&Vcharacter_composition_table);
2583 Vcharacter_composition_table = make_char_code_table (Qnil);
2585 staticpro (&Vcharacter_variant_table);
2586 Vcharacter_variant_table = make_char_code_table (Qnil);
2588 Vdefault_coded_charset_priority_list = Qnil;
2589 DEFVAR_LISP ("default-coded-charset-priority-list",
2590 &Vdefault_coded_charset_priority_list /*
2591 Default order of preferred coded-character-sets.
2597 complex_vars_of_mule_charset (void)
2599 staticpro (&Vcharset_hash_table);
2600 Vcharset_hash_table =
2601 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2603 /* Predefined character sets. We store them into variables for
2608 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2609 CHARSET_TYPE_256X256, 1, 2, 0,
2610 CHARSET_LEFT_TO_RIGHT,
2611 build_string ("BMP"),
2612 build_string ("BMP"),
2613 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2614 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2615 Qnil, 0, 0xFFFF, 0, 0);
2617 # define MIN_CHAR_THAI 0
2618 # define MAX_CHAR_THAI 0
2619 # define MIN_CHAR_GREEK 0
2620 # define MAX_CHAR_GREEK 0
2621 # define MIN_CHAR_HEBREW 0
2622 # define MAX_CHAR_HEBREW 0
2623 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2624 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2625 # define MIN_CHAR_CYRILLIC 0
2626 # define MAX_CHAR_CYRILLIC 0
2629 make_charset (LEADING_BYTE_ASCII, Qascii,
2630 CHARSET_TYPE_94, 1, 0, 'B',
2631 CHARSET_LEFT_TO_RIGHT,
2632 build_string ("ASCII"),
2633 build_string ("ASCII)"),
2634 build_string ("ASCII (ISO646 IRV)"),
2635 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2636 Qnil, 0, 0x7F, 0, 0);
2637 Vcharset_control_1 =
2638 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2639 CHARSET_TYPE_94, 1, 1, 0,
2640 CHARSET_LEFT_TO_RIGHT,
2641 build_string ("C1"),
2642 build_string ("Control characters"),
2643 build_string ("Control characters 128-191"),
2645 Qnil, 0x80, 0x9F, 0, 0);
2646 Vcharset_latin_iso8859_1 =
2647 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2648 CHARSET_TYPE_96, 1, 1, 'A',
2649 CHARSET_LEFT_TO_RIGHT,
2650 build_string ("Latin-1"),
2651 build_string ("ISO8859-1 (Latin-1)"),
2652 build_string ("ISO8859-1 (Latin-1)"),
2653 build_string ("iso8859-1"),
2654 Qnil, 0xA0, 0xFF, 0, 32);
2655 Vcharset_latin_iso8859_2 =
2656 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2657 CHARSET_TYPE_96, 1, 1, 'B',
2658 CHARSET_LEFT_TO_RIGHT,
2659 build_string ("Latin-2"),
2660 build_string ("ISO8859-2 (Latin-2)"),
2661 build_string ("ISO8859-2 (Latin-2)"),
2662 build_string ("iso8859-2"),
2664 Vcharset_latin_iso8859_3 =
2665 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2666 CHARSET_TYPE_96, 1, 1, 'C',
2667 CHARSET_LEFT_TO_RIGHT,
2668 build_string ("Latin-3"),
2669 build_string ("ISO8859-3 (Latin-3)"),
2670 build_string ("ISO8859-3 (Latin-3)"),
2671 build_string ("iso8859-3"),
2673 Vcharset_latin_iso8859_4 =
2674 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2675 CHARSET_TYPE_96, 1, 1, 'D',
2676 CHARSET_LEFT_TO_RIGHT,
2677 build_string ("Latin-4"),
2678 build_string ("ISO8859-4 (Latin-4)"),
2679 build_string ("ISO8859-4 (Latin-4)"),
2680 build_string ("iso8859-4"),
2682 Vcharset_thai_tis620 =
2683 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2684 CHARSET_TYPE_96, 1, 1, 'T',
2685 CHARSET_LEFT_TO_RIGHT,
2686 build_string ("TIS620"),
2687 build_string ("TIS620 (Thai)"),
2688 build_string ("TIS620.2529 (Thai)"),
2689 build_string ("tis620"),
2690 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2691 Vcharset_greek_iso8859_7 =
2692 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2693 CHARSET_TYPE_96, 1, 1, 'F',
2694 CHARSET_LEFT_TO_RIGHT,
2695 build_string ("ISO8859-7"),
2696 build_string ("ISO8859-7 (Greek)"),
2697 build_string ("ISO8859-7 (Greek)"),
2698 build_string ("iso8859-7"),
2699 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2700 Vcharset_arabic_iso8859_6 =
2701 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2702 CHARSET_TYPE_96, 1, 1, 'G',
2703 CHARSET_RIGHT_TO_LEFT,
2704 build_string ("ISO8859-6"),
2705 build_string ("ISO8859-6 (Arabic)"),
2706 build_string ("ISO8859-6 (Arabic)"),
2707 build_string ("iso8859-6"),
2709 Vcharset_hebrew_iso8859_8 =
2710 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2711 CHARSET_TYPE_96, 1, 1, 'H',
2712 CHARSET_RIGHT_TO_LEFT,
2713 build_string ("ISO8859-8"),
2714 build_string ("ISO8859-8 (Hebrew)"),
2715 build_string ("ISO8859-8 (Hebrew)"),
2716 build_string ("iso8859-8"),
2717 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2718 Vcharset_katakana_jisx0201 =
2719 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2720 CHARSET_TYPE_94, 1, 1, 'I',
2721 CHARSET_LEFT_TO_RIGHT,
2722 build_string ("JISX0201 Kana"),
2723 build_string ("JISX0201.1976 (Japanese Kana)"),
2724 build_string ("JISX0201.1976 Japanese Kana"),
2725 build_string ("jisx0201\\.1976"),
2727 MIN_CHAR_HALFWIDTH_KATAKANA,
2728 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2729 Vcharset_latin_jisx0201 =
2730 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2731 CHARSET_TYPE_94, 1, 0, 'J',
2732 CHARSET_LEFT_TO_RIGHT,
2733 build_string ("JISX0201 Roman"),
2734 build_string ("JISX0201.1976 (Japanese Roman)"),
2735 build_string ("JISX0201.1976 Japanese Roman"),
2736 build_string ("jisx0201\\.1976"),
2738 Vcharset_cyrillic_iso8859_5 =
2739 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2740 CHARSET_TYPE_96, 1, 1, 'L',
2741 CHARSET_LEFT_TO_RIGHT,
2742 build_string ("ISO8859-5"),
2743 build_string ("ISO8859-5 (Cyrillic)"),
2744 build_string ("ISO8859-5 (Cyrillic)"),
2745 build_string ("iso8859-5"),
2746 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2747 Vcharset_latin_iso8859_9 =
2748 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2749 CHARSET_TYPE_96, 1, 1, 'M',
2750 CHARSET_LEFT_TO_RIGHT,
2751 build_string ("Latin-5"),
2752 build_string ("ISO8859-9 (Latin-5)"),
2753 build_string ("ISO8859-9 (Latin-5)"),
2754 build_string ("iso8859-9"),
2756 Vcharset_japanese_jisx0208_1978 =
2757 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2758 CHARSET_TYPE_94X94, 2, 0, '@',
2759 CHARSET_LEFT_TO_RIGHT,
2760 build_string ("JIS X0208:1978"),
2761 build_string ("JIS X0208:1978 (Japanese)"),
2763 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2764 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2766 Vcharset_chinese_gb2312 =
2767 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2768 CHARSET_TYPE_94X94, 2, 0, 'A',
2769 CHARSET_LEFT_TO_RIGHT,
2770 build_string ("GB2312"),
2771 build_string ("GB2312)"),
2772 build_string ("GB2312 Chinese simplified"),
2773 build_string ("gb2312"),
2775 Vcharset_japanese_jisx0208 =
2776 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2777 CHARSET_TYPE_94X94, 2, 0, 'B',
2778 CHARSET_LEFT_TO_RIGHT,
2779 build_string ("JISX0208"),
2780 build_string ("JIS X0208:1983 (Japanese)"),
2781 build_string ("JIS X0208:1983 Japanese Kanji"),
2782 build_string ("jisx0208\\.1983"),
2784 Vcharset_korean_ksc5601 =
2785 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2786 CHARSET_TYPE_94X94, 2, 0, 'C',
2787 CHARSET_LEFT_TO_RIGHT,
2788 build_string ("KSC5601"),
2789 build_string ("KSC5601 (Korean"),
2790 build_string ("KSC5601 Korean Hangul and Hanja"),
2791 build_string ("ksc5601"),
2793 Vcharset_japanese_jisx0212 =
2794 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2795 CHARSET_TYPE_94X94, 2, 0, 'D',
2796 CHARSET_LEFT_TO_RIGHT,
2797 build_string ("JISX0212"),
2798 build_string ("JISX0212 (Japanese)"),
2799 build_string ("JISX0212 Japanese Supplement"),
2800 build_string ("jisx0212"),
2803 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2804 Vcharset_chinese_cns11643_1 =
2805 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2806 CHARSET_TYPE_94X94, 2, 0, 'G',
2807 CHARSET_LEFT_TO_RIGHT,
2808 build_string ("CNS11643-1"),
2809 build_string ("CNS11643-1 (Chinese traditional)"),
2811 ("CNS 11643 Plane 1 Chinese traditional"),
2812 build_string (CHINESE_CNS_PLANE_RE("1")),
2814 Vcharset_chinese_cns11643_2 =
2815 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2816 CHARSET_TYPE_94X94, 2, 0, 'H',
2817 CHARSET_LEFT_TO_RIGHT,
2818 build_string ("CNS11643-2"),
2819 build_string ("CNS11643-2 (Chinese traditional)"),
2821 ("CNS 11643 Plane 2 Chinese traditional"),
2822 build_string (CHINESE_CNS_PLANE_RE("2")),
2825 Vcharset_latin_viscii_lower =
2826 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2827 CHARSET_TYPE_96, 1, 1, '1',
2828 CHARSET_LEFT_TO_RIGHT,
2829 build_string ("VISCII lower"),
2830 build_string ("VISCII lower (Vietnamese)"),
2831 build_string ("VISCII lower (Vietnamese)"),
2832 build_string ("MULEVISCII-LOWER"),
2834 Vcharset_latin_viscii_upper =
2835 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2836 CHARSET_TYPE_96, 1, 1, '2',
2837 CHARSET_LEFT_TO_RIGHT,
2838 build_string ("VISCII upper"),
2839 build_string ("VISCII upper (Vietnamese)"),
2840 build_string ("VISCII upper (Vietnamese)"),
2841 build_string ("MULEVISCII-UPPER"),
2843 Vcharset_latin_viscii =
2844 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2845 CHARSET_TYPE_256, 1, 2, 0,
2846 CHARSET_LEFT_TO_RIGHT,
2847 build_string ("VISCII"),
2848 build_string ("VISCII 1.1 (Vietnamese)"),
2849 build_string ("VISCII 1.1 (Vietnamese)"),
2850 build_string ("VISCII1\\.1"),
2852 Vcharset_ethiopic_ucs =
2853 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2854 CHARSET_TYPE_256X256, 2, 2, 0,
2855 CHARSET_LEFT_TO_RIGHT,
2856 build_string ("Ethiopic (UCS)"),
2857 build_string ("Ethiopic (UCS)"),
2858 build_string ("Ethiopic of UCS"),
2859 build_string ("Ethiopic-Unicode"),
2860 Qnil, 0x1200, 0x137F, 0x1200, 0);
2861 Vcharset_hiragana_jisx0208 =
2862 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2863 CHARSET_TYPE_94X94, 2, 0, 'B',
2864 CHARSET_LEFT_TO_RIGHT,
2865 build_string ("Hiragana"),
2866 build_string ("Hiragana of JIS X0208"),
2867 build_string ("Japanese Hiragana of JIS X0208"),
2868 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2869 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2870 (0x24 - 33) * 94 + (0x21 - 33), 33);
2871 Vcharset_katakana_jisx0208 =
2872 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2873 CHARSET_TYPE_94X94, 2, 0, 'B',
2874 CHARSET_LEFT_TO_RIGHT,
2875 build_string ("Katakana"),
2876 build_string ("Katakana of JIS X0208"),
2877 build_string ("Japanese Katakana of JIS X0208"),
2878 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2879 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2880 (0x25 - 33) * 94 + (0x21 - 33), 33);
2882 Vcharset_chinese_big5_1 =
2883 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2884 CHARSET_TYPE_94X94, 2, 0, '0',
2885 CHARSET_LEFT_TO_RIGHT,
2886 build_string ("Big5"),
2887 build_string ("Big5 (Level-1)"),
2889 ("Big5 Level-1 Chinese traditional"),
2890 build_string ("big5"),
2892 Vcharset_chinese_big5_2 =
2893 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2894 CHARSET_TYPE_94X94, 2, 0, '1',
2895 CHARSET_LEFT_TO_RIGHT,
2896 build_string ("Big5"),
2897 build_string ("Big5 (Level-2)"),
2899 ("Big5 Level-2 Chinese traditional"),
2900 build_string ("big5"),
2903 #ifdef ENABLE_COMPOSITE_CHARS
2904 /* #### For simplicity, we put composite chars into a 96x96 charset.
2905 This is going to lead to problems because you can run out of
2906 room, esp. as we don't yet recycle numbers. */
2907 Vcharset_composite =
2908 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2909 CHARSET_TYPE_96X96, 2, 0, 0,
2910 CHARSET_LEFT_TO_RIGHT,
2911 build_string ("Composite"),
2912 build_string ("Composite characters"),
2913 build_string ("Composite characters"),
2916 composite_char_row_next = 32;
2917 composite_char_col_next = 32;
2919 Vcomposite_char_string2char_hash_table =
2920 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2921 Vcomposite_char_char2string_hash_table =
2922 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2923 staticpro (&Vcomposite_char_string2char_hash_table);
2924 staticpro (&Vcomposite_char_char2string_hash_table);
2925 #endif /* ENABLE_COMPOSITE_CHARS */