1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
391 Lisp_Object QnoBreak;
393 Lisp_Object Qfraction;
396 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
402 else if (EQ (v, Qwide))
404 else if (EQ (v, Qnarrow))
406 else if (EQ (v, Qcompat))
408 else if (EQ (v, QnoBreak))
410 else if (EQ (v, Qsuper))
412 else if (EQ (v, Qfraction))
415 signal_simple_error (err_msg, err_arg);
418 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
419 Return character corresponding with list.
423 Lisp_Object table = Vcharacter_composition_table;
424 Lisp_Object rest = list;
428 Lisp_Object v = Fcar (rest);
430 Emchar c = to_char_code (v, "Invalid value for composition", list);
432 ret = get_char_code_table (c, table);
437 if (!CHAR_CODE_TABLE_P (ret))
442 else if (!CONSP (rest))
444 else if (CHAR_CODE_TABLE_P (ret))
447 signal_simple_error ("Invalid table is found with", list);
449 signal_simple_error ("Invalid value for composition", list);
452 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
453 Return variants of CHARACTER.
457 CHECK_CHAR (character);
458 return Fcopy_list (get_char_code_table (XCHAR (character),
459 Vcharacter_variant_table));
462 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
463 Return the alist of attributes of CHARACTER.
467 CHECK_CHAR (character);
468 return Fcopy_alist (get_char_code_table (XCHAR (character),
469 Vcharacter_attribute_table));
472 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
473 Return the value of CHARACTER's ATTRIBUTE.
475 (character, attribute))
478 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
484 if (!NILP (ccs = Ffind_charset (attribute)))
487 return Fcdr (Fassq (attribute, ret));
491 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
494 Emchar char_code = XCHAR (character);
496 = get_char_code_table (char_code, Vcharacter_attribute_table);
499 cell = Fassq (attribute, ret);
503 ret = Fcons (Fcons (attribute, value), ret);
505 else if (!EQ (Fcdr (cell), value))
507 Fsetcdr (cell, value);
509 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
513 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
514 Store CHARACTER's ATTRIBUTE with VALUE.
516 (character, attribute, value))
520 ccs = Ffind_charset (attribute);
524 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
529 /* ad-hoc method for `ascii' */
530 if ((XCHARSET_CHARS (ccs) == 94) &&
531 (XCHARSET_BYTE_OFFSET (ccs) != 33))
532 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
534 ccs_len = XCHARSET_CHARS (ccs);
537 signal_simple_error ("Invalid value for coded-charset",
541 rest = Fget_char_attribute (character, attribute);
548 Lisp_Object ei = Fcar (rest);
550 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
551 nv = XVECTOR_DATA(v)[i];
558 XVECTOR_DATA(v)[i] = Qnil;
559 v = XCHARSET_DECODING_TABLE (ccs);
564 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
571 Lisp_Object ei = Fcar (rest);
574 signal_simple_error ("Invalid value for coded-charset", value);
576 if ((i < 0) || (255 < i))
577 signal_simple_error ("Invalid value for coded-charset", value);
578 if (XCHARSET_GRAPHIC (ccs) == 1)
580 i -= XCHARSET_BYTE_OFFSET (ccs);
581 nv = XVECTOR_DATA(v)[i];
587 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
594 XVECTOR_DATA(v)[i] = character;
596 else if (EQ (attribute, Q_decomposition))
598 Lisp_Object rest = value;
599 Lisp_Object table = Vcharacter_composition_table;
602 signal_simple_error ("Invalid value for ->decomposition",
607 Lisp_Object v = Fcar (rest);
610 = to_char_code (v, "Invalid value for ->decomposition", value);
615 put_char_code_table (c, character, table);
620 ntable = get_char_code_table (c, table);
621 if (!CHAR_CODE_TABLE_P (ntable))
623 ntable = make_char_code_table (Qnil);
624 put_char_code_table (c, ntable, table);
630 else if (EQ (attribute, Q_ucs))
636 signal_simple_error ("Invalid value for ->ucs", value);
640 ret = get_char_code_table (c, Vcharacter_variant_table);
641 if (NILP (Fmemq (character, ret)))
643 put_char_code_table (c, Fcons (character, ret),
644 Vcharacter_variant_table);
647 return put_char_attribute (character, attribute, value);
652 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
653 Store character's ATTRIBUTES.
657 Lisp_Object rest = attributes;
658 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
659 Lisp_Object character;
665 Lisp_Object cell = Fcar (rest);
669 signal_simple_error ("Invalid argument", attributes);
670 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
671 && XCHARSET_FINAL (ccs))
675 if (XCHARSET_DIMENSION (ccs) == 1)
677 Lisp_Object eb1 = Fcar (Fcdr (cell));
681 signal_simple_error ("Invalid argument", attributes);
683 switch (XCHARSET_CHARS (ccs))
687 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
691 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
697 else if (XCHARSET_DIMENSION (ccs) == 2)
699 Lisp_Object eb1 = Fcar (Fcdr (cell));
700 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
704 signal_simple_error ("Invalid argument", attributes);
707 signal_simple_error ("Invalid argument", attributes);
709 switch (XCHARSET_CHARS (ccs))
712 code = MIN_CHAR_94x94
713 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
714 + (b1 - 33) * 94 + (b2 - 33);
717 code = MIN_CHAR_96x96
718 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
719 + (b1 - 32) * 96 + (b2 - 32);
730 character = make_char (code);
731 goto setup_attributes;
737 else if (!INTP (code))
738 signal_simple_error ("Invalid argument", attributes);
740 character = make_char (XINT (code));
746 Lisp_Object cell = Fcar (rest);
749 signal_simple_error ("Invalid argument", attributes);
750 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
754 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
757 Lisp_Object Vutf_2000_version;
761 int leading_code_private_11;
764 Lisp_Object Qcharsetp;
766 /* Qdoc_string, Qdimension, Qchars defined in general.c */
767 Lisp_Object Qregistry, Qfinal, Qgraphic;
768 Lisp_Object Qdirection;
769 Lisp_Object Qreverse_direction_charset;
770 Lisp_Object Qleading_byte;
771 Lisp_Object Qshort_name, Qlong_name;
787 Qjapanese_jisx0208_1978,
799 Qvietnamese_viscii_lower,
800 Qvietnamese_viscii_upper,
809 Lisp_Object Ql2r, Qr2l;
811 Lisp_Object Vcharset_hash_table;
814 static Charset_ID next_allocated_leading_byte;
816 static Charset_ID next_allocated_1_byte_leading_byte;
817 static Charset_ID next_allocated_2_byte_leading_byte;
820 /* Composite characters are characters constructed by overstriking two
821 or more regular characters.
823 1) The old Mule implementation involves storing composite characters
824 in a buffer as a tag followed by all of the actual characters
825 used to make up the composite character. I think this is a bad
826 idea; it greatly complicates code that wants to handle strings
827 one character at a time because it has to deal with the possibility
828 of great big ungainly characters. It's much more reasonable to
829 simply store an index into a table of composite characters.
831 2) The current implementation only allows for 16,384 separate
832 composite characters over the lifetime of the XEmacs process.
833 This could become a potential problem if the user
834 edited lots of different files that use composite characters.
835 Due to FSF bogosity, increasing the number of allowable
836 composite characters under Mule would decrease the number
837 of possible faces that can exist. Mule already has shrunk
838 this to 2048, and further shrinkage would become uncomfortable.
839 No such problems exist in XEmacs.
841 Composite characters could be represented as 0x80 C1 C2 C3,
842 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
843 for slightly under 2^20 (one million) composite characters
844 over the XEmacs process lifetime, and you only need to
845 increase the size of a Mule character from 19 to 21 bits.
846 Or you could use 0x80 C1 C2 C3 C4, allowing for about
847 85 million (slightly over 2^26) composite characters. */
850 /************************************************************************/
851 /* Basic Emchar functions */
852 /************************************************************************/
854 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
855 string in STR. Returns the number of bytes stored.
856 Do not call this directly. Use the macro set_charptr_emchar() instead.
860 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
875 else if ( c <= 0x7ff )
877 *p++ = (c >> 6) | 0xc0;
878 *p++ = (c & 0x3f) | 0x80;
880 else if ( c <= 0xffff )
882 *p++ = (c >> 12) | 0xe0;
883 *p++ = ((c >> 6) & 0x3f) | 0x80;
884 *p++ = (c & 0x3f) | 0x80;
886 else if ( c <= 0x1fffff )
888 *p++ = (c >> 18) | 0xf0;
889 *p++ = ((c >> 12) & 0x3f) | 0x80;
890 *p++ = ((c >> 6) & 0x3f) | 0x80;
891 *p++ = (c & 0x3f) | 0x80;
893 else if ( c <= 0x3ffffff )
895 *p++ = (c >> 24) | 0xf8;
896 *p++ = ((c >> 18) & 0x3f) | 0x80;
897 *p++ = ((c >> 12) & 0x3f) | 0x80;
898 *p++ = ((c >> 6) & 0x3f) | 0x80;
899 *p++ = (c & 0x3f) | 0x80;
903 *p++ = (c >> 30) | 0xfc;
904 *p++ = ((c >> 24) & 0x3f) | 0x80;
905 *p++ = ((c >> 18) & 0x3f) | 0x80;
906 *p++ = ((c >> 12) & 0x3f) | 0x80;
907 *p++ = ((c >> 6) & 0x3f) | 0x80;
908 *p++ = (c & 0x3f) | 0x80;
911 BREAKUP_CHAR (c, charset, c1, c2);
912 lb = CHAR_LEADING_BYTE (c);
913 if (LEADING_BYTE_PRIVATE_P (lb))
914 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
916 if (EQ (charset, Vcharset_control_1))
925 /* Return the first character from a Mule-encoded string in STR,
926 assuming it's non-ASCII. Do not call this directly.
927 Use the macro charptr_emchar() instead. */
930 non_ascii_charptr_emchar (CONST Bufbyte *str)
943 else if ( b >= 0xf8 )
948 else if ( b >= 0xf0 )
953 else if ( b >= 0xe0 )
958 else if ( b >= 0xc0 )
968 for( ; len > 0; len-- )
971 ch = ( ch << 6 ) | ( b & 0x3f );
975 Bufbyte i0 = *str, i1, i2 = 0;
978 if (i0 == LEADING_BYTE_CONTROL_1)
979 return (Emchar) (*++str - 0x20);
981 if (LEADING_BYTE_PREFIX_P (i0))
986 charset = CHARSET_BY_LEADING_BYTE (i0);
987 if (XCHARSET_DIMENSION (charset) == 2)
990 return MAKE_CHAR (charset, i1, i2);
994 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
995 Do not call this directly. Use the macro valid_char_p() instead. */
999 non_ascii_valid_char_p (Emchar ch)
1003 /* Must have only lowest 19 bits set */
1007 f1 = CHAR_FIELD1 (ch);
1008 f2 = CHAR_FIELD2 (ch);
1009 f3 = CHAR_FIELD3 (ch);
1013 Lisp_Object charset;
1015 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1016 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1017 f2 > MAX_CHAR_FIELD2_PRIVATE)
1022 if (f3 != 0x20 && f3 != 0x7F)
1026 NOTE: This takes advantage of the fact that
1027 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1028 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1030 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1031 return (XCHARSET_CHARS (charset) == 96);
1035 Lisp_Object charset;
1037 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1038 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1039 f1 > MAX_CHAR_FIELD1_PRIVATE)
1041 if (f2 < 0x20 || f3 < 0x20)
1044 #ifdef ENABLE_COMPOSITE_CHARS
1045 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1047 if (UNBOUNDP (Fgethash (make_int (ch),
1048 Vcomposite_char_char2string_hash_table,
1053 #endif /* ENABLE_COMPOSITE_CHARS */
1055 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1058 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1060 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1063 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1065 return (XCHARSET_CHARS (charset) == 96);
1071 /************************************************************************/
1072 /* Basic string functions */
1073 /************************************************************************/
1075 /* Copy the character pointed to by PTR into STR, assuming it's
1076 non-ASCII. Do not call this directly. Use the macro
1077 charptr_copy_char() instead. */
1080 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1082 Bufbyte *strptr = str;
1084 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1086 /* Notice fallthrough. */
1088 case 6: *++strptr = *ptr++;
1089 case 5: *++strptr = *ptr++;
1091 case 4: *++strptr = *ptr++;
1092 case 3: *++strptr = *ptr++;
1093 case 2: *++strptr = *ptr;
1098 return strptr + 1 - str;
1102 /************************************************************************/
1103 /* streams of Emchars */
1104 /************************************************************************/
1106 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1107 The functions below are not meant to be called directly; use
1108 the macros in insdel.h. */
1111 Lstream_get_emchar_1 (Lstream *stream, int ch)
1113 Bufbyte str[MAX_EMCHAR_LEN];
1114 Bufbyte *strptr = str;
1116 str[0] = (Bufbyte) ch;
1117 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1119 /* Notice fallthrough. */
1122 ch = Lstream_getc (stream);
1124 *++strptr = (Bufbyte) ch;
1126 ch = Lstream_getc (stream);
1128 *++strptr = (Bufbyte) ch;
1131 ch = Lstream_getc (stream);
1133 *++strptr = (Bufbyte) ch;
1135 ch = Lstream_getc (stream);
1137 *++strptr = (Bufbyte) ch;
1139 ch = Lstream_getc (stream);
1141 *++strptr = (Bufbyte) ch;
1146 return charptr_emchar (str);
1150 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1152 Bufbyte str[MAX_EMCHAR_LEN];
1153 Bytecount len = set_charptr_emchar (str, ch);
1154 return Lstream_write (stream, str, len);
1158 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1160 Bufbyte str[MAX_EMCHAR_LEN];
1161 Bytecount len = set_charptr_emchar (str, ch);
1162 Lstream_unread (stream, str, len);
1166 /************************************************************************/
1167 /* charset object */
1168 /************************************************************************/
1171 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1173 struct Lisp_Charset *cs = XCHARSET (obj);
1175 markobj (cs->short_name);
1176 markobj (cs->long_name);
1177 markobj (cs->doc_string);
1178 markobj (cs->registry);
1179 markobj (cs->ccl_program);
1181 markobj (cs->decoding_table);
1187 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1189 struct Lisp_Charset *cs = XCHARSET (obj);
1193 error ("printing unreadable object #<charset %s 0x%x>",
1194 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1197 write_c_string ("#<charset ", printcharfun);
1198 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1199 write_c_string (" ", printcharfun);
1200 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1201 write_c_string (" ", printcharfun);
1202 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1203 write_c_string (" ", printcharfun);
1204 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1205 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1206 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1207 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1208 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1210 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1211 CHARSET_COLUMNS (cs),
1212 CHARSET_GRAPHIC (cs),
1213 CHARSET_FINAL (cs));
1214 write_c_string (buf, printcharfun);
1215 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1216 sprintf (buf, " 0x%x>", cs->header.uid);
1217 write_c_string (buf, printcharfun);
1220 static const struct lrecord_description charset_description[] = {
1221 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1223 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1228 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1229 mark_charset, print_charset, 0, 0, 0,
1230 charset_description,
1231 struct Lisp_Charset);
1233 /* Make a new charset. */
1236 make_charset (Charset_ID id, Lisp_Object name,
1237 unsigned char type, unsigned char columns, unsigned char graphic,
1238 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1239 Lisp_Object long_name, Lisp_Object doc,
1241 Lisp_Object decoding_table,
1242 Emchar ucs_min, Emchar ucs_max,
1243 Emchar code_offset, unsigned char byte_offset)
1246 struct Lisp_Charset *cs =
1247 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1248 XSETCHARSET (obj, cs);
1250 CHARSET_ID (cs) = id;
1251 CHARSET_NAME (cs) = name;
1252 CHARSET_SHORT_NAME (cs) = short_name;
1253 CHARSET_LONG_NAME (cs) = long_name;
1254 CHARSET_DIRECTION (cs) = direction;
1255 CHARSET_TYPE (cs) = type;
1256 CHARSET_COLUMNS (cs) = columns;
1257 CHARSET_GRAPHIC (cs) = graphic;
1258 CHARSET_FINAL (cs) = final;
1259 CHARSET_DOC_STRING (cs) = doc;
1260 CHARSET_REGISTRY (cs) = reg;
1261 CHARSET_CCL_PROGRAM (cs) = Qnil;
1262 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1264 CHARSET_DECODING_TABLE(cs) = Qnil;
1265 CHARSET_UCS_MIN(cs) = ucs_min;
1266 CHARSET_UCS_MAX(cs) = ucs_max;
1267 CHARSET_CODE_OFFSET(cs) = code_offset;
1268 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1271 switch (CHARSET_TYPE (cs))
1273 case CHARSET_TYPE_94:
1274 CHARSET_DIMENSION (cs) = 1;
1275 CHARSET_CHARS (cs) = 94;
1277 case CHARSET_TYPE_96:
1278 CHARSET_DIMENSION (cs) = 1;
1279 CHARSET_CHARS (cs) = 96;
1281 case CHARSET_TYPE_94X94:
1282 CHARSET_DIMENSION (cs) = 2;
1283 CHARSET_CHARS (cs) = 94;
1285 case CHARSET_TYPE_96X96:
1286 CHARSET_DIMENSION (cs) = 2;
1287 CHARSET_CHARS (cs) = 96;
1290 case CHARSET_TYPE_128:
1291 CHARSET_DIMENSION (cs) = 1;
1292 CHARSET_CHARS (cs) = 128;
1294 case CHARSET_TYPE_128X128:
1295 CHARSET_DIMENSION (cs) = 2;
1296 CHARSET_CHARS (cs) = 128;
1298 case CHARSET_TYPE_256:
1299 CHARSET_DIMENSION (cs) = 1;
1300 CHARSET_CHARS (cs) = 256;
1302 case CHARSET_TYPE_256X256:
1303 CHARSET_DIMENSION (cs) = 2;
1304 CHARSET_CHARS (cs) = 256;
1310 if (id == LEADING_BYTE_ASCII)
1311 CHARSET_REP_BYTES (cs) = 1;
1313 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1315 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1320 /* some charsets do not have final characters. This includes
1321 ASCII, Control-1, Composite, and the two faux private
1324 if (code_offset == 0)
1326 assert (NILP (charset_by_attributes[type][final]));
1327 charset_by_attributes[type][final] = obj;
1330 assert (NILP (charset_by_attributes[type][final][direction]));
1331 charset_by_attributes[type][final][direction] = obj;
1335 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1336 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1339 /* official leading byte */
1340 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1343 /* Some charsets are "faux" and don't have names or really exist at
1344 all except in the leading-byte table. */
1346 Fputhash (name, obj, Vcharset_hash_table);
1351 get_unallocated_leading_byte (int dimension)
1356 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1359 lb = next_allocated_leading_byte++;
1363 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1366 lb = next_allocated_1_byte_leading_byte++;
1370 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1373 lb = next_allocated_2_byte_leading_byte++;
1379 ("No more character sets free for this dimension",
1380 make_int (dimension));
1387 range_charset_code_point (Lisp_Object charset, Emchar ch)
1391 if ((XCHARSET_UCS_MIN (charset) <= ch)
1392 && (ch <= XCHARSET_UCS_MAX (charset)))
1394 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1396 if (XCHARSET_DIMENSION (charset) == 1)
1397 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1398 else if (XCHARSET_DIMENSION (charset) == 2)
1399 return list2 (make_int (d / XCHARSET_CHARS (charset)
1400 + XCHARSET_BYTE_OFFSET (charset)),
1401 make_int (d % XCHARSET_CHARS (charset)
1402 + XCHARSET_BYTE_OFFSET (charset)));
1403 else if (XCHARSET_DIMENSION (charset) == 3)
1404 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1405 * XCHARSET_CHARS (charset))
1406 + XCHARSET_BYTE_OFFSET (charset)),
1407 make_int (d / XCHARSET_CHARS (charset)
1408 % XCHARSET_CHARS (charset)
1409 + XCHARSET_BYTE_OFFSET (charset)),
1410 make_int (d % XCHARSET_CHARS (charset)
1411 + XCHARSET_BYTE_OFFSET (charset)));
1412 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1413 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1414 * XCHARSET_CHARS (charset)
1415 * XCHARSET_CHARS (charset))
1416 + XCHARSET_BYTE_OFFSET (charset)),
1417 make_int (d / (XCHARSET_CHARS (charset)
1418 * XCHARSET_CHARS (charset))
1419 % XCHARSET_CHARS (charset)
1420 + XCHARSET_BYTE_OFFSET (charset)),
1421 make_int (d / XCHARSET_CHARS (charset)
1422 % XCHARSET_CHARS (charset)
1423 + XCHARSET_BYTE_OFFSET (charset)),
1424 make_int (d % XCHARSET_CHARS (charset)
1425 + XCHARSET_BYTE_OFFSET (charset)));
1427 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1429 if (XCHARSET_DIMENSION (charset) == 1)
1431 if (XCHARSET_CHARS (charset) == 94)
1433 if (((d = ch - (MIN_CHAR_94
1434 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1436 return list1 (make_int (d + 33));
1438 else if (XCHARSET_CHARS (charset) == 96)
1440 if (((d = ch - (MIN_CHAR_96
1441 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1443 return list1 (make_int (d + 32));
1448 else if (XCHARSET_DIMENSION (charset) == 2)
1450 if (XCHARSET_CHARS (charset) == 94)
1452 if (((d = ch - (MIN_CHAR_94x94
1453 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1456 return list2 (make_int ((d / 94) + 33),
1457 make_int (d % 94 + 33));
1459 else if (XCHARSET_CHARS (charset) == 96)
1461 if (((d = ch - (MIN_CHAR_96x96
1462 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1465 return list2 (make_int ((d / 96) + 32),
1466 make_int (d % 96 + 32));
1474 split_builtin_char (Emchar c)
1476 if (c < MIN_CHAR_OBS_94x94)
1478 if (c <= MAX_CHAR_BASIC_LATIN)
1480 return list2 (Vcharset_ascii, make_int (c));
1484 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1488 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1490 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1492 return list2 (Vcharset_greek_iso8859_7,
1493 make_int (c - MIN_CHAR_GREEK + 0x20));
1495 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1497 return list2 (Vcharset_cyrillic_iso8859_5,
1498 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1500 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1502 return list2 (Vcharset_hebrew_iso8859_8,
1503 make_int (c - MIN_CHAR_HEBREW + 0x20));
1505 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1507 return list2 (Vcharset_thai_tis620,
1508 make_int (c - MIN_CHAR_THAI + 0x20));
1510 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1511 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1513 return list2 (Vcharset_katakana_jisx0201,
1514 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1518 return list3 (Vcharset_ucs_bmp,
1519 make_int (c >> 8), make_int (c & 0xff));
1522 else if (c <= MAX_CHAR_OBS_94x94)
1524 return list3 (CHARSET_BY_ATTRIBUTES
1525 (CHARSET_TYPE_94X94,
1526 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1527 CHARSET_LEFT_TO_RIGHT),
1528 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1529 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1531 else if (c <= MAX_CHAR_94)
1533 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1534 ((c - MIN_CHAR_94) / 94) + '0',
1535 CHARSET_LEFT_TO_RIGHT),
1536 make_int (((c - MIN_CHAR_94) % 94) + 33));
1538 else if (c <= MAX_CHAR_96)
1540 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1541 ((c - MIN_CHAR_96) / 96) + '0',
1542 CHARSET_LEFT_TO_RIGHT),
1543 make_int (((c - MIN_CHAR_96) % 96) + 32));
1545 else if (c <= MAX_CHAR_94x94)
1547 return list3 (CHARSET_BY_ATTRIBUTES
1548 (CHARSET_TYPE_94X94,
1549 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1550 CHARSET_LEFT_TO_RIGHT),
1551 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1552 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1554 else if (c <= MAX_CHAR_96x96)
1556 return list3 (CHARSET_BY_ATTRIBUTES
1557 (CHARSET_TYPE_96X96,
1558 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1559 CHARSET_LEFT_TO_RIGHT),
1560 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1561 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1570 charset_code_point (Lisp_Object charset, Emchar ch)
1572 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1574 if (!EQ (cdef, Qnil))
1576 Lisp_Object field = Fassq (charset, cdef);
1578 if (!EQ (field, Qnil))
1579 return Fcdr (field);
1581 return range_charset_code_point (charset, ch);
1584 Lisp_Object Vdefault_coded_charset_priority_list;
1588 /************************************************************************/
1589 /* Basic charset Lisp functions */
1590 /************************************************************************/
1592 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1593 Return non-nil if OBJECT is a charset.
1597 return CHARSETP (object) ? Qt : Qnil;
1600 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1601 Retrieve the charset of the given name.
1602 If CHARSET-OR-NAME is a charset object, it is simply returned.
1603 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1604 nil is returned. Otherwise the associated charset object is returned.
1608 if (CHARSETP (charset_or_name))
1609 return charset_or_name;
1611 CHECK_SYMBOL (charset_or_name);
1612 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1615 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1616 Retrieve the charset of the given name.
1617 Same as `find-charset' except an error is signalled if there is no such
1618 charset instead of returning nil.
1622 Lisp_Object charset = Ffind_charset (name);
1625 signal_simple_error ("No such charset", name);
1629 /* We store the charsets in hash tables with the names as the key and the
1630 actual charset object as the value. Occasionally we need to use them
1631 in a list format. These routines provide us with that. */
1632 struct charset_list_closure
1634 Lisp_Object *charset_list;
1638 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1639 void *charset_list_closure)
1641 /* This function can GC */
1642 struct charset_list_closure *chcl =
1643 (struct charset_list_closure*) charset_list_closure;
1644 Lisp_Object *charset_list = chcl->charset_list;
1646 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1650 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1651 Return a list of the names of all defined charsets.
1655 Lisp_Object charset_list = Qnil;
1656 struct gcpro gcpro1;
1657 struct charset_list_closure charset_list_closure;
1659 GCPRO1 (charset_list);
1660 charset_list_closure.charset_list = &charset_list;
1661 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1662 &charset_list_closure);
1665 return charset_list;
1668 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1669 Return the name of the given charset.
1673 return XCHARSET_NAME (Fget_charset (charset));
1676 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1677 Define a new character set.
1678 This function is for use with Mule support.
1679 NAME is a symbol, the name by which the character set is normally referred.
1680 DOC-STRING is a string describing the character set.
1681 PROPS is a property list, describing the specific nature of the
1682 character set. Recognized properties are:
1684 'short-name Short version of the charset name (ex: Latin-1)
1685 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1686 'registry A regular expression matching the font registry field for
1688 'dimension Number of octets used to index a character in this charset.
1689 Either 1 or 2. Defaults to 1.
1690 'columns Number of columns used to display a character in this charset.
1691 Only used in TTY mode. (Under X, the actual width of a
1692 character can be derived from the font used to display the
1693 characters.) If unspecified, defaults to the dimension
1694 (this is almost always the correct value).
1695 'chars Number of characters in each dimension (94 or 96).
1696 Defaults to 94. Note that if the dimension is 2, the
1697 character set thus described is 94x94 or 96x96.
1698 'final Final byte of ISO 2022 escape sequence. Must be
1699 supplied. Each combination of (DIMENSION, CHARS) defines a
1700 separate namespace for final bytes. Note that ISO
1701 2022 restricts the final byte to the range
1702 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1703 dimension == 2. Note also that final bytes in the range
1704 0x30 - 0x3F are reserved for user-defined (not official)
1706 'graphic 0 (use left half of font on output) or 1 (use right half
1707 of font on output). Defaults to 0. For example, for
1708 a font whose registry is ISO8859-1, the left half
1709 (octets 0x20 - 0x7F) is the `ascii' character set, while
1710 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1711 character set. With 'graphic set to 0, the octets
1712 will have their high bit cleared; with it set to 1,
1713 the octets will have their high bit set.
1714 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1716 'ccl-program A compiled CCL program used to convert a character in
1717 this charset into an index into the font. This is in
1718 addition to the 'graphic property. The CCL program
1719 is passed the octets of the character, with the high
1720 bit cleared and set depending upon whether the value
1721 of the 'graphic property is 0 or 1.
1723 (name, doc_string, props))
1725 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1726 int direction = CHARSET_LEFT_TO_RIGHT;
1728 Lisp_Object registry = Qnil;
1729 Lisp_Object charset;
1730 Lisp_Object rest, keyword, value;
1731 Lisp_Object ccl_program = Qnil;
1732 Lisp_Object short_name = Qnil, long_name = Qnil;
1733 int byte_offset = -1;
1735 CHECK_SYMBOL (name);
1736 if (!NILP (doc_string))
1737 CHECK_STRING (doc_string);
1739 charset = Ffind_charset (name);
1740 if (!NILP (charset))
1741 signal_simple_error ("Cannot redefine existing charset", name);
1743 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1745 if (EQ (keyword, Qshort_name))
1747 CHECK_STRING (value);
1751 if (EQ (keyword, Qlong_name))
1753 CHECK_STRING (value);
1757 else if (EQ (keyword, Qdimension))
1760 dimension = XINT (value);
1761 if (dimension < 1 || dimension > 2)
1762 signal_simple_error ("Invalid value for 'dimension", value);
1765 else if (EQ (keyword, Qchars))
1768 chars = XINT (value);
1769 if (chars != 94 && chars != 96)
1770 signal_simple_error ("Invalid value for 'chars", value);
1773 else if (EQ (keyword, Qcolumns))
1776 columns = XINT (value);
1777 if (columns != 1 && columns != 2)
1778 signal_simple_error ("Invalid value for 'columns", value);
1781 else if (EQ (keyword, Qgraphic))
1784 graphic = XINT (value);
1786 if (graphic < 0 || graphic > 2)
1788 if (graphic < 0 || graphic > 1)
1790 signal_simple_error ("Invalid value for 'graphic", value);
1793 else if (EQ (keyword, Qregistry))
1795 CHECK_STRING (value);
1799 else if (EQ (keyword, Qdirection))
1801 if (EQ (value, Ql2r))
1802 direction = CHARSET_LEFT_TO_RIGHT;
1803 else if (EQ (value, Qr2l))
1804 direction = CHARSET_RIGHT_TO_LEFT;
1806 signal_simple_error ("Invalid value for 'direction", value);
1809 else if (EQ (keyword, Qfinal))
1811 CHECK_CHAR_COERCE_INT (value);
1812 final = XCHAR (value);
1813 if (final < '0' || final > '~')
1814 signal_simple_error ("Invalid value for 'final", value);
1817 else if (EQ (keyword, Qccl_program))
1819 CHECK_VECTOR (value);
1820 ccl_program = value;
1824 signal_simple_error ("Unrecognized property", keyword);
1828 error ("'final must be specified");
1829 if (dimension == 2 && final > 0x5F)
1831 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1835 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1837 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1839 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1840 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1842 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1844 id = get_unallocated_leading_byte (dimension);
1846 if (NILP (doc_string))
1847 doc_string = build_string ("");
1849 if (NILP (registry))
1850 registry = build_string ("");
1852 if (NILP (short_name))
1853 XSETSTRING (short_name, XSYMBOL (name)->name);
1855 if (NILP (long_name))
1856 long_name = doc_string;
1859 columns = dimension;
1861 if (byte_offset < 0)
1865 else if (chars == 96)
1871 charset = make_charset (id, name, type, columns, graphic,
1872 final, direction, short_name, long_name,
1873 doc_string, registry,
1874 Qnil, 0, 0, 0, byte_offset);
1875 if (!NILP (ccl_program))
1876 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1880 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1882 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1883 NEW-NAME is the name of the new charset. Return the new charset.
1885 (charset, new_name))
1887 Lisp_Object new_charset = Qnil;
1888 int id, dimension, columns, graphic, final;
1889 int direction, type;
1890 Lisp_Object registry, doc_string, short_name, long_name;
1891 struct Lisp_Charset *cs;
1893 charset = Fget_charset (charset);
1894 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1895 signal_simple_error ("Charset already has reverse-direction charset",
1898 CHECK_SYMBOL (new_name);
1899 if (!NILP (Ffind_charset (new_name)))
1900 signal_simple_error ("Cannot redefine existing charset", new_name);
1902 cs = XCHARSET (charset);
1904 type = CHARSET_TYPE (cs);
1905 columns = CHARSET_COLUMNS (cs);
1906 dimension = CHARSET_DIMENSION (cs);
1907 id = get_unallocated_leading_byte (dimension);
1909 graphic = CHARSET_GRAPHIC (cs);
1910 final = CHARSET_FINAL (cs);
1911 direction = CHARSET_RIGHT_TO_LEFT;
1912 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1913 direction = CHARSET_LEFT_TO_RIGHT;
1914 doc_string = CHARSET_DOC_STRING (cs);
1915 short_name = CHARSET_SHORT_NAME (cs);
1916 long_name = CHARSET_LONG_NAME (cs);
1917 registry = CHARSET_REGISTRY (cs);
1919 new_charset = make_charset (id, new_name, type, columns,
1920 graphic, final, direction, short_name, long_name,
1921 doc_string, registry,
1923 CHARSET_DECODING_TABLE(cs),
1924 CHARSET_UCS_MIN(cs),
1925 CHARSET_UCS_MAX(cs),
1926 CHARSET_CODE_OFFSET(cs),
1927 CHARSET_BYTE_OFFSET(cs)
1933 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1934 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1939 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1940 Define symbol ALIAS as an alias for CHARSET.
1944 CHECK_SYMBOL (alias);
1945 charset = Fget_charset (charset);
1946 return Fputhash (alias, charset, Vcharset_hash_table);
1949 /* #### Reverse direction charsets not yet implemented. */
1951 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1953 Return the reverse-direction charset parallel to CHARSET, if any.
1954 This is the charset with the same properties (in particular, the same
1955 dimension, number of characters per dimension, and final byte) as
1956 CHARSET but whose characters are displayed in the opposite direction.
1960 charset = Fget_charset (charset);
1961 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1965 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1966 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1967 If DIRECTION is omitted, both directions will be checked (left-to-right
1968 will be returned if character sets exist for both directions).
1970 (dimension, chars, final, direction))
1972 int dm, ch, fi, di = -1;
1974 Lisp_Object obj = Qnil;
1976 CHECK_INT (dimension);
1977 dm = XINT (dimension);
1978 if (dm < 1 || dm > 2)
1979 signal_simple_error ("Invalid value for DIMENSION", dimension);
1983 if (ch != 94 && ch != 96)
1984 signal_simple_error ("Invalid value for CHARS", chars);
1986 CHECK_CHAR_COERCE_INT (final);
1988 if (fi < '0' || fi > '~')
1989 signal_simple_error ("Invalid value for FINAL", final);
1991 if (EQ (direction, Ql2r))
1992 di = CHARSET_LEFT_TO_RIGHT;
1993 else if (EQ (direction, Qr2l))
1994 di = CHARSET_RIGHT_TO_LEFT;
1995 else if (!NILP (direction))
1996 signal_simple_error ("Invalid value for DIRECTION", direction);
1998 if (dm == 2 && fi > 0x5F)
2000 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2003 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2005 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2009 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2011 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2014 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2017 return XCHARSET_NAME (obj);
2021 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2022 Return short name of CHARSET.
2026 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2029 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2030 Return long name of CHARSET.
2034 return XCHARSET_LONG_NAME (Fget_charset (charset));
2037 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2038 Return description of CHARSET.
2042 return XCHARSET_DOC_STRING (Fget_charset (charset));
2045 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2046 Return dimension of CHARSET.
2050 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2053 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2054 Return property PROP of CHARSET.
2055 Recognized properties are those listed in `make-charset', as well as
2056 'name and 'doc-string.
2060 struct Lisp_Charset *cs;
2062 charset = Fget_charset (charset);
2063 cs = XCHARSET (charset);
2065 CHECK_SYMBOL (prop);
2066 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2067 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2068 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2069 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2070 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2071 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2072 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2073 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2074 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2075 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2076 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2077 if (EQ (prop, Qdirection))
2078 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2079 if (EQ (prop, Qreverse_direction_charset))
2081 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2085 return XCHARSET_NAME (obj);
2087 signal_simple_error ("Unrecognized charset property name", prop);
2088 return Qnil; /* not reached */
2091 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2092 Return charset identification number of CHARSET.
2096 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2099 /* #### We need to figure out which properties we really want to
2102 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2103 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2105 (charset, ccl_program))
2107 charset = Fget_charset (charset);
2108 CHECK_VECTOR (ccl_program);
2109 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2114 invalidate_charset_font_caches (Lisp_Object charset)
2116 /* Invalidate font cache entries for charset on all devices. */
2117 Lisp_Object devcons, concons, hash_table;
2118 DEVICE_LOOP_NO_BREAK (devcons, concons)
2120 struct device *d = XDEVICE (XCAR (devcons));
2121 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2122 if (!UNBOUNDP (hash_table))
2123 Fclrhash (hash_table);
2127 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2128 Set the 'registry property of CHARSET to REGISTRY.
2130 (charset, registry))
2132 charset = Fget_charset (charset);
2133 CHECK_STRING (registry);
2134 XCHARSET_REGISTRY (charset) = registry;
2135 invalidate_charset_font_caches (charset);
2136 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2141 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2142 Return mapping-table of CHARSET.
2146 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2149 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2150 Set mapping-table of CHARSET to TABLE.
2154 struct Lisp_Charset *cs;
2155 Lisp_Object old_table;
2158 charset = Fget_charset (charset);
2159 cs = XCHARSET (charset);
2161 if (EQ (table, Qnil))
2163 CHARSET_DECODING_TABLE(cs) = table;
2166 else if (VECTORP (table))
2170 /* ad-hoc method for `ascii' */
2171 if ((CHARSET_CHARS (cs) == 94) &&
2172 (CHARSET_BYTE_OFFSET (cs) != 33))
2173 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2175 ccs_len = CHARSET_CHARS (cs);
2177 if (XVECTOR_LENGTH (table) > ccs_len)
2178 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2179 old_table = CHARSET_DECODING_TABLE(cs);
2180 CHARSET_DECODING_TABLE(cs) = table;
2183 signal_error (Qwrong_type_argument,
2184 list2 (build_translated_string ("vector-or-nil-p"),
2186 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2188 switch (CHARSET_DIMENSION (cs))
2191 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2193 Lisp_Object c = XVECTOR_DATA(table)[i];
2198 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2202 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2204 Lisp_Object v = XVECTOR_DATA(table)[i];
2210 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2212 CHARSET_DECODING_TABLE(cs) = old_table;
2213 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2215 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2217 Lisp_Object c = XVECTOR_DATA(v)[j];
2220 put_char_attribute (c, charset,
2223 (i + CHARSET_BYTE_OFFSET (cs)),
2225 (j + CHARSET_BYTE_OFFSET (cs))));
2229 put_char_attribute (v, charset,
2231 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2240 /************************************************************************/
2241 /* Lisp primitives for working with characters */
2242 /************************************************************************/
2244 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2245 Make a character from CHARSET and octets ARG1 and ARG2.
2246 ARG2 is required only for characters from two-dimensional charsets.
2247 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2248 character s with caron.
2250 (charset, arg1, arg2))
2252 struct Lisp_Charset *cs;
2254 int lowlim, highlim;
2256 charset = Fget_charset (charset);
2257 cs = XCHARSET (charset);
2259 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2260 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2262 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2264 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2265 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2268 /* It is useful (and safe, according to Olivier Galibert) to strip
2269 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2270 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2271 Latin 2 code of the character. */
2279 if (a1 < lowlim || a1 > highlim)
2280 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2282 if (CHARSET_DIMENSION (cs) == 1)
2286 ("Charset is of dimension one; second octet must be nil", arg2);
2287 return make_char (MAKE_CHAR (charset, a1, 0));
2296 a2 = XINT (arg2) & 0x7f;
2298 if (a2 < lowlim || a2 > highlim)
2299 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2301 return make_char (MAKE_CHAR (charset, a1, a2));
2304 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2305 Return the character set of char CH.
2309 CHECK_CHAR_COERCE_INT (ch);
2311 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2314 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2315 Return list of charset and one or two position-codes of CHAR.
2319 /* This function can GC */
2320 struct gcpro gcpro1, gcpro2;
2321 Lisp_Object charset = Qnil;
2322 Lisp_Object rc = Qnil;
2325 GCPRO2 (charset, rc);
2326 CHECK_CHAR_COERCE_INT (character);
2328 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2330 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2332 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2336 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2344 #ifdef ENABLE_COMPOSITE_CHARS
2345 /************************************************************************/
2346 /* composite character functions */
2347 /************************************************************************/
2350 lookup_composite_char (Bufbyte *str, int len)
2352 Lisp_Object lispstr = make_string (str, len);
2353 Lisp_Object ch = Fgethash (lispstr,
2354 Vcomposite_char_string2char_hash_table,
2360 if (composite_char_row_next >= 128)
2361 signal_simple_error ("No more composite chars available", lispstr);
2362 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2363 composite_char_col_next);
2364 Fputhash (make_char (emch), lispstr,
2365 Vcomposite_char_char2string_hash_table);
2366 Fputhash (lispstr, make_char (emch),
2367 Vcomposite_char_string2char_hash_table);
2368 composite_char_col_next++;
2369 if (composite_char_col_next >= 128)
2371 composite_char_col_next = 32;
2372 composite_char_row_next++;
2381 composite_char_string (Emchar ch)
2383 Lisp_Object str = Fgethash (make_char (ch),
2384 Vcomposite_char_char2string_hash_table,
2386 assert (!UNBOUNDP (str));
2390 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2391 Convert a string into a single composite character.
2392 The character is the result of overstriking all the characters in
2397 CHECK_STRING (string);
2398 return make_char (lookup_composite_char (XSTRING_DATA (string),
2399 XSTRING_LENGTH (string)));
2402 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2403 Return a string of the characters comprising a composite character.
2411 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2412 signal_simple_error ("Must be composite char", ch);
2413 return composite_char_string (emch);
2415 #endif /* ENABLE_COMPOSITE_CHARS */
2418 /************************************************************************/
2419 /* initialization */
2420 /************************************************************************/
2423 syms_of_mule_charset (void)
2425 DEFSUBR (Fcharsetp);
2426 DEFSUBR (Ffind_charset);
2427 DEFSUBR (Fget_charset);
2428 DEFSUBR (Fcharset_list);
2429 DEFSUBR (Fcharset_name);
2430 DEFSUBR (Fmake_charset);
2431 DEFSUBR (Fmake_reverse_direction_charset);
2432 /* DEFSUBR (Freverse_direction_charset); */
2433 DEFSUBR (Fdefine_charset_alias);
2434 DEFSUBR (Fcharset_from_attributes);
2435 DEFSUBR (Fcharset_short_name);
2436 DEFSUBR (Fcharset_long_name);
2437 DEFSUBR (Fcharset_description);
2438 DEFSUBR (Fcharset_dimension);
2439 DEFSUBR (Fcharset_property);
2440 DEFSUBR (Fcharset_id);
2441 DEFSUBR (Fset_charset_ccl_program);
2442 DEFSUBR (Fset_charset_registry);
2444 DEFSUBR (Fchar_attribute_alist);
2445 DEFSUBR (Fget_char_attribute);
2446 DEFSUBR (Fput_char_attribute);
2447 DEFSUBR (Fdefine_char);
2448 DEFSUBR (Fchar_variants);
2449 DEFSUBR (Fget_composite_char);
2450 DEFSUBR (Fcharset_mapping_table);
2451 DEFSUBR (Fset_charset_mapping_table);
2454 DEFSUBR (Fmake_char);
2455 DEFSUBR (Fchar_charset);
2456 DEFSUBR (Fsplit_char);
2458 #ifdef ENABLE_COMPOSITE_CHARS
2459 DEFSUBR (Fmake_composite_char);
2460 DEFSUBR (Fcomposite_char_string);
2463 defsymbol (&Qcharsetp, "charsetp");
2464 defsymbol (&Qregistry, "registry");
2465 defsymbol (&Qfinal, "final");
2466 defsymbol (&Qgraphic, "graphic");
2467 defsymbol (&Qdirection, "direction");
2468 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2469 defsymbol (&Qshort_name, "short-name");
2470 defsymbol (&Qlong_name, "long-name");
2472 defsymbol (&Ql2r, "l2r");
2473 defsymbol (&Qr2l, "r2l");
2475 /* Charsets, compatible with FSF 20.3
2476 Naming convention is Script-Charset[-Edition] */
2477 defsymbol (&Qascii, "ascii");
2478 defsymbol (&Qcontrol_1, "control-1");
2479 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2480 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2481 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2482 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2483 defsymbol (&Qthai_tis620, "thai-tis620");
2484 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2485 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2486 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2487 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2488 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2489 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2490 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2491 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2492 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2493 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2494 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2495 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2496 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2497 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2499 defsymbol (&Q_ucs, "->ucs");
2500 defsymbol (&Q_decomposition, "->decomposition");
2501 defsymbol (&Qwide, "wide");
2502 defsymbol (&Qnarrow, "narrow");
2503 defsymbol (&Qcompat, "compat");
2504 defsymbol (&QnoBreak, "noBreak");
2505 defsymbol (&Qsuper, "super");
2506 defsymbol (&Qfraction, "fraction");
2507 defsymbol (&Qucs, "ucs");
2508 defsymbol (&Qucs_bmp, "ucs-bmp");
2509 defsymbol (&Qlatin_viscii, "latin-viscii");
2510 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2511 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2512 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2513 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2514 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2515 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2516 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2518 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2519 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2521 defsymbol (&Qcomposite, "composite");
2525 vars_of_mule_charset (void)
2532 /* Table of charsets indexed by leading byte. */
2533 for (i = 0; i < countof (charset_by_leading_byte); i++)
2534 charset_by_leading_byte[i] = Qnil;
2537 /* Table of charsets indexed by type/final-byte. */
2538 for (i = 0; i < countof (charset_by_attributes); i++)
2539 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2540 charset_by_attributes[i][j] = Qnil;
2542 /* Table of charsets indexed by type/final-byte/direction. */
2543 for (i = 0; i < countof (charset_by_attributes); i++)
2544 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2545 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2546 charset_by_attributes[i][j][k] = Qnil;
2550 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2552 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2553 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2557 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2558 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2559 Leading-code of private TYPE9N charset of column-width 1.
2561 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2565 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2566 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2567 Version number of UTF-2000.
2570 staticpro (&Vcharacter_attribute_table);
2571 Vcharacter_attribute_table = make_char_code_table (Qnil);
2573 staticpro (&Vcharacter_composition_table);
2574 Vcharacter_composition_table = make_char_code_table (Qnil);
2576 staticpro (&Vcharacter_variant_table);
2577 Vcharacter_variant_table = make_char_code_table (Qnil);
2579 Vdefault_coded_charset_priority_list = Qnil;
2580 DEFVAR_LISP ("default-coded-charset-priority-list",
2581 &Vdefault_coded_charset_priority_list /*
2582 Default order of preferred coded-character-sets.
2588 complex_vars_of_mule_charset (void)
2590 staticpro (&Vcharset_hash_table);
2591 Vcharset_hash_table =
2592 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2594 /* Predefined character sets. We store them into variables for
2599 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2600 CHARSET_TYPE_256X256, 1, 2, 0,
2601 CHARSET_LEFT_TO_RIGHT,
2602 build_string ("BMP"),
2603 build_string ("BMP"),
2604 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2605 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2606 Qnil, 0, 0xFFFF, 0, 0);
2608 # define MIN_CHAR_THAI 0
2609 # define MAX_CHAR_THAI 0
2610 # define MIN_CHAR_GREEK 0
2611 # define MAX_CHAR_GREEK 0
2612 # define MIN_CHAR_HEBREW 0
2613 # define MAX_CHAR_HEBREW 0
2614 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2615 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2616 # define MIN_CHAR_CYRILLIC 0
2617 # define MAX_CHAR_CYRILLIC 0
2620 make_charset (LEADING_BYTE_ASCII, Qascii,
2621 CHARSET_TYPE_94, 1, 0, 'B',
2622 CHARSET_LEFT_TO_RIGHT,
2623 build_string ("ASCII"),
2624 build_string ("ASCII)"),
2625 build_string ("ASCII (ISO646 IRV)"),
2626 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2627 Qnil, 0, 0x7F, 0, 0);
2628 Vcharset_control_1 =
2629 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2630 CHARSET_TYPE_94, 1, 1, 0,
2631 CHARSET_LEFT_TO_RIGHT,
2632 build_string ("C1"),
2633 build_string ("Control characters"),
2634 build_string ("Control characters 128-191"),
2636 Qnil, 0x80, 0x9F, 0, 0);
2637 Vcharset_latin_iso8859_1 =
2638 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2639 CHARSET_TYPE_96, 1, 1, 'A',
2640 CHARSET_LEFT_TO_RIGHT,
2641 build_string ("Latin-1"),
2642 build_string ("ISO8859-1 (Latin-1)"),
2643 build_string ("ISO8859-1 (Latin-1)"),
2644 build_string ("iso8859-1"),
2645 Qnil, 0xA0, 0xFF, 0, 32);
2646 Vcharset_latin_iso8859_2 =
2647 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2648 CHARSET_TYPE_96, 1, 1, 'B',
2649 CHARSET_LEFT_TO_RIGHT,
2650 build_string ("Latin-2"),
2651 build_string ("ISO8859-2 (Latin-2)"),
2652 build_string ("ISO8859-2 (Latin-2)"),
2653 build_string ("iso8859-2"),
2655 Vcharset_latin_iso8859_3 =
2656 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2657 CHARSET_TYPE_96, 1, 1, 'C',
2658 CHARSET_LEFT_TO_RIGHT,
2659 build_string ("Latin-3"),
2660 build_string ("ISO8859-3 (Latin-3)"),
2661 build_string ("ISO8859-3 (Latin-3)"),
2662 build_string ("iso8859-3"),
2664 Vcharset_latin_iso8859_4 =
2665 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2666 CHARSET_TYPE_96, 1, 1, 'D',
2667 CHARSET_LEFT_TO_RIGHT,
2668 build_string ("Latin-4"),
2669 build_string ("ISO8859-4 (Latin-4)"),
2670 build_string ("ISO8859-4 (Latin-4)"),
2671 build_string ("iso8859-4"),
2673 Vcharset_thai_tis620 =
2674 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2675 CHARSET_TYPE_96, 1, 1, 'T',
2676 CHARSET_LEFT_TO_RIGHT,
2677 build_string ("TIS620"),
2678 build_string ("TIS620 (Thai)"),
2679 build_string ("TIS620.2529 (Thai)"),
2680 build_string ("tis620"),
2681 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2682 Vcharset_greek_iso8859_7 =
2683 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2684 CHARSET_TYPE_96, 1, 1, 'F',
2685 CHARSET_LEFT_TO_RIGHT,
2686 build_string ("ISO8859-7"),
2687 build_string ("ISO8859-7 (Greek)"),
2688 build_string ("ISO8859-7 (Greek)"),
2689 build_string ("iso8859-7"),
2690 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2691 Vcharset_arabic_iso8859_6 =
2692 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2693 CHARSET_TYPE_96, 1, 1, 'G',
2694 CHARSET_RIGHT_TO_LEFT,
2695 build_string ("ISO8859-6"),
2696 build_string ("ISO8859-6 (Arabic)"),
2697 build_string ("ISO8859-6 (Arabic)"),
2698 build_string ("iso8859-6"),
2700 Vcharset_hebrew_iso8859_8 =
2701 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2702 CHARSET_TYPE_96, 1, 1, 'H',
2703 CHARSET_RIGHT_TO_LEFT,
2704 build_string ("ISO8859-8"),
2705 build_string ("ISO8859-8 (Hebrew)"),
2706 build_string ("ISO8859-8 (Hebrew)"),
2707 build_string ("iso8859-8"),
2708 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2709 Vcharset_katakana_jisx0201 =
2710 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2711 CHARSET_TYPE_94, 1, 1, 'I',
2712 CHARSET_LEFT_TO_RIGHT,
2713 build_string ("JISX0201 Kana"),
2714 build_string ("JISX0201.1976 (Japanese Kana)"),
2715 build_string ("JISX0201.1976 Japanese Kana"),
2716 build_string ("jisx0201\\.1976"),
2718 MIN_CHAR_HALFWIDTH_KATAKANA,
2719 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2720 Vcharset_latin_jisx0201 =
2721 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2722 CHARSET_TYPE_94, 1, 0, 'J',
2723 CHARSET_LEFT_TO_RIGHT,
2724 build_string ("JISX0201 Roman"),
2725 build_string ("JISX0201.1976 (Japanese Roman)"),
2726 build_string ("JISX0201.1976 Japanese Roman"),
2727 build_string ("jisx0201\\.1976"),
2729 Vcharset_cyrillic_iso8859_5 =
2730 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2731 CHARSET_TYPE_96, 1, 1, 'L',
2732 CHARSET_LEFT_TO_RIGHT,
2733 build_string ("ISO8859-5"),
2734 build_string ("ISO8859-5 (Cyrillic)"),
2735 build_string ("ISO8859-5 (Cyrillic)"),
2736 build_string ("iso8859-5"),
2737 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2738 Vcharset_latin_iso8859_9 =
2739 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2740 CHARSET_TYPE_96, 1, 1, 'M',
2741 CHARSET_LEFT_TO_RIGHT,
2742 build_string ("Latin-5"),
2743 build_string ("ISO8859-9 (Latin-5)"),
2744 build_string ("ISO8859-9 (Latin-5)"),
2745 build_string ("iso8859-9"),
2747 Vcharset_japanese_jisx0208_1978 =
2748 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2749 CHARSET_TYPE_94X94, 2, 0, '@',
2750 CHARSET_LEFT_TO_RIGHT,
2751 build_string ("JIS X0208:1978"),
2752 build_string ("JIS X0208:1978 (Japanese)"),
2754 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2755 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2757 Vcharset_chinese_gb2312 =
2758 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2759 CHARSET_TYPE_94X94, 2, 0, 'A',
2760 CHARSET_LEFT_TO_RIGHT,
2761 build_string ("GB2312"),
2762 build_string ("GB2312)"),
2763 build_string ("GB2312 Chinese simplified"),
2764 build_string ("gb2312"),
2766 Vcharset_japanese_jisx0208 =
2767 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2768 CHARSET_TYPE_94X94, 2, 0, 'B',
2769 CHARSET_LEFT_TO_RIGHT,
2770 build_string ("JISX0208"),
2771 build_string ("JIS X0208:1983 (Japanese)"),
2772 build_string ("JIS X0208:1983 Japanese Kanji"),
2773 build_string ("jisx0208\\.1983"),
2775 Vcharset_korean_ksc5601 =
2776 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2777 CHARSET_TYPE_94X94, 2, 0, 'C',
2778 CHARSET_LEFT_TO_RIGHT,
2779 build_string ("KSC5601"),
2780 build_string ("KSC5601 (Korean"),
2781 build_string ("KSC5601 Korean Hangul and Hanja"),
2782 build_string ("ksc5601"),
2784 Vcharset_japanese_jisx0212 =
2785 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2786 CHARSET_TYPE_94X94, 2, 0, 'D',
2787 CHARSET_LEFT_TO_RIGHT,
2788 build_string ("JISX0212"),
2789 build_string ("JISX0212 (Japanese)"),
2790 build_string ("JISX0212 Japanese Supplement"),
2791 build_string ("jisx0212"),
2794 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2795 Vcharset_chinese_cns11643_1 =
2796 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2797 CHARSET_TYPE_94X94, 2, 0, 'G',
2798 CHARSET_LEFT_TO_RIGHT,
2799 build_string ("CNS11643-1"),
2800 build_string ("CNS11643-1 (Chinese traditional)"),
2802 ("CNS 11643 Plane 1 Chinese traditional"),
2803 build_string (CHINESE_CNS_PLANE_RE("1")),
2805 Vcharset_chinese_cns11643_2 =
2806 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2807 CHARSET_TYPE_94X94, 2, 0, 'H',
2808 CHARSET_LEFT_TO_RIGHT,
2809 build_string ("CNS11643-2"),
2810 build_string ("CNS11643-2 (Chinese traditional)"),
2812 ("CNS 11643 Plane 2 Chinese traditional"),
2813 build_string (CHINESE_CNS_PLANE_RE("2")),
2816 Vcharset_latin_viscii_lower =
2817 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2818 CHARSET_TYPE_96, 1, 1, '1',
2819 CHARSET_LEFT_TO_RIGHT,
2820 build_string ("VISCII lower"),
2821 build_string ("VISCII lower (Vietnamese)"),
2822 build_string ("VISCII lower (Vietnamese)"),
2823 build_string ("MULEVISCII-LOWER"),
2825 Vcharset_latin_viscii_upper =
2826 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2827 CHARSET_TYPE_96, 1, 1, '2',
2828 CHARSET_LEFT_TO_RIGHT,
2829 build_string ("VISCII upper"),
2830 build_string ("VISCII upper (Vietnamese)"),
2831 build_string ("VISCII upper (Vietnamese)"),
2832 build_string ("MULEVISCII-UPPER"),
2834 Vcharset_latin_viscii =
2835 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2836 CHARSET_TYPE_256, 1, 2, 0,
2837 CHARSET_LEFT_TO_RIGHT,
2838 build_string ("VISCII"),
2839 build_string ("VISCII 1.1 (Vietnamese)"),
2840 build_string ("VISCII 1.1 (Vietnamese)"),
2841 build_string ("VISCII1\\.1"),
2843 Vcharset_ethiopic_ucs =
2844 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2845 CHARSET_TYPE_256X256, 2, 2, 0,
2846 CHARSET_LEFT_TO_RIGHT,
2847 build_string ("Ethiopic (UCS)"),
2848 build_string ("Ethiopic (UCS)"),
2849 build_string ("Ethiopic of UCS"),
2850 build_string ("Ethiopic-Unicode"),
2851 Qnil, 0x1200, 0x137F, 0x1200, 0);
2852 Vcharset_hiragana_jisx0208 =
2853 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2854 CHARSET_TYPE_94X94, 2, 0, 'B',
2855 CHARSET_LEFT_TO_RIGHT,
2856 build_string ("Hiragana"),
2857 build_string ("Hiragana of JIS X0208"),
2858 build_string ("Japanese Hiragana of JIS X0208"),
2859 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2860 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2861 (0x24 - 33) * 94 + (0x21 - 33), 33);
2862 Vcharset_katakana_jisx0208 =
2863 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2864 CHARSET_TYPE_94X94, 2, 0, 'B',
2865 CHARSET_LEFT_TO_RIGHT,
2866 build_string ("Katakana"),
2867 build_string ("Katakana of JIS X0208"),
2868 build_string ("Japanese Katakana of JIS X0208"),
2869 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2870 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2871 (0x25 - 33) * 94 + (0x21 - 33), 33);
2873 Vcharset_chinese_big5_1 =
2874 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2875 CHARSET_TYPE_94X94, 2, 0, '0',
2876 CHARSET_LEFT_TO_RIGHT,
2877 build_string ("Big5"),
2878 build_string ("Big5 (Level-1)"),
2880 ("Big5 Level-1 Chinese traditional"),
2881 build_string ("big5"),
2883 Vcharset_chinese_big5_2 =
2884 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2885 CHARSET_TYPE_94X94, 2, 0, '1',
2886 CHARSET_LEFT_TO_RIGHT,
2887 build_string ("Big5"),
2888 build_string ("Big5 (Level-2)"),
2890 ("Big5 Level-2 Chinese traditional"),
2891 build_string ("big5"),
2894 #ifdef ENABLE_COMPOSITE_CHARS
2895 /* #### For simplicity, we put composite chars into a 96x96 charset.
2896 This is going to lead to problems because you can run out of
2897 room, esp. as we don't yet recycle numbers. */
2898 Vcharset_composite =
2899 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2900 CHARSET_TYPE_96X96, 2, 0, 0,
2901 CHARSET_LEFT_TO_RIGHT,
2902 build_string ("Composite"),
2903 build_string ("Composite characters"),
2904 build_string ("Composite characters"),
2907 composite_char_row_next = 32;
2908 composite_char_col_next = 32;
2910 Vcomposite_char_string2char_hash_table =
2911 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2912 Vcomposite_char_char2string_hash_table =
2913 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2914 staticpro (&Vcomposite_char_string2char_hash_table);
2915 staticpro (&Vcomposite_char_char2string_hash_table);
2916 #endif /* ENABLE_COMPOSITE_CHARS */