1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
391 Lisp_Object QnoBreak;
393 Lisp_Object Qfraction;
396 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
402 else if (EQ (v, Qwide))
404 else if (EQ (v, Qnarrow))
406 else if (EQ (v, Qcompat))
408 else if (EQ (v, QnoBreak))
410 else if (EQ (v, Qsuper))
412 else if (EQ (v, Qfraction))
415 signal_simple_error (err_msg, err_arg);
418 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
419 Return character corresponding with list.
423 Lisp_Object table = Vcharacter_composition_table;
424 Lisp_Object rest = list;
428 Lisp_Object v = Fcar (rest);
430 Emchar c = to_char_code (v, "Invalid value for composition", list);
432 ret = get_char_code_table (c, table);
437 if (!CHAR_CODE_TABLE_P (ret))
442 else if (!CONSP (rest))
444 else if (CHAR_CODE_TABLE_P (ret))
447 signal_simple_error ("Invalid table is found with", list);
449 signal_simple_error ("Invalid value for composition", list);
452 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
453 Return variants of CHARACTER.
457 CHECK_CHAR (character);
458 return Fcopy_list (get_char_code_table (XCHAR (character),
459 Vcharacter_variant_table));
462 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
463 Return the alist of attributes of CHARACTER.
467 CHECK_CHAR (character);
468 return Fcopy_alist (get_char_code_table (XCHAR (character),
469 Vcharacter_attribute_table));
472 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
473 Return the value of CHARACTER's ATTRIBUTE.
475 (character, attribute))
478 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
484 if (!NILP (ccs = Ffind_charset (attribute)))
487 return Fcdr (Fassq (attribute, ret));
491 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
494 Emchar char_code = XCHAR (character);
496 = get_char_code_table (char_code, Vcharacter_attribute_table);
499 cell = Fassq (attribute, ret);
503 ret = Fcons (Fcons (attribute, value), ret);
505 else if (!EQ (Fcdr (cell), value))
507 Fsetcdr (cell, value);
509 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
513 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
514 Store CHARACTER's ATTRIBUTE with VALUE.
516 (character, attribute, value))
520 ccs = Ffind_charset (attribute);
524 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
529 /* ad-hoc method for `ascii' */
530 if ((XCHARSET_CHARS (ccs) == 94) &&
531 (XCHARSET_BYTE_OFFSET (ccs) != 33))
532 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
534 ccs_len = XCHARSET_CHARS (ccs);
537 signal_simple_error ("Invalid value for coded-charset",
541 rest = Fget_char_attribute (character, attribute);
548 Lisp_Object ei = Fcar (rest);
550 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
551 nv = XVECTOR_DATA(v)[i];
558 XVECTOR_DATA(v)[i] = Qnil;
559 v = XCHARSET_DECODING_TABLE (ccs);
564 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
567 if (XCHARSET_GRAPHIC (ccs) == 1)
568 value = Fcopy_list (value);
573 Lisp_Object ei = Fcar (rest);
576 signal_simple_error ("Invalid value for coded-charset", value);
578 if ((i < 0) || (255 < i))
579 signal_simple_error ("Invalid value for coded-charset", value);
580 if (XCHARSET_GRAPHIC (ccs) == 1)
583 Fsetcar (rest, make_int (i));
585 i -= XCHARSET_BYTE_OFFSET (ccs);
586 nv = XVECTOR_DATA(v)[i];
592 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
599 XVECTOR_DATA(v)[i] = character;
601 else if (EQ (attribute, Q_decomposition))
603 Lisp_Object rest = value;
604 Lisp_Object table = Vcharacter_composition_table;
607 signal_simple_error ("Invalid value for ->decomposition",
612 Lisp_Object v = Fcar (rest);
615 = to_char_code (v, "Invalid value for ->decomposition", value);
620 put_char_code_table (c, character, table);
625 ntable = get_char_code_table (c, table);
626 if (!CHAR_CODE_TABLE_P (ntable))
628 ntable = make_char_code_table (Qnil);
629 put_char_code_table (c, ntable, table);
635 else if (EQ (attribute, Q_ucs))
641 signal_simple_error ("Invalid value for ->ucs", value);
645 ret = get_char_code_table (c, Vcharacter_variant_table);
646 if (NILP (Fmemq (character, ret)))
648 put_char_code_table (c, Fcons (character, ret),
649 Vcharacter_variant_table);
652 return put_char_attribute (character, attribute, value);
657 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
658 Store character's ATTRIBUTES.
662 Lisp_Object rest = attributes;
663 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
664 Lisp_Object character;
670 Lisp_Object cell = Fcar (rest);
674 signal_simple_error ("Invalid argument", attributes);
675 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
676 && XCHARSET_FINAL (ccs))
680 if (XCHARSET_DIMENSION (ccs) == 1)
682 Lisp_Object eb1 = Fcar (Fcdr (cell));
686 signal_simple_error ("Invalid argument", attributes);
688 switch (XCHARSET_CHARS (ccs))
692 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
696 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
702 else if (XCHARSET_DIMENSION (ccs) == 2)
704 Lisp_Object eb1 = Fcar (Fcdr (cell));
705 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
709 signal_simple_error ("Invalid argument", attributes);
712 signal_simple_error ("Invalid argument", attributes);
714 switch (XCHARSET_CHARS (ccs))
717 code = MIN_CHAR_94x94
718 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
719 + (b1 - 33) * 94 + (b2 - 33);
722 code = MIN_CHAR_96x96
723 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
724 + (b1 - 32) * 96 + (b2 - 32);
735 character = make_char (code);
736 goto setup_attributes;
742 else if (!INTP (code))
743 signal_simple_error ("Invalid argument", attributes);
745 character = make_char (XINT (code));
751 Lisp_Object cell = Fcar (rest);
754 signal_simple_error ("Invalid argument", attributes);
755 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
759 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
762 Lisp_Object Vutf_2000_version;
766 int leading_code_private_11;
769 Lisp_Object Qcharsetp;
771 /* Qdoc_string, Qdimension, Qchars defined in general.c */
772 Lisp_Object Qregistry, Qfinal, Qgraphic;
773 Lisp_Object Qdirection;
774 Lisp_Object Qreverse_direction_charset;
775 Lisp_Object Qleading_byte;
776 Lisp_Object Qshort_name, Qlong_name;
792 Qjapanese_jisx0208_1978,
804 Qvietnamese_viscii_lower,
805 Qvietnamese_viscii_upper,
814 Lisp_Object Ql2r, Qr2l;
816 Lisp_Object Vcharset_hash_table;
819 static Charset_ID next_allocated_leading_byte;
821 static Charset_ID next_allocated_1_byte_leading_byte;
822 static Charset_ID next_allocated_2_byte_leading_byte;
825 /* Composite characters are characters constructed by overstriking two
826 or more regular characters.
828 1) The old Mule implementation involves storing composite characters
829 in a buffer as a tag followed by all of the actual characters
830 used to make up the composite character. I think this is a bad
831 idea; it greatly complicates code that wants to handle strings
832 one character at a time because it has to deal with the possibility
833 of great big ungainly characters. It's much more reasonable to
834 simply store an index into a table of composite characters.
836 2) The current implementation only allows for 16,384 separate
837 composite characters over the lifetime of the XEmacs process.
838 This could become a potential problem if the user
839 edited lots of different files that use composite characters.
840 Due to FSF bogosity, increasing the number of allowable
841 composite characters under Mule would decrease the number
842 of possible faces that can exist. Mule already has shrunk
843 this to 2048, and further shrinkage would become uncomfortable.
844 No such problems exist in XEmacs.
846 Composite characters could be represented as 0x80 C1 C2 C3,
847 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
848 for slightly under 2^20 (one million) composite characters
849 over the XEmacs process lifetime, and you only need to
850 increase the size of a Mule character from 19 to 21 bits.
851 Or you could use 0x80 C1 C2 C3 C4, allowing for about
852 85 million (slightly over 2^26) composite characters. */
855 /************************************************************************/
856 /* Basic Emchar functions */
857 /************************************************************************/
859 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
860 string in STR. Returns the number of bytes stored.
861 Do not call this directly. Use the macro set_charptr_emchar() instead.
865 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
880 else if ( c <= 0x7ff )
882 *p++ = (c >> 6) | 0xc0;
883 *p++ = (c & 0x3f) | 0x80;
885 else if ( c <= 0xffff )
887 *p++ = (c >> 12) | 0xe0;
888 *p++ = ((c >> 6) & 0x3f) | 0x80;
889 *p++ = (c & 0x3f) | 0x80;
891 else if ( c <= 0x1fffff )
893 *p++ = (c >> 18) | 0xf0;
894 *p++ = ((c >> 12) & 0x3f) | 0x80;
895 *p++ = ((c >> 6) & 0x3f) | 0x80;
896 *p++ = (c & 0x3f) | 0x80;
898 else if ( c <= 0x3ffffff )
900 *p++ = (c >> 24) | 0xf8;
901 *p++ = ((c >> 18) & 0x3f) | 0x80;
902 *p++ = ((c >> 12) & 0x3f) | 0x80;
903 *p++ = ((c >> 6) & 0x3f) | 0x80;
904 *p++ = (c & 0x3f) | 0x80;
908 *p++ = (c >> 30) | 0xfc;
909 *p++ = ((c >> 24) & 0x3f) | 0x80;
910 *p++ = ((c >> 18) & 0x3f) | 0x80;
911 *p++ = ((c >> 12) & 0x3f) | 0x80;
912 *p++ = ((c >> 6) & 0x3f) | 0x80;
913 *p++ = (c & 0x3f) | 0x80;
916 BREAKUP_CHAR (c, charset, c1, c2);
917 lb = CHAR_LEADING_BYTE (c);
918 if (LEADING_BYTE_PRIVATE_P (lb))
919 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
921 if (EQ (charset, Vcharset_control_1))
930 /* Return the first character from a Mule-encoded string in STR,
931 assuming it's non-ASCII. Do not call this directly.
932 Use the macro charptr_emchar() instead. */
935 non_ascii_charptr_emchar (CONST Bufbyte *str)
948 else if ( b >= 0xf8 )
953 else if ( b >= 0xf0 )
958 else if ( b >= 0xe0 )
963 else if ( b >= 0xc0 )
973 for( ; len > 0; len-- )
976 ch = ( ch << 6 ) | ( b & 0x3f );
980 Bufbyte i0 = *str, i1, i2 = 0;
983 if (i0 == LEADING_BYTE_CONTROL_1)
984 return (Emchar) (*++str - 0x20);
986 if (LEADING_BYTE_PREFIX_P (i0))
991 charset = CHARSET_BY_LEADING_BYTE (i0);
992 if (XCHARSET_DIMENSION (charset) == 2)
995 return MAKE_CHAR (charset, i1, i2);
999 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1000 Do not call this directly. Use the macro valid_char_p() instead. */
1004 non_ascii_valid_char_p (Emchar ch)
1008 /* Must have only lowest 19 bits set */
1012 f1 = CHAR_FIELD1 (ch);
1013 f2 = CHAR_FIELD2 (ch);
1014 f3 = CHAR_FIELD3 (ch);
1018 Lisp_Object charset;
1020 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1021 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1022 f2 > MAX_CHAR_FIELD2_PRIVATE)
1027 if (f3 != 0x20 && f3 != 0x7F)
1031 NOTE: This takes advantage of the fact that
1032 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1033 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1035 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1036 return (XCHARSET_CHARS (charset) == 96);
1040 Lisp_Object charset;
1042 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1043 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1044 f1 > MAX_CHAR_FIELD1_PRIVATE)
1046 if (f2 < 0x20 || f3 < 0x20)
1049 #ifdef ENABLE_COMPOSITE_CHARS
1050 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1052 if (UNBOUNDP (Fgethash (make_int (ch),
1053 Vcomposite_char_char2string_hash_table,
1058 #endif /* ENABLE_COMPOSITE_CHARS */
1060 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1063 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1065 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1068 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1070 return (XCHARSET_CHARS (charset) == 96);
1076 /************************************************************************/
1077 /* Basic string functions */
1078 /************************************************************************/
1080 /* Copy the character pointed to by PTR into STR, assuming it's
1081 non-ASCII. Do not call this directly. Use the macro
1082 charptr_copy_char() instead. */
1085 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1087 Bufbyte *strptr = str;
1089 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1091 /* Notice fallthrough. */
1093 case 6: *++strptr = *ptr++;
1094 case 5: *++strptr = *ptr++;
1096 case 4: *++strptr = *ptr++;
1097 case 3: *++strptr = *ptr++;
1098 case 2: *++strptr = *ptr;
1103 return strptr + 1 - str;
1107 /************************************************************************/
1108 /* streams of Emchars */
1109 /************************************************************************/
1111 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1112 The functions below are not meant to be called directly; use
1113 the macros in insdel.h. */
1116 Lstream_get_emchar_1 (Lstream *stream, int ch)
1118 Bufbyte str[MAX_EMCHAR_LEN];
1119 Bufbyte *strptr = str;
1121 str[0] = (Bufbyte) ch;
1122 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1124 /* Notice fallthrough. */
1127 ch = Lstream_getc (stream);
1129 *++strptr = (Bufbyte) ch;
1131 ch = Lstream_getc (stream);
1133 *++strptr = (Bufbyte) ch;
1136 ch = Lstream_getc (stream);
1138 *++strptr = (Bufbyte) ch;
1140 ch = Lstream_getc (stream);
1142 *++strptr = (Bufbyte) ch;
1144 ch = Lstream_getc (stream);
1146 *++strptr = (Bufbyte) ch;
1151 return charptr_emchar (str);
1155 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1157 Bufbyte str[MAX_EMCHAR_LEN];
1158 Bytecount len = set_charptr_emchar (str, ch);
1159 return Lstream_write (stream, str, len);
1163 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1165 Bufbyte str[MAX_EMCHAR_LEN];
1166 Bytecount len = set_charptr_emchar (str, ch);
1167 Lstream_unread (stream, str, len);
1171 /************************************************************************/
1172 /* charset object */
1173 /************************************************************************/
1176 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1178 struct Lisp_Charset *cs = XCHARSET (obj);
1180 markobj (cs->short_name);
1181 markobj (cs->long_name);
1182 markobj (cs->doc_string);
1183 markobj (cs->registry);
1184 markobj (cs->ccl_program);
1186 markobj (cs->decoding_table);
1192 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1194 struct Lisp_Charset *cs = XCHARSET (obj);
1198 error ("printing unreadable object #<charset %s 0x%x>",
1199 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1202 write_c_string ("#<charset ", printcharfun);
1203 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1204 write_c_string (" ", printcharfun);
1205 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1206 write_c_string (" ", printcharfun);
1207 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1208 write_c_string (" ", printcharfun);
1209 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1210 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1211 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1212 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1213 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1215 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1216 CHARSET_COLUMNS (cs),
1217 CHARSET_GRAPHIC (cs),
1218 CHARSET_FINAL (cs));
1219 write_c_string (buf, printcharfun);
1220 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1221 sprintf (buf, " 0x%x>", cs->header.uid);
1222 write_c_string (buf, printcharfun);
1225 static const struct lrecord_description charset_description[] = {
1226 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1228 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1233 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1234 mark_charset, print_charset, 0, 0, 0,
1235 charset_description,
1236 struct Lisp_Charset);
1238 /* Make a new charset. */
1241 make_charset (Charset_ID id, Lisp_Object name,
1242 unsigned char type, unsigned char columns, unsigned char graphic,
1243 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1244 Lisp_Object long_name, Lisp_Object doc,
1246 Lisp_Object decoding_table,
1247 Emchar ucs_min, Emchar ucs_max,
1248 Emchar code_offset, unsigned char byte_offset)
1251 struct Lisp_Charset *cs =
1252 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1253 XSETCHARSET (obj, cs);
1255 CHARSET_ID (cs) = id;
1256 CHARSET_NAME (cs) = name;
1257 CHARSET_SHORT_NAME (cs) = short_name;
1258 CHARSET_LONG_NAME (cs) = long_name;
1259 CHARSET_DIRECTION (cs) = direction;
1260 CHARSET_TYPE (cs) = type;
1261 CHARSET_COLUMNS (cs) = columns;
1262 CHARSET_GRAPHIC (cs) = graphic;
1263 CHARSET_FINAL (cs) = final;
1264 CHARSET_DOC_STRING (cs) = doc;
1265 CHARSET_REGISTRY (cs) = reg;
1266 CHARSET_CCL_PROGRAM (cs) = Qnil;
1267 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1269 CHARSET_DECODING_TABLE(cs) = Qnil;
1270 CHARSET_UCS_MIN(cs) = ucs_min;
1271 CHARSET_UCS_MAX(cs) = ucs_max;
1272 CHARSET_CODE_OFFSET(cs) = code_offset;
1273 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1276 switch (CHARSET_TYPE (cs))
1278 case CHARSET_TYPE_94:
1279 CHARSET_DIMENSION (cs) = 1;
1280 CHARSET_CHARS (cs) = 94;
1282 case CHARSET_TYPE_96:
1283 CHARSET_DIMENSION (cs) = 1;
1284 CHARSET_CHARS (cs) = 96;
1286 case CHARSET_TYPE_94X94:
1287 CHARSET_DIMENSION (cs) = 2;
1288 CHARSET_CHARS (cs) = 94;
1290 case CHARSET_TYPE_96X96:
1291 CHARSET_DIMENSION (cs) = 2;
1292 CHARSET_CHARS (cs) = 96;
1295 case CHARSET_TYPE_128:
1296 CHARSET_DIMENSION (cs) = 1;
1297 CHARSET_CHARS (cs) = 128;
1299 case CHARSET_TYPE_128X128:
1300 CHARSET_DIMENSION (cs) = 2;
1301 CHARSET_CHARS (cs) = 128;
1303 case CHARSET_TYPE_256:
1304 CHARSET_DIMENSION (cs) = 1;
1305 CHARSET_CHARS (cs) = 256;
1307 case CHARSET_TYPE_256X256:
1308 CHARSET_DIMENSION (cs) = 2;
1309 CHARSET_CHARS (cs) = 256;
1315 if (id == LEADING_BYTE_ASCII)
1316 CHARSET_REP_BYTES (cs) = 1;
1318 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1320 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1325 /* some charsets do not have final characters. This includes
1326 ASCII, Control-1, Composite, and the two faux private
1329 if (code_offset == 0)
1331 assert (NILP (charset_by_attributes[type][final]));
1332 charset_by_attributes[type][final] = obj;
1335 assert (NILP (charset_by_attributes[type][final][direction]));
1336 charset_by_attributes[type][final][direction] = obj;
1340 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1341 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1344 /* official leading byte */
1345 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1348 /* Some charsets are "faux" and don't have names or really exist at
1349 all except in the leading-byte table. */
1351 Fputhash (name, obj, Vcharset_hash_table);
1356 get_unallocated_leading_byte (int dimension)
1361 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1364 lb = next_allocated_leading_byte++;
1368 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1371 lb = next_allocated_1_byte_leading_byte++;
1375 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1378 lb = next_allocated_2_byte_leading_byte++;
1384 ("No more character sets free for this dimension",
1385 make_int (dimension));
1392 range_charset_code_point (Lisp_Object charset, Emchar ch)
1396 if ((XCHARSET_UCS_MIN (charset) <= ch)
1397 && (ch <= XCHARSET_UCS_MAX (charset)))
1399 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1401 if (XCHARSET_DIMENSION (charset) == 1)
1402 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1403 else if (XCHARSET_DIMENSION (charset) == 2)
1404 return list2 (make_int (d / XCHARSET_CHARS (charset)
1405 + XCHARSET_BYTE_OFFSET (charset)),
1406 make_int (d % XCHARSET_CHARS (charset)
1407 + XCHARSET_BYTE_OFFSET (charset)));
1408 else if (XCHARSET_DIMENSION (charset) == 3)
1409 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1410 * XCHARSET_CHARS (charset))
1411 + XCHARSET_BYTE_OFFSET (charset)),
1412 make_int (d / XCHARSET_CHARS (charset)
1413 % XCHARSET_CHARS (charset)
1414 + XCHARSET_BYTE_OFFSET (charset)),
1415 make_int (d % XCHARSET_CHARS (charset)
1416 + XCHARSET_BYTE_OFFSET (charset)));
1417 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1418 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1419 * XCHARSET_CHARS (charset)
1420 * XCHARSET_CHARS (charset))
1421 + XCHARSET_BYTE_OFFSET (charset)),
1422 make_int (d / (XCHARSET_CHARS (charset)
1423 * XCHARSET_CHARS (charset))
1424 % XCHARSET_CHARS (charset)
1425 + XCHARSET_BYTE_OFFSET (charset)),
1426 make_int (d / XCHARSET_CHARS (charset)
1427 % XCHARSET_CHARS (charset)
1428 + XCHARSET_BYTE_OFFSET (charset)),
1429 make_int (d % XCHARSET_CHARS (charset)
1430 + XCHARSET_BYTE_OFFSET (charset)));
1432 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1434 if (XCHARSET_DIMENSION (charset) == 1)
1436 if (XCHARSET_CHARS (charset) == 94)
1438 if (((d = ch - (MIN_CHAR_94
1439 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1441 return list1 (make_int (d + 33));
1443 else if (XCHARSET_CHARS (charset) == 96)
1445 if (((d = ch - (MIN_CHAR_96
1446 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1448 return list1 (make_int (d + 32));
1453 else if (XCHARSET_DIMENSION (charset) == 2)
1455 if (XCHARSET_CHARS (charset) == 94)
1457 if (((d = ch - (MIN_CHAR_94x94
1458 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1461 return list2 (make_int ((d / 94) + 33),
1462 make_int (d % 94 + 33));
1464 else if (XCHARSET_CHARS (charset) == 96)
1466 if (((d = ch - (MIN_CHAR_96x96
1467 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1470 return list2 (make_int ((d / 96) + 32),
1471 make_int (d % 96 + 32));
1479 split_builtin_char (Emchar c)
1481 if (c < MIN_CHAR_OBS_94x94)
1483 if (c <= MAX_CHAR_BASIC_LATIN)
1485 return list2 (Vcharset_ascii, make_int (c));
1489 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1493 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1495 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1497 return list2 (Vcharset_greek_iso8859_7,
1498 make_int (c - MIN_CHAR_GREEK + 0x20));
1500 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1502 return list2 (Vcharset_cyrillic_iso8859_5,
1503 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1505 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1507 return list2 (Vcharset_hebrew_iso8859_8,
1508 make_int (c - MIN_CHAR_HEBREW + 0x20));
1510 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1512 return list2 (Vcharset_thai_tis620,
1513 make_int (c - MIN_CHAR_THAI + 0x20));
1515 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1516 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1518 return list2 (Vcharset_katakana_jisx0201,
1519 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1523 return list3 (Vcharset_ucs_bmp,
1524 make_int (c >> 8), make_int (c & 0xff));
1527 else if (c <= MAX_CHAR_OBS_94x94)
1529 return list3 (CHARSET_BY_ATTRIBUTES
1530 (CHARSET_TYPE_94X94,
1531 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1532 CHARSET_LEFT_TO_RIGHT),
1533 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1534 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1536 else if (c <= MAX_CHAR_94)
1538 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1539 ((c - MIN_CHAR_94) / 94) + '0',
1540 CHARSET_LEFT_TO_RIGHT),
1541 make_int (((c - MIN_CHAR_94) % 94) + 33));
1543 else if (c <= MAX_CHAR_96)
1545 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1546 ((c - MIN_CHAR_96) / 96) + '0',
1547 CHARSET_LEFT_TO_RIGHT),
1548 make_int (((c - MIN_CHAR_96) % 96) + 32));
1550 else if (c <= MAX_CHAR_94x94)
1552 return list3 (CHARSET_BY_ATTRIBUTES
1553 (CHARSET_TYPE_94X94,
1554 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1555 CHARSET_LEFT_TO_RIGHT),
1556 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1557 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1559 else if (c <= MAX_CHAR_96x96)
1561 return list3 (CHARSET_BY_ATTRIBUTES
1562 (CHARSET_TYPE_96X96,
1563 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1564 CHARSET_LEFT_TO_RIGHT),
1565 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1566 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1575 charset_code_point (Lisp_Object charset, Emchar ch)
1577 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1579 if (!EQ (cdef, Qnil))
1581 Lisp_Object field = Fassq (charset, cdef);
1583 if (!EQ (field, Qnil))
1584 return Fcdr (field);
1586 return range_charset_code_point (charset, ch);
1589 Lisp_Object Vdefault_coded_charset_priority_list;
1593 /************************************************************************/
1594 /* Basic charset Lisp functions */
1595 /************************************************************************/
1597 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1598 Return non-nil if OBJECT is a charset.
1602 return CHARSETP (object) ? Qt : Qnil;
1605 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1606 Retrieve the charset of the given name.
1607 If CHARSET-OR-NAME is a charset object, it is simply returned.
1608 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1609 nil is returned. Otherwise the associated charset object is returned.
1613 if (CHARSETP (charset_or_name))
1614 return charset_or_name;
1616 CHECK_SYMBOL (charset_or_name);
1617 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1620 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1621 Retrieve the charset of the given name.
1622 Same as `find-charset' except an error is signalled if there is no such
1623 charset instead of returning nil.
1627 Lisp_Object charset = Ffind_charset (name);
1630 signal_simple_error ("No such charset", name);
1634 /* We store the charsets in hash tables with the names as the key and the
1635 actual charset object as the value. Occasionally we need to use them
1636 in a list format. These routines provide us with that. */
1637 struct charset_list_closure
1639 Lisp_Object *charset_list;
1643 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1644 void *charset_list_closure)
1646 /* This function can GC */
1647 struct charset_list_closure *chcl =
1648 (struct charset_list_closure*) charset_list_closure;
1649 Lisp_Object *charset_list = chcl->charset_list;
1651 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1655 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1656 Return a list of the names of all defined charsets.
1660 Lisp_Object charset_list = Qnil;
1661 struct gcpro gcpro1;
1662 struct charset_list_closure charset_list_closure;
1664 GCPRO1 (charset_list);
1665 charset_list_closure.charset_list = &charset_list;
1666 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1667 &charset_list_closure);
1670 return charset_list;
1673 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1674 Return the name of the given charset.
1678 return XCHARSET_NAME (Fget_charset (charset));
1681 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1682 Define a new character set.
1683 This function is for use with Mule support.
1684 NAME is a symbol, the name by which the character set is normally referred.
1685 DOC-STRING is a string describing the character set.
1686 PROPS is a property list, describing the specific nature of the
1687 character set. Recognized properties are:
1689 'short-name Short version of the charset name (ex: Latin-1)
1690 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1691 'registry A regular expression matching the font registry field for
1693 'dimension Number of octets used to index a character in this charset.
1694 Either 1 or 2. Defaults to 1.
1695 'columns Number of columns used to display a character in this charset.
1696 Only used in TTY mode. (Under X, the actual width of a
1697 character can be derived from the font used to display the
1698 characters.) If unspecified, defaults to the dimension
1699 (this is almost always the correct value).
1700 'chars Number of characters in each dimension (94 or 96).
1701 Defaults to 94. Note that if the dimension is 2, the
1702 character set thus described is 94x94 or 96x96.
1703 'final Final byte of ISO 2022 escape sequence. Must be
1704 supplied. Each combination of (DIMENSION, CHARS) defines a
1705 separate namespace for final bytes. Note that ISO
1706 2022 restricts the final byte to the range
1707 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1708 dimension == 2. Note also that final bytes in the range
1709 0x30 - 0x3F are reserved for user-defined (not official)
1711 'graphic 0 (use left half of font on output) or 1 (use right half
1712 of font on output). Defaults to 0. For example, for
1713 a font whose registry is ISO8859-1, the left half
1714 (octets 0x20 - 0x7F) is the `ascii' character set, while
1715 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1716 character set. With 'graphic set to 0, the octets
1717 will have their high bit cleared; with it set to 1,
1718 the octets will have their high bit set.
1719 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1721 'ccl-program A compiled CCL program used to convert a character in
1722 this charset into an index into the font. This is in
1723 addition to the 'graphic property. The CCL program
1724 is passed the octets of the character, with the high
1725 bit cleared and set depending upon whether the value
1726 of the 'graphic property is 0 or 1.
1728 (name, doc_string, props))
1730 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1731 int direction = CHARSET_LEFT_TO_RIGHT;
1733 Lisp_Object registry = Qnil;
1734 Lisp_Object charset;
1735 Lisp_Object rest, keyword, value;
1736 Lisp_Object ccl_program = Qnil;
1737 Lisp_Object short_name = Qnil, long_name = Qnil;
1738 int byte_offset = -1;
1740 CHECK_SYMBOL (name);
1741 if (!NILP (doc_string))
1742 CHECK_STRING (doc_string);
1744 charset = Ffind_charset (name);
1745 if (!NILP (charset))
1746 signal_simple_error ("Cannot redefine existing charset", name);
1748 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1750 if (EQ (keyword, Qshort_name))
1752 CHECK_STRING (value);
1756 if (EQ (keyword, Qlong_name))
1758 CHECK_STRING (value);
1762 else if (EQ (keyword, Qdimension))
1765 dimension = XINT (value);
1766 if (dimension < 1 || dimension > 2)
1767 signal_simple_error ("Invalid value for 'dimension", value);
1770 else if (EQ (keyword, Qchars))
1773 chars = XINT (value);
1774 if (chars != 94 && chars != 96)
1775 signal_simple_error ("Invalid value for 'chars", value);
1778 else if (EQ (keyword, Qcolumns))
1781 columns = XINT (value);
1782 if (columns != 1 && columns != 2)
1783 signal_simple_error ("Invalid value for 'columns", value);
1786 else if (EQ (keyword, Qgraphic))
1789 graphic = XINT (value);
1791 if (graphic < 0 || graphic > 2)
1793 if (graphic < 0 || graphic > 1)
1795 signal_simple_error ("Invalid value for 'graphic", value);
1798 else if (EQ (keyword, Qregistry))
1800 CHECK_STRING (value);
1804 else if (EQ (keyword, Qdirection))
1806 if (EQ (value, Ql2r))
1807 direction = CHARSET_LEFT_TO_RIGHT;
1808 else if (EQ (value, Qr2l))
1809 direction = CHARSET_RIGHT_TO_LEFT;
1811 signal_simple_error ("Invalid value for 'direction", value);
1814 else if (EQ (keyword, Qfinal))
1816 CHECK_CHAR_COERCE_INT (value);
1817 final = XCHAR (value);
1818 if (final < '0' || final > '~')
1819 signal_simple_error ("Invalid value for 'final", value);
1822 else if (EQ (keyword, Qccl_program))
1824 CHECK_VECTOR (value);
1825 ccl_program = value;
1829 signal_simple_error ("Unrecognized property", keyword);
1833 error ("'final must be specified");
1834 if (dimension == 2 && final > 0x5F)
1836 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1840 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1842 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1844 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1845 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1847 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1849 id = get_unallocated_leading_byte (dimension);
1851 if (NILP (doc_string))
1852 doc_string = build_string ("");
1854 if (NILP (registry))
1855 registry = build_string ("");
1857 if (NILP (short_name))
1858 XSETSTRING (short_name, XSYMBOL (name)->name);
1860 if (NILP (long_name))
1861 long_name = doc_string;
1864 columns = dimension;
1866 if (byte_offset < 0)
1870 else if (chars == 96)
1876 charset = make_charset (id, name, type, columns, graphic,
1877 final, direction, short_name, long_name,
1878 doc_string, registry,
1879 Qnil, 0, 0, 0, byte_offset);
1880 if (!NILP (ccl_program))
1881 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1885 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1887 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1888 NEW-NAME is the name of the new charset. Return the new charset.
1890 (charset, new_name))
1892 Lisp_Object new_charset = Qnil;
1893 int id, dimension, columns, graphic, final;
1894 int direction, type;
1895 Lisp_Object registry, doc_string, short_name, long_name;
1896 struct Lisp_Charset *cs;
1898 charset = Fget_charset (charset);
1899 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1900 signal_simple_error ("Charset already has reverse-direction charset",
1903 CHECK_SYMBOL (new_name);
1904 if (!NILP (Ffind_charset (new_name)))
1905 signal_simple_error ("Cannot redefine existing charset", new_name);
1907 cs = XCHARSET (charset);
1909 type = CHARSET_TYPE (cs);
1910 columns = CHARSET_COLUMNS (cs);
1911 dimension = CHARSET_DIMENSION (cs);
1912 id = get_unallocated_leading_byte (dimension);
1914 graphic = CHARSET_GRAPHIC (cs);
1915 final = CHARSET_FINAL (cs);
1916 direction = CHARSET_RIGHT_TO_LEFT;
1917 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1918 direction = CHARSET_LEFT_TO_RIGHT;
1919 doc_string = CHARSET_DOC_STRING (cs);
1920 short_name = CHARSET_SHORT_NAME (cs);
1921 long_name = CHARSET_LONG_NAME (cs);
1922 registry = CHARSET_REGISTRY (cs);
1924 new_charset = make_charset (id, new_name, type, columns,
1925 graphic, final, direction, short_name, long_name,
1926 doc_string, registry,
1928 CHARSET_DECODING_TABLE(cs),
1929 CHARSET_UCS_MIN(cs),
1930 CHARSET_UCS_MAX(cs),
1931 CHARSET_CODE_OFFSET(cs),
1932 CHARSET_BYTE_OFFSET(cs)
1938 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1939 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1944 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1945 Define symbol ALIAS as an alias for CHARSET.
1949 CHECK_SYMBOL (alias);
1950 charset = Fget_charset (charset);
1951 return Fputhash (alias, charset, Vcharset_hash_table);
1954 /* #### Reverse direction charsets not yet implemented. */
1956 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1958 Return the reverse-direction charset parallel to CHARSET, if any.
1959 This is the charset with the same properties (in particular, the same
1960 dimension, number of characters per dimension, and final byte) as
1961 CHARSET but whose characters are displayed in the opposite direction.
1965 charset = Fget_charset (charset);
1966 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1970 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1971 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1972 If DIRECTION is omitted, both directions will be checked (left-to-right
1973 will be returned if character sets exist for both directions).
1975 (dimension, chars, final, direction))
1977 int dm, ch, fi, di = -1;
1979 Lisp_Object obj = Qnil;
1981 CHECK_INT (dimension);
1982 dm = XINT (dimension);
1983 if (dm < 1 || dm > 2)
1984 signal_simple_error ("Invalid value for DIMENSION", dimension);
1988 if (ch != 94 && ch != 96)
1989 signal_simple_error ("Invalid value for CHARS", chars);
1991 CHECK_CHAR_COERCE_INT (final);
1993 if (fi < '0' || fi > '~')
1994 signal_simple_error ("Invalid value for FINAL", final);
1996 if (EQ (direction, Ql2r))
1997 di = CHARSET_LEFT_TO_RIGHT;
1998 else if (EQ (direction, Qr2l))
1999 di = CHARSET_RIGHT_TO_LEFT;
2000 else if (!NILP (direction))
2001 signal_simple_error ("Invalid value for DIRECTION", direction);
2003 if (dm == 2 && fi > 0x5F)
2005 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2008 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2010 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2014 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2016 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2019 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2022 return XCHARSET_NAME (obj);
2026 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2027 Return short name of CHARSET.
2031 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2034 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2035 Return long name of CHARSET.
2039 return XCHARSET_LONG_NAME (Fget_charset (charset));
2042 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2043 Return description of CHARSET.
2047 return XCHARSET_DOC_STRING (Fget_charset (charset));
2050 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2051 Return dimension of CHARSET.
2055 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2058 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2059 Return property PROP of CHARSET.
2060 Recognized properties are those listed in `make-charset', as well as
2061 'name and 'doc-string.
2065 struct Lisp_Charset *cs;
2067 charset = Fget_charset (charset);
2068 cs = XCHARSET (charset);
2070 CHECK_SYMBOL (prop);
2071 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2072 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2073 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2074 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2075 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2076 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2077 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2078 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2079 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2080 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2081 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2082 if (EQ (prop, Qdirection))
2083 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2084 if (EQ (prop, Qreverse_direction_charset))
2086 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2090 return XCHARSET_NAME (obj);
2092 signal_simple_error ("Unrecognized charset property name", prop);
2093 return Qnil; /* not reached */
2096 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2097 Return charset identification number of CHARSET.
2101 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2104 /* #### We need to figure out which properties we really want to
2107 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2108 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2110 (charset, ccl_program))
2112 charset = Fget_charset (charset);
2113 CHECK_VECTOR (ccl_program);
2114 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2119 invalidate_charset_font_caches (Lisp_Object charset)
2121 /* Invalidate font cache entries for charset on all devices. */
2122 Lisp_Object devcons, concons, hash_table;
2123 DEVICE_LOOP_NO_BREAK (devcons, concons)
2125 struct device *d = XDEVICE (XCAR (devcons));
2126 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2127 if (!UNBOUNDP (hash_table))
2128 Fclrhash (hash_table);
2132 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2133 Set the 'registry property of CHARSET to REGISTRY.
2135 (charset, registry))
2137 charset = Fget_charset (charset);
2138 CHECK_STRING (registry);
2139 XCHARSET_REGISTRY (charset) = registry;
2140 invalidate_charset_font_caches (charset);
2141 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2146 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2147 Return mapping-table of CHARSET.
2151 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2154 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2155 Set mapping-table of CHARSET to TABLE.
2159 struct Lisp_Charset *cs;
2160 Lisp_Object old_table;
2163 charset = Fget_charset (charset);
2164 cs = XCHARSET (charset);
2166 if (EQ (table, Qnil))
2168 CHARSET_DECODING_TABLE(cs) = table;
2171 else if (VECTORP (table))
2175 /* ad-hoc method for `ascii' */
2176 if ((CHARSET_CHARS (cs) == 94) &&
2177 (CHARSET_BYTE_OFFSET (cs) != 33))
2178 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2180 ccs_len = CHARSET_CHARS (cs);
2182 if (XVECTOR_LENGTH (table) > ccs_len)
2183 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2184 old_table = CHARSET_DECODING_TABLE(cs);
2185 CHARSET_DECODING_TABLE(cs) = table;
2188 signal_error (Qwrong_type_argument,
2189 list2 (build_translated_string ("vector-or-nil-p"),
2191 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2193 switch (CHARSET_DIMENSION (cs))
2196 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2198 Lisp_Object c = XVECTOR_DATA(table)[i];
2203 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2207 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2209 Lisp_Object v = XVECTOR_DATA(table)[i];
2215 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2217 CHARSET_DECODING_TABLE(cs) = old_table;
2218 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2220 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2222 Lisp_Object c = XVECTOR_DATA(v)[j];
2225 put_char_attribute (c, charset,
2228 (i + CHARSET_BYTE_OFFSET (cs)),
2230 (j + CHARSET_BYTE_OFFSET (cs))));
2234 put_char_attribute (v, charset,
2236 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2245 /************************************************************************/
2246 /* Lisp primitives for working with characters */
2247 /************************************************************************/
2249 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2250 Make a character from CHARSET and octets ARG1 and ARG2.
2251 ARG2 is required only for characters from two-dimensional charsets.
2252 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2253 character s with caron.
2255 (charset, arg1, arg2))
2257 struct Lisp_Charset *cs;
2259 int lowlim, highlim;
2261 charset = Fget_charset (charset);
2262 cs = XCHARSET (charset);
2264 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2265 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2267 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2269 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2270 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2273 /* It is useful (and safe, according to Olivier Galibert) to strip
2274 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2275 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2276 Latin 2 code of the character. */
2284 if (a1 < lowlim || a1 > highlim)
2285 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2287 if (CHARSET_DIMENSION (cs) == 1)
2291 ("Charset is of dimension one; second octet must be nil", arg2);
2292 return make_char (MAKE_CHAR (charset, a1, 0));
2301 a2 = XINT (arg2) & 0x7f;
2303 if (a2 < lowlim || a2 > highlim)
2304 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2306 return make_char (MAKE_CHAR (charset, a1, a2));
2309 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2310 Return the character set of char CH.
2314 CHECK_CHAR_COERCE_INT (ch);
2316 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2319 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2320 Return list of charset and one or two position-codes of CHAR.
2324 /* This function can GC */
2325 struct gcpro gcpro1, gcpro2;
2326 Lisp_Object charset = Qnil;
2327 Lisp_Object rc = Qnil;
2330 GCPRO2 (charset, rc);
2331 CHECK_CHAR_COERCE_INT (character);
2333 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2335 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2337 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2341 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2349 #ifdef ENABLE_COMPOSITE_CHARS
2350 /************************************************************************/
2351 /* composite character functions */
2352 /************************************************************************/
2355 lookup_composite_char (Bufbyte *str, int len)
2357 Lisp_Object lispstr = make_string (str, len);
2358 Lisp_Object ch = Fgethash (lispstr,
2359 Vcomposite_char_string2char_hash_table,
2365 if (composite_char_row_next >= 128)
2366 signal_simple_error ("No more composite chars available", lispstr);
2367 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2368 composite_char_col_next);
2369 Fputhash (make_char (emch), lispstr,
2370 Vcomposite_char_char2string_hash_table);
2371 Fputhash (lispstr, make_char (emch),
2372 Vcomposite_char_string2char_hash_table);
2373 composite_char_col_next++;
2374 if (composite_char_col_next >= 128)
2376 composite_char_col_next = 32;
2377 composite_char_row_next++;
2386 composite_char_string (Emchar ch)
2388 Lisp_Object str = Fgethash (make_char (ch),
2389 Vcomposite_char_char2string_hash_table,
2391 assert (!UNBOUNDP (str));
2395 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2396 Convert a string into a single composite character.
2397 The character is the result of overstriking all the characters in
2402 CHECK_STRING (string);
2403 return make_char (lookup_composite_char (XSTRING_DATA (string),
2404 XSTRING_LENGTH (string)));
2407 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2408 Return a string of the characters comprising a composite character.
2416 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2417 signal_simple_error ("Must be composite char", ch);
2418 return composite_char_string (emch);
2420 #endif /* ENABLE_COMPOSITE_CHARS */
2423 /************************************************************************/
2424 /* initialization */
2425 /************************************************************************/
2428 syms_of_mule_charset (void)
2430 DEFSUBR (Fcharsetp);
2431 DEFSUBR (Ffind_charset);
2432 DEFSUBR (Fget_charset);
2433 DEFSUBR (Fcharset_list);
2434 DEFSUBR (Fcharset_name);
2435 DEFSUBR (Fmake_charset);
2436 DEFSUBR (Fmake_reverse_direction_charset);
2437 /* DEFSUBR (Freverse_direction_charset); */
2438 DEFSUBR (Fdefine_charset_alias);
2439 DEFSUBR (Fcharset_from_attributes);
2440 DEFSUBR (Fcharset_short_name);
2441 DEFSUBR (Fcharset_long_name);
2442 DEFSUBR (Fcharset_description);
2443 DEFSUBR (Fcharset_dimension);
2444 DEFSUBR (Fcharset_property);
2445 DEFSUBR (Fcharset_id);
2446 DEFSUBR (Fset_charset_ccl_program);
2447 DEFSUBR (Fset_charset_registry);
2449 DEFSUBR (Fchar_attribute_alist);
2450 DEFSUBR (Fget_char_attribute);
2451 DEFSUBR (Fput_char_attribute);
2452 DEFSUBR (Fdefine_char);
2453 DEFSUBR (Fchar_variants);
2454 DEFSUBR (Fget_composite_char);
2455 DEFSUBR (Fcharset_mapping_table);
2456 DEFSUBR (Fset_charset_mapping_table);
2459 DEFSUBR (Fmake_char);
2460 DEFSUBR (Fchar_charset);
2461 DEFSUBR (Fsplit_char);
2463 #ifdef ENABLE_COMPOSITE_CHARS
2464 DEFSUBR (Fmake_composite_char);
2465 DEFSUBR (Fcomposite_char_string);
2468 defsymbol (&Qcharsetp, "charsetp");
2469 defsymbol (&Qregistry, "registry");
2470 defsymbol (&Qfinal, "final");
2471 defsymbol (&Qgraphic, "graphic");
2472 defsymbol (&Qdirection, "direction");
2473 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2474 defsymbol (&Qshort_name, "short-name");
2475 defsymbol (&Qlong_name, "long-name");
2477 defsymbol (&Ql2r, "l2r");
2478 defsymbol (&Qr2l, "r2l");
2480 /* Charsets, compatible with FSF 20.3
2481 Naming convention is Script-Charset[-Edition] */
2482 defsymbol (&Qascii, "ascii");
2483 defsymbol (&Qcontrol_1, "control-1");
2484 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2485 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2486 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2487 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2488 defsymbol (&Qthai_tis620, "thai-tis620");
2489 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2490 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2491 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2492 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2493 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2494 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2495 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2496 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2497 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2498 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2499 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2500 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2501 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2502 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2504 defsymbol (&Q_ucs, "->ucs");
2505 defsymbol (&Q_decomposition, "->decomposition");
2506 defsymbol (&Qwide, "wide");
2507 defsymbol (&Qnarrow, "narrow");
2508 defsymbol (&Qcompat, "compat");
2509 defsymbol (&QnoBreak, "noBreak");
2510 defsymbol (&Qsuper, "super");
2511 defsymbol (&Qfraction, "fraction");
2512 defsymbol (&Qucs, "ucs");
2513 defsymbol (&Qucs_bmp, "ucs-bmp");
2514 defsymbol (&Qlatin_viscii, "latin-viscii");
2515 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2516 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2517 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2518 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2519 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2520 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2521 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2523 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2524 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2526 defsymbol (&Qcomposite, "composite");
2530 vars_of_mule_charset (void)
2537 /* Table of charsets indexed by leading byte. */
2538 for (i = 0; i < countof (charset_by_leading_byte); i++)
2539 charset_by_leading_byte[i] = Qnil;
2542 /* Table of charsets indexed by type/final-byte. */
2543 for (i = 0; i < countof (charset_by_attributes); i++)
2544 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2545 charset_by_attributes[i][j] = Qnil;
2547 /* Table of charsets indexed by type/final-byte/direction. */
2548 for (i = 0; i < countof (charset_by_attributes); i++)
2549 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2550 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2551 charset_by_attributes[i][j][k] = Qnil;
2555 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2557 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2558 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2562 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2563 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2564 Leading-code of private TYPE9N charset of column-width 1.
2566 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2570 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2571 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2572 Version number of UTF-2000.
2575 staticpro (&Vcharacter_attribute_table);
2576 Vcharacter_attribute_table = make_char_code_table (Qnil);
2578 staticpro (&Vcharacter_composition_table);
2579 Vcharacter_composition_table = make_char_code_table (Qnil);
2581 staticpro (&Vcharacter_variant_table);
2582 Vcharacter_variant_table = make_char_code_table (Qnil);
2584 Vdefault_coded_charset_priority_list = Qnil;
2585 DEFVAR_LISP ("default-coded-charset-priority-list",
2586 &Vdefault_coded_charset_priority_list /*
2587 Default order of preferred coded-character-sets.
2593 complex_vars_of_mule_charset (void)
2595 staticpro (&Vcharset_hash_table);
2596 Vcharset_hash_table =
2597 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2599 /* Predefined character sets. We store them into variables for
2604 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2605 CHARSET_TYPE_256X256, 1, 2, 0,
2606 CHARSET_LEFT_TO_RIGHT,
2607 build_string ("BMP"),
2608 build_string ("BMP"),
2609 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2610 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2611 Qnil, 0, 0xFFFF, 0, 0);
2613 # define MIN_CHAR_THAI 0
2614 # define MAX_CHAR_THAI 0
2615 # define MIN_CHAR_GREEK 0
2616 # define MAX_CHAR_GREEK 0
2617 # define MIN_CHAR_HEBREW 0
2618 # define MAX_CHAR_HEBREW 0
2619 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2620 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2621 # define MIN_CHAR_CYRILLIC 0
2622 # define MAX_CHAR_CYRILLIC 0
2625 make_charset (LEADING_BYTE_ASCII, Qascii,
2626 CHARSET_TYPE_94, 1, 0, 'B',
2627 CHARSET_LEFT_TO_RIGHT,
2628 build_string ("ASCII"),
2629 build_string ("ASCII)"),
2630 build_string ("ASCII (ISO646 IRV)"),
2631 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2632 Qnil, 0, 0x7F, 0, 0);
2633 Vcharset_control_1 =
2634 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2635 CHARSET_TYPE_94, 1, 1, 0,
2636 CHARSET_LEFT_TO_RIGHT,
2637 build_string ("C1"),
2638 build_string ("Control characters"),
2639 build_string ("Control characters 128-191"),
2641 Qnil, 0x80, 0x9F, 0, 0);
2642 Vcharset_latin_iso8859_1 =
2643 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2644 CHARSET_TYPE_96, 1, 1, 'A',
2645 CHARSET_LEFT_TO_RIGHT,
2646 build_string ("Latin-1"),
2647 build_string ("ISO8859-1 (Latin-1)"),
2648 build_string ("ISO8859-1 (Latin-1)"),
2649 build_string ("iso8859-1"),
2650 Qnil, 0xA0, 0xFF, 0, 32);
2651 Vcharset_latin_iso8859_2 =
2652 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2653 CHARSET_TYPE_96, 1, 1, 'B',
2654 CHARSET_LEFT_TO_RIGHT,
2655 build_string ("Latin-2"),
2656 build_string ("ISO8859-2 (Latin-2)"),
2657 build_string ("ISO8859-2 (Latin-2)"),
2658 build_string ("iso8859-2"),
2660 Vcharset_latin_iso8859_3 =
2661 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2662 CHARSET_TYPE_96, 1, 1, 'C',
2663 CHARSET_LEFT_TO_RIGHT,
2664 build_string ("Latin-3"),
2665 build_string ("ISO8859-3 (Latin-3)"),
2666 build_string ("ISO8859-3 (Latin-3)"),
2667 build_string ("iso8859-3"),
2669 Vcharset_latin_iso8859_4 =
2670 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2671 CHARSET_TYPE_96, 1, 1, 'D',
2672 CHARSET_LEFT_TO_RIGHT,
2673 build_string ("Latin-4"),
2674 build_string ("ISO8859-4 (Latin-4)"),
2675 build_string ("ISO8859-4 (Latin-4)"),
2676 build_string ("iso8859-4"),
2678 Vcharset_thai_tis620 =
2679 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2680 CHARSET_TYPE_96, 1, 1, 'T',
2681 CHARSET_LEFT_TO_RIGHT,
2682 build_string ("TIS620"),
2683 build_string ("TIS620 (Thai)"),
2684 build_string ("TIS620.2529 (Thai)"),
2685 build_string ("tis620"),
2686 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2687 Vcharset_greek_iso8859_7 =
2688 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2689 CHARSET_TYPE_96, 1, 1, 'F',
2690 CHARSET_LEFT_TO_RIGHT,
2691 build_string ("ISO8859-7"),
2692 build_string ("ISO8859-7 (Greek)"),
2693 build_string ("ISO8859-7 (Greek)"),
2694 build_string ("iso8859-7"),
2695 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2696 Vcharset_arabic_iso8859_6 =
2697 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2698 CHARSET_TYPE_96, 1, 1, 'G',
2699 CHARSET_RIGHT_TO_LEFT,
2700 build_string ("ISO8859-6"),
2701 build_string ("ISO8859-6 (Arabic)"),
2702 build_string ("ISO8859-6 (Arabic)"),
2703 build_string ("iso8859-6"),
2705 Vcharset_hebrew_iso8859_8 =
2706 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2707 CHARSET_TYPE_96, 1, 1, 'H',
2708 CHARSET_RIGHT_TO_LEFT,
2709 build_string ("ISO8859-8"),
2710 build_string ("ISO8859-8 (Hebrew)"),
2711 build_string ("ISO8859-8 (Hebrew)"),
2712 build_string ("iso8859-8"),
2713 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2714 Vcharset_katakana_jisx0201 =
2715 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2716 CHARSET_TYPE_94, 1, 1, 'I',
2717 CHARSET_LEFT_TO_RIGHT,
2718 build_string ("JISX0201 Kana"),
2719 build_string ("JISX0201.1976 (Japanese Kana)"),
2720 build_string ("JISX0201.1976 Japanese Kana"),
2721 build_string ("jisx0201\\.1976"),
2723 MIN_CHAR_HALFWIDTH_KATAKANA,
2724 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2725 Vcharset_latin_jisx0201 =
2726 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2727 CHARSET_TYPE_94, 1, 0, 'J',
2728 CHARSET_LEFT_TO_RIGHT,
2729 build_string ("JISX0201 Roman"),
2730 build_string ("JISX0201.1976 (Japanese Roman)"),
2731 build_string ("JISX0201.1976 Japanese Roman"),
2732 build_string ("jisx0201\\.1976"),
2734 Vcharset_cyrillic_iso8859_5 =
2735 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2736 CHARSET_TYPE_96, 1, 1, 'L',
2737 CHARSET_LEFT_TO_RIGHT,
2738 build_string ("ISO8859-5"),
2739 build_string ("ISO8859-5 (Cyrillic)"),
2740 build_string ("ISO8859-5 (Cyrillic)"),
2741 build_string ("iso8859-5"),
2742 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2743 Vcharset_latin_iso8859_9 =
2744 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2745 CHARSET_TYPE_96, 1, 1, 'M',
2746 CHARSET_LEFT_TO_RIGHT,
2747 build_string ("Latin-5"),
2748 build_string ("ISO8859-9 (Latin-5)"),
2749 build_string ("ISO8859-9 (Latin-5)"),
2750 build_string ("iso8859-9"),
2752 Vcharset_japanese_jisx0208_1978 =
2753 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2754 CHARSET_TYPE_94X94, 2, 0, '@',
2755 CHARSET_LEFT_TO_RIGHT,
2756 build_string ("JIS X0208:1978"),
2757 build_string ("JIS X0208:1978 (Japanese)"),
2759 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2760 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2762 Vcharset_chinese_gb2312 =
2763 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2764 CHARSET_TYPE_94X94, 2, 0, 'A',
2765 CHARSET_LEFT_TO_RIGHT,
2766 build_string ("GB2312"),
2767 build_string ("GB2312)"),
2768 build_string ("GB2312 Chinese simplified"),
2769 build_string ("gb2312"),
2771 Vcharset_japanese_jisx0208 =
2772 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2773 CHARSET_TYPE_94X94, 2, 0, 'B',
2774 CHARSET_LEFT_TO_RIGHT,
2775 build_string ("JISX0208"),
2776 build_string ("JIS X0208:1983 (Japanese)"),
2777 build_string ("JIS X0208:1983 Japanese Kanji"),
2778 build_string ("jisx0208\\.1983"),
2780 Vcharset_korean_ksc5601 =
2781 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2782 CHARSET_TYPE_94X94, 2, 0, 'C',
2783 CHARSET_LEFT_TO_RIGHT,
2784 build_string ("KSC5601"),
2785 build_string ("KSC5601 (Korean"),
2786 build_string ("KSC5601 Korean Hangul and Hanja"),
2787 build_string ("ksc5601"),
2789 Vcharset_japanese_jisx0212 =
2790 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2791 CHARSET_TYPE_94X94, 2, 0, 'D',
2792 CHARSET_LEFT_TO_RIGHT,
2793 build_string ("JISX0212"),
2794 build_string ("JISX0212 (Japanese)"),
2795 build_string ("JISX0212 Japanese Supplement"),
2796 build_string ("jisx0212"),
2799 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2800 Vcharset_chinese_cns11643_1 =
2801 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2802 CHARSET_TYPE_94X94, 2, 0, 'G',
2803 CHARSET_LEFT_TO_RIGHT,
2804 build_string ("CNS11643-1"),
2805 build_string ("CNS11643-1 (Chinese traditional)"),
2807 ("CNS 11643 Plane 1 Chinese traditional"),
2808 build_string (CHINESE_CNS_PLANE_RE("1")),
2810 Vcharset_chinese_cns11643_2 =
2811 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2812 CHARSET_TYPE_94X94, 2, 0, 'H',
2813 CHARSET_LEFT_TO_RIGHT,
2814 build_string ("CNS11643-2"),
2815 build_string ("CNS11643-2 (Chinese traditional)"),
2817 ("CNS 11643 Plane 2 Chinese traditional"),
2818 build_string (CHINESE_CNS_PLANE_RE("2")),
2821 Vcharset_latin_viscii_lower =
2822 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2823 CHARSET_TYPE_96, 1, 1, '1',
2824 CHARSET_LEFT_TO_RIGHT,
2825 build_string ("VISCII lower"),
2826 build_string ("VISCII lower (Vietnamese)"),
2827 build_string ("VISCII lower (Vietnamese)"),
2828 build_string ("MULEVISCII-LOWER"),
2830 Vcharset_latin_viscii_upper =
2831 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2832 CHARSET_TYPE_96, 1, 1, '2',
2833 CHARSET_LEFT_TO_RIGHT,
2834 build_string ("VISCII upper"),
2835 build_string ("VISCII upper (Vietnamese)"),
2836 build_string ("VISCII upper (Vietnamese)"),
2837 build_string ("MULEVISCII-UPPER"),
2839 Vcharset_latin_viscii =
2840 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2841 CHARSET_TYPE_256, 1, 2, 0,
2842 CHARSET_LEFT_TO_RIGHT,
2843 build_string ("VISCII"),
2844 build_string ("VISCII 1.1 (Vietnamese)"),
2845 build_string ("VISCII 1.1 (Vietnamese)"),
2846 build_string ("VISCII1\\.1"),
2848 Vcharset_ethiopic_ucs =
2849 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2850 CHARSET_TYPE_256X256, 2, 2, 0,
2851 CHARSET_LEFT_TO_RIGHT,
2852 build_string ("Ethiopic (UCS)"),
2853 build_string ("Ethiopic (UCS)"),
2854 build_string ("Ethiopic of UCS"),
2855 build_string ("Ethiopic-Unicode"),
2856 Qnil, 0x1200, 0x137F, 0x1200, 0);
2857 Vcharset_hiragana_jisx0208 =
2858 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2859 CHARSET_TYPE_94X94, 2, 0, 'B',
2860 CHARSET_LEFT_TO_RIGHT,
2861 build_string ("Hiragana"),
2862 build_string ("Hiragana of JIS X0208"),
2863 build_string ("Japanese Hiragana of JIS X0208"),
2864 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2865 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2866 (0x24 - 33) * 94 + (0x21 - 33), 33);
2867 Vcharset_katakana_jisx0208 =
2868 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2869 CHARSET_TYPE_94X94, 2, 0, 'B',
2870 CHARSET_LEFT_TO_RIGHT,
2871 build_string ("Katakana"),
2872 build_string ("Katakana of JIS X0208"),
2873 build_string ("Japanese Katakana of JIS X0208"),
2874 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2875 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2876 (0x25 - 33) * 94 + (0x21 - 33), 33);
2878 Vcharset_chinese_big5_1 =
2879 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2880 CHARSET_TYPE_94X94, 2, 0, '0',
2881 CHARSET_LEFT_TO_RIGHT,
2882 build_string ("Big5"),
2883 build_string ("Big5 (Level-1)"),
2885 ("Big5 Level-1 Chinese traditional"),
2886 build_string ("big5"),
2888 Vcharset_chinese_big5_2 =
2889 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2890 CHARSET_TYPE_94X94, 2, 0, '1',
2891 CHARSET_LEFT_TO_RIGHT,
2892 build_string ("Big5"),
2893 build_string ("Big5 (Level-2)"),
2895 ("Big5 Level-2 Chinese traditional"),
2896 build_string ("big5"),
2899 #ifdef ENABLE_COMPOSITE_CHARS
2900 /* #### For simplicity, we put composite chars into a 96x96 charset.
2901 This is going to lead to problems because you can run out of
2902 room, esp. as we don't yet recycle numbers. */
2903 Vcharset_composite =
2904 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2905 CHARSET_TYPE_96X96, 2, 0, 0,
2906 CHARSET_LEFT_TO_RIGHT,
2907 build_string ("Composite"),
2908 build_string ("Composite characters"),
2909 build_string ("Composite characters"),
2912 composite_char_row_next = 32;
2913 composite_char_col_next = 32;
2915 Vcomposite_char_string2char_hash_table =
2916 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2917 Vcomposite_char_char2string_hash_table =
2918 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2919 staticpro (&Vcomposite_char_string2char_hash_table);
2920 staticpro (&Vcomposite_char_char2string_hash_table);
2921 #endif /* ENABLE_COMPOSITE_CHARS */