1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
391 Lisp_Object QnoBreak;
393 Lisp_Object Qfraction;
396 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
402 else if (EQ (v, Qwide))
404 else if (EQ (v, Qnarrow))
406 else if (EQ (v, Qcompat))
408 else if (EQ (v, QnoBreak))
410 else if (EQ (v, Qsuper))
412 else if (EQ (v, Qfraction))
415 signal_simple_error (err_msg, err_arg);
418 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
419 Return character corresponding with list.
423 Lisp_Object table = Vcharacter_composition_table;
424 Lisp_Object rest = list;
428 Lisp_Object v = Fcar (rest);
430 Emchar c = to_char_code (v, "Invalid value for composition", list);
432 ret = get_char_code_table (c, table);
437 if (!CHAR_CODE_TABLE_P (ret))
442 else if (!CONSP (rest))
444 else if (CHAR_CODE_TABLE_P (ret))
447 signal_simple_error ("Invalid table is found with", list);
449 signal_simple_error ("Invalid value for composition", list);
452 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
453 Return variants of CHARACTER.
457 CHECK_CHAR (character);
458 return Fcopy_list (get_char_code_table (XCHAR (character),
459 Vcharacter_variant_table));
462 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
463 Return the alist of attributes of CHARACTER.
467 CHECK_CHAR (character);
468 return Fcopy_alist (get_char_code_table (XCHAR (character),
469 Vcharacter_attribute_table));
472 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
473 Return the value of CHARACTER's ATTRIBUTE.
475 (character, attribute))
478 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
484 if (!NILP (ccs = Ffind_charset (attribute)))
487 return Fcdr (Fassq (attribute, ret));
491 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
494 Emchar char_code = XCHAR (character);
496 = get_char_code_table (char_code, Vcharacter_attribute_table);
499 cell = Fassq (attribute, ret);
503 ret = Fcons (Fcons (attribute, value), ret);
505 else if (!EQ (Fcdr (cell), value))
507 Fsetcdr (cell, value);
509 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
513 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
514 Store CHARACTER's ATTRIBUTE with VALUE.
516 (character, attribute, value))
520 ccs = Ffind_charset (attribute);
524 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
529 /* ad-hoc method for `ascii' */
530 if ((XCHARSET_CHARS (ccs) == 94) &&
531 (XCHARSET_BYTE_OFFSET (ccs) != 33))
532 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
534 ccs_len = XCHARSET_CHARS (ccs);
537 signal_simple_error ("Invalid value for coded-charset",
541 rest = Fget_char_attribute (character, attribute);
548 Lisp_Object ei = Fcar (rest);
550 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
551 nv = XVECTOR_DATA(v)[i];
558 XVECTOR_DATA(v)[i] = Qnil;
559 v = XCHARSET_DECODING_TABLE (ccs);
564 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
571 Lisp_Object ei = Fcar (rest);
574 signal_simple_error ("Invalid value for coded-charset",
576 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
577 nv = XVECTOR_DATA(v)[i];
583 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
590 XVECTOR_DATA(v)[i] = character;
592 else if (EQ (attribute, Q_decomposition))
594 Lisp_Object rest = value;
595 Lisp_Object table = Vcharacter_composition_table;
598 signal_simple_error ("Invalid value for ->decomposition",
603 Lisp_Object v = Fcar (rest);
606 = to_char_code (v, "Invalid value for ->decomposition", value);
611 put_char_code_table (c, character, table);
616 ntable = get_char_code_table (c, table);
617 if (!CHAR_CODE_TABLE_P (ntable))
619 ntable = make_char_code_table (Qnil);
620 put_char_code_table (c, ntable, table);
626 else if (EQ (attribute, Q_ucs))
632 signal_simple_error ("Invalid value for ->ucs", value);
636 ret = get_char_code_table (c, Vcharacter_variant_table);
637 if (NILP (Fmemq (character, ret)))
639 put_char_code_table (c, Fcons (character, ret),
640 Vcharacter_variant_table);
643 return put_char_attribute (character, attribute, value);
648 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
649 Store character's ATTRIBUTES.
653 Lisp_Object rest = attributes;
654 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
655 Lisp_Object character;
661 Lisp_Object cell = Fcar (rest);
665 signal_simple_error ("Invalid argument", attributes);
666 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
667 && XCHARSET_FINAL (ccs))
671 if (XCHARSET_DIMENSION (ccs) == 1)
673 Lisp_Object eb1 = Fcar (Fcdr (cell));
677 signal_simple_error ("Invalid argument", attributes);
679 switch (XCHARSET_CHARS (ccs))
683 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
687 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
693 else if (XCHARSET_DIMENSION (ccs) == 2)
695 Lisp_Object eb1 = Fcar (Fcdr (cell));
696 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
700 signal_simple_error ("Invalid argument", attributes);
703 signal_simple_error ("Invalid argument", attributes);
705 switch (XCHARSET_CHARS (ccs))
708 code = MIN_CHAR_94x94
709 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
710 + (b1 - 33) * 94 + (b2 - 33);
713 code = MIN_CHAR_96x96
714 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
715 + (b1 - 32) * 96 + (b2 - 32);
726 character = make_char (code);
727 goto setup_attributes;
733 else if (!INTP (code))
734 signal_simple_error ("Invalid argument", attributes);
736 character = make_char (XINT (code));
742 Lisp_Object cell = Fcar (rest);
745 signal_simple_error ("Invalid argument", attributes);
746 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
750 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
753 Lisp_Object Vutf_2000_version;
757 int leading_code_private_11;
760 Lisp_Object Qcharsetp;
762 /* Qdoc_string, Qdimension, Qchars defined in general.c */
763 Lisp_Object Qregistry, Qfinal, Qgraphic;
764 Lisp_Object Qdirection;
765 Lisp_Object Qreverse_direction_charset;
766 Lisp_Object Qleading_byte;
767 Lisp_Object Qshort_name, Qlong_name;
783 Qjapanese_jisx0208_1978,
795 Qvietnamese_viscii_lower,
796 Qvietnamese_viscii_upper,
805 Lisp_Object Ql2r, Qr2l;
807 Lisp_Object Vcharset_hash_table;
810 static Charset_ID next_allocated_leading_byte;
812 static Charset_ID next_allocated_1_byte_leading_byte;
813 static Charset_ID next_allocated_2_byte_leading_byte;
816 /* Composite characters are characters constructed by overstriking two
817 or more regular characters.
819 1) The old Mule implementation involves storing composite characters
820 in a buffer as a tag followed by all of the actual characters
821 used to make up the composite character. I think this is a bad
822 idea; it greatly complicates code that wants to handle strings
823 one character at a time because it has to deal with the possibility
824 of great big ungainly characters. It's much more reasonable to
825 simply store an index into a table of composite characters.
827 2) The current implementation only allows for 16,384 separate
828 composite characters over the lifetime of the XEmacs process.
829 This could become a potential problem if the user
830 edited lots of different files that use composite characters.
831 Due to FSF bogosity, increasing the number of allowable
832 composite characters under Mule would decrease the number
833 of possible faces that can exist. Mule already has shrunk
834 this to 2048, and further shrinkage would become uncomfortable.
835 No such problems exist in XEmacs.
837 Composite characters could be represented as 0x80 C1 C2 C3,
838 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
839 for slightly under 2^20 (one million) composite characters
840 over the XEmacs process lifetime, and you only need to
841 increase the size of a Mule character from 19 to 21 bits.
842 Or you could use 0x80 C1 C2 C3 C4, allowing for about
843 85 million (slightly over 2^26) composite characters. */
846 /************************************************************************/
847 /* Basic Emchar functions */
848 /************************************************************************/
850 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
851 string in STR. Returns the number of bytes stored.
852 Do not call this directly. Use the macro set_charptr_emchar() instead.
856 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
871 else if ( c <= 0x7ff )
873 *p++ = (c >> 6) | 0xc0;
874 *p++ = (c & 0x3f) | 0x80;
876 else if ( c <= 0xffff )
878 *p++ = (c >> 12) | 0xe0;
879 *p++ = ((c >> 6) & 0x3f) | 0x80;
880 *p++ = (c & 0x3f) | 0x80;
882 else if ( c <= 0x1fffff )
884 *p++ = (c >> 18) | 0xf0;
885 *p++ = ((c >> 12) & 0x3f) | 0x80;
886 *p++ = ((c >> 6) & 0x3f) | 0x80;
887 *p++ = (c & 0x3f) | 0x80;
889 else if ( c <= 0x3ffffff )
891 *p++ = (c >> 24) | 0xf8;
892 *p++ = ((c >> 18) & 0x3f) | 0x80;
893 *p++ = ((c >> 12) & 0x3f) | 0x80;
894 *p++ = ((c >> 6) & 0x3f) | 0x80;
895 *p++ = (c & 0x3f) | 0x80;
899 *p++ = (c >> 30) | 0xfc;
900 *p++ = ((c >> 24) & 0x3f) | 0x80;
901 *p++ = ((c >> 18) & 0x3f) | 0x80;
902 *p++ = ((c >> 12) & 0x3f) | 0x80;
903 *p++ = ((c >> 6) & 0x3f) | 0x80;
904 *p++ = (c & 0x3f) | 0x80;
907 BREAKUP_CHAR (c, charset, c1, c2);
908 lb = CHAR_LEADING_BYTE (c);
909 if (LEADING_BYTE_PRIVATE_P (lb))
910 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
912 if (EQ (charset, Vcharset_control_1))
921 /* Return the first character from a Mule-encoded string in STR,
922 assuming it's non-ASCII. Do not call this directly.
923 Use the macro charptr_emchar() instead. */
926 non_ascii_charptr_emchar (CONST Bufbyte *str)
939 else if ( b >= 0xf8 )
944 else if ( b >= 0xf0 )
949 else if ( b >= 0xe0 )
954 else if ( b >= 0xc0 )
964 for( ; len > 0; len-- )
967 ch = ( ch << 6 ) | ( b & 0x3f );
971 Bufbyte i0 = *str, i1, i2 = 0;
974 if (i0 == LEADING_BYTE_CONTROL_1)
975 return (Emchar) (*++str - 0x20);
977 if (LEADING_BYTE_PREFIX_P (i0))
982 charset = CHARSET_BY_LEADING_BYTE (i0);
983 if (XCHARSET_DIMENSION (charset) == 2)
986 return MAKE_CHAR (charset, i1, i2);
990 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
991 Do not call this directly. Use the macro valid_char_p() instead. */
995 non_ascii_valid_char_p (Emchar ch)
999 /* Must have only lowest 19 bits set */
1003 f1 = CHAR_FIELD1 (ch);
1004 f2 = CHAR_FIELD2 (ch);
1005 f3 = CHAR_FIELD3 (ch);
1009 Lisp_Object charset;
1011 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1012 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1013 f2 > MAX_CHAR_FIELD2_PRIVATE)
1018 if (f3 != 0x20 && f3 != 0x7F)
1022 NOTE: This takes advantage of the fact that
1023 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1024 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1026 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1027 return (XCHARSET_CHARS (charset) == 96);
1031 Lisp_Object charset;
1033 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1034 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1035 f1 > MAX_CHAR_FIELD1_PRIVATE)
1037 if (f2 < 0x20 || f3 < 0x20)
1040 #ifdef ENABLE_COMPOSITE_CHARS
1041 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1043 if (UNBOUNDP (Fgethash (make_int (ch),
1044 Vcomposite_char_char2string_hash_table,
1049 #endif /* ENABLE_COMPOSITE_CHARS */
1051 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1054 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1056 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1059 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1061 return (XCHARSET_CHARS (charset) == 96);
1067 /************************************************************************/
1068 /* Basic string functions */
1069 /************************************************************************/
1071 /* Copy the character pointed to by PTR into STR, assuming it's
1072 non-ASCII. Do not call this directly. Use the macro
1073 charptr_copy_char() instead. */
1076 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1078 Bufbyte *strptr = str;
1080 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1082 /* Notice fallthrough. */
1084 case 6: *++strptr = *ptr++;
1085 case 5: *++strptr = *ptr++;
1087 case 4: *++strptr = *ptr++;
1088 case 3: *++strptr = *ptr++;
1089 case 2: *++strptr = *ptr;
1094 return strptr + 1 - str;
1098 /************************************************************************/
1099 /* streams of Emchars */
1100 /************************************************************************/
1102 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1103 The functions below are not meant to be called directly; use
1104 the macros in insdel.h. */
1107 Lstream_get_emchar_1 (Lstream *stream, int ch)
1109 Bufbyte str[MAX_EMCHAR_LEN];
1110 Bufbyte *strptr = str;
1112 str[0] = (Bufbyte) ch;
1113 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1115 /* Notice fallthrough. */
1118 ch = Lstream_getc (stream);
1120 *++strptr = (Bufbyte) ch;
1122 ch = Lstream_getc (stream);
1124 *++strptr = (Bufbyte) ch;
1127 ch = Lstream_getc (stream);
1129 *++strptr = (Bufbyte) ch;
1131 ch = Lstream_getc (stream);
1133 *++strptr = (Bufbyte) ch;
1135 ch = Lstream_getc (stream);
1137 *++strptr = (Bufbyte) ch;
1142 return charptr_emchar (str);
1146 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1148 Bufbyte str[MAX_EMCHAR_LEN];
1149 Bytecount len = set_charptr_emchar (str, ch);
1150 return Lstream_write (stream, str, len);
1154 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1156 Bufbyte str[MAX_EMCHAR_LEN];
1157 Bytecount len = set_charptr_emchar (str, ch);
1158 Lstream_unread (stream, str, len);
1162 /************************************************************************/
1163 /* charset object */
1164 /************************************************************************/
1167 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1169 struct Lisp_Charset *cs = XCHARSET (obj);
1171 markobj (cs->short_name);
1172 markobj (cs->long_name);
1173 markobj (cs->doc_string);
1174 markobj (cs->registry);
1175 markobj (cs->ccl_program);
1177 markobj (cs->decoding_table);
1183 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1185 struct Lisp_Charset *cs = XCHARSET (obj);
1189 error ("printing unreadable object #<charset %s 0x%x>",
1190 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1193 write_c_string ("#<charset ", printcharfun);
1194 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1195 write_c_string (" ", printcharfun);
1196 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1197 write_c_string (" ", printcharfun);
1198 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1199 write_c_string (" ", printcharfun);
1200 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1201 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1202 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1203 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1204 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1206 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1207 CHARSET_COLUMNS (cs),
1208 CHARSET_GRAPHIC (cs),
1209 CHARSET_FINAL (cs));
1210 write_c_string (buf, printcharfun);
1211 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1212 sprintf (buf, " 0x%x>", cs->header.uid);
1213 write_c_string (buf, printcharfun);
1216 static const struct lrecord_description charset_description[] = {
1217 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1219 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1224 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1225 mark_charset, print_charset, 0, 0, 0,
1226 charset_description,
1227 struct Lisp_Charset);
1229 /* Make a new charset. */
1232 make_charset (Charset_ID id, Lisp_Object name,
1233 unsigned char type, unsigned char columns, unsigned char graphic,
1234 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1235 Lisp_Object long_name, Lisp_Object doc,
1237 Lisp_Object decoding_table,
1238 Emchar ucs_min, Emchar ucs_max,
1239 Emchar code_offset, unsigned char byte_offset)
1242 struct Lisp_Charset *cs =
1243 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1244 XSETCHARSET (obj, cs);
1246 CHARSET_ID (cs) = id;
1247 CHARSET_NAME (cs) = name;
1248 CHARSET_SHORT_NAME (cs) = short_name;
1249 CHARSET_LONG_NAME (cs) = long_name;
1250 CHARSET_DIRECTION (cs) = direction;
1251 CHARSET_TYPE (cs) = type;
1252 CHARSET_COLUMNS (cs) = columns;
1253 CHARSET_GRAPHIC (cs) = graphic;
1254 CHARSET_FINAL (cs) = final;
1255 CHARSET_DOC_STRING (cs) = doc;
1256 CHARSET_REGISTRY (cs) = reg;
1257 CHARSET_CCL_PROGRAM (cs) = Qnil;
1258 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1260 CHARSET_DECODING_TABLE(cs) = Qnil;
1261 CHARSET_UCS_MIN(cs) = ucs_min;
1262 CHARSET_UCS_MAX(cs) = ucs_max;
1263 CHARSET_CODE_OFFSET(cs) = code_offset;
1264 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1267 switch (CHARSET_TYPE (cs))
1269 case CHARSET_TYPE_94:
1270 CHARSET_DIMENSION (cs) = 1;
1271 CHARSET_CHARS (cs) = 94;
1273 case CHARSET_TYPE_96:
1274 CHARSET_DIMENSION (cs) = 1;
1275 CHARSET_CHARS (cs) = 96;
1277 case CHARSET_TYPE_94X94:
1278 CHARSET_DIMENSION (cs) = 2;
1279 CHARSET_CHARS (cs) = 94;
1281 case CHARSET_TYPE_96X96:
1282 CHARSET_DIMENSION (cs) = 2;
1283 CHARSET_CHARS (cs) = 96;
1286 case CHARSET_TYPE_128:
1287 CHARSET_DIMENSION (cs) = 1;
1288 CHARSET_CHARS (cs) = 128;
1290 case CHARSET_TYPE_128X128:
1291 CHARSET_DIMENSION (cs) = 2;
1292 CHARSET_CHARS (cs) = 128;
1294 case CHARSET_TYPE_256:
1295 CHARSET_DIMENSION (cs) = 1;
1296 CHARSET_CHARS (cs) = 256;
1298 case CHARSET_TYPE_256X256:
1299 CHARSET_DIMENSION (cs) = 2;
1300 CHARSET_CHARS (cs) = 256;
1306 if (id == LEADING_BYTE_ASCII)
1307 CHARSET_REP_BYTES (cs) = 1;
1309 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1311 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1316 /* some charsets do not have final characters. This includes
1317 ASCII, Control-1, Composite, and the two faux private
1320 if (code_offset == 0)
1322 assert (NILP (charset_by_attributes[type][final]));
1323 charset_by_attributes[type][final] = obj;
1326 assert (NILP (charset_by_attributes[type][final][direction]));
1327 charset_by_attributes[type][final][direction] = obj;
1331 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1332 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1335 /* official leading byte */
1336 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1339 /* Some charsets are "faux" and don't have names or really exist at
1340 all except in the leading-byte table. */
1342 Fputhash (name, obj, Vcharset_hash_table);
1347 get_unallocated_leading_byte (int dimension)
1352 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1355 lb = next_allocated_leading_byte++;
1359 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1362 lb = next_allocated_1_byte_leading_byte++;
1366 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1369 lb = next_allocated_2_byte_leading_byte++;
1375 ("No more character sets free for this dimension",
1376 make_int (dimension));
1383 range_charset_code_point (Lisp_Object charset, Emchar ch)
1387 if ((XCHARSET_UCS_MIN (charset) <= ch)
1388 && (ch <= XCHARSET_UCS_MAX (charset)))
1390 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1392 if (XCHARSET_DIMENSION (charset) == 1)
1393 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1394 else if (XCHARSET_DIMENSION (charset) == 2)
1395 return list2 (make_int (d / XCHARSET_CHARS (charset)
1396 + XCHARSET_BYTE_OFFSET (charset)),
1397 make_int (d % XCHARSET_CHARS (charset)
1398 + XCHARSET_BYTE_OFFSET (charset)));
1399 else if (XCHARSET_DIMENSION (charset) == 3)
1400 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1401 * XCHARSET_CHARS (charset))
1402 + XCHARSET_BYTE_OFFSET (charset)),
1403 make_int (d / XCHARSET_CHARS (charset)
1404 % XCHARSET_CHARS (charset)
1405 + XCHARSET_BYTE_OFFSET (charset)),
1406 make_int (d % XCHARSET_CHARS (charset)
1407 + XCHARSET_BYTE_OFFSET (charset)));
1408 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1409 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1410 * XCHARSET_CHARS (charset)
1411 * XCHARSET_CHARS (charset))
1412 + XCHARSET_BYTE_OFFSET (charset)),
1413 make_int (d / (XCHARSET_CHARS (charset)
1414 * XCHARSET_CHARS (charset))
1415 % XCHARSET_CHARS (charset)
1416 + XCHARSET_BYTE_OFFSET (charset)),
1417 make_int (d / XCHARSET_CHARS (charset)
1418 % XCHARSET_CHARS (charset)
1419 + XCHARSET_BYTE_OFFSET (charset)),
1420 make_int (d % XCHARSET_CHARS (charset)
1421 + XCHARSET_BYTE_OFFSET (charset)));
1423 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1425 if (XCHARSET_DIMENSION (charset) == 1)
1427 if (XCHARSET_CHARS (charset) == 94)
1429 if (((d = ch - (MIN_CHAR_94
1430 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1432 return list1 (make_int (d + 33));
1434 else if (XCHARSET_CHARS (charset) == 96)
1436 if (((d = ch - (MIN_CHAR_96
1437 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1439 return list1 (make_int (d + 32));
1444 else if (XCHARSET_DIMENSION (charset) == 2)
1446 if (XCHARSET_CHARS (charset) == 94)
1448 if (((d = ch - (MIN_CHAR_94x94
1449 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1452 return list2 (make_int ((d / 94) + 33),
1453 make_int (d % 94 + 33));
1455 else if (XCHARSET_CHARS (charset) == 96)
1457 if (((d = ch - (MIN_CHAR_96x96
1458 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1461 return list2 (make_int ((d / 96) + 32),
1462 make_int (d % 96 + 32));
1470 split_builtin_char (Emchar c)
1472 if (c < MIN_CHAR_OBS_94x94)
1474 if (c <= MAX_CHAR_BASIC_LATIN)
1476 return list2 (Vcharset_ascii, make_int (c));
1480 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1484 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1486 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1488 return list2 (Vcharset_greek_iso8859_7,
1489 make_int (c - MIN_CHAR_GREEK + 0x20));
1491 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1493 return list2 (Vcharset_cyrillic_iso8859_5,
1494 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1496 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1498 return list2 (Vcharset_hebrew_iso8859_8,
1499 make_int (c - MIN_CHAR_HEBREW + 0x20));
1501 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1503 return list2 (Vcharset_thai_tis620,
1504 make_int (c - MIN_CHAR_THAI + 0x20));
1506 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1507 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1509 return list2 (Vcharset_katakana_jisx0201,
1510 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1514 return list3 (Vcharset_ucs_bmp,
1515 make_int (c >> 8), make_int (c & 0xff));
1518 else if (c <= MAX_CHAR_OBS_94x94)
1520 return list3 (CHARSET_BY_ATTRIBUTES
1521 (CHARSET_TYPE_94X94,
1522 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1523 CHARSET_LEFT_TO_RIGHT),
1524 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1525 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1527 else if (c <= MAX_CHAR_94)
1529 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1530 ((c - MIN_CHAR_94) / 94) + '0',
1531 CHARSET_LEFT_TO_RIGHT),
1532 make_int (((c - MIN_CHAR_94) % 94) + 33));
1534 else if (c <= MAX_CHAR_96)
1536 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1537 ((c - MIN_CHAR_96) / 96) + '0',
1538 CHARSET_LEFT_TO_RIGHT),
1539 make_int (((c - MIN_CHAR_96) % 96) + 32));
1541 else if (c <= MAX_CHAR_94x94)
1543 return list3 (CHARSET_BY_ATTRIBUTES
1544 (CHARSET_TYPE_94X94,
1545 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1546 CHARSET_LEFT_TO_RIGHT),
1547 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1548 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1550 else if (c <= MAX_CHAR_96x96)
1552 return list3 (CHARSET_BY_ATTRIBUTES
1553 (CHARSET_TYPE_96X96,
1554 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1555 CHARSET_LEFT_TO_RIGHT),
1556 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1557 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1566 charset_code_point (Lisp_Object charset, Emchar ch)
1568 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1570 if (!EQ (cdef, Qnil))
1572 Lisp_Object field = Fassq (charset, cdef);
1574 if (!EQ (field, Qnil))
1575 return Fcdr (field);
1577 return range_charset_code_point (charset, ch);
1580 Lisp_Object Vdefault_coded_charset_priority_list;
1584 /************************************************************************/
1585 /* Basic charset Lisp functions */
1586 /************************************************************************/
1588 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1589 Return non-nil if OBJECT is a charset.
1593 return CHARSETP (object) ? Qt : Qnil;
1596 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1597 Retrieve the charset of the given name.
1598 If CHARSET-OR-NAME is a charset object, it is simply returned.
1599 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1600 nil is returned. Otherwise the associated charset object is returned.
1604 if (CHARSETP (charset_or_name))
1605 return charset_or_name;
1607 CHECK_SYMBOL (charset_or_name);
1608 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1611 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1612 Retrieve the charset of the given name.
1613 Same as `find-charset' except an error is signalled if there is no such
1614 charset instead of returning nil.
1618 Lisp_Object charset = Ffind_charset (name);
1621 signal_simple_error ("No such charset", name);
1625 /* We store the charsets in hash tables with the names as the key and the
1626 actual charset object as the value. Occasionally we need to use them
1627 in a list format. These routines provide us with that. */
1628 struct charset_list_closure
1630 Lisp_Object *charset_list;
1634 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1635 void *charset_list_closure)
1637 /* This function can GC */
1638 struct charset_list_closure *chcl =
1639 (struct charset_list_closure*) charset_list_closure;
1640 Lisp_Object *charset_list = chcl->charset_list;
1642 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1646 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1647 Return a list of the names of all defined charsets.
1651 Lisp_Object charset_list = Qnil;
1652 struct gcpro gcpro1;
1653 struct charset_list_closure charset_list_closure;
1655 GCPRO1 (charset_list);
1656 charset_list_closure.charset_list = &charset_list;
1657 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1658 &charset_list_closure);
1661 return charset_list;
1664 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1665 Return the name of the given charset.
1669 return XCHARSET_NAME (Fget_charset (charset));
1672 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1673 Define a new character set.
1674 This function is for use with Mule support.
1675 NAME is a symbol, the name by which the character set is normally referred.
1676 DOC-STRING is a string describing the character set.
1677 PROPS is a property list, describing the specific nature of the
1678 character set. Recognized properties are:
1680 'short-name Short version of the charset name (ex: Latin-1)
1681 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1682 'registry A regular expression matching the font registry field for
1684 'dimension Number of octets used to index a character in this charset.
1685 Either 1 or 2. Defaults to 1.
1686 'columns Number of columns used to display a character in this charset.
1687 Only used in TTY mode. (Under X, the actual width of a
1688 character can be derived from the font used to display the
1689 characters.) If unspecified, defaults to the dimension
1690 (this is almost always the correct value).
1691 'chars Number of characters in each dimension (94 or 96).
1692 Defaults to 94. Note that if the dimension is 2, the
1693 character set thus described is 94x94 or 96x96.
1694 'final Final byte of ISO 2022 escape sequence. Must be
1695 supplied. Each combination of (DIMENSION, CHARS) defines a
1696 separate namespace for final bytes. Note that ISO
1697 2022 restricts the final byte to the range
1698 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1699 dimension == 2. Note also that final bytes in the range
1700 0x30 - 0x3F are reserved for user-defined (not official)
1702 'graphic 0 (use left half of font on output) or 1 (use right half
1703 of font on output). Defaults to 0. For example, for
1704 a font whose registry is ISO8859-1, the left half
1705 (octets 0x20 - 0x7F) is the `ascii' character set, while
1706 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1707 character set. With 'graphic set to 0, the octets
1708 will have their high bit cleared; with it set to 1,
1709 the octets will have their high bit set.
1710 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1712 'ccl-program A compiled CCL program used to convert a character in
1713 this charset into an index into the font. This is in
1714 addition to the 'graphic property. The CCL program
1715 is passed the octets of the character, with the high
1716 bit cleared and set depending upon whether the value
1717 of the 'graphic property is 0 or 1.
1719 (name, doc_string, props))
1721 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1722 int direction = CHARSET_LEFT_TO_RIGHT;
1724 Lisp_Object registry = Qnil;
1725 Lisp_Object charset;
1726 Lisp_Object rest, keyword, value;
1727 Lisp_Object ccl_program = Qnil;
1728 Lisp_Object short_name = Qnil, long_name = Qnil;
1729 int byte_offset = -1;
1731 CHECK_SYMBOL (name);
1732 if (!NILP (doc_string))
1733 CHECK_STRING (doc_string);
1735 charset = Ffind_charset (name);
1736 if (!NILP (charset))
1737 signal_simple_error ("Cannot redefine existing charset", name);
1739 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1741 if (EQ (keyword, Qshort_name))
1743 CHECK_STRING (value);
1747 if (EQ (keyword, Qlong_name))
1749 CHECK_STRING (value);
1753 else if (EQ (keyword, Qdimension))
1756 dimension = XINT (value);
1757 if (dimension < 1 || dimension > 2)
1758 signal_simple_error ("Invalid value for 'dimension", value);
1761 else if (EQ (keyword, Qchars))
1764 chars = XINT (value);
1765 if (chars != 94 && chars != 96)
1766 signal_simple_error ("Invalid value for 'chars", value);
1769 else if (EQ (keyword, Qcolumns))
1772 columns = XINT (value);
1773 if (columns != 1 && columns != 2)
1774 signal_simple_error ("Invalid value for 'columns", value);
1777 else if (EQ (keyword, Qgraphic))
1780 graphic = XINT (value);
1782 if (graphic < 0 || graphic > 2)
1784 if (graphic < 0 || graphic > 1)
1786 signal_simple_error ("Invalid value for 'graphic", value);
1789 else if (EQ (keyword, Qregistry))
1791 CHECK_STRING (value);
1795 else if (EQ (keyword, Qdirection))
1797 if (EQ (value, Ql2r))
1798 direction = CHARSET_LEFT_TO_RIGHT;
1799 else if (EQ (value, Qr2l))
1800 direction = CHARSET_RIGHT_TO_LEFT;
1802 signal_simple_error ("Invalid value for 'direction", value);
1805 else if (EQ (keyword, Qfinal))
1807 CHECK_CHAR_COERCE_INT (value);
1808 final = XCHAR (value);
1809 if (final < '0' || final > '~')
1810 signal_simple_error ("Invalid value for 'final", value);
1813 else if (EQ (keyword, Qccl_program))
1815 CHECK_VECTOR (value);
1816 ccl_program = value;
1820 signal_simple_error ("Unrecognized property", keyword);
1824 error ("'final must be specified");
1825 if (dimension == 2 && final > 0x5F)
1827 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1831 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1833 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1835 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1836 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1838 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1840 id = get_unallocated_leading_byte (dimension);
1842 if (NILP (doc_string))
1843 doc_string = build_string ("");
1845 if (NILP (registry))
1846 registry = build_string ("");
1848 if (NILP (short_name))
1849 XSETSTRING (short_name, XSYMBOL (name)->name);
1851 if (NILP (long_name))
1852 long_name = doc_string;
1855 columns = dimension;
1857 if (byte_offset < 0)
1861 else if (chars == 96)
1867 charset = make_charset (id, name, type, columns, graphic,
1868 final, direction, short_name, long_name,
1869 doc_string, registry,
1870 Qnil, 0, 0, 0, byte_offset);
1871 if (!NILP (ccl_program))
1872 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1876 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1878 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1879 NEW-NAME is the name of the new charset. Return the new charset.
1881 (charset, new_name))
1883 Lisp_Object new_charset = Qnil;
1884 int id, dimension, columns, graphic, final;
1885 int direction, type;
1886 Lisp_Object registry, doc_string, short_name, long_name;
1887 struct Lisp_Charset *cs;
1889 charset = Fget_charset (charset);
1890 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1891 signal_simple_error ("Charset already has reverse-direction charset",
1894 CHECK_SYMBOL (new_name);
1895 if (!NILP (Ffind_charset (new_name)))
1896 signal_simple_error ("Cannot redefine existing charset", new_name);
1898 cs = XCHARSET (charset);
1900 type = CHARSET_TYPE (cs);
1901 columns = CHARSET_COLUMNS (cs);
1902 dimension = CHARSET_DIMENSION (cs);
1903 id = get_unallocated_leading_byte (dimension);
1905 graphic = CHARSET_GRAPHIC (cs);
1906 final = CHARSET_FINAL (cs);
1907 direction = CHARSET_RIGHT_TO_LEFT;
1908 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1909 direction = CHARSET_LEFT_TO_RIGHT;
1910 doc_string = CHARSET_DOC_STRING (cs);
1911 short_name = CHARSET_SHORT_NAME (cs);
1912 long_name = CHARSET_LONG_NAME (cs);
1913 registry = CHARSET_REGISTRY (cs);
1915 new_charset = make_charset (id, new_name, type, columns,
1916 graphic, final, direction, short_name, long_name,
1917 doc_string, registry,
1919 CHARSET_DECODING_TABLE(cs),
1920 CHARSET_UCS_MIN(cs),
1921 CHARSET_UCS_MAX(cs),
1922 CHARSET_CODE_OFFSET(cs),
1923 CHARSET_BYTE_OFFSET(cs)
1929 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1930 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1935 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1936 Define symbol ALIAS as an alias for CHARSET.
1940 CHECK_SYMBOL (alias);
1941 charset = Fget_charset (charset);
1942 return Fputhash (alias, charset, Vcharset_hash_table);
1945 /* #### Reverse direction charsets not yet implemented. */
1947 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1949 Return the reverse-direction charset parallel to CHARSET, if any.
1950 This is the charset with the same properties (in particular, the same
1951 dimension, number of characters per dimension, and final byte) as
1952 CHARSET but whose characters are displayed in the opposite direction.
1956 charset = Fget_charset (charset);
1957 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1961 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1962 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1963 If DIRECTION is omitted, both directions will be checked (left-to-right
1964 will be returned if character sets exist for both directions).
1966 (dimension, chars, final, direction))
1968 int dm, ch, fi, di = -1;
1970 Lisp_Object obj = Qnil;
1972 CHECK_INT (dimension);
1973 dm = XINT (dimension);
1974 if (dm < 1 || dm > 2)
1975 signal_simple_error ("Invalid value for DIMENSION", dimension);
1979 if (ch != 94 && ch != 96)
1980 signal_simple_error ("Invalid value for CHARS", chars);
1982 CHECK_CHAR_COERCE_INT (final);
1984 if (fi < '0' || fi > '~')
1985 signal_simple_error ("Invalid value for FINAL", final);
1987 if (EQ (direction, Ql2r))
1988 di = CHARSET_LEFT_TO_RIGHT;
1989 else if (EQ (direction, Qr2l))
1990 di = CHARSET_RIGHT_TO_LEFT;
1991 else if (!NILP (direction))
1992 signal_simple_error ("Invalid value for DIRECTION", direction);
1994 if (dm == 2 && fi > 0x5F)
1996 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1999 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2001 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2005 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2007 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2010 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2013 return XCHARSET_NAME (obj);
2017 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2018 Return short name of CHARSET.
2022 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2025 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2026 Return long name of CHARSET.
2030 return XCHARSET_LONG_NAME (Fget_charset (charset));
2033 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2034 Return description of CHARSET.
2038 return XCHARSET_DOC_STRING (Fget_charset (charset));
2041 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2042 Return dimension of CHARSET.
2046 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2049 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2050 Return property PROP of CHARSET.
2051 Recognized properties are those listed in `make-charset', as well as
2052 'name and 'doc-string.
2056 struct Lisp_Charset *cs;
2058 charset = Fget_charset (charset);
2059 cs = XCHARSET (charset);
2061 CHECK_SYMBOL (prop);
2062 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2063 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2064 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2065 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2066 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2067 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2068 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2069 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2070 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2071 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2072 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2073 if (EQ (prop, Qdirection))
2074 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2075 if (EQ (prop, Qreverse_direction_charset))
2077 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2081 return XCHARSET_NAME (obj);
2083 signal_simple_error ("Unrecognized charset property name", prop);
2084 return Qnil; /* not reached */
2087 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2088 Return charset identification number of CHARSET.
2092 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2095 /* #### We need to figure out which properties we really want to
2098 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2099 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2101 (charset, ccl_program))
2103 charset = Fget_charset (charset);
2104 CHECK_VECTOR (ccl_program);
2105 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2110 invalidate_charset_font_caches (Lisp_Object charset)
2112 /* Invalidate font cache entries for charset on all devices. */
2113 Lisp_Object devcons, concons, hash_table;
2114 DEVICE_LOOP_NO_BREAK (devcons, concons)
2116 struct device *d = XDEVICE (XCAR (devcons));
2117 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2118 if (!UNBOUNDP (hash_table))
2119 Fclrhash (hash_table);
2123 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2124 Set the 'registry property of CHARSET to REGISTRY.
2126 (charset, registry))
2128 charset = Fget_charset (charset);
2129 CHECK_STRING (registry);
2130 XCHARSET_REGISTRY (charset) = registry;
2131 invalidate_charset_font_caches (charset);
2132 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2137 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2138 Return mapping-table of CHARSET.
2142 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2145 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2146 Set mapping-table of CHARSET to TABLE.
2150 struct Lisp_Charset *cs;
2151 Lisp_Object old_table;
2154 charset = Fget_charset (charset);
2155 cs = XCHARSET (charset);
2157 if (EQ (table, Qnil))
2159 CHARSET_DECODING_TABLE(cs) = table;
2162 else if (VECTORP (table))
2166 /* ad-hoc method for `ascii' */
2167 if ((CHARSET_CHARS (cs) == 94) &&
2168 (CHARSET_BYTE_OFFSET (cs) != 33))
2169 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2171 ccs_len = CHARSET_CHARS (cs);
2173 if (XVECTOR_LENGTH (table) > ccs_len)
2174 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2175 old_table = CHARSET_DECODING_TABLE(cs);
2176 CHARSET_DECODING_TABLE(cs) = table;
2179 signal_error (Qwrong_type_argument,
2180 list2 (build_translated_string ("vector-or-nil-p"),
2182 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2184 switch (CHARSET_DIMENSION (cs))
2187 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2189 Lisp_Object c = XVECTOR_DATA(table)[i];
2194 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2198 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2200 Lisp_Object v = XVECTOR_DATA(table)[i];
2206 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2208 CHARSET_DECODING_TABLE(cs) = old_table;
2209 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2211 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2213 Lisp_Object c = XVECTOR_DATA(v)[j];
2216 put_char_attribute (c, charset,
2219 (i + CHARSET_BYTE_OFFSET (cs)),
2221 (j + CHARSET_BYTE_OFFSET (cs))));
2225 put_char_attribute (v, charset,
2227 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2236 /************************************************************************/
2237 /* Lisp primitives for working with characters */
2238 /************************************************************************/
2240 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2241 Make a character from CHARSET and octets ARG1 and ARG2.
2242 ARG2 is required only for characters from two-dimensional charsets.
2243 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2244 character s with caron.
2246 (charset, arg1, arg2))
2248 struct Lisp_Charset *cs;
2250 int lowlim, highlim;
2252 charset = Fget_charset (charset);
2253 cs = XCHARSET (charset);
2255 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2256 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2258 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2260 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2261 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2264 /* It is useful (and safe, according to Olivier Galibert) to strip
2265 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2266 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2267 Latin 2 code of the character. */
2275 if (a1 < lowlim || a1 > highlim)
2276 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2278 if (CHARSET_DIMENSION (cs) == 1)
2282 ("Charset is of dimension one; second octet must be nil", arg2);
2283 return make_char (MAKE_CHAR (charset, a1, 0));
2292 a2 = XINT (arg2) & 0x7f;
2294 if (a2 < lowlim || a2 > highlim)
2295 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2297 return make_char (MAKE_CHAR (charset, a1, a2));
2300 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2301 Return the character set of char CH.
2305 CHECK_CHAR_COERCE_INT (ch);
2307 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2310 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2311 Return list of charset and one or two position-codes of CHAR.
2315 /* This function can GC */
2316 struct gcpro gcpro1, gcpro2;
2317 Lisp_Object charset = Qnil;
2318 Lisp_Object rc = Qnil;
2321 GCPRO2 (charset, rc);
2322 CHECK_CHAR_COERCE_INT (character);
2324 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2326 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2328 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2332 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2340 #ifdef ENABLE_COMPOSITE_CHARS
2341 /************************************************************************/
2342 /* composite character functions */
2343 /************************************************************************/
2346 lookup_composite_char (Bufbyte *str, int len)
2348 Lisp_Object lispstr = make_string (str, len);
2349 Lisp_Object ch = Fgethash (lispstr,
2350 Vcomposite_char_string2char_hash_table,
2356 if (composite_char_row_next >= 128)
2357 signal_simple_error ("No more composite chars available", lispstr);
2358 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2359 composite_char_col_next);
2360 Fputhash (make_char (emch), lispstr,
2361 Vcomposite_char_char2string_hash_table);
2362 Fputhash (lispstr, make_char (emch),
2363 Vcomposite_char_string2char_hash_table);
2364 composite_char_col_next++;
2365 if (composite_char_col_next >= 128)
2367 composite_char_col_next = 32;
2368 composite_char_row_next++;
2377 composite_char_string (Emchar ch)
2379 Lisp_Object str = Fgethash (make_char (ch),
2380 Vcomposite_char_char2string_hash_table,
2382 assert (!UNBOUNDP (str));
2386 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2387 Convert a string into a single composite character.
2388 The character is the result of overstriking all the characters in
2393 CHECK_STRING (string);
2394 return make_char (lookup_composite_char (XSTRING_DATA (string),
2395 XSTRING_LENGTH (string)));
2398 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2399 Return a string of the characters comprising a composite character.
2407 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2408 signal_simple_error ("Must be composite char", ch);
2409 return composite_char_string (emch);
2411 #endif /* ENABLE_COMPOSITE_CHARS */
2414 /************************************************************************/
2415 /* initialization */
2416 /************************************************************************/
2419 syms_of_mule_charset (void)
2421 DEFSUBR (Fcharsetp);
2422 DEFSUBR (Ffind_charset);
2423 DEFSUBR (Fget_charset);
2424 DEFSUBR (Fcharset_list);
2425 DEFSUBR (Fcharset_name);
2426 DEFSUBR (Fmake_charset);
2427 DEFSUBR (Fmake_reverse_direction_charset);
2428 /* DEFSUBR (Freverse_direction_charset); */
2429 DEFSUBR (Fdefine_charset_alias);
2430 DEFSUBR (Fcharset_from_attributes);
2431 DEFSUBR (Fcharset_short_name);
2432 DEFSUBR (Fcharset_long_name);
2433 DEFSUBR (Fcharset_description);
2434 DEFSUBR (Fcharset_dimension);
2435 DEFSUBR (Fcharset_property);
2436 DEFSUBR (Fcharset_id);
2437 DEFSUBR (Fset_charset_ccl_program);
2438 DEFSUBR (Fset_charset_registry);
2440 DEFSUBR (Fchar_attribute_alist);
2441 DEFSUBR (Fget_char_attribute);
2442 DEFSUBR (Fput_char_attribute);
2443 DEFSUBR (Fdefine_char);
2444 DEFSUBR (Fchar_variants);
2445 DEFSUBR (Fget_composite_char);
2446 DEFSUBR (Fcharset_mapping_table);
2447 DEFSUBR (Fset_charset_mapping_table);
2450 DEFSUBR (Fmake_char);
2451 DEFSUBR (Fchar_charset);
2452 DEFSUBR (Fsplit_char);
2454 #ifdef ENABLE_COMPOSITE_CHARS
2455 DEFSUBR (Fmake_composite_char);
2456 DEFSUBR (Fcomposite_char_string);
2459 defsymbol (&Qcharsetp, "charsetp");
2460 defsymbol (&Qregistry, "registry");
2461 defsymbol (&Qfinal, "final");
2462 defsymbol (&Qgraphic, "graphic");
2463 defsymbol (&Qdirection, "direction");
2464 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2465 defsymbol (&Qshort_name, "short-name");
2466 defsymbol (&Qlong_name, "long-name");
2468 defsymbol (&Ql2r, "l2r");
2469 defsymbol (&Qr2l, "r2l");
2471 /* Charsets, compatible with FSF 20.3
2472 Naming convention is Script-Charset[-Edition] */
2473 defsymbol (&Qascii, "ascii");
2474 defsymbol (&Qcontrol_1, "control-1");
2475 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2476 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2477 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2478 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2479 defsymbol (&Qthai_tis620, "thai-tis620");
2480 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2481 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2482 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2483 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2484 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2485 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2486 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2487 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2488 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2489 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2490 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2491 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2492 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2493 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2495 defsymbol (&Q_ucs, "->ucs");
2496 defsymbol (&Q_decomposition, "->decomposition");
2497 defsymbol (&Qwide, "wide");
2498 defsymbol (&Qnarrow, "narrow");
2499 defsymbol (&Qcompat, "compat");
2500 defsymbol (&QnoBreak, "noBreak");
2501 defsymbol (&Qsuper, "super");
2502 defsymbol (&Qfraction, "fraction");
2503 defsymbol (&Qucs, "ucs");
2504 defsymbol (&Qucs_bmp, "ucs-bmp");
2505 defsymbol (&Qlatin_viscii, "latin-viscii");
2506 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2507 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2508 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2509 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2510 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2511 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2512 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2514 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2515 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2517 defsymbol (&Qcomposite, "composite");
2521 vars_of_mule_charset (void)
2528 /* Table of charsets indexed by leading byte. */
2529 for (i = 0; i < countof (charset_by_leading_byte); i++)
2530 charset_by_leading_byte[i] = Qnil;
2533 /* Table of charsets indexed by type/final-byte. */
2534 for (i = 0; i < countof (charset_by_attributes); i++)
2535 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2536 charset_by_attributes[i][j] = Qnil;
2538 /* Table of charsets indexed by type/final-byte/direction. */
2539 for (i = 0; i < countof (charset_by_attributes); i++)
2540 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2541 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2542 charset_by_attributes[i][j][k] = Qnil;
2546 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2548 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2549 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2553 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2554 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2555 Leading-code of private TYPE9N charset of column-width 1.
2557 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2561 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2562 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2563 Version number of UTF-2000.
2566 staticpro (&Vcharacter_attribute_table);
2567 Vcharacter_attribute_table = make_char_code_table (Qnil);
2569 staticpro (&Vcharacter_composition_table);
2570 Vcharacter_composition_table = make_char_code_table (Qnil);
2572 staticpro (&Vcharacter_variant_table);
2573 Vcharacter_variant_table = make_char_code_table (Qnil);
2575 Vdefault_coded_charset_priority_list = Qnil;
2576 DEFVAR_LISP ("default-coded-charset-priority-list",
2577 &Vdefault_coded_charset_priority_list /*
2578 Default order of preferred coded-character-sets.
2584 complex_vars_of_mule_charset (void)
2586 staticpro (&Vcharset_hash_table);
2587 Vcharset_hash_table =
2588 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2590 /* Predefined character sets. We store them into variables for
2595 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2596 CHARSET_TYPE_256X256, 1, 2, 0,
2597 CHARSET_LEFT_TO_RIGHT,
2598 build_string ("BMP"),
2599 build_string ("BMP"),
2600 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2601 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2602 Qnil, 0, 0xFFFF, 0, 0);
2604 # define MIN_CHAR_THAI 0
2605 # define MAX_CHAR_THAI 0
2606 # define MIN_CHAR_GREEK 0
2607 # define MAX_CHAR_GREEK 0
2608 # define MIN_CHAR_HEBREW 0
2609 # define MAX_CHAR_HEBREW 0
2610 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2611 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2612 # define MIN_CHAR_CYRILLIC 0
2613 # define MAX_CHAR_CYRILLIC 0
2616 make_charset (LEADING_BYTE_ASCII, Qascii,
2617 CHARSET_TYPE_94, 1, 0, 'B',
2618 CHARSET_LEFT_TO_RIGHT,
2619 build_string ("ASCII"),
2620 build_string ("ASCII)"),
2621 build_string ("ASCII (ISO646 IRV)"),
2622 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2623 Qnil, 0, 0x7F, 0, 0);
2624 Vcharset_control_1 =
2625 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2626 CHARSET_TYPE_94, 1, 1, 0,
2627 CHARSET_LEFT_TO_RIGHT,
2628 build_string ("C1"),
2629 build_string ("Control characters"),
2630 build_string ("Control characters 128-191"),
2632 Qnil, 0x80, 0x9F, 0, 0);
2633 Vcharset_latin_iso8859_1 =
2634 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2635 CHARSET_TYPE_96, 1, 1, 'A',
2636 CHARSET_LEFT_TO_RIGHT,
2637 build_string ("Latin-1"),
2638 build_string ("ISO8859-1 (Latin-1)"),
2639 build_string ("ISO8859-1 (Latin-1)"),
2640 build_string ("iso8859-1"),
2641 Qnil, 0xA0, 0xFF, 0, 32);
2642 Vcharset_latin_iso8859_2 =
2643 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2644 CHARSET_TYPE_96, 1, 1, 'B',
2645 CHARSET_LEFT_TO_RIGHT,
2646 build_string ("Latin-2"),
2647 build_string ("ISO8859-2 (Latin-2)"),
2648 build_string ("ISO8859-2 (Latin-2)"),
2649 build_string ("iso8859-2"),
2651 Vcharset_latin_iso8859_3 =
2652 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2653 CHARSET_TYPE_96, 1, 1, 'C',
2654 CHARSET_LEFT_TO_RIGHT,
2655 build_string ("Latin-3"),
2656 build_string ("ISO8859-3 (Latin-3)"),
2657 build_string ("ISO8859-3 (Latin-3)"),
2658 build_string ("iso8859-3"),
2660 Vcharset_latin_iso8859_4 =
2661 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2662 CHARSET_TYPE_96, 1, 1, 'D',
2663 CHARSET_LEFT_TO_RIGHT,
2664 build_string ("Latin-4"),
2665 build_string ("ISO8859-4 (Latin-4)"),
2666 build_string ("ISO8859-4 (Latin-4)"),
2667 build_string ("iso8859-4"),
2669 Vcharset_thai_tis620 =
2670 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2671 CHARSET_TYPE_96, 1, 1, 'T',
2672 CHARSET_LEFT_TO_RIGHT,
2673 build_string ("TIS620"),
2674 build_string ("TIS620 (Thai)"),
2675 build_string ("TIS620.2529 (Thai)"),
2676 build_string ("tis620"),
2677 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2678 Vcharset_greek_iso8859_7 =
2679 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2680 CHARSET_TYPE_96, 1, 1, 'F',
2681 CHARSET_LEFT_TO_RIGHT,
2682 build_string ("ISO8859-7"),
2683 build_string ("ISO8859-7 (Greek)"),
2684 build_string ("ISO8859-7 (Greek)"),
2685 build_string ("iso8859-7"),
2686 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2687 Vcharset_arabic_iso8859_6 =
2688 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2689 CHARSET_TYPE_96, 1, 1, 'G',
2690 CHARSET_RIGHT_TO_LEFT,
2691 build_string ("ISO8859-6"),
2692 build_string ("ISO8859-6 (Arabic)"),
2693 build_string ("ISO8859-6 (Arabic)"),
2694 build_string ("iso8859-6"),
2696 Vcharset_hebrew_iso8859_8 =
2697 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2698 CHARSET_TYPE_96, 1, 1, 'H',
2699 CHARSET_RIGHT_TO_LEFT,
2700 build_string ("ISO8859-8"),
2701 build_string ("ISO8859-8 (Hebrew)"),
2702 build_string ("ISO8859-8 (Hebrew)"),
2703 build_string ("iso8859-8"),
2704 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2705 Vcharset_katakana_jisx0201 =
2706 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2707 CHARSET_TYPE_94, 1, 1, 'I',
2708 CHARSET_LEFT_TO_RIGHT,
2709 build_string ("JISX0201 Kana"),
2710 build_string ("JISX0201.1976 (Japanese Kana)"),
2711 build_string ("JISX0201.1976 Japanese Kana"),
2712 build_string ("jisx0201\\.1976"),
2714 MIN_CHAR_HALFWIDTH_KATAKANA,
2715 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2716 Vcharset_latin_jisx0201 =
2717 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2718 CHARSET_TYPE_94, 1, 0, 'J',
2719 CHARSET_LEFT_TO_RIGHT,
2720 build_string ("JISX0201 Roman"),
2721 build_string ("JISX0201.1976 (Japanese Roman)"),
2722 build_string ("JISX0201.1976 Japanese Roman"),
2723 build_string ("jisx0201\\.1976"),
2725 Vcharset_cyrillic_iso8859_5 =
2726 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2727 CHARSET_TYPE_96, 1, 1, 'L',
2728 CHARSET_LEFT_TO_RIGHT,
2729 build_string ("ISO8859-5"),
2730 build_string ("ISO8859-5 (Cyrillic)"),
2731 build_string ("ISO8859-5 (Cyrillic)"),
2732 build_string ("iso8859-5"),
2733 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2734 Vcharset_latin_iso8859_9 =
2735 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2736 CHARSET_TYPE_96, 1, 1, 'M',
2737 CHARSET_LEFT_TO_RIGHT,
2738 build_string ("Latin-5"),
2739 build_string ("ISO8859-9 (Latin-5)"),
2740 build_string ("ISO8859-9 (Latin-5)"),
2741 build_string ("iso8859-9"),
2743 Vcharset_japanese_jisx0208_1978 =
2744 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2745 CHARSET_TYPE_94X94, 2, 0, '@',
2746 CHARSET_LEFT_TO_RIGHT,
2747 build_string ("JIS X0208:1978"),
2748 build_string ("JIS X0208:1978 (Japanese)"),
2750 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2751 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2753 Vcharset_chinese_gb2312 =
2754 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2755 CHARSET_TYPE_94X94, 2, 0, 'A',
2756 CHARSET_LEFT_TO_RIGHT,
2757 build_string ("GB2312"),
2758 build_string ("GB2312)"),
2759 build_string ("GB2312 Chinese simplified"),
2760 build_string ("gb2312"),
2762 Vcharset_japanese_jisx0208 =
2763 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2764 CHARSET_TYPE_94X94, 2, 0, 'B',
2765 CHARSET_LEFT_TO_RIGHT,
2766 build_string ("JISX0208"),
2767 build_string ("JIS X0208:1983 (Japanese)"),
2768 build_string ("JIS X0208:1983 Japanese Kanji"),
2769 build_string ("jisx0208\\.1983"),
2771 Vcharset_korean_ksc5601 =
2772 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2773 CHARSET_TYPE_94X94, 2, 0, 'C',
2774 CHARSET_LEFT_TO_RIGHT,
2775 build_string ("KSC5601"),
2776 build_string ("KSC5601 (Korean"),
2777 build_string ("KSC5601 Korean Hangul and Hanja"),
2778 build_string ("ksc5601"),
2780 Vcharset_japanese_jisx0212 =
2781 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2782 CHARSET_TYPE_94X94, 2, 0, 'D',
2783 CHARSET_LEFT_TO_RIGHT,
2784 build_string ("JISX0212"),
2785 build_string ("JISX0212 (Japanese)"),
2786 build_string ("JISX0212 Japanese Supplement"),
2787 build_string ("jisx0212"),
2790 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2791 Vcharset_chinese_cns11643_1 =
2792 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2793 CHARSET_TYPE_94X94, 2, 0, 'G',
2794 CHARSET_LEFT_TO_RIGHT,
2795 build_string ("CNS11643-1"),
2796 build_string ("CNS11643-1 (Chinese traditional)"),
2798 ("CNS 11643 Plane 1 Chinese traditional"),
2799 build_string (CHINESE_CNS_PLANE_RE("1")),
2801 Vcharset_chinese_cns11643_2 =
2802 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2803 CHARSET_TYPE_94X94, 2, 0, 'H',
2804 CHARSET_LEFT_TO_RIGHT,
2805 build_string ("CNS11643-2"),
2806 build_string ("CNS11643-2 (Chinese traditional)"),
2808 ("CNS 11643 Plane 2 Chinese traditional"),
2809 build_string (CHINESE_CNS_PLANE_RE("2")),
2812 Vcharset_latin_viscii_lower =
2813 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2814 CHARSET_TYPE_96, 1, 1, '1',
2815 CHARSET_LEFT_TO_RIGHT,
2816 build_string ("VISCII lower"),
2817 build_string ("VISCII lower (Vietnamese)"),
2818 build_string ("VISCII lower (Vietnamese)"),
2819 build_string ("MULEVISCII-LOWER"),
2821 Vcharset_latin_viscii_upper =
2822 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2823 CHARSET_TYPE_96, 1, 1, '2',
2824 CHARSET_LEFT_TO_RIGHT,
2825 build_string ("VISCII upper"),
2826 build_string ("VISCII upper (Vietnamese)"),
2827 build_string ("VISCII upper (Vietnamese)"),
2828 build_string ("MULEVISCII-UPPER"),
2830 Vcharset_latin_viscii =
2831 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2832 CHARSET_TYPE_256, 1, 2, 0,
2833 CHARSET_LEFT_TO_RIGHT,
2834 build_string ("VISCII"),
2835 build_string ("VISCII 1.1 (Vietnamese)"),
2836 build_string ("VISCII 1.1 (Vietnamese)"),
2837 build_string ("VISCII1\\.1"),
2839 Vcharset_ethiopic_ucs =
2840 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2841 CHARSET_TYPE_256X256, 2, 2, 0,
2842 CHARSET_LEFT_TO_RIGHT,
2843 build_string ("Ethiopic (UCS)"),
2844 build_string ("Ethiopic (UCS)"),
2845 build_string ("Ethiopic of UCS"),
2846 build_string ("Ethiopic-Unicode"),
2847 Qnil, 0x1200, 0x137F, 0x1200, 0);
2848 Vcharset_hiragana_jisx0208 =
2849 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2850 CHARSET_TYPE_94X94, 2, 0, 'B',
2851 CHARSET_LEFT_TO_RIGHT,
2852 build_string ("Hiragana"),
2853 build_string ("Hiragana of JIS X0208"),
2854 build_string ("Japanese Hiragana of JIS X0208"),
2855 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2856 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2857 (0x24 - 33) * 94 + (0x21 - 33), 33);
2858 Vcharset_katakana_jisx0208 =
2859 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2860 CHARSET_TYPE_94X94, 2, 0, 'B',
2861 CHARSET_LEFT_TO_RIGHT,
2862 build_string ("Katakana"),
2863 build_string ("Katakana of JIS X0208"),
2864 build_string ("Japanese Katakana of JIS X0208"),
2865 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2866 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2867 (0x25 - 33) * 94 + (0x21 - 33), 33);
2869 Vcharset_chinese_big5_1 =
2870 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2871 CHARSET_TYPE_94X94, 2, 0, '0',
2872 CHARSET_LEFT_TO_RIGHT,
2873 build_string ("Big5"),
2874 build_string ("Big5 (Level-1)"),
2876 ("Big5 Level-1 Chinese traditional"),
2877 build_string ("big5"),
2879 Vcharset_chinese_big5_2 =
2880 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2881 CHARSET_TYPE_94X94, 2, 0, '1',
2882 CHARSET_LEFT_TO_RIGHT,
2883 build_string ("Big5"),
2884 build_string ("Big5 (Level-2)"),
2886 ("Big5 Level-2 Chinese traditional"),
2887 build_string ("big5"),
2890 #ifdef ENABLE_COMPOSITE_CHARS
2891 /* #### For simplicity, we put composite chars into a 96x96 charset.
2892 This is going to lead to problems because you can run out of
2893 room, esp. as we don't yet recycle numbers. */
2894 Vcharset_composite =
2895 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2896 CHARSET_TYPE_96X96, 2, 0, 0,
2897 CHARSET_LEFT_TO_RIGHT,
2898 build_string ("Composite"),
2899 build_string ("Composite characters"),
2900 build_string ("Composite characters"),
2903 composite_char_row_next = 32;
2904 composite_char_col_next = 32;
2906 Vcomposite_char_string2char_hash_table =
2907 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2908 Vcomposite_char_char2string_hash_table =
2909 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2910 staticpro (&Vcomposite_char_string2char_hash_table);
2911 staticpro (&Vcomposite_char_char2string_hash_table);
2912 #endif /* ENABLE_COMPOSITE_CHARS */