1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ideograph_daikanwa;
66 Lisp_Object Vcharset_ethiopic_ucs;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 unsigned int code = ch;
299 struct Lisp_Char_Byte_Table* cpt
300 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
301 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
303 if (CHAR_BYTE_TABLE_P (ret))
304 cpt = XCHAR_BYTE_TABLE (ret);
308 ret = cpt->property [(unsigned char) (code >> 16)];
309 if (CHAR_BYTE_TABLE_P (ret))
310 cpt = XCHAR_BYTE_TABLE (ret);
314 ret = cpt->property [(unsigned char) (code >> 8)];
315 if (CHAR_BYTE_TABLE_P (ret))
316 cpt = XCHAR_BYTE_TABLE (ret);
320 return cpt->property [(unsigned char) code];
324 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
326 unsigned int code = ch;
327 struct Lisp_Char_Byte_Table* cpt1
328 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
329 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
331 if (CHAR_BYTE_TABLE_P (ret))
333 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
335 ret = cpt2->property[(unsigned char)(code >> 16)];
336 if (CHAR_BYTE_TABLE_P (ret))
338 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
340 ret = cpt3->property[(unsigned char)(code >> 8)];
341 if (CHAR_BYTE_TABLE_P (ret))
343 struct Lisp_Char_Byte_Table* cpt4
344 = XCHAR_BYTE_TABLE (ret);
346 cpt4->property[(unsigned char)code] = value;
348 else if (!EQ (ret, value))
350 Lisp_Object cpt4 = make_char_byte_table (ret);
352 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
353 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
356 else if (!EQ (ret, value))
358 Lisp_Object cpt3 = make_char_byte_table (ret);
359 Lisp_Object cpt4 = make_char_byte_table (ret);
361 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
362 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
364 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
367 else if (!EQ (ret, value))
369 Lisp_Object cpt2 = make_char_byte_table (ret);
370 Lisp_Object cpt3 = make_char_byte_table (ret);
371 Lisp_Object cpt4 = make_char_byte_table (ret);
373 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
374 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
375 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
376 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
381 Lisp_Object Vcharacter_attribute_table;
382 Lisp_Object Vcharacter_composition_table;
383 Lisp_Object Vcharacter_variant_table;
385 Lisp_Object Q_decomposition;
388 Lisp_Object QnoBreak;
389 Lisp_Object Qfraction;
399 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
405 else if (EQ (v, Qcompat))
407 else if (EQ (v, QnoBreak))
409 else if (EQ (v, Qfraction))
411 else if (EQ (v, Qsuper))
413 else if (EQ (v, Qsub))
415 else if (EQ (v, Qcircle))
417 else if (EQ (v, Qsquare))
419 else if (EQ (v, Qwide))
421 else if (EQ (v, Qnarrow))
423 else if (EQ (v, Qfont))
426 signal_simple_error (err_msg, err_arg);
429 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
430 Return character corresponding with list.
434 Lisp_Object table = Vcharacter_composition_table;
435 Lisp_Object rest = list;
439 Lisp_Object v = Fcar (rest);
441 Emchar c = to_char_code (v, "Invalid value for composition", list);
443 ret = get_char_code_table (c, table);
448 if (!CHAR_CODE_TABLE_P (ret))
453 else if (!CONSP (rest))
455 else if (CHAR_CODE_TABLE_P (ret))
458 signal_simple_error ("Invalid table is found with", list);
460 signal_simple_error ("Invalid value for composition", list);
463 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
464 Return variants of CHARACTER.
468 CHECK_CHAR (character);
469 return Fcopy_list (get_char_code_table (XCHAR (character),
470 Vcharacter_variant_table));
473 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
474 Return the alist of attributes of CHARACTER.
478 CHECK_CHAR (character);
479 return Fcopy_alist (get_char_code_table (XCHAR (character),
480 Vcharacter_attribute_table));
483 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
484 Return the value of CHARACTER's ATTRIBUTE.
486 (character, attribute))
491 CHECK_CHAR (character);
492 ret = get_char_code_table (XCHAR (character),
493 Vcharacter_attribute_table);
497 if (!NILP (ccs = Ffind_charset (attribute)))
500 return Fcdr (Fassq (attribute, ret));
504 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
507 Emchar char_code = XCHAR (character);
509 = get_char_code_table (char_code, Vcharacter_attribute_table);
512 cell = Fassq (attribute, ret);
516 ret = Fcons (Fcons (attribute, value), ret);
518 else if (!EQ (Fcdr (cell), value))
520 Fsetcdr (cell, value);
522 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
526 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
527 Store CHARACTER's ATTRIBUTE with VALUE.
529 (character, attribute, value))
533 CHECK_CHAR (character);
534 ccs = Ffind_charset (attribute);
538 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
543 /* ad-hoc method for `ascii' */
544 if ((XCHARSET_CHARS (ccs) == 94) &&
545 (XCHARSET_BYTE_OFFSET (ccs) != 33))
546 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
548 ccs_len = XCHARSET_CHARS (ccs);
551 signal_simple_error ("Invalid value for coded-charset",
555 rest = Fget_char_attribute (character, attribute);
562 Lisp_Object ei = Fcar (rest);
564 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
565 nv = XVECTOR_DATA(v)[i];
572 XVECTOR_DATA(v)[i] = Qnil;
573 v = XCHARSET_DECODING_TABLE (ccs);
578 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
581 if (XCHARSET_GRAPHIC (ccs) == 1)
582 value = Fcopy_list (value);
587 Lisp_Object ei = Fcar (rest);
590 signal_simple_error ("Invalid value for coded-charset", value);
592 if ((i < 0) || (255 < i))
593 signal_simple_error ("Invalid value for coded-charset", value);
594 if (XCHARSET_GRAPHIC (ccs) == 1)
597 Fsetcar (rest, make_int (i));
599 i -= XCHARSET_BYTE_OFFSET (ccs);
600 nv = XVECTOR_DATA(v)[i];
606 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
613 XVECTOR_DATA(v)[i] = character;
615 else if (EQ (attribute, Q_decomposition))
617 Lisp_Object rest = value;
618 Lisp_Object table = Vcharacter_composition_table;
621 signal_simple_error ("Invalid value for ->decomposition",
626 Lisp_Object v = Fcar (rest);
629 = to_char_code (v, "Invalid value for ->decomposition", value);
634 put_char_code_table (c, character, table);
639 ntable = get_char_code_table (c, table);
640 if (!CHAR_CODE_TABLE_P (ntable))
642 ntable = make_char_code_table (Qnil);
643 put_char_code_table (c, ntable, table);
649 else if (EQ (attribute, Q_ucs))
655 signal_simple_error ("Invalid value for ->ucs", value);
659 ret = get_char_code_table (c, Vcharacter_variant_table);
660 if (NILP (Fmemq (character, ret)))
662 put_char_code_table (c, Fcons (character, ret),
663 Vcharacter_variant_table);
666 return put_char_attribute (character, attribute, value);
671 EXFUN (Fmake_char, 3);
673 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
674 Store character's ATTRIBUTES.
678 Lisp_Object rest = attributes;
679 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
680 Lisp_Object character;
686 Lisp_Object cell = Fcar (rest);
690 signal_simple_error ("Invalid argument", attributes);
691 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
694 character = Fmake_char (ccs, Fcar (cell),
696 goto setup_attributes;
702 else if (!INTP (code))
703 signal_simple_error ("Invalid argument", attributes);
705 character = make_char (XINT (code));
711 Lisp_Object cell = Fcar (rest);
714 signal_simple_error ("Invalid argument", attributes);
715 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
719 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
722 Lisp_Object Vutf_2000_version;
726 int leading_code_private_11;
729 Lisp_Object Qcharsetp;
731 /* Qdoc_string, Qdimension, Qchars defined in general.c */
732 Lisp_Object Qregistry, Qfinal, Qgraphic;
733 Lisp_Object Qdirection;
734 Lisp_Object Qreverse_direction_charset;
735 Lisp_Object Qleading_byte;
736 Lisp_Object Qshort_name, Qlong_name;
752 Qjapanese_jisx0208_1978,
764 Qvietnamese_viscii_lower,
765 Qvietnamese_viscii_upper,
773 Lisp_Object Ql2r, Qr2l;
775 Lisp_Object Vcharset_hash_table;
778 static Charset_ID next_allocated_leading_byte;
780 static Charset_ID next_allocated_1_byte_leading_byte;
781 static Charset_ID next_allocated_2_byte_leading_byte;
784 /* Composite characters are characters constructed by overstriking two
785 or more regular characters.
787 1) The old Mule implementation involves storing composite characters
788 in a buffer as a tag followed by all of the actual characters
789 used to make up the composite character. I think this is a bad
790 idea; it greatly complicates code that wants to handle strings
791 one character at a time because it has to deal with the possibility
792 of great big ungainly characters. It's much more reasonable to
793 simply store an index into a table of composite characters.
795 2) The current implementation only allows for 16,384 separate
796 composite characters over the lifetime of the XEmacs process.
797 This could become a potential problem if the user
798 edited lots of different files that use composite characters.
799 Due to FSF bogosity, increasing the number of allowable
800 composite characters under Mule would decrease the number
801 of possible faces that can exist. Mule already has shrunk
802 this to 2048, and further shrinkage would become uncomfortable.
803 No such problems exist in XEmacs.
805 Composite characters could be represented as 0x80 C1 C2 C3,
806 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
807 for slightly under 2^20 (one million) composite characters
808 over the XEmacs process lifetime, and you only need to
809 increase the size of a Mule character from 19 to 21 bits.
810 Or you could use 0x80 C1 C2 C3 C4, allowing for about
811 85 million (slightly over 2^26) composite characters. */
814 /************************************************************************/
815 /* Basic Emchar functions */
816 /************************************************************************/
818 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
819 string in STR. Returns the number of bytes stored.
820 Do not call this directly. Use the macro set_charptr_emchar() instead.
824 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
839 else if ( c <= 0x7ff )
841 *p++ = (c >> 6) | 0xc0;
842 *p++ = (c & 0x3f) | 0x80;
844 else if ( c <= 0xffff )
846 *p++ = (c >> 12) | 0xe0;
847 *p++ = ((c >> 6) & 0x3f) | 0x80;
848 *p++ = (c & 0x3f) | 0x80;
850 else if ( c <= 0x1fffff )
852 *p++ = (c >> 18) | 0xf0;
853 *p++ = ((c >> 12) & 0x3f) | 0x80;
854 *p++ = ((c >> 6) & 0x3f) | 0x80;
855 *p++ = (c & 0x3f) | 0x80;
857 else if ( c <= 0x3ffffff )
859 *p++ = (c >> 24) | 0xf8;
860 *p++ = ((c >> 18) & 0x3f) | 0x80;
861 *p++ = ((c >> 12) & 0x3f) | 0x80;
862 *p++ = ((c >> 6) & 0x3f) | 0x80;
863 *p++ = (c & 0x3f) | 0x80;
867 *p++ = (c >> 30) | 0xfc;
868 *p++ = ((c >> 24) & 0x3f) | 0x80;
869 *p++ = ((c >> 18) & 0x3f) | 0x80;
870 *p++ = ((c >> 12) & 0x3f) | 0x80;
871 *p++ = ((c >> 6) & 0x3f) | 0x80;
872 *p++ = (c & 0x3f) | 0x80;
875 BREAKUP_CHAR (c, charset, c1, c2);
876 lb = CHAR_LEADING_BYTE (c);
877 if (LEADING_BYTE_PRIVATE_P (lb))
878 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
880 if (EQ (charset, Vcharset_control_1))
889 /* Return the first character from a Mule-encoded string in STR,
890 assuming it's non-ASCII. Do not call this directly.
891 Use the macro charptr_emchar() instead. */
894 non_ascii_charptr_emchar (CONST Bufbyte *str)
907 else if ( b >= 0xf8 )
912 else if ( b >= 0xf0 )
917 else if ( b >= 0xe0 )
922 else if ( b >= 0xc0 )
932 for( ; len > 0; len-- )
935 ch = ( ch << 6 ) | ( b & 0x3f );
939 Bufbyte i0 = *str, i1, i2 = 0;
942 if (i0 == LEADING_BYTE_CONTROL_1)
943 return (Emchar) (*++str - 0x20);
945 if (LEADING_BYTE_PREFIX_P (i0))
950 charset = CHARSET_BY_LEADING_BYTE (i0);
951 if (XCHARSET_DIMENSION (charset) == 2)
954 return MAKE_CHAR (charset, i1, i2);
958 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
959 Do not call this directly. Use the macro valid_char_p() instead. */
963 non_ascii_valid_char_p (Emchar ch)
967 /* Must have only lowest 19 bits set */
971 f1 = CHAR_FIELD1 (ch);
972 f2 = CHAR_FIELD2 (ch);
973 f3 = CHAR_FIELD3 (ch);
979 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
980 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
981 f2 > MAX_CHAR_FIELD2_PRIVATE)
986 if (f3 != 0x20 && f3 != 0x7F)
990 NOTE: This takes advantage of the fact that
991 FIELD2_TO_OFFICIAL_LEADING_BYTE and
992 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
994 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
995 return (XCHARSET_CHARS (charset) == 96);
1001 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1002 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1003 f1 > MAX_CHAR_FIELD1_PRIVATE)
1005 if (f2 < 0x20 || f3 < 0x20)
1008 #ifdef ENABLE_COMPOSITE_CHARS
1009 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1011 if (UNBOUNDP (Fgethash (make_int (ch),
1012 Vcomposite_char_char2string_hash_table,
1017 #endif /* ENABLE_COMPOSITE_CHARS */
1019 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1022 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1024 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1027 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1029 return (XCHARSET_CHARS (charset) == 96);
1035 /************************************************************************/
1036 /* Basic string functions */
1037 /************************************************************************/
1039 /* Copy the character pointed to by PTR into STR, assuming it's
1040 non-ASCII. Do not call this directly. Use the macro
1041 charptr_copy_char() instead. */
1044 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1046 Bufbyte *strptr = str;
1048 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1050 /* Notice fallthrough. */
1052 case 6: *++strptr = *ptr++;
1053 case 5: *++strptr = *ptr++;
1055 case 4: *++strptr = *ptr++;
1056 case 3: *++strptr = *ptr++;
1057 case 2: *++strptr = *ptr;
1062 return strptr + 1 - str;
1066 /************************************************************************/
1067 /* streams of Emchars */
1068 /************************************************************************/
1070 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1071 The functions below are not meant to be called directly; use
1072 the macros in insdel.h. */
1075 Lstream_get_emchar_1 (Lstream *stream, int ch)
1077 Bufbyte str[MAX_EMCHAR_LEN];
1078 Bufbyte *strptr = str;
1080 str[0] = (Bufbyte) ch;
1081 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1083 /* Notice fallthrough. */
1086 ch = Lstream_getc (stream);
1088 *++strptr = (Bufbyte) ch;
1090 ch = Lstream_getc (stream);
1092 *++strptr = (Bufbyte) ch;
1095 ch = Lstream_getc (stream);
1097 *++strptr = (Bufbyte) ch;
1099 ch = Lstream_getc (stream);
1101 *++strptr = (Bufbyte) ch;
1103 ch = Lstream_getc (stream);
1105 *++strptr = (Bufbyte) ch;
1110 return charptr_emchar (str);
1114 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1116 Bufbyte str[MAX_EMCHAR_LEN];
1117 Bytecount len = set_charptr_emchar (str, ch);
1118 return Lstream_write (stream, str, len);
1122 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1124 Bufbyte str[MAX_EMCHAR_LEN];
1125 Bytecount len = set_charptr_emchar (str, ch);
1126 Lstream_unread (stream, str, len);
1130 /************************************************************************/
1131 /* charset object */
1132 /************************************************************************/
1135 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1137 struct Lisp_Charset *cs = XCHARSET (obj);
1139 markobj (cs->short_name);
1140 markobj (cs->long_name);
1141 markobj (cs->doc_string);
1142 markobj (cs->registry);
1143 markobj (cs->ccl_program);
1145 markobj (cs->decoding_table);
1151 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1153 struct Lisp_Charset *cs = XCHARSET (obj);
1157 error ("printing unreadable object #<charset %s 0x%x>",
1158 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1161 write_c_string ("#<charset ", printcharfun);
1162 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1163 write_c_string (" ", printcharfun);
1164 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1165 write_c_string (" ", printcharfun);
1166 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1167 write_c_string (" ", printcharfun);
1168 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1169 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1170 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1171 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1172 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1174 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1175 CHARSET_COLUMNS (cs),
1176 CHARSET_GRAPHIC (cs),
1177 CHARSET_FINAL (cs));
1178 write_c_string (buf, printcharfun);
1179 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1180 sprintf (buf, " 0x%x>", cs->header.uid);
1181 write_c_string (buf, printcharfun);
1184 static const struct lrecord_description charset_description[] = {
1185 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1187 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1192 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1193 mark_charset, print_charset, 0, 0, 0,
1194 charset_description,
1195 struct Lisp_Charset);
1197 /* Make a new charset. */
1200 make_charset (Charset_ID id, Lisp_Object name,
1201 unsigned char type, unsigned char columns, unsigned char graphic,
1202 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1203 Lisp_Object long_name, Lisp_Object doc,
1205 Lisp_Object decoding_table,
1206 Emchar ucs_min, Emchar ucs_max,
1207 Emchar code_offset, unsigned char byte_offset)
1210 struct Lisp_Charset *cs =
1211 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1212 XSETCHARSET (obj, cs);
1214 CHARSET_ID (cs) = id;
1215 CHARSET_NAME (cs) = name;
1216 CHARSET_SHORT_NAME (cs) = short_name;
1217 CHARSET_LONG_NAME (cs) = long_name;
1218 CHARSET_DIRECTION (cs) = direction;
1219 CHARSET_TYPE (cs) = type;
1220 CHARSET_COLUMNS (cs) = columns;
1221 CHARSET_GRAPHIC (cs) = graphic;
1222 CHARSET_FINAL (cs) = final;
1223 CHARSET_DOC_STRING (cs) = doc;
1224 CHARSET_REGISTRY (cs) = reg;
1225 CHARSET_CCL_PROGRAM (cs) = Qnil;
1226 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1228 CHARSET_DECODING_TABLE(cs) = Qnil;
1229 CHARSET_UCS_MIN(cs) = ucs_min;
1230 CHARSET_UCS_MAX(cs) = ucs_max;
1231 CHARSET_CODE_OFFSET(cs) = code_offset;
1232 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1235 switch (CHARSET_TYPE (cs))
1237 case CHARSET_TYPE_94:
1238 CHARSET_DIMENSION (cs) = 1;
1239 CHARSET_CHARS (cs) = 94;
1241 case CHARSET_TYPE_96:
1242 CHARSET_DIMENSION (cs) = 1;
1243 CHARSET_CHARS (cs) = 96;
1245 case CHARSET_TYPE_94X94:
1246 CHARSET_DIMENSION (cs) = 2;
1247 CHARSET_CHARS (cs) = 94;
1249 case CHARSET_TYPE_96X96:
1250 CHARSET_DIMENSION (cs) = 2;
1251 CHARSET_CHARS (cs) = 96;
1254 case CHARSET_TYPE_128:
1255 CHARSET_DIMENSION (cs) = 1;
1256 CHARSET_CHARS (cs) = 128;
1258 case CHARSET_TYPE_128X128:
1259 CHARSET_DIMENSION (cs) = 2;
1260 CHARSET_CHARS (cs) = 128;
1262 case CHARSET_TYPE_256:
1263 CHARSET_DIMENSION (cs) = 1;
1264 CHARSET_CHARS (cs) = 256;
1266 case CHARSET_TYPE_256X256:
1267 CHARSET_DIMENSION (cs) = 2;
1268 CHARSET_CHARS (cs) = 256;
1274 if (id == LEADING_BYTE_ASCII)
1275 CHARSET_REP_BYTES (cs) = 1;
1277 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1279 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1284 /* some charsets do not have final characters. This includes
1285 ASCII, Control-1, Composite, and the two faux private
1288 if (code_offset == 0)
1290 assert (NILP (charset_by_attributes[type][final]));
1291 charset_by_attributes[type][final] = obj;
1294 assert (NILP (charset_by_attributes[type][final][direction]));
1295 charset_by_attributes[type][final][direction] = obj;
1299 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1300 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1303 /* official leading byte */
1304 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1307 /* Some charsets are "faux" and don't have names or really exist at
1308 all except in the leading-byte table. */
1310 Fputhash (name, obj, Vcharset_hash_table);
1315 get_unallocated_leading_byte (int dimension)
1320 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1323 lb = next_allocated_leading_byte++;
1327 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1330 lb = next_allocated_1_byte_leading_byte++;
1334 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1337 lb = next_allocated_2_byte_leading_byte++;
1343 ("No more character sets free for this dimension",
1344 make_int (dimension));
1351 range_charset_code_point (Lisp_Object charset, Emchar ch)
1355 if ((XCHARSET_UCS_MIN (charset) <= ch)
1356 && (ch <= XCHARSET_UCS_MAX (charset)))
1358 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1360 if (XCHARSET_DIMENSION (charset) == 1)
1361 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1362 else if (XCHARSET_DIMENSION (charset) == 2)
1363 return list2 (make_int (d / XCHARSET_CHARS (charset)
1364 + XCHARSET_BYTE_OFFSET (charset)),
1365 make_int (d % XCHARSET_CHARS (charset)
1366 + XCHARSET_BYTE_OFFSET (charset)));
1367 else if (XCHARSET_DIMENSION (charset) == 3)
1368 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1369 * XCHARSET_CHARS (charset))
1370 + XCHARSET_BYTE_OFFSET (charset)),
1371 make_int (d / XCHARSET_CHARS (charset)
1372 % XCHARSET_CHARS (charset)
1373 + XCHARSET_BYTE_OFFSET (charset)),
1374 make_int (d % XCHARSET_CHARS (charset)
1375 + XCHARSET_BYTE_OFFSET (charset)));
1376 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1377 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1378 * XCHARSET_CHARS (charset)
1379 * XCHARSET_CHARS (charset))
1380 + XCHARSET_BYTE_OFFSET (charset)),
1381 make_int (d / (XCHARSET_CHARS (charset)
1382 * XCHARSET_CHARS (charset))
1383 % XCHARSET_CHARS (charset)
1384 + XCHARSET_BYTE_OFFSET (charset)),
1385 make_int (d / XCHARSET_CHARS (charset)
1386 % XCHARSET_CHARS (charset)
1387 + XCHARSET_BYTE_OFFSET (charset)),
1388 make_int (d % XCHARSET_CHARS (charset)
1389 + XCHARSET_BYTE_OFFSET (charset)));
1391 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1393 if (XCHARSET_DIMENSION (charset) == 1)
1395 if (XCHARSET_CHARS (charset) == 94)
1397 if (((d = ch - (MIN_CHAR_94
1398 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1400 return list1 (make_int (d + 33));
1402 else if (XCHARSET_CHARS (charset) == 96)
1404 if (((d = ch - (MIN_CHAR_96
1405 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1407 return list1 (make_int (d + 32));
1412 else if (XCHARSET_DIMENSION (charset) == 2)
1414 if (XCHARSET_CHARS (charset) == 94)
1416 if (((d = ch - (MIN_CHAR_94x94
1417 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1420 return list2 (make_int ((d / 94) + 33),
1421 make_int (d % 94 + 33));
1423 else if (XCHARSET_CHARS (charset) == 96)
1425 if (((d = ch - (MIN_CHAR_96x96
1426 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1429 return list2 (make_int ((d / 96) + 32),
1430 make_int (d % 96 + 32));
1438 split_builtin_char (Emchar c)
1440 if (c < MIN_CHAR_OBS_94x94)
1442 if (c <= MAX_CHAR_BASIC_LATIN)
1444 return list2 (Vcharset_ascii, make_int (c));
1448 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1452 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1454 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1456 return list2 (Vcharset_greek_iso8859_7,
1457 make_int (c - MIN_CHAR_GREEK + 0x20));
1459 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1461 return list2 (Vcharset_cyrillic_iso8859_5,
1462 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1464 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1466 return list2 (Vcharset_hebrew_iso8859_8,
1467 make_int (c - MIN_CHAR_HEBREW + 0x20));
1469 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1471 return list2 (Vcharset_thai_tis620,
1472 make_int (c - MIN_CHAR_THAI + 0x20));
1474 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1475 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1477 return list2 (Vcharset_katakana_jisx0201,
1478 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1482 return list3 (Vcharset_ucs_bmp,
1483 make_int (c >> 8), make_int (c & 0xff));
1486 else if (c <= MAX_CHAR_OBS_94x94)
1488 return list3 (CHARSET_BY_ATTRIBUTES
1489 (CHARSET_TYPE_94X94,
1490 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1491 CHARSET_LEFT_TO_RIGHT),
1492 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1493 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1495 else if (c <= MAX_CHAR_DAIKANWA)
1497 return list3 (Vcharset_ideograph_daikanwa,
1498 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1499 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1501 else if (c <= MAX_CHAR_94)
1503 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1504 ((c - MIN_CHAR_94) / 94) + '0',
1505 CHARSET_LEFT_TO_RIGHT),
1506 make_int (((c - MIN_CHAR_94) % 94) + 33));
1508 else if (c <= MAX_CHAR_96)
1510 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1511 ((c - MIN_CHAR_96) / 96) + '0',
1512 CHARSET_LEFT_TO_RIGHT),
1513 make_int (((c - MIN_CHAR_96) % 96) + 32));
1515 else if (c <= MAX_CHAR_94x94)
1517 return list3 (CHARSET_BY_ATTRIBUTES
1518 (CHARSET_TYPE_94X94,
1519 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1520 CHARSET_LEFT_TO_RIGHT),
1521 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1522 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1524 else if (c <= MAX_CHAR_96x96)
1526 return list3 (CHARSET_BY_ATTRIBUTES
1527 (CHARSET_TYPE_96X96,
1528 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1529 CHARSET_LEFT_TO_RIGHT),
1530 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1531 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1540 charset_code_point (Lisp_Object charset, Emchar ch)
1542 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1544 if (!EQ (cdef, Qnil))
1546 Lisp_Object field = Fassq (charset, cdef);
1548 if (!EQ (field, Qnil))
1549 return Fcdr (field);
1551 return range_charset_code_point (charset, ch);
1554 Lisp_Object Vdefault_coded_charset_priority_list;
1558 /************************************************************************/
1559 /* Basic charset Lisp functions */
1560 /************************************************************************/
1562 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1563 Return non-nil if OBJECT is a charset.
1567 return CHARSETP (object) ? Qt : Qnil;
1570 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1571 Retrieve the charset of the given name.
1572 If CHARSET-OR-NAME is a charset object, it is simply returned.
1573 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1574 nil is returned. Otherwise the associated charset object is returned.
1578 if (CHARSETP (charset_or_name))
1579 return charset_or_name;
1581 CHECK_SYMBOL (charset_or_name);
1582 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1585 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1586 Retrieve the charset of the given name.
1587 Same as `find-charset' except an error is signalled if there is no such
1588 charset instead of returning nil.
1592 Lisp_Object charset = Ffind_charset (name);
1595 signal_simple_error ("No such charset", name);
1599 /* We store the charsets in hash tables with the names as the key and the
1600 actual charset object as the value. Occasionally we need to use them
1601 in a list format. These routines provide us with that. */
1602 struct charset_list_closure
1604 Lisp_Object *charset_list;
1608 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1609 void *charset_list_closure)
1611 /* This function can GC */
1612 struct charset_list_closure *chcl =
1613 (struct charset_list_closure*) charset_list_closure;
1614 Lisp_Object *charset_list = chcl->charset_list;
1616 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1620 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1621 Return a list of the names of all defined charsets.
1625 Lisp_Object charset_list = Qnil;
1626 struct gcpro gcpro1;
1627 struct charset_list_closure charset_list_closure;
1629 GCPRO1 (charset_list);
1630 charset_list_closure.charset_list = &charset_list;
1631 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1632 &charset_list_closure);
1635 return charset_list;
1638 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1639 Return the name of the given charset.
1643 return XCHARSET_NAME (Fget_charset (charset));
1646 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1647 Define a new character set.
1648 This function is for use with Mule support.
1649 NAME is a symbol, the name by which the character set is normally referred.
1650 DOC-STRING is a string describing the character set.
1651 PROPS is a property list, describing the specific nature of the
1652 character set. Recognized properties are:
1654 'short-name Short version of the charset name (ex: Latin-1)
1655 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1656 'registry A regular expression matching the font registry field for
1658 'dimension Number of octets used to index a character in this charset.
1659 Either 1 or 2. Defaults to 1.
1660 'columns Number of columns used to display a character in this charset.
1661 Only used in TTY mode. (Under X, the actual width of a
1662 character can be derived from the font used to display the
1663 characters.) If unspecified, defaults to the dimension
1664 (this is almost always the correct value).
1665 'chars Number of characters in each dimension (94 or 96).
1666 Defaults to 94. Note that if the dimension is 2, the
1667 character set thus described is 94x94 or 96x96.
1668 'final Final byte of ISO 2022 escape sequence. Must be
1669 supplied. Each combination of (DIMENSION, CHARS) defines a
1670 separate namespace for final bytes. Note that ISO
1671 2022 restricts the final byte to the range
1672 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1673 dimension == 2. Note also that final bytes in the range
1674 0x30 - 0x3F are reserved for user-defined (not official)
1676 'graphic 0 (use left half of font on output) or 1 (use right half
1677 of font on output). Defaults to 0. For example, for
1678 a font whose registry is ISO8859-1, the left half
1679 (octets 0x20 - 0x7F) is the `ascii' character set, while
1680 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1681 character set. With 'graphic set to 0, the octets
1682 will have their high bit cleared; with it set to 1,
1683 the octets will have their high bit set.
1684 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1686 'ccl-program A compiled CCL program used to convert a character in
1687 this charset into an index into the font. This is in
1688 addition to the 'graphic property. The CCL program
1689 is passed the octets of the character, with the high
1690 bit cleared and set depending upon whether the value
1691 of the 'graphic property is 0 or 1.
1693 (name, doc_string, props))
1695 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1696 int direction = CHARSET_LEFT_TO_RIGHT;
1698 Lisp_Object registry = Qnil;
1699 Lisp_Object charset;
1700 Lisp_Object rest, keyword, value;
1701 Lisp_Object ccl_program = Qnil;
1702 Lisp_Object short_name = Qnil, long_name = Qnil;
1703 int byte_offset = -1;
1705 CHECK_SYMBOL (name);
1706 if (!NILP (doc_string))
1707 CHECK_STRING (doc_string);
1709 charset = Ffind_charset (name);
1710 if (!NILP (charset))
1711 signal_simple_error ("Cannot redefine existing charset", name);
1713 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1715 if (EQ (keyword, Qshort_name))
1717 CHECK_STRING (value);
1721 if (EQ (keyword, Qlong_name))
1723 CHECK_STRING (value);
1727 else if (EQ (keyword, Qdimension))
1730 dimension = XINT (value);
1731 if (dimension < 1 || dimension > 2)
1732 signal_simple_error ("Invalid value for 'dimension", value);
1735 else if (EQ (keyword, Qchars))
1738 chars = XINT (value);
1739 if (chars != 94 && chars != 96)
1740 signal_simple_error ("Invalid value for 'chars", value);
1743 else if (EQ (keyword, Qcolumns))
1746 columns = XINT (value);
1747 if (columns != 1 && columns != 2)
1748 signal_simple_error ("Invalid value for 'columns", value);
1751 else if (EQ (keyword, Qgraphic))
1754 graphic = XINT (value);
1756 if (graphic < 0 || graphic > 2)
1758 if (graphic < 0 || graphic > 1)
1760 signal_simple_error ("Invalid value for 'graphic", value);
1763 else if (EQ (keyword, Qregistry))
1765 CHECK_STRING (value);
1769 else if (EQ (keyword, Qdirection))
1771 if (EQ (value, Ql2r))
1772 direction = CHARSET_LEFT_TO_RIGHT;
1773 else if (EQ (value, Qr2l))
1774 direction = CHARSET_RIGHT_TO_LEFT;
1776 signal_simple_error ("Invalid value for 'direction", value);
1779 else if (EQ (keyword, Qfinal))
1781 CHECK_CHAR_COERCE_INT (value);
1782 final = XCHAR (value);
1783 if (final < '0' || final > '~')
1784 signal_simple_error ("Invalid value for 'final", value);
1787 else if (EQ (keyword, Qccl_program))
1789 CHECK_VECTOR (value);
1790 ccl_program = value;
1794 signal_simple_error ("Unrecognized property", keyword);
1798 error ("'final must be specified");
1799 if (dimension == 2 && final > 0x5F)
1801 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1805 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1807 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1809 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1810 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1812 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1814 id = get_unallocated_leading_byte (dimension);
1816 if (NILP (doc_string))
1817 doc_string = build_string ("");
1819 if (NILP (registry))
1820 registry = build_string ("");
1822 if (NILP (short_name))
1823 XSETSTRING (short_name, XSYMBOL (name)->name);
1825 if (NILP (long_name))
1826 long_name = doc_string;
1829 columns = dimension;
1831 if (byte_offset < 0)
1835 else if (chars == 96)
1841 charset = make_charset (id, name, type, columns, graphic,
1842 final, direction, short_name, long_name,
1843 doc_string, registry,
1844 Qnil, 0, 0, 0, byte_offset);
1845 if (!NILP (ccl_program))
1846 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1850 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1852 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1853 NEW-NAME is the name of the new charset. Return the new charset.
1855 (charset, new_name))
1857 Lisp_Object new_charset = Qnil;
1858 int id, dimension, columns, graphic, final;
1859 int direction, type;
1860 Lisp_Object registry, doc_string, short_name, long_name;
1861 struct Lisp_Charset *cs;
1863 charset = Fget_charset (charset);
1864 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1865 signal_simple_error ("Charset already has reverse-direction charset",
1868 CHECK_SYMBOL (new_name);
1869 if (!NILP (Ffind_charset (new_name)))
1870 signal_simple_error ("Cannot redefine existing charset", new_name);
1872 cs = XCHARSET (charset);
1874 type = CHARSET_TYPE (cs);
1875 columns = CHARSET_COLUMNS (cs);
1876 dimension = CHARSET_DIMENSION (cs);
1877 id = get_unallocated_leading_byte (dimension);
1879 graphic = CHARSET_GRAPHIC (cs);
1880 final = CHARSET_FINAL (cs);
1881 direction = CHARSET_RIGHT_TO_LEFT;
1882 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1883 direction = CHARSET_LEFT_TO_RIGHT;
1884 doc_string = CHARSET_DOC_STRING (cs);
1885 short_name = CHARSET_SHORT_NAME (cs);
1886 long_name = CHARSET_LONG_NAME (cs);
1887 registry = CHARSET_REGISTRY (cs);
1889 new_charset = make_charset (id, new_name, type, columns,
1890 graphic, final, direction, short_name, long_name,
1891 doc_string, registry,
1893 CHARSET_DECODING_TABLE(cs),
1894 CHARSET_UCS_MIN(cs),
1895 CHARSET_UCS_MAX(cs),
1896 CHARSET_CODE_OFFSET(cs),
1897 CHARSET_BYTE_OFFSET(cs)
1903 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1904 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1909 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1910 Define symbol ALIAS as an alias for CHARSET.
1914 CHECK_SYMBOL (alias);
1915 charset = Fget_charset (charset);
1916 return Fputhash (alias, charset, Vcharset_hash_table);
1919 /* #### Reverse direction charsets not yet implemented. */
1921 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1923 Return the reverse-direction charset parallel to CHARSET, if any.
1924 This is the charset with the same properties (in particular, the same
1925 dimension, number of characters per dimension, and final byte) as
1926 CHARSET but whose characters are displayed in the opposite direction.
1930 charset = Fget_charset (charset);
1931 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1935 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1936 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1937 If DIRECTION is omitted, both directions will be checked (left-to-right
1938 will be returned if character sets exist for both directions).
1940 (dimension, chars, final, direction))
1942 int dm, ch, fi, di = -1;
1944 Lisp_Object obj = Qnil;
1946 CHECK_INT (dimension);
1947 dm = XINT (dimension);
1948 if (dm < 1 || dm > 2)
1949 signal_simple_error ("Invalid value for DIMENSION", dimension);
1953 if (ch != 94 && ch != 96)
1954 signal_simple_error ("Invalid value for CHARS", chars);
1956 CHECK_CHAR_COERCE_INT (final);
1958 if (fi < '0' || fi > '~')
1959 signal_simple_error ("Invalid value for FINAL", final);
1961 if (EQ (direction, Ql2r))
1962 di = CHARSET_LEFT_TO_RIGHT;
1963 else if (EQ (direction, Qr2l))
1964 di = CHARSET_RIGHT_TO_LEFT;
1965 else if (!NILP (direction))
1966 signal_simple_error ("Invalid value for DIRECTION", direction);
1968 if (dm == 2 && fi > 0x5F)
1970 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1973 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1975 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1979 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1981 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1984 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1987 return XCHARSET_NAME (obj);
1991 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1992 Return short name of CHARSET.
1996 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1999 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2000 Return long name of CHARSET.
2004 return XCHARSET_LONG_NAME (Fget_charset (charset));
2007 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2008 Return description of CHARSET.
2012 return XCHARSET_DOC_STRING (Fget_charset (charset));
2015 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2016 Return dimension of CHARSET.
2020 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2023 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2024 Return property PROP of CHARSET.
2025 Recognized properties are those listed in `make-charset', as well as
2026 'name and 'doc-string.
2030 struct Lisp_Charset *cs;
2032 charset = Fget_charset (charset);
2033 cs = XCHARSET (charset);
2035 CHECK_SYMBOL (prop);
2036 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2037 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2038 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2039 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2040 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2041 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2042 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2043 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2044 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2045 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2046 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2047 if (EQ (prop, Qdirection))
2048 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2049 if (EQ (prop, Qreverse_direction_charset))
2051 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2055 return XCHARSET_NAME (obj);
2057 signal_simple_error ("Unrecognized charset property name", prop);
2058 return Qnil; /* not reached */
2061 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2062 Return charset identification number of CHARSET.
2066 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2069 /* #### We need to figure out which properties we really want to
2072 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2073 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2075 (charset, ccl_program))
2077 charset = Fget_charset (charset);
2078 CHECK_VECTOR (ccl_program);
2079 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2084 invalidate_charset_font_caches (Lisp_Object charset)
2086 /* Invalidate font cache entries for charset on all devices. */
2087 Lisp_Object devcons, concons, hash_table;
2088 DEVICE_LOOP_NO_BREAK (devcons, concons)
2090 struct device *d = XDEVICE (XCAR (devcons));
2091 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2092 if (!UNBOUNDP (hash_table))
2093 Fclrhash (hash_table);
2097 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2098 Set the 'registry property of CHARSET to REGISTRY.
2100 (charset, registry))
2102 charset = Fget_charset (charset);
2103 CHECK_STRING (registry);
2104 XCHARSET_REGISTRY (charset) = registry;
2105 invalidate_charset_font_caches (charset);
2106 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2111 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2112 Return mapping-table of CHARSET.
2116 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2119 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2120 Set mapping-table of CHARSET to TABLE.
2124 struct Lisp_Charset *cs;
2125 Lisp_Object old_table;
2128 charset = Fget_charset (charset);
2129 cs = XCHARSET (charset);
2131 if (EQ (table, Qnil))
2133 CHARSET_DECODING_TABLE(cs) = table;
2136 else if (VECTORP (table))
2140 /* ad-hoc method for `ascii' */
2141 if ((CHARSET_CHARS (cs) == 94) &&
2142 (CHARSET_BYTE_OFFSET (cs) != 33))
2143 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2145 ccs_len = CHARSET_CHARS (cs);
2147 if (XVECTOR_LENGTH (table) > ccs_len)
2148 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2149 old_table = CHARSET_DECODING_TABLE(cs);
2150 CHARSET_DECODING_TABLE(cs) = table;
2153 signal_error (Qwrong_type_argument,
2154 list2 (build_translated_string ("vector-or-nil-p"),
2156 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2158 switch (CHARSET_DIMENSION (cs))
2161 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2163 Lisp_Object c = XVECTOR_DATA(table)[i];
2168 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2172 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2174 Lisp_Object v = XVECTOR_DATA(table)[i];
2180 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2182 CHARSET_DECODING_TABLE(cs) = old_table;
2183 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2185 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2187 Lisp_Object c = XVECTOR_DATA(v)[j];
2190 put_char_attribute (c, charset,
2193 (i + CHARSET_BYTE_OFFSET (cs)),
2195 (j + CHARSET_BYTE_OFFSET (cs))));
2199 put_char_attribute (v, charset,
2201 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2210 /************************************************************************/
2211 /* Lisp primitives for working with characters */
2212 /************************************************************************/
2214 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2215 Make a character from CHARSET and octets ARG1 and ARG2.
2216 ARG2 is required only for characters from two-dimensional charsets.
2217 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2218 character s with caron.
2220 (charset, arg1, arg2))
2222 struct Lisp_Charset *cs;
2224 int lowlim, highlim;
2226 charset = Fget_charset (charset);
2227 cs = XCHARSET (charset);
2229 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2230 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2232 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2234 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2235 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2238 /* It is useful (and safe, according to Olivier Galibert) to strip
2239 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2240 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2241 Latin 2 code of the character. */
2249 if (a1 < lowlim || a1 > highlim)
2250 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2252 if (CHARSET_DIMENSION (cs) == 1)
2256 ("Charset is of dimension one; second octet must be nil", arg2);
2257 return make_char (MAKE_CHAR (charset, a1, 0));
2266 a2 = XINT (arg2) & 0x7f;
2268 if (a2 < lowlim || a2 > highlim)
2269 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2271 return make_char (MAKE_CHAR (charset, a1, a2));
2274 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2275 Return the character set of char CH.
2279 CHECK_CHAR_COERCE_INT (ch);
2281 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2284 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2285 Return list of charset and one or two position-codes of CHAR.
2291 Lisp_Object charset;
2293 CHECK_CHAR_COERCE_INT (character);
2294 ret = SPLIT_CHAR (XCHAR (character));
2295 charset = Fcar (ret);
2296 if (CHARSETP (charset))
2297 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2301 /* This function can GC */
2302 struct gcpro gcpro1, gcpro2;
2303 Lisp_Object charset = Qnil;
2304 Lisp_Object rc = Qnil;
2307 GCPRO2 (charset, rc);
2308 CHECK_CHAR_COERCE_INT (character);
2310 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2312 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2314 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2318 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2326 #ifdef ENABLE_COMPOSITE_CHARS
2327 /************************************************************************/
2328 /* composite character functions */
2329 /************************************************************************/
2332 lookup_composite_char (Bufbyte *str, int len)
2334 Lisp_Object lispstr = make_string (str, len);
2335 Lisp_Object ch = Fgethash (lispstr,
2336 Vcomposite_char_string2char_hash_table,
2342 if (composite_char_row_next >= 128)
2343 signal_simple_error ("No more composite chars available", lispstr);
2344 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2345 composite_char_col_next);
2346 Fputhash (make_char (emch), lispstr,
2347 Vcomposite_char_char2string_hash_table);
2348 Fputhash (lispstr, make_char (emch),
2349 Vcomposite_char_string2char_hash_table);
2350 composite_char_col_next++;
2351 if (composite_char_col_next >= 128)
2353 composite_char_col_next = 32;
2354 composite_char_row_next++;
2363 composite_char_string (Emchar ch)
2365 Lisp_Object str = Fgethash (make_char (ch),
2366 Vcomposite_char_char2string_hash_table,
2368 assert (!UNBOUNDP (str));
2372 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2373 Convert a string into a single composite character.
2374 The character is the result of overstriking all the characters in
2379 CHECK_STRING (string);
2380 return make_char (lookup_composite_char (XSTRING_DATA (string),
2381 XSTRING_LENGTH (string)));
2384 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2385 Return a string of the characters comprising a composite character.
2393 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2394 signal_simple_error ("Must be composite char", ch);
2395 return composite_char_string (emch);
2397 #endif /* ENABLE_COMPOSITE_CHARS */
2400 /************************************************************************/
2401 /* initialization */
2402 /************************************************************************/
2405 syms_of_mule_charset (void)
2407 DEFSUBR (Fcharsetp);
2408 DEFSUBR (Ffind_charset);
2409 DEFSUBR (Fget_charset);
2410 DEFSUBR (Fcharset_list);
2411 DEFSUBR (Fcharset_name);
2412 DEFSUBR (Fmake_charset);
2413 DEFSUBR (Fmake_reverse_direction_charset);
2414 /* DEFSUBR (Freverse_direction_charset); */
2415 DEFSUBR (Fdefine_charset_alias);
2416 DEFSUBR (Fcharset_from_attributes);
2417 DEFSUBR (Fcharset_short_name);
2418 DEFSUBR (Fcharset_long_name);
2419 DEFSUBR (Fcharset_description);
2420 DEFSUBR (Fcharset_dimension);
2421 DEFSUBR (Fcharset_property);
2422 DEFSUBR (Fcharset_id);
2423 DEFSUBR (Fset_charset_ccl_program);
2424 DEFSUBR (Fset_charset_registry);
2426 DEFSUBR (Fchar_attribute_alist);
2427 DEFSUBR (Fget_char_attribute);
2428 DEFSUBR (Fput_char_attribute);
2429 DEFSUBR (Fdefine_char);
2430 DEFSUBR (Fchar_variants);
2431 DEFSUBR (Fget_composite_char);
2432 DEFSUBR (Fcharset_mapping_table);
2433 DEFSUBR (Fset_charset_mapping_table);
2436 DEFSUBR (Fmake_char);
2437 DEFSUBR (Fchar_charset);
2438 DEFSUBR (Fsplit_char);
2440 #ifdef ENABLE_COMPOSITE_CHARS
2441 DEFSUBR (Fmake_composite_char);
2442 DEFSUBR (Fcomposite_char_string);
2445 defsymbol (&Qcharsetp, "charsetp");
2446 defsymbol (&Qregistry, "registry");
2447 defsymbol (&Qfinal, "final");
2448 defsymbol (&Qgraphic, "graphic");
2449 defsymbol (&Qdirection, "direction");
2450 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2451 defsymbol (&Qshort_name, "short-name");
2452 defsymbol (&Qlong_name, "long-name");
2454 defsymbol (&Ql2r, "l2r");
2455 defsymbol (&Qr2l, "r2l");
2457 /* Charsets, compatible with FSF 20.3
2458 Naming convention is Script-Charset[-Edition] */
2459 defsymbol (&Qascii, "ascii");
2460 defsymbol (&Qcontrol_1, "control-1");
2461 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2462 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2463 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2464 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2465 defsymbol (&Qthai_tis620, "thai-tis620");
2466 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2467 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2468 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2469 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2470 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2471 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2472 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2473 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2474 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2475 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2476 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2477 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2478 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2479 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2481 defsymbol (&Q_ucs, "->ucs");
2482 defsymbol (&Q_decomposition, "->decomposition");
2483 defsymbol (&Qcompat, "compat");
2484 defsymbol (&QnoBreak, "noBreak");
2485 defsymbol (&Qfraction, "fraction");
2486 defsymbol (&Qsuper, "super");
2487 defsymbol (&Qsub, "sub");
2488 defsymbol (&Qcircle, "circle");
2489 defsymbol (&Qsquare, "square");
2490 defsymbol (&Qwide, "wide");
2491 defsymbol (&Qnarrow, "narrow");
2492 defsymbol (&Qfont, "font");
2493 defsymbol (&Qucs, "ucs");
2494 defsymbol (&Qucs_bmp, "ucs-bmp");
2495 defsymbol (&Qlatin_viscii, "latin-viscii");
2496 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2497 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2498 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2499 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2500 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2501 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2503 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2504 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2506 defsymbol (&Qcomposite, "composite");
2510 vars_of_mule_charset (void)
2517 /* Table of charsets indexed by leading byte. */
2518 for (i = 0; i < countof (charset_by_leading_byte); i++)
2519 charset_by_leading_byte[i] = Qnil;
2522 /* Table of charsets indexed by type/final-byte. */
2523 for (i = 0; i < countof (charset_by_attributes); i++)
2524 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2525 charset_by_attributes[i][j] = Qnil;
2527 /* Table of charsets indexed by type/final-byte/direction. */
2528 for (i = 0; i < countof (charset_by_attributes); i++)
2529 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2530 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2531 charset_by_attributes[i][j][k] = Qnil;
2535 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2537 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2538 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2542 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2543 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2544 Leading-code of private TYPE9N charset of column-width 1.
2546 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2550 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2551 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2552 Version number of UTF-2000.
2555 staticpro (&Vcharacter_attribute_table);
2556 Vcharacter_attribute_table = make_char_code_table (Qnil);
2558 staticpro (&Vcharacter_composition_table);
2559 Vcharacter_composition_table = make_char_code_table (Qnil);
2561 staticpro (&Vcharacter_variant_table);
2562 Vcharacter_variant_table = make_char_code_table (Qnil);
2564 Vdefault_coded_charset_priority_list = Qnil;
2565 DEFVAR_LISP ("default-coded-charset-priority-list",
2566 &Vdefault_coded_charset_priority_list /*
2567 Default order of preferred coded-character-sets.
2573 complex_vars_of_mule_charset (void)
2575 staticpro (&Vcharset_hash_table);
2576 Vcharset_hash_table =
2577 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2579 /* Predefined character sets. We store them into variables for
2584 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2585 CHARSET_TYPE_256X256, 1, 2, 0,
2586 CHARSET_LEFT_TO_RIGHT,
2587 build_string ("BMP"),
2588 build_string ("BMP"),
2589 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2590 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2591 Qnil, 0, 0xFFFF, 0, 0);
2593 # define MIN_CHAR_THAI 0
2594 # define MAX_CHAR_THAI 0
2595 # define MIN_CHAR_GREEK 0
2596 # define MAX_CHAR_GREEK 0
2597 # define MIN_CHAR_HEBREW 0
2598 # define MAX_CHAR_HEBREW 0
2599 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2600 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2601 # define MIN_CHAR_CYRILLIC 0
2602 # define MAX_CHAR_CYRILLIC 0
2605 make_charset (LEADING_BYTE_ASCII, Qascii,
2606 CHARSET_TYPE_94, 1, 0, 'B',
2607 CHARSET_LEFT_TO_RIGHT,
2608 build_string ("ASCII"),
2609 build_string ("ASCII)"),
2610 build_string ("ASCII (ISO646 IRV)"),
2611 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2612 Qnil, 0, 0x7F, 0, 0);
2613 Vcharset_control_1 =
2614 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2615 CHARSET_TYPE_94, 1, 1, 0,
2616 CHARSET_LEFT_TO_RIGHT,
2617 build_string ("C1"),
2618 build_string ("Control characters"),
2619 build_string ("Control characters 128-191"),
2621 Qnil, 0x80, 0x9F, 0, 0);
2622 Vcharset_latin_iso8859_1 =
2623 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2624 CHARSET_TYPE_96, 1, 1, 'A',
2625 CHARSET_LEFT_TO_RIGHT,
2626 build_string ("Latin-1"),
2627 build_string ("ISO8859-1 (Latin-1)"),
2628 build_string ("ISO8859-1 (Latin-1)"),
2629 build_string ("iso8859-1"),
2630 Qnil, 0xA0, 0xFF, 0, 32);
2631 Vcharset_latin_iso8859_2 =
2632 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2633 CHARSET_TYPE_96, 1, 1, 'B',
2634 CHARSET_LEFT_TO_RIGHT,
2635 build_string ("Latin-2"),
2636 build_string ("ISO8859-2 (Latin-2)"),
2637 build_string ("ISO8859-2 (Latin-2)"),
2638 build_string ("iso8859-2"),
2640 Vcharset_latin_iso8859_3 =
2641 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2642 CHARSET_TYPE_96, 1, 1, 'C',
2643 CHARSET_LEFT_TO_RIGHT,
2644 build_string ("Latin-3"),
2645 build_string ("ISO8859-3 (Latin-3)"),
2646 build_string ("ISO8859-3 (Latin-3)"),
2647 build_string ("iso8859-3"),
2649 Vcharset_latin_iso8859_4 =
2650 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2651 CHARSET_TYPE_96, 1, 1, 'D',
2652 CHARSET_LEFT_TO_RIGHT,
2653 build_string ("Latin-4"),
2654 build_string ("ISO8859-4 (Latin-4)"),
2655 build_string ("ISO8859-4 (Latin-4)"),
2656 build_string ("iso8859-4"),
2658 Vcharset_thai_tis620 =
2659 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2660 CHARSET_TYPE_96, 1, 1, 'T',
2661 CHARSET_LEFT_TO_RIGHT,
2662 build_string ("TIS620"),
2663 build_string ("TIS620 (Thai)"),
2664 build_string ("TIS620.2529 (Thai)"),
2665 build_string ("tis620"),
2666 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2667 Vcharset_greek_iso8859_7 =
2668 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2669 CHARSET_TYPE_96, 1, 1, 'F',
2670 CHARSET_LEFT_TO_RIGHT,
2671 build_string ("ISO8859-7"),
2672 build_string ("ISO8859-7 (Greek)"),
2673 build_string ("ISO8859-7 (Greek)"),
2674 build_string ("iso8859-7"),
2675 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2676 Vcharset_arabic_iso8859_6 =
2677 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2678 CHARSET_TYPE_96, 1, 1, 'G',
2679 CHARSET_RIGHT_TO_LEFT,
2680 build_string ("ISO8859-6"),
2681 build_string ("ISO8859-6 (Arabic)"),
2682 build_string ("ISO8859-6 (Arabic)"),
2683 build_string ("iso8859-6"),
2685 Vcharset_hebrew_iso8859_8 =
2686 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2687 CHARSET_TYPE_96, 1, 1, 'H',
2688 CHARSET_RIGHT_TO_LEFT,
2689 build_string ("ISO8859-8"),
2690 build_string ("ISO8859-8 (Hebrew)"),
2691 build_string ("ISO8859-8 (Hebrew)"),
2692 build_string ("iso8859-8"),
2693 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2694 Vcharset_katakana_jisx0201 =
2695 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2696 CHARSET_TYPE_94, 1, 1, 'I',
2697 CHARSET_LEFT_TO_RIGHT,
2698 build_string ("JISX0201 Kana"),
2699 build_string ("JISX0201.1976 (Japanese Kana)"),
2700 build_string ("JISX0201.1976 Japanese Kana"),
2701 build_string ("jisx0201\\.1976"),
2703 MIN_CHAR_HALFWIDTH_KATAKANA,
2704 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2705 Vcharset_latin_jisx0201 =
2706 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2707 CHARSET_TYPE_94, 1, 0, 'J',
2708 CHARSET_LEFT_TO_RIGHT,
2709 build_string ("JISX0201 Roman"),
2710 build_string ("JISX0201.1976 (Japanese Roman)"),
2711 build_string ("JISX0201.1976 Japanese Roman"),
2712 build_string ("jisx0201\\.1976"),
2714 Vcharset_cyrillic_iso8859_5 =
2715 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2716 CHARSET_TYPE_96, 1, 1, 'L',
2717 CHARSET_LEFT_TO_RIGHT,
2718 build_string ("ISO8859-5"),
2719 build_string ("ISO8859-5 (Cyrillic)"),
2720 build_string ("ISO8859-5 (Cyrillic)"),
2721 build_string ("iso8859-5"),
2722 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2723 Vcharset_latin_iso8859_9 =
2724 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2725 CHARSET_TYPE_96, 1, 1, 'M',
2726 CHARSET_LEFT_TO_RIGHT,
2727 build_string ("Latin-5"),
2728 build_string ("ISO8859-9 (Latin-5)"),
2729 build_string ("ISO8859-9 (Latin-5)"),
2730 build_string ("iso8859-9"),
2732 Vcharset_japanese_jisx0208_1978 =
2733 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2734 CHARSET_TYPE_94X94, 2, 0, '@',
2735 CHARSET_LEFT_TO_RIGHT,
2736 build_string ("JIS X0208:1978"),
2737 build_string ("JIS X0208:1978 (Japanese)"),
2739 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2740 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2742 Vcharset_chinese_gb2312 =
2743 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2744 CHARSET_TYPE_94X94, 2, 0, 'A',
2745 CHARSET_LEFT_TO_RIGHT,
2746 build_string ("GB2312"),
2747 build_string ("GB2312)"),
2748 build_string ("GB2312 Chinese simplified"),
2749 build_string ("gb2312"),
2751 Vcharset_japanese_jisx0208 =
2752 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2753 CHARSET_TYPE_94X94, 2, 0, 'B',
2754 CHARSET_LEFT_TO_RIGHT,
2755 build_string ("JISX0208"),
2756 build_string ("JIS X0208:1983 (Japanese)"),
2757 build_string ("JIS X0208:1983 Japanese Kanji"),
2758 build_string ("jisx0208\\.1983"),
2760 Vcharset_korean_ksc5601 =
2761 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2762 CHARSET_TYPE_94X94, 2, 0, 'C',
2763 CHARSET_LEFT_TO_RIGHT,
2764 build_string ("KSC5601"),
2765 build_string ("KSC5601 (Korean"),
2766 build_string ("KSC5601 Korean Hangul and Hanja"),
2767 build_string ("ksc5601"),
2769 Vcharset_japanese_jisx0212 =
2770 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2771 CHARSET_TYPE_94X94, 2, 0, 'D',
2772 CHARSET_LEFT_TO_RIGHT,
2773 build_string ("JISX0212"),
2774 build_string ("JISX0212 (Japanese)"),
2775 build_string ("JISX0212 Japanese Supplement"),
2776 build_string ("jisx0212"),
2779 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2780 Vcharset_chinese_cns11643_1 =
2781 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2782 CHARSET_TYPE_94X94, 2, 0, 'G',
2783 CHARSET_LEFT_TO_RIGHT,
2784 build_string ("CNS11643-1"),
2785 build_string ("CNS11643-1 (Chinese traditional)"),
2787 ("CNS 11643 Plane 1 Chinese traditional"),
2788 build_string (CHINESE_CNS_PLANE_RE("1")),
2790 Vcharset_chinese_cns11643_2 =
2791 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2792 CHARSET_TYPE_94X94, 2, 0, 'H',
2793 CHARSET_LEFT_TO_RIGHT,
2794 build_string ("CNS11643-2"),
2795 build_string ("CNS11643-2 (Chinese traditional)"),
2797 ("CNS 11643 Plane 2 Chinese traditional"),
2798 build_string (CHINESE_CNS_PLANE_RE("2")),
2801 Vcharset_latin_viscii_lower =
2802 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2803 CHARSET_TYPE_96, 1, 1, '1',
2804 CHARSET_LEFT_TO_RIGHT,
2805 build_string ("VISCII lower"),
2806 build_string ("VISCII lower (Vietnamese)"),
2807 build_string ("VISCII lower (Vietnamese)"),
2808 build_string ("MULEVISCII-LOWER"),
2810 Vcharset_latin_viscii_upper =
2811 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2812 CHARSET_TYPE_96, 1, 1, '2',
2813 CHARSET_LEFT_TO_RIGHT,
2814 build_string ("VISCII upper"),
2815 build_string ("VISCII upper (Vietnamese)"),
2816 build_string ("VISCII upper (Vietnamese)"),
2817 build_string ("MULEVISCII-UPPER"),
2819 Vcharset_latin_viscii =
2820 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2821 CHARSET_TYPE_256, 1, 2, 0,
2822 CHARSET_LEFT_TO_RIGHT,
2823 build_string ("VISCII"),
2824 build_string ("VISCII 1.1 (Vietnamese)"),
2825 build_string ("VISCII 1.1 (Vietnamese)"),
2826 build_string ("VISCII1\\.1"),
2828 Vcharset_ideograph_daikanwa =
2829 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2830 CHARSET_TYPE_256X256, 2, 2, 0,
2831 CHARSET_LEFT_TO_RIGHT,
2832 build_string ("Daikanwa"),
2833 build_string ("Morohashi's Daikanwa"),
2834 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2835 build_string ("Daikanwa"),
2836 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2837 Vcharset_ethiopic_ucs =
2838 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2839 CHARSET_TYPE_256X256, 2, 2, 0,
2840 CHARSET_LEFT_TO_RIGHT,
2841 build_string ("Ethiopic (UCS)"),
2842 build_string ("Ethiopic (UCS)"),
2843 build_string ("Ethiopic of UCS"),
2844 build_string ("Ethiopic-Unicode"),
2845 Qnil, 0x1200, 0x137F, 0x1200, 0);
2847 Vcharset_chinese_big5_1 =
2848 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2849 CHARSET_TYPE_94X94, 2, 0, '0',
2850 CHARSET_LEFT_TO_RIGHT,
2851 build_string ("Big5"),
2852 build_string ("Big5 (Level-1)"),
2854 ("Big5 Level-1 Chinese traditional"),
2855 build_string ("big5"),
2857 Vcharset_chinese_big5_2 =
2858 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2859 CHARSET_TYPE_94X94, 2, 0, '1',
2860 CHARSET_LEFT_TO_RIGHT,
2861 build_string ("Big5"),
2862 build_string ("Big5 (Level-2)"),
2864 ("Big5 Level-2 Chinese traditional"),
2865 build_string ("big5"),
2868 #ifdef ENABLE_COMPOSITE_CHARS
2869 /* #### For simplicity, we put composite chars into a 96x96 charset.
2870 This is going to lead to problems because you can run out of
2871 room, esp. as we don't yet recycle numbers. */
2872 Vcharset_composite =
2873 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2874 CHARSET_TYPE_96X96, 2, 0, 0,
2875 CHARSET_LEFT_TO_RIGHT,
2876 build_string ("Composite"),
2877 build_string ("Composite characters"),
2878 build_string ("Composite characters"),
2881 composite_char_row_next = 32;
2882 composite_char_col_next = 32;
2884 Vcomposite_char_string2char_hash_table =
2885 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2886 Vcomposite_char_char2string_hash_table =
2887 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2888 staticpro (&Vcomposite_char_string2char_hash_table);
2889 staticpro (&Vcomposite_char_char2string_hash_table);
2890 #endif /* ENABLE_COMPOSITE_CHARS */