1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo;
70 Lisp_Object Vcharset_mojikyo_pj_1;
71 Lisp_Object Vcharset_mojikyo_pj_2;
72 Lisp_Object Vcharset_mojikyo_pj_3;
73 Lisp_Object Vcharset_mojikyo_pj_4;
74 Lisp_Object Vcharset_mojikyo_pj_5;
75 Lisp_Object Vcharset_mojikyo_pj_6;
76 Lisp_Object Vcharset_mojikyo_pj_7;
77 Lisp_Object Vcharset_mojikyo_pj_8;
78 Lisp_Object Vcharset_mojikyo_pj_9;
79 Lisp_Object Vcharset_mojikyo_pj_10;
80 Lisp_Object Vcharset_mojikyo_pj_11;
81 Lisp_Object Vcharset_mojikyo_pj_12;
82 Lisp_Object Vcharset_mojikyo_pj_13;
83 Lisp_Object Vcharset_mojikyo_pj_14;
84 Lisp_Object Vcharset_mojikyo_pj_15;
85 Lisp_Object Vcharset_mojikyo_pj_16;
86 Lisp_Object Vcharset_mojikyo_pj_17;
87 Lisp_Object Vcharset_mojikyo_pj_18;
88 Lisp_Object Vcharset_mojikyo_pj_19;
89 Lisp_Object Vcharset_mojikyo_pj_20;
90 Lisp_Object Vcharset_mojikyo_pj_21;
91 Lisp_Object Vcharset_ethiopic_ucs;
93 Lisp_Object Vcharset_chinese_big5_1;
94 Lisp_Object Vcharset_chinese_big5_2;
96 #ifdef ENABLE_COMPOSITE_CHARS
97 Lisp_Object Vcharset_composite;
99 /* Hash tables for composite chars. One maps string representing
100 composed chars to their equivalent chars; one goes the
102 Lisp_Object Vcomposite_char_char2string_hash_table;
103 Lisp_Object Vcomposite_char_string2char_hash_table;
105 static int composite_char_row_next;
106 static int composite_char_col_next;
108 #endif /* ENABLE_COMPOSITE_CHARS */
110 struct charset_lookup *chlook;
112 static const struct lrecord_description charset_lookup_description_1[] = {
113 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
122 static const struct struct_description charset_lookup_description = {
123 sizeof(struct charset_lookup),
124 charset_lookup_description_1
128 /* Table of number of bytes in the string representation of a character
129 indexed by the first byte of that representation.
131 rep_bytes_by_first_byte(c) is more efficient than the equivalent
132 canonical computation:
134 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
136 const Bytecount rep_bytes_by_first_byte[0xA0] =
137 { /* 0x00 - 0x7f are for straight ASCII */
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 /* 0x80 - 0x8f are for Dimension-1 official charsets */
148 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
152 /* 0x90 - 0x9d are for Dimension-2 official charsets */
153 /* 0x9e is for Dimension-1 private charsets */
154 /* 0x9f is for Dimension-2 private charsets */
155 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
162 mark_char_byte_table (Lisp_Object obj)
164 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
167 for (i = 0; i < 256; i++)
169 mark_object (cte->property[i]);
175 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
177 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
178 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
181 for (i = 0; i < 256; i++)
182 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
184 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
186 if (!char_byte_table_equal (cte1->property[i],
187 cte2->property[i], depth + 1))
194 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
200 char_byte_table_hash (Lisp_Object obj, int depth)
202 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
204 return internal_array_hash (cte->property, 256, depth);
207 static const struct lrecord_description char_byte_table_description[] = {
208 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
212 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
213 mark_char_byte_table,
214 internal_object_printer,
215 0, char_byte_table_equal,
216 char_byte_table_hash,
217 char_byte_table_description,
218 struct Lisp_Char_Byte_Table);
221 make_char_byte_table (Lisp_Object initval)
225 struct Lisp_Char_Byte_Table *cte =
226 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
227 &lrecord_char_byte_table);
229 for (i = 0; i < 256; i++)
230 cte->property[i] = initval;
232 XSETCHAR_BYTE_TABLE (obj, cte);
237 copy_char_byte_table (Lisp_Object entry)
239 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
242 struct Lisp_Char_Byte_Table *ctenew =
243 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
244 &lrecord_char_byte_table);
246 for (i = 0; i < 256; i++)
248 Lisp_Object new = cte->property[i];
249 if (CHAR_BYTE_TABLE_P (new))
250 ctenew->property[i] = copy_char_byte_table (new);
252 ctenew->property[i] = new;
255 XSETCHAR_BYTE_TABLE (obj, ctenew);
261 mark_char_code_table (Lisp_Object obj)
263 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
269 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
271 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
272 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
274 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
278 char_code_table_hash (Lisp_Object obj, int depth)
280 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
282 return char_code_table_hash (cte->table, depth + 1);
285 static const struct lrecord_description char_code_table_description[] = {
286 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
290 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
291 mark_char_code_table,
292 internal_object_printer,
293 0, char_code_table_equal,
294 char_code_table_hash,
295 char_code_table_description,
296 struct Lisp_Char_Code_Table);
299 make_char_code_table (Lisp_Object initval)
302 struct Lisp_Char_Code_Table *cte =
303 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
304 &lrecord_char_code_table);
306 cte->table = make_char_byte_table (initval);
308 XSETCHAR_CODE_TABLE (obj, cte);
313 copy_char_code_table (Lisp_Object entry)
315 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
317 struct Lisp_Char_Code_Table *ctenew =
318 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
319 &lrecord_char_code_table);
321 ctenew->table = copy_char_byte_table (cte->table);
322 XSETCHAR_CODE_TABLE (obj, ctenew);
328 get_char_code_table (Emchar ch, Lisp_Object table)
330 unsigned int code = ch;
331 struct Lisp_Char_Byte_Table* cpt
332 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
333 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
335 if (CHAR_BYTE_TABLE_P (ret))
336 cpt = XCHAR_BYTE_TABLE (ret);
340 ret = cpt->property [(unsigned char) (code >> 16)];
341 if (CHAR_BYTE_TABLE_P (ret))
342 cpt = XCHAR_BYTE_TABLE (ret);
346 ret = cpt->property [(unsigned char) (code >> 8)];
347 if (CHAR_BYTE_TABLE_P (ret))
348 cpt = XCHAR_BYTE_TABLE (ret);
352 return cpt->property [(unsigned char) code];
356 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
358 unsigned int code = ch;
359 struct Lisp_Char_Byte_Table* cpt1
360 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
361 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
363 if (CHAR_BYTE_TABLE_P (ret))
365 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
367 ret = cpt2->property[(unsigned char)(code >> 16)];
368 if (CHAR_BYTE_TABLE_P (ret))
370 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
372 ret = cpt3->property[(unsigned char)(code >> 8)];
373 if (CHAR_BYTE_TABLE_P (ret))
375 struct Lisp_Char_Byte_Table* cpt4
376 = XCHAR_BYTE_TABLE (ret);
378 cpt4->property[(unsigned char)code] = value;
380 else if (!EQ (ret, value))
382 Lisp_Object cpt4 = make_char_byte_table (ret);
384 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
385 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
388 else if (!EQ (ret, value))
390 Lisp_Object cpt3 = make_char_byte_table (ret);
391 Lisp_Object cpt4 = make_char_byte_table (ret);
393 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
394 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
396 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
399 else if (!EQ (ret, value))
401 Lisp_Object cpt2 = make_char_byte_table (ret);
402 Lisp_Object cpt3 = make_char_byte_table (ret);
403 Lisp_Object cpt4 = make_char_byte_table (ret);
405 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
406 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
407 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
408 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
413 Lisp_Object Vcharacter_attribute_table;
414 Lisp_Object Vcharacter_composition_table;
415 Lisp_Object Vcharacter_variant_table;
417 Lisp_Object Q_decomposition;
420 Lisp_Object Qisolated;
421 Lisp_Object Qinitial;
424 Lisp_Object Qvertical;
425 Lisp_Object QnoBreak;
426 Lisp_Object Qfraction;
437 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
443 else if (EQ (v, Qcompat))
445 else if (EQ (v, Qisolated))
447 else if (EQ (v, Qinitial))
449 else if (EQ (v, Qmedial))
451 else if (EQ (v, Qfinal))
453 else if (EQ (v, Qvertical))
455 else if (EQ (v, QnoBreak))
457 else if (EQ (v, Qfraction))
459 else if (EQ (v, Qsuper))
461 else if (EQ (v, Qsub))
463 else if (EQ (v, Qcircle))
465 else if (EQ (v, Qsquare))
467 else if (EQ (v, Qwide))
469 else if (EQ (v, Qnarrow))
471 else if (EQ (v, Qsmall))
473 else if (EQ (v, Qfont))
476 signal_simple_error (err_msg, err_arg);
479 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
480 Return character corresponding with list.
484 Lisp_Object table = Vcharacter_composition_table;
485 Lisp_Object rest = list;
489 Lisp_Object v = Fcar (rest);
491 Emchar c = to_char_code (v, "Invalid value for composition", list);
493 ret = get_char_code_table (c, table);
498 if (!CHAR_CODE_TABLE_P (ret))
503 else if (!CONSP (rest))
505 else if (CHAR_CODE_TABLE_P (ret))
508 signal_simple_error ("Invalid table is found with", list);
510 signal_simple_error ("Invalid value for composition", list);
513 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
514 Return variants of CHARACTER.
518 CHECK_CHAR (character);
519 return Fcopy_list (get_char_code_table (XCHAR (character),
520 Vcharacter_variant_table));
523 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
524 Return the alist of attributes of CHARACTER.
528 CHECK_CHAR (character);
529 return Fcopy_alist (get_char_code_table (XCHAR (character),
530 Vcharacter_attribute_table));
533 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
534 Return the value of CHARACTER's ATTRIBUTE.
536 (character, attribute))
541 CHECK_CHAR (character);
542 ret = get_char_code_table (XCHAR (character),
543 Vcharacter_attribute_table);
547 if (!NILP (ccs = Ffind_charset (attribute)))
550 return Fcdr (Fassq (attribute, ret));
554 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
557 Emchar char_code = XCHAR (character);
559 = get_char_code_table (char_code, Vcharacter_attribute_table);
562 cell = Fassq (attribute, ret);
566 ret = Fcons (Fcons (attribute, value), ret);
568 else if (!EQ (Fcdr (cell), value))
570 Fsetcdr (cell, value);
572 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
577 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
579 Emchar char_code = XCHAR (character);
581 = get_char_code_table (char_code, Vcharacter_attribute_table);
583 if (EQ (attribute, Fcar (Fcar (alist))))
585 alist = Fcdr (alist);
589 Lisp_Object pr = alist;
590 Lisp_Object r = Fcdr (alist);
594 if (EQ (attribute, Fcar (Fcar (r))))
596 XCDR (pr) = Fcdr (r);
603 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
607 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
608 Store CHARACTER's ATTRIBUTE with VALUE.
610 (character, attribute, value))
614 CHECK_CHAR (character);
615 ccs = Ffind_charset (attribute);
618 Lisp_Object cpos, rest;
619 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
626 /* ad-hoc method for `ascii' */
627 if ((XCHARSET_CHARS (ccs) == 94) &&
628 (XCHARSET_BYTE_OFFSET (ccs) != 33))
629 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
631 ccs_len = XCHARSET_CHARS (ccs);
635 Lisp_Object ret = Fcar (value);
638 signal_simple_error ("Invalid value for coded-charset", value);
639 code_point = XINT (ret);
640 if (XCHARSET_GRAPHIC (ccs) == 1)
648 signal_simple_error ("Invalid value for coded-charset", value);
651 signal_simple_error ("Invalid value for coded-charset", value);
653 if (XCHARSET_GRAPHIC (ccs) == 1)
655 code_point = (code_point << 8) | i;
658 value = make_int (code_point);
660 else if (INTP (value))
662 if (XCHARSET_GRAPHIC (ccs) == 1)
663 value = make_int (XINT (value) & 0x7F7F7F7F);
666 signal_simple_error ("Invalid value for coded-charset", value);
669 cpos = Fget_char_attribute (character, attribute);
674 dim = XCHARSET_DIMENSION (ccs);
675 code_point = XINT (cpos);
679 i = ((code_point >> (8 * dim)) & 255)
680 - XCHARSET_BYTE_OFFSET (ccs);
681 nv = XVECTOR_DATA(v)[i];
687 XVECTOR_DATA(v)[i] = Qnil;
688 v = XCHARSET_DECODING_TABLE (ccs);
693 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
696 dim = XCHARSET_DIMENSION (ccs);
697 code_point = XINT (value);
702 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
703 nv = XVECTOR_DATA(v)[i];
707 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
713 XVECTOR_DATA(v)[i] = character;
715 else if (EQ (attribute, Q_decomposition))
717 Lisp_Object rest = value;
718 Lisp_Object table = Vcharacter_composition_table;
721 signal_simple_error ("Invalid value for ->decomposition",
726 Lisp_Object v = Fcar (rest);
729 = to_char_code (v, "Invalid value for ->decomposition", value);
734 put_char_code_table (c, character, table);
739 ntable = get_char_code_table (c, table);
740 if (!CHAR_CODE_TABLE_P (ntable))
742 ntable = make_char_code_table (Qnil);
743 put_char_code_table (c, ntable, table);
749 else if (EQ (attribute, Q_ucs))
755 signal_simple_error ("Invalid value for ->ucs", value);
759 ret = get_char_code_table (c, Vcharacter_variant_table);
760 if (NILP (Fmemq (character, ret)))
762 put_char_code_table (c, Fcons (character, ret),
763 Vcharacter_variant_table);
766 return put_char_attribute (character, attribute, value);
769 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
770 Remove CHARACTER's ATTRIBUTE.
772 (character, attribute))
776 CHECK_CHAR (character);
777 ccs = Ffind_charset (attribute);
781 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
788 /* ad-hoc method for `ascii' */
789 if ((XCHARSET_CHARS (ccs) == 94) &&
790 (XCHARSET_BYTE_OFFSET (ccs) != 33))
791 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
793 ccs_len = XCHARSET_CHARS (ccs);
796 cpos = Fget_char_attribute (character, attribute);
801 dim = XCHARSET_DIMENSION (ccs);
802 code_point = XINT (cpos);
806 i = ((code_point >> (8 * dim)) & 255)
807 - XCHARSET_BYTE_OFFSET (ccs);
808 nv = XVECTOR_DATA(v)[i];
814 XVECTOR_DATA(v)[i] = Qnil;
815 v = XCHARSET_DECODING_TABLE (ccs);
819 return remove_char_attribute (character, attribute);
824 EXFUN (Fmake_char, 3);
825 EXFUN (Fdecode_char, 2);
827 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
828 Store character's ATTRIBUTES.
832 Lisp_Object rest = attributes;
833 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
834 Lisp_Object character;
840 Lisp_Object cell = Fcar (rest);
844 signal_simple_error ("Invalid argument", attributes);
845 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
846 && ((XCHARSET_FINAL (ccs) != 0) ||
847 (XCHARSET_UCS_MAX (ccs) > 0)) )
851 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
853 character = Fdecode_char (ccs, cell);
854 goto setup_attributes;
858 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
861 signal_simple_error ("Invalid argument", attributes);
863 character = make_char (XINT (code) + 0x100000);
864 goto setup_attributes;
868 else if (!INTP (code))
869 signal_simple_error ("Invalid argument", attributes);
871 character = make_char (XINT (code));
877 Lisp_Object cell = Fcar (rest);
880 signal_simple_error ("Invalid argument", attributes);
881 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
885 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
888 Lisp_Object Vutf_2000_version;
892 int leading_code_private_11;
895 Lisp_Object Qcharsetp;
897 /* Qdoc_string, Qdimension, Qchars defined in general.c */
898 Lisp_Object Qregistry, Qfinal, Qgraphic;
899 Lisp_Object Qdirection;
900 Lisp_Object Qreverse_direction_charset;
901 Lisp_Object Qleading_byte;
902 Lisp_Object Qshort_name, Qlong_name;
918 Qjapanese_jisx0208_1978,
921 Qjapanese_jisx0208_1990,
931 Qvietnamese_viscii_lower,
932 Qvietnamese_viscii_upper,
962 Lisp_Object Ql2r, Qr2l;
964 Lisp_Object Vcharset_hash_table;
967 static Charset_ID next_allocated_leading_byte;
969 static Charset_ID next_allocated_1_byte_leading_byte;
970 static Charset_ID next_allocated_2_byte_leading_byte;
973 /* Composite characters are characters constructed by overstriking two
974 or more regular characters.
976 1) The old Mule implementation involves storing composite characters
977 in a buffer as a tag followed by all of the actual characters
978 used to make up the composite character. I think this is a bad
979 idea; it greatly complicates code that wants to handle strings
980 one character at a time because it has to deal with the possibility
981 of great big ungainly characters. It's much more reasonable to
982 simply store an index into a table of composite characters.
984 2) The current implementation only allows for 16,384 separate
985 composite characters over the lifetime of the XEmacs process.
986 This could become a potential problem if the user
987 edited lots of different files that use composite characters.
988 Due to FSF bogosity, increasing the number of allowable
989 composite characters under Mule would decrease the number
990 of possible faces that can exist. Mule already has shrunk
991 this to 2048, and further shrinkage would become uncomfortable.
992 No such problems exist in XEmacs.
994 Composite characters could be represented as 0x80 C1 C2 C3,
995 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
996 for slightly under 2^20 (one million) composite characters
997 over the XEmacs process lifetime, and you only need to
998 increase the size of a Mule character from 19 to 21 bits.
999 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1000 85 million (slightly over 2^26) composite characters. */
1003 /************************************************************************/
1004 /* Basic Emchar functions */
1005 /************************************************************************/
1007 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1008 string in STR. Returns the number of bytes stored.
1009 Do not call this directly. Use the macro set_charptr_emchar() instead.
1013 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1019 Lisp_Object charset;
1028 else if ( c <= 0x7ff )
1030 *p++ = (c >> 6) | 0xc0;
1031 *p++ = (c & 0x3f) | 0x80;
1033 else if ( c <= 0xffff )
1035 *p++ = (c >> 12) | 0xe0;
1036 *p++ = ((c >> 6) & 0x3f) | 0x80;
1037 *p++ = (c & 0x3f) | 0x80;
1039 else if ( c <= 0x1fffff )
1041 *p++ = (c >> 18) | 0xf0;
1042 *p++ = ((c >> 12) & 0x3f) | 0x80;
1043 *p++ = ((c >> 6) & 0x3f) | 0x80;
1044 *p++ = (c & 0x3f) | 0x80;
1046 else if ( c <= 0x3ffffff )
1048 *p++ = (c >> 24) | 0xf8;
1049 *p++ = ((c >> 18) & 0x3f) | 0x80;
1050 *p++ = ((c >> 12) & 0x3f) | 0x80;
1051 *p++ = ((c >> 6) & 0x3f) | 0x80;
1052 *p++ = (c & 0x3f) | 0x80;
1056 *p++ = (c >> 30) | 0xfc;
1057 *p++ = ((c >> 24) & 0x3f) | 0x80;
1058 *p++ = ((c >> 18) & 0x3f) | 0x80;
1059 *p++ = ((c >> 12) & 0x3f) | 0x80;
1060 *p++ = ((c >> 6) & 0x3f) | 0x80;
1061 *p++ = (c & 0x3f) | 0x80;
1064 BREAKUP_CHAR (c, charset, c1, c2);
1065 lb = CHAR_LEADING_BYTE (c);
1066 if (LEADING_BYTE_PRIVATE_P (lb))
1067 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1069 if (EQ (charset, Vcharset_control_1))
1078 /* Return the first character from a Mule-encoded string in STR,
1079 assuming it's non-ASCII. Do not call this directly.
1080 Use the macro charptr_emchar() instead. */
1083 non_ascii_charptr_emchar (CONST Bufbyte *str)
1096 else if ( b >= 0xf8 )
1101 else if ( b >= 0xf0 )
1106 else if ( b >= 0xe0 )
1111 else if ( b >= 0xc0 )
1121 for( ; len > 0; len-- )
1124 ch = ( ch << 6 ) | ( b & 0x3f );
1128 Bufbyte i0 = *str, i1, i2 = 0;
1129 Lisp_Object charset;
1131 if (i0 == LEADING_BYTE_CONTROL_1)
1132 return (Emchar) (*++str - 0x20);
1134 if (LEADING_BYTE_PREFIX_P (i0))
1139 charset = CHARSET_BY_LEADING_BYTE (i0);
1140 if (XCHARSET_DIMENSION (charset) == 2)
1143 return MAKE_CHAR (charset, i1, i2);
1147 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1148 Do not call this directly. Use the macro valid_char_p() instead. */
1152 non_ascii_valid_char_p (Emchar ch)
1156 /* Must have only lowest 19 bits set */
1160 f1 = CHAR_FIELD1 (ch);
1161 f2 = CHAR_FIELD2 (ch);
1162 f3 = CHAR_FIELD3 (ch);
1166 Lisp_Object charset;
1168 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1169 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1170 f2 > MAX_CHAR_FIELD2_PRIVATE)
1175 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1176 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1180 NOTE: This takes advantage of the fact that
1181 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1182 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1184 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1185 if (EQ (charset, Qnil))
1187 return (XCHARSET_CHARS (charset) == 96);
1191 Lisp_Object charset;
1193 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1194 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1195 f1 > MAX_CHAR_FIELD1_PRIVATE)
1197 if (f2 < 0x20 || f3 < 0x20)
1200 #ifdef ENABLE_COMPOSITE_CHARS
1201 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1203 if (UNBOUNDP (Fgethash (make_int (ch),
1204 Vcomposite_char_char2string_hash_table,
1209 #endif /* ENABLE_COMPOSITE_CHARS */
1211 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1212 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1215 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1217 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1220 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1222 if (EQ (charset, Qnil))
1224 return (XCHARSET_CHARS (charset) == 96);
1230 /************************************************************************/
1231 /* Basic string functions */
1232 /************************************************************************/
1234 /* Copy the character pointed to by PTR into STR, assuming it's
1235 non-ASCII. Do not call this directly. Use the macro
1236 charptr_copy_char() instead. */
1239 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1241 Bufbyte *strptr = str;
1243 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1245 /* Notice fallthrough. */
1247 case 6: *++strptr = *ptr++;
1248 case 5: *++strptr = *ptr++;
1250 case 4: *++strptr = *ptr++;
1251 case 3: *++strptr = *ptr++;
1252 case 2: *++strptr = *ptr;
1257 return strptr + 1 - str;
1261 /************************************************************************/
1262 /* streams of Emchars */
1263 /************************************************************************/
1265 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1266 The functions below are not meant to be called directly; use
1267 the macros in insdel.h. */
1270 Lstream_get_emchar_1 (Lstream *stream, int ch)
1272 Bufbyte str[MAX_EMCHAR_LEN];
1273 Bufbyte *strptr = str;
1275 str[0] = (Bufbyte) ch;
1276 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1278 /* Notice fallthrough. */
1281 ch = Lstream_getc (stream);
1283 *++strptr = (Bufbyte) ch;
1285 ch = Lstream_getc (stream);
1287 *++strptr = (Bufbyte) ch;
1290 ch = Lstream_getc (stream);
1292 *++strptr = (Bufbyte) ch;
1294 ch = Lstream_getc (stream);
1296 *++strptr = (Bufbyte) ch;
1298 ch = Lstream_getc (stream);
1300 *++strptr = (Bufbyte) ch;
1305 return charptr_emchar (str);
1309 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1311 Bufbyte str[MAX_EMCHAR_LEN];
1312 Bytecount len = set_charptr_emchar (str, ch);
1313 return Lstream_write (stream, str, len);
1317 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1319 Bufbyte str[MAX_EMCHAR_LEN];
1320 Bytecount len = set_charptr_emchar (str, ch);
1321 Lstream_unread (stream, str, len);
1325 /************************************************************************/
1326 /* charset object */
1327 /************************************************************************/
1330 mark_charset (Lisp_Object obj)
1332 struct Lisp_Charset *cs = XCHARSET (obj);
1334 mark_object (cs->short_name);
1335 mark_object (cs->long_name);
1336 mark_object (cs->doc_string);
1337 mark_object (cs->registry);
1338 mark_object (cs->ccl_program);
1340 mark_object (cs->decoding_table);
1346 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1348 struct Lisp_Charset *cs = XCHARSET (obj);
1352 error ("printing unreadable object #<charset %s 0x%x>",
1353 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1356 write_c_string ("#<charset ", printcharfun);
1357 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1358 write_c_string (" ", printcharfun);
1359 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1360 write_c_string (" ", printcharfun);
1361 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1362 write_c_string (" ", printcharfun);
1363 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1364 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1366 CHARSET_DIMENSION (cs),
1367 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1368 CHARSET_COLUMNS (cs),
1369 CHARSET_GRAPHIC (cs),
1370 CHARSET_FINAL (cs));
1371 write_c_string (buf, printcharfun);
1372 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1373 sprintf (buf, " 0x%x>", cs->header.uid);
1374 write_c_string (buf, printcharfun);
1377 static const struct lrecord_description charset_description[] = {
1378 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1380 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1385 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1386 mark_charset, print_charset, 0, 0, 0,
1387 charset_description,
1388 struct Lisp_Charset);
1389 /* Make a new charset. */
1392 make_charset (Charset_ID id, Lisp_Object name,
1393 unsigned short chars, unsigned char dimension,
1394 unsigned char columns, unsigned char graphic,
1395 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1396 Lisp_Object long_name, Lisp_Object doc,
1398 Lisp_Object decoding_table,
1399 Emchar ucs_min, Emchar ucs_max,
1400 Emchar code_offset, unsigned char byte_offset)
1402 unsigned char type = 0;
1404 struct Lisp_Charset *cs =
1405 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1406 XSETCHARSET (obj, cs);
1408 CHARSET_ID (cs) = id;
1409 CHARSET_NAME (cs) = name;
1410 CHARSET_SHORT_NAME (cs) = short_name;
1411 CHARSET_LONG_NAME (cs) = long_name;
1412 CHARSET_CHARS (cs) = chars;
1413 CHARSET_DIMENSION (cs) = dimension;
1414 CHARSET_DIRECTION (cs) = direction;
1415 CHARSET_COLUMNS (cs) = columns;
1416 CHARSET_GRAPHIC (cs) = graphic;
1417 CHARSET_FINAL (cs) = final;
1418 CHARSET_DOC_STRING (cs) = doc;
1419 CHARSET_REGISTRY (cs) = reg;
1420 CHARSET_CCL_PROGRAM (cs) = Qnil;
1421 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1423 CHARSET_DECODING_TABLE(cs) = Qnil;
1424 CHARSET_UCS_MIN(cs) = ucs_min;
1425 CHARSET_UCS_MAX(cs) = ucs_max;
1426 CHARSET_CODE_OFFSET(cs) = code_offset;
1427 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1430 switch (CHARSET_CHARS (cs))
1433 switch (CHARSET_DIMENSION (cs))
1436 type = CHARSET_TYPE_94;
1439 type = CHARSET_TYPE_94X94;
1444 switch (CHARSET_DIMENSION (cs))
1447 type = CHARSET_TYPE_96;
1450 type = CHARSET_TYPE_96X96;
1456 switch (CHARSET_DIMENSION (cs))
1459 type = CHARSET_TYPE_128;
1462 type = CHARSET_TYPE_128X128;
1467 switch (CHARSET_DIMENSION (cs))
1470 type = CHARSET_TYPE_256;
1473 type = CHARSET_TYPE_256X256;
1480 CHARSET_TYPE (cs) = type;
1484 if (id == LEADING_BYTE_ASCII)
1485 CHARSET_REP_BYTES (cs) = 1;
1487 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1489 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1494 /* some charsets do not have final characters. This includes
1495 ASCII, Control-1, Composite, and the two faux private
1498 if (code_offset == 0)
1500 assert (NILP (chlook->charset_by_attributes[type][final]));
1501 chlook->charset_by_attributes[type][final] = obj;
1504 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1505 chlook->charset_by_attributes[type][final][direction] = obj;
1509 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1510 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1512 /* Some charsets are "faux" and don't have names or really exist at
1513 all except in the leading-byte table. */
1515 Fputhash (name, obj, Vcharset_hash_table);
1520 get_unallocated_leading_byte (int dimension)
1525 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1528 lb = next_allocated_leading_byte++;
1532 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1535 lb = next_allocated_1_byte_leading_byte++;
1539 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1542 lb = next_allocated_2_byte_leading_byte++;
1548 ("No more character sets free for this dimension",
1549 make_int (dimension));
1556 make_builtin_char (Lisp_Object charset, int c1, int c2)
1558 if (XCHARSET_UCS_MAX (charset))
1561 = (XCHARSET_DIMENSION (charset) == 1
1563 c1 - XCHARSET_BYTE_OFFSET (charset)
1565 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1566 + c2 - XCHARSET_BYTE_OFFSET (charset))
1567 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1568 if ((code < XCHARSET_UCS_MIN (charset))
1569 || (XCHARSET_UCS_MAX (charset) < code))
1570 signal_simple_error ("Arguments makes invalid character",
1574 else if (XCHARSET_DIMENSION (charset) == 1)
1576 switch (XCHARSET_CHARS (charset))
1580 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1583 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1590 switch (XCHARSET_CHARS (charset))
1593 return MIN_CHAR_94x94
1594 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1595 + (c1 - 33) * 94 + (c2 - 33);
1597 return MIN_CHAR_96x96
1598 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1599 + (c1 - 32) * 96 + (c2 - 32);
1607 range_charset_code_point (Lisp_Object charset, Emchar ch)
1611 if ((XCHARSET_UCS_MIN (charset) <= ch)
1612 && (ch <= XCHARSET_UCS_MAX (charset)))
1614 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1616 if (XCHARSET_CHARS (charset) == 256)
1618 else if (XCHARSET_DIMENSION (charset) == 1)
1619 return d + XCHARSET_BYTE_OFFSET (charset);
1620 else if (XCHARSET_DIMENSION (charset) == 2)
1622 ((d / XCHARSET_CHARS (charset)
1623 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1624 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1625 else if (XCHARSET_DIMENSION (charset) == 3)
1627 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1628 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1629 | ((d / XCHARSET_CHARS (charset)
1630 % XCHARSET_CHARS (charset)
1631 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1632 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1633 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1635 ((d / (XCHARSET_CHARS (charset)
1636 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1637 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1638 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1639 % XCHARSET_CHARS (charset)
1640 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1641 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1642 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1643 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1645 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1647 if (XCHARSET_DIMENSION (charset) == 1)
1649 if (XCHARSET_CHARS (charset) == 94)
1651 if (((d = ch - (MIN_CHAR_94
1652 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1656 else if (XCHARSET_CHARS (charset) == 96)
1658 if (((d = ch - (MIN_CHAR_96
1659 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1666 else if (XCHARSET_DIMENSION (charset) == 2)
1668 if (XCHARSET_CHARS (charset) == 94)
1670 if (((d = ch - (MIN_CHAR_94x94
1671 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1674 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1676 else if (XCHARSET_CHARS (charset) == 96)
1678 if (((d = ch - (MIN_CHAR_96x96
1679 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1682 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1692 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1694 if (c <= MAX_CHAR_BASIC_LATIN)
1696 *charset = Vcharset_ascii;
1701 *charset = Vcharset_control_1;
1706 *charset = Vcharset_latin_iso8859_1;
1710 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1712 *charset = Vcharset_greek_iso8859_7;
1713 return c - MIN_CHAR_GREEK + 0x20;
1715 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1717 *charset = Vcharset_cyrillic_iso8859_5;
1718 return c - MIN_CHAR_CYRILLIC + 0x20;
1721 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1723 *charset = Vcharset_hebrew_iso8859_8;
1724 return c - MIN_CHAR_HEBREW + 0x20;
1726 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1728 *charset = Vcharset_thai_tis620;
1729 return c - MIN_CHAR_THAI + 0x20;
1732 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1733 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1735 return list2 (Vcharset_katakana_jisx0201,
1736 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1739 else if (c <= MAX_CHAR_BMP)
1741 *charset = Vcharset_ucs_bmp;
1744 else if (c < MIN_CHAR_DAIKANWA)
1746 *charset = Vcharset_ucs;
1749 else if (c <= MAX_CHAR_DAIKANWA)
1751 *charset = Vcharset_ideograph_daikanwa;
1752 return c - MIN_CHAR_DAIKANWA;
1754 else if (c < MIN_CHAR_94)
1756 *charset = Vcharset_ucs;
1759 else if (c <= MAX_CHAR_94)
1761 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1762 ((c - MIN_CHAR_94) / 94) + '0',
1763 CHARSET_LEFT_TO_RIGHT);
1764 if (!NILP (*charset))
1765 return ((c - MIN_CHAR_94) % 94) + 33;
1768 *charset = Vcharset_ucs;
1772 else if (c <= MAX_CHAR_96)
1774 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1775 ((c - MIN_CHAR_96) / 96) + '0',
1776 CHARSET_LEFT_TO_RIGHT);
1777 if (!NILP (*charset))
1778 return ((c - MIN_CHAR_96) % 96) + 32;
1781 *charset = Vcharset_ucs;
1785 else if (c <= MAX_CHAR_94x94)
1788 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1789 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1790 CHARSET_LEFT_TO_RIGHT);
1791 if (!NILP (*charset))
1792 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1793 | (((c - MIN_CHAR_94x94) % 94) + 33);
1796 *charset = Vcharset_ucs;
1800 else if (c <= MAX_CHAR_96x96)
1803 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1804 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1805 CHARSET_LEFT_TO_RIGHT);
1806 if (!NILP (*charset))
1807 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1808 | (((c - MIN_CHAR_96x96) % 96) + 32);
1811 *charset = Vcharset_ucs;
1817 *charset = Vcharset_ucs;
1822 Lisp_Object Vdefault_coded_charset_priority_list;
1826 /************************************************************************/
1827 /* Basic charset Lisp functions */
1828 /************************************************************************/
1830 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1831 Return non-nil if OBJECT is a charset.
1835 return CHARSETP (object) ? Qt : Qnil;
1838 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1839 Retrieve the charset of the given name.
1840 If CHARSET-OR-NAME is a charset object, it is simply returned.
1841 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1842 nil is returned. Otherwise the associated charset object is returned.
1846 if (CHARSETP (charset_or_name))
1847 return charset_or_name;
1849 CHECK_SYMBOL (charset_or_name);
1850 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1853 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1854 Retrieve the charset of the given name.
1855 Same as `find-charset' except an error is signalled if there is no such
1856 charset instead of returning nil.
1860 Lisp_Object charset = Ffind_charset (name);
1863 signal_simple_error ("No such charset", name);
1867 /* We store the charsets in hash tables with the names as the key and the
1868 actual charset object as the value. Occasionally we need to use them
1869 in a list format. These routines provide us with that. */
1870 struct charset_list_closure
1872 Lisp_Object *charset_list;
1876 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1877 void *charset_list_closure)
1879 /* This function can GC */
1880 struct charset_list_closure *chcl =
1881 (struct charset_list_closure*) charset_list_closure;
1882 Lisp_Object *charset_list = chcl->charset_list;
1884 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1888 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1889 Return a list of the names of all defined charsets.
1893 Lisp_Object charset_list = Qnil;
1894 struct gcpro gcpro1;
1895 struct charset_list_closure charset_list_closure;
1897 GCPRO1 (charset_list);
1898 charset_list_closure.charset_list = &charset_list;
1899 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1900 &charset_list_closure);
1903 return charset_list;
1906 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1907 Return the name of the given charset.
1911 return XCHARSET_NAME (Fget_charset (charset));
1914 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1915 Define a new character set.
1916 This function is for use with Mule support.
1917 NAME is a symbol, the name by which the character set is normally referred.
1918 DOC-STRING is a string describing the character set.
1919 PROPS is a property list, describing the specific nature of the
1920 character set. Recognized properties are:
1922 'short-name Short version of the charset name (ex: Latin-1)
1923 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1924 'registry A regular expression matching the font registry field for
1926 'dimension Number of octets used to index a character in this charset.
1927 Either 1 or 2. Defaults to 1.
1928 'columns Number of columns used to display a character in this charset.
1929 Only used in TTY mode. (Under X, the actual width of a
1930 character can be derived from the font used to display the
1931 characters.) If unspecified, defaults to the dimension
1932 (this is almost always the correct value).
1933 'chars Number of characters in each dimension (94 or 96).
1934 Defaults to 94. Note that if the dimension is 2, the
1935 character set thus described is 94x94 or 96x96.
1936 'final Final byte of ISO 2022 escape sequence. Must be
1937 supplied. Each combination of (DIMENSION, CHARS) defines a
1938 separate namespace for final bytes. Note that ISO
1939 2022 restricts the final byte to the range
1940 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1941 dimension == 2. Note also that final bytes in the range
1942 0x30 - 0x3F are reserved for user-defined (not official)
1944 'graphic 0 (use left half of font on output) or 1 (use right half
1945 of font on output). Defaults to 0. For example, for
1946 a font whose registry is ISO8859-1, the left half
1947 (octets 0x20 - 0x7F) is the `ascii' character set, while
1948 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1949 character set. With 'graphic set to 0, the octets
1950 will have their high bit cleared; with it set to 1,
1951 the octets will have their high bit set.
1952 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1954 'ccl-program A compiled CCL program used to convert a character in
1955 this charset into an index into the font. This is in
1956 addition to the 'graphic property. The CCL program
1957 is passed the octets of the character, with the high
1958 bit cleared and set depending upon whether the value
1959 of the 'graphic property is 0 or 1.
1961 (name, doc_string, props))
1963 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1964 int direction = CHARSET_LEFT_TO_RIGHT;
1966 Lisp_Object registry = Qnil;
1967 Lisp_Object charset;
1968 Lisp_Object rest, keyword, value;
1969 Lisp_Object ccl_program = Qnil;
1970 Lisp_Object short_name = Qnil, long_name = Qnil;
1971 int byte_offset = -1;
1973 CHECK_SYMBOL (name);
1974 if (!NILP (doc_string))
1975 CHECK_STRING (doc_string);
1977 charset = Ffind_charset (name);
1978 if (!NILP (charset))
1979 signal_simple_error ("Cannot redefine existing charset", name);
1981 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1983 if (EQ (keyword, Qshort_name))
1985 CHECK_STRING (value);
1989 if (EQ (keyword, Qlong_name))
1991 CHECK_STRING (value);
1995 else if (EQ (keyword, Qdimension))
1998 dimension = XINT (value);
1999 if (dimension < 1 || dimension > 2)
2000 signal_simple_error ("Invalid value for 'dimension", value);
2003 else if (EQ (keyword, Qchars))
2006 chars = XINT (value);
2007 if (chars != 94 && chars != 96)
2008 signal_simple_error ("Invalid value for 'chars", value);
2011 else if (EQ (keyword, Qcolumns))
2014 columns = XINT (value);
2015 if (columns != 1 && columns != 2)
2016 signal_simple_error ("Invalid value for 'columns", value);
2019 else if (EQ (keyword, Qgraphic))
2022 graphic = XINT (value);
2024 if (graphic < 0 || graphic > 2)
2026 if (graphic < 0 || graphic > 1)
2028 signal_simple_error ("Invalid value for 'graphic", value);
2031 else if (EQ (keyword, Qregistry))
2033 CHECK_STRING (value);
2037 else if (EQ (keyword, Qdirection))
2039 if (EQ (value, Ql2r))
2040 direction = CHARSET_LEFT_TO_RIGHT;
2041 else if (EQ (value, Qr2l))
2042 direction = CHARSET_RIGHT_TO_LEFT;
2044 signal_simple_error ("Invalid value for 'direction", value);
2047 else if (EQ (keyword, Qfinal))
2049 CHECK_CHAR_COERCE_INT (value);
2050 final = XCHAR (value);
2051 if (final < '0' || final > '~')
2052 signal_simple_error ("Invalid value for 'final", value);
2055 else if (EQ (keyword, Qccl_program))
2057 CHECK_VECTOR (value);
2058 ccl_program = value;
2062 signal_simple_error ("Unrecognized property", keyword);
2066 error ("'final must be specified");
2067 if (dimension == 2 && final > 0x5F)
2069 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2073 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2075 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2077 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2078 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2080 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2082 id = get_unallocated_leading_byte (dimension);
2084 if (NILP (doc_string))
2085 doc_string = build_string ("");
2087 if (NILP (registry))
2088 registry = build_string ("");
2090 if (NILP (short_name))
2091 XSETSTRING (short_name, XSYMBOL (name)->name);
2093 if (NILP (long_name))
2094 long_name = doc_string;
2097 columns = dimension;
2099 if (byte_offset < 0)
2103 else if (chars == 96)
2109 charset = make_charset (id, name, chars, dimension, columns, graphic,
2110 final, direction, short_name, long_name,
2111 doc_string, registry,
2112 Qnil, 0, 0, 0, byte_offset);
2113 if (!NILP (ccl_program))
2114 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2118 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2120 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2121 NEW-NAME is the name of the new charset. Return the new charset.
2123 (charset, new_name))
2125 Lisp_Object new_charset = Qnil;
2126 int id, chars, dimension, columns, graphic, final;
2128 Lisp_Object registry, doc_string, short_name, long_name;
2129 struct Lisp_Charset *cs;
2131 charset = Fget_charset (charset);
2132 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2133 signal_simple_error ("Charset already has reverse-direction charset",
2136 CHECK_SYMBOL (new_name);
2137 if (!NILP (Ffind_charset (new_name)))
2138 signal_simple_error ("Cannot redefine existing charset", new_name);
2140 cs = XCHARSET (charset);
2142 chars = CHARSET_CHARS (cs);
2143 dimension = CHARSET_DIMENSION (cs);
2144 columns = CHARSET_COLUMNS (cs);
2145 id = get_unallocated_leading_byte (dimension);
2147 graphic = CHARSET_GRAPHIC (cs);
2148 final = CHARSET_FINAL (cs);
2149 direction = CHARSET_RIGHT_TO_LEFT;
2150 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2151 direction = CHARSET_LEFT_TO_RIGHT;
2152 doc_string = CHARSET_DOC_STRING (cs);
2153 short_name = CHARSET_SHORT_NAME (cs);
2154 long_name = CHARSET_LONG_NAME (cs);
2155 registry = CHARSET_REGISTRY (cs);
2157 new_charset = make_charset (id, new_name, chars, dimension, columns,
2158 graphic, final, direction, short_name, long_name,
2159 doc_string, registry,
2161 CHARSET_DECODING_TABLE(cs),
2162 CHARSET_UCS_MIN(cs),
2163 CHARSET_UCS_MAX(cs),
2164 CHARSET_CODE_OFFSET(cs),
2165 CHARSET_BYTE_OFFSET(cs)
2171 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2172 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2177 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2178 Define symbol ALIAS as an alias for CHARSET.
2182 CHECK_SYMBOL (alias);
2183 charset = Fget_charset (charset);
2184 return Fputhash (alias, charset, Vcharset_hash_table);
2187 /* #### Reverse direction charsets not yet implemented. */
2189 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2191 Return the reverse-direction charset parallel to CHARSET, if any.
2192 This is the charset with the same properties (in particular, the same
2193 dimension, number of characters per dimension, and final byte) as
2194 CHARSET but whose characters are displayed in the opposite direction.
2198 charset = Fget_charset (charset);
2199 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2203 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2204 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2205 If DIRECTION is omitted, both directions will be checked (left-to-right
2206 will be returned if character sets exist for both directions).
2208 (dimension, chars, final, direction))
2210 int dm, ch, fi, di = -1;
2212 Lisp_Object obj = Qnil;
2214 CHECK_INT (dimension);
2215 dm = XINT (dimension);
2216 if (dm < 1 || dm > 2)
2217 signal_simple_error ("Invalid value for DIMENSION", dimension);
2221 if (ch != 94 && ch != 96)
2222 signal_simple_error ("Invalid value for CHARS", chars);
2224 CHECK_CHAR_COERCE_INT (final);
2226 if (fi < '0' || fi > '~')
2227 signal_simple_error ("Invalid value for FINAL", final);
2229 if (EQ (direction, Ql2r))
2230 di = CHARSET_LEFT_TO_RIGHT;
2231 else if (EQ (direction, Qr2l))
2232 di = CHARSET_RIGHT_TO_LEFT;
2233 else if (!NILP (direction))
2234 signal_simple_error ("Invalid value for DIRECTION", direction);
2236 if (dm == 2 && fi > 0x5F)
2238 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2241 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2243 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2247 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2249 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2252 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2255 return XCHARSET_NAME (obj);
2259 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2260 Return short name of CHARSET.
2264 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2267 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2268 Return long name of CHARSET.
2272 return XCHARSET_LONG_NAME (Fget_charset (charset));
2275 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2276 Return description of CHARSET.
2280 return XCHARSET_DOC_STRING (Fget_charset (charset));
2283 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2284 Return dimension of CHARSET.
2288 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2291 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2292 Return property PROP of CHARSET.
2293 Recognized properties are those listed in `make-charset', as well as
2294 'name and 'doc-string.
2298 struct Lisp_Charset *cs;
2300 charset = Fget_charset (charset);
2301 cs = XCHARSET (charset);
2303 CHECK_SYMBOL (prop);
2304 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2305 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2306 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2307 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2308 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2309 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2310 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2311 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2312 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2313 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2314 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2315 if (EQ (prop, Qdirection))
2316 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2317 if (EQ (prop, Qreverse_direction_charset))
2319 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2323 return XCHARSET_NAME (obj);
2325 signal_simple_error ("Unrecognized charset property name", prop);
2326 return Qnil; /* not reached */
2329 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2330 Return charset identification number of CHARSET.
2334 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2337 /* #### We need to figure out which properties we really want to
2340 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2341 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2343 (charset, ccl_program))
2345 charset = Fget_charset (charset);
2346 CHECK_VECTOR (ccl_program);
2347 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2352 invalidate_charset_font_caches (Lisp_Object charset)
2354 /* Invalidate font cache entries for charset on all devices. */
2355 Lisp_Object devcons, concons, hash_table;
2356 DEVICE_LOOP_NO_BREAK (devcons, concons)
2358 struct device *d = XDEVICE (XCAR (devcons));
2359 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2360 if (!UNBOUNDP (hash_table))
2361 Fclrhash (hash_table);
2365 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2366 Set the 'registry property of CHARSET to REGISTRY.
2368 (charset, registry))
2370 charset = Fget_charset (charset);
2371 CHECK_STRING (registry);
2372 XCHARSET_REGISTRY (charset) = registry;
2373 invalidate_charset_font_caches (charset);
2374 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2379 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2380 Return mapping-table of CHARSET.
2384 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2387 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2388 Set mapping-table of CHARSET to TABLE.
2392 struct Lisp_Charset *cs;
2393 Lisp_Object old_table;
2396 charset = Fget_charset (charset);
2397 cs = XCHARSET (charset);
2399 if (EQ (table, Qnil))
2401 CHARSET_DECODING_TABLE(cs) = table;
2404 else if (VECTORP (table))
2408 /* ad-hoc method for `ascii' */
2409 if ((CHARSET_CHARS (cs) == 94) &&
2410 (CHARSET_BYTE_OFFSET (cs) != 33))
2411 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2413 ccs_len = CHARSET_CHARS (cs);
2415 if (XVECTOR_LENGTH (table) > ccs_len)
2416 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2417 old_table = CHARSET_DECODING_TABLE(cs);
2418 CHARSET_DECODING_TABLE(cs) = table;
2421 signal_error (Qwrong_type_argument,
2422 list2 (build_translated_string ("vector-or-nil-p"),
2424 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2426 switch (CHARSET_DIMENSION (cs))
2429 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2431 Lisp_Object c = XVECTOR_DATA(table)[i];
2436 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2440 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2442 Lisp_Object v = XVECTOR_DATA(table)[i];
2448 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2450 CHARSET_DECODING_TABLE(cs) = old_table;
2451 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2453 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2455 Lisp_Object c = XVECTOR_DATA(v)[j];
2460 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2461 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2465 put_char_attribute (v, charset,
2466 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2475 /************************************************************************/
2476 /* Lisp primitives for working with characters */
2477 /************************************************************************/
2480 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2481 Make a character from CHARSET and code-point CODE.
2487 charset = Fget_charset (charset);
2490 if (XCHARSET_GRAPHIC (charset) == 1)
2492 return make_char (DECODE_CHAR (charset, c));
2496 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2497 Make a character from CHARSET and octets ARG1 and ARG2.
2498 ARG2 is required only for characters from two-dimensional charsets.
2499 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2500 character s with caron.
2502 (charset, arg1, arg2))
2504 struct Lisp_Charset *cs;
2506 int lowlim, highlim;
2508 charset = Fget_charset (charset);
2509 cs = XCHARSET (charset);
2511 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2512 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2514 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2516 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2517 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2520 /* It is useful (and safe, according to Olivier Galibert) to strip
2521 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2522 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2523 Latin 2 code of the character. */
2531 if (a1 < lowlim || a1 > highlim)
2532 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2534 if (CHARSET_DIMENSION (cs) == 1)
2538 ("Charset is of dimension one; second octet must be nil", arg2);
2539 return make_char (MAKE_CHAR (charset, a1, 0));
2548 a2 = XINT (arg2) & 0x7f;
2550 if (a2 < lowlim || a2 > highlim)
2551 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2553 return make_char (MAKE_CHAR (charset, a1, a2));
2556 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2557 Return the character set of char CH.
2561 CHECK_CHAR_COERCE_INT (ch);
2563 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2566 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2567 Return the octet numbered N (should be 0 or 1) of char CH.
2568 N defaults to 0 if omitted.
2572 Lisp_Object charset;
2575 CHECK_CHAR_COERCE_INT (ch);
2577 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2579 if (NILP (n) || EQ (n, Qzero))
2580 return make_int (octet0);
2581 else if (EQ (n, make_int (1)))
2582 return make_int (octet1);
2584 signal_simple_error ("Octet number must be 0 or 1", n);
2587 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2588 Return list of charset and one or two position-codes of CHAR.
2592 /* This function can GC */
2593 struct gcpro gcpro1, gcpro2;
2594 Lisp_Object charset = Qnil;
2595 Lisp_Object rc = Qnil;
2603 GCPRO2 (charset, rc);
2604 CHECK_CHAR_COERCE_INT (character);
2607 code_point = ENCODE_CHAR (XCHAR (character), charset);
2608 dimension = XCHARSET_DIMENSION (charset);
2609 while (dimension > 0)
2611 rc = Fcons (make_int (code_point & 255), rc);
2615 rc = Fcons (XCHARSET_NAME (charset), rc);
2617 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2619 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2621 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2625 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2634 #ifdef ENABLE_COMPOSITE_CHARS
2635 /************************************************************************/
2636 /* composite character functions */
2637 /************************************************************************/
2640 lookup_composite_char (Bufbyte *str, int len)
2642 Lisp_Object lispstr = make_string (str, len);
2643 Lisp_Object ch = Fgethash (lispstr,
2644 Vcomposite_char_string2char_hash_table,
2650 if (composite_char_row_next >= 128)
2651 signal_simple_error ("No more composite chars available", lispstr);
2652 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2653 composite_char_col_next);
2654 Fputhash (make_char (emch), lispstr,
2655 Vcomposite_char_char2string_hash_table);
2656 Fputhash (lispstr, make_char (emch),
2657 Vcomposite_char_string2char_hash_table);
2658 composite_char_col_next++;
2659 if (composite_char_col_next >= 128)
2661 composite_char_col_next = 32;
2662 composite_char_row_next++;
2671 composite_char_string (Emchar ch)
2673 Lisp_Object str = Fgethash (make_char (ch),
2674 Vcomposite_char_char2string_hash_table,
2676 assert (!UNBOUNDP (str));
2680 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2681 Convert a string into a single composite character.
2682 The character is the result of overstriking all the characters in
2687 CHECK_STRING (string);
2688 return make_char (lookup_composite_char (XSTRING_DATA (string),
2689 XSTRING_LENGTH (string)));
2692 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2693 Return a string of the characters comprising a composite character.
2701 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2702 signal_simple_error ("Must be composite char", ch);
2703 return composite_char_string (emch);
2705 #endif /* ENABLE_COMPOSITE_CHARS */
2708 /************************************************************************/
2709 /* initialization */
2710 /************************************************************************/
2713 syms_of_mule_charset (void)
2715 DEFSUBR (Fcharsetp);
2716 DEFSUBR (Ffind_charset);
2717 DEFSUBR (Fget_charset);
2718 DEFSUBR (Fcharset_list);
2719 DEFSUBR (Fcharset_name);
2720 DEFSUBR (Fmake_charset);
2721 DEFSUBR (Fmake_reverse_direction_charset);
2722 /* DEFSUBR (Freverse_direction_charset); */
2723 DEFSUBR (Fdefine_charset_alias);
2724 DEFSUBR (Fcharset_from_attributes);
2725 DEFSUBR (Fcharset_short_name);
2726 DEFSUBR (Fcharset_long_name);
2727 DEFSUBR (Fcharset_description);
2728 DEFSUBR (Fcharset_dimension);
2729 DEFSUBR (Fcharset_property);
2730 DEFSUBR (Fcharset_id);
2731 DEFSUBR (Fset_charset_ccl_program);
2732 DEFSUBR (Fset_charset_registry);
2734 DEFSUBR (Fchar_attribute_alist);
2735 DEFSUBR (Fget_char_attribute);
2736 DEFSUBR (Fput_char_attribute);
2737 DEFSUBR (Fremove_char_attribute);
2738 DEFSUBR (Fdefine_char);
2739 DEFSUBR (Fchar_variants);
2740 DEFSUBR (Fget_composite_char);
2741 DEFSUBR (Fcharset_mapping_table);
2742 DEFSUBR (Fset_charset_mapping_table);
2746 DEFSUBR (Fdecode_char);
2748 DEFSUBR (Fmake_char);
2749 DEFSUBR (Fchar_charset);
2750 DEFSUBR (Fchar_octet);
2751 DEFSUBR (Fsplit_char);
2753 #ifdef ENABLE_COMPOSITE_CHARS
2754 DEFSUBR (Fmake_composite_char);
2755 DEFSUBR (Fcomposite_char_string);
2758 defsymbol (&Qcharsetp, "charsetp");
2759 defsymbol (&Qregistry, "registry");
2760 defsymbol (&Qfinal, "final");
2761 defsymbol (&Qgraphic, "graphic");
2762 defsymbol (&Qdirection, "direction");
2763 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2764 defsymbol (&Qshort_name, "short-name");
2765 defsymbol (&Qlong_name, "long-name");
2767 defsymbol (&Ql2r, "l2r");
2768 defsymbol (&Qr2l, "r2l");
2770 /* Charsets, compatible with FSF 20.3
2771 Naming convention is Script-Charset[-Edition] */
2772 defsymbol (&Qascii, "ascii");
2773 defsymbol (&Qcontrol_1, "control-1");
2774 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2775 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2776 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2777 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2778 defsymbol (&Qthai_tis620, "thai-tis620");
2779 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2780 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2781 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2782 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2783 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2784 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2785 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2786 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2787 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2788 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2789 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2790 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2791 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2792 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2793 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2795 defsymbol (&Q_ucs, "->ucs");
2796 defsymbol (&Q_decomposition, "->decomposition");
2797 defsymbol (&Qcompat, "compat");
2798 defsymbol (&Qisolated, "isolated");
2799 defsymbol (&Qinitial, "initial");
2800 defsymbol (&Qmedial, "medial");
2801 defsymbol (&Qfinal, "final");
2802 defsymbol (&Qvertical, "vertical");
2803 defsymbol (&QnoBreak, "noBreak");
2804 defsymbol (&Qfraction, "fraction");
2805 defsymbol (&Qsuper, "super");
2806 defsymbol (&Qsub, "sub");
2807 defsymbol (&Qcircle, "circle");
2808 defsymbol (&Qsquare, "square");
2809 defsymbol (&Qwide, "wide");
2810 defsymbol (&Qnarrow, "narrow");
2811 defsymbol (&Qsmall, "small");
2812 defsymbol (&Qfont, "font");
2813 defsymbol (&Qucs, "ucs");
2814 defsymbol (&Qucs_bmp, "ucs-bmp");
2815 defsymbol (&Qlatin_viscii, "latin-viscii");
2816 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2817 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2818 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2819 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2820 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2821 defsymbol (&Qmojikyo, "mojikyo");
2822 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2823 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2824 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2825 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2826 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2827 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2828 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2829 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2830 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2831 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2832 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2833 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2834 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2835 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2836 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2837 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2838 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2839 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2840 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2841 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2842 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2843 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2845 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2846 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2848 defsymbol (&Qcomposite, "composite");
2852 vars_of_mule_charset (void)
2859 chlook = xnew (struct charset_lookup);
2860 dumpstruct (&chlook, &charset_lookup_description);
2862 /* Table of charsets indexed by leading byte. */
2863 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2864 chlook->charset_by_leading_byte[i] = Qnil;
2867 /* Table of charsets indexed by type/final-byte. */
2868 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2869 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2870 chlook->charset_by_attributes[i][j] = Qnil;
2872 /* Table of charsets indexed by type/final-byte/direction. */
2873 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2874 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2875 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2876 chlook->charset_by_attributes[i][j][k] = Qnil;
2880 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2882 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2883 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2887 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2888 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2889 Leading-code of private TYPE9N charset of column-width 1.
2891 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2895 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2896 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2897 Version number of UTF-2000.
2900 staticpro (&Vcharacter_attribute_table);
2901 Vcharacter_attribute_table = make_char_code_table (Qnil);
2903 staticpro (&Vcharacter_composition_table);
2904 Vcharacter_composition_table = make_char_code_table (Qnil);
2906 staticpro (&Vcharacter_variant_table);
2907 Vcharacter_variant_table = make_char_code_table (Qnil);
2909 Vdefault_coded_charset_priority_list = Qnil;
2910 DEFVAR_LISP ("default-coded-charset-priority-list",
2911 &Vdefault_coded_charset_priority_list /*
2912 Default order of preferred coded-character-sets.
2918 complex_vars_of_mule_charset (void)
2920 staticpro (&Vcharset_hash_table);
2921 Vcharset_hash_table =
2922 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2924 /* Predefined character sets. We store them into variables for
2928 staticpro (&Vcharset_ucs);
2930 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2931 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2932 build_string ("UCS"),
2933 build_string ("UCS"),
2934 build_string ("ISO/IEC 10646"),
2936 Qnil, 0, 0xFFFFFFF, 0, 0);
2937 staticpro (&Vcharset_ucs_bmp);
2939 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2940 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2941 build_string ("BMP"),
2942 build_string ("BMP"),
2943 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2944 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2945 Qnil, 0, 0xFFFF, 0, 0);
2947 # define MIN_CHAR_THAI 0
2948 # define MAX_CHAR_THAI 0
2949 # define MIN_CHAR_HEBREW 0
2950 # define MAX_CHAR_HEBREW 0
2951 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2952 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2954 staticpro (&Vcharset_ascii);
2956 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2957 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2958 build_string ("ASCII"),
2959 build_string ("ASCII)"),
2960 build_string ("ASCII (ISO646 IRV)"),
2961 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2962 Qnil, 0, 0x7F, 0, 0);
2963 staticpro (&Vcharset_control_1);
2964 Vcharset_control_1 =
2965 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2966 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2967 build_string ("C1"),
2968 build_string ("Control characters"),
2969 build_string ("Control characters 128-191"),
2971 Qnil, 0x80, 0x9F, 0, 0);
2972 staticpro (&Vcharset_latin_iso8859_1);
2973 Vcharset_latin_iso8859_1 =
2974 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2975 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2976 build_string ("Latin-1"),
2977 build_string ("ISO8859-1 (Latin-1)"),
2978 build_string ("ISO8859-1 (Latin-1)"),
2979 build_string ("iso8859-1"),
2980 Qnil, 0xA0, 0xFF, 0, 32);
2981 staticpro (&Vcharset_latin_iso8859_2);
2982 Vcharset_latin_iso8859_2 =
2983 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2984 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2985 build_string ("Latin-2"),
2986 build_string ("ISO8859-2 (Latin-2)"),
2987 build_string ("ISO8859-2 (Latin-2)"),
2988 build_string ("iso8859-2"),
2990 staticpro (&Vcharset_latin_iso8859_3);
2991 Vcharset_latin_iso8859_3 =
2992 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2993 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2994 build_string ("Latin-3"),
2995 build_string ("ISO8859-3 (Latin-3)"),
2996 build_string ("ISO8859-3 (Latin-3)"),
2997 build_string ("iso8859-3"),
2999 staticpro (&Vcharset_latin_iso8859_4);
3000 Vcharset_latin_iso8859_4 =
3001 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3002 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3003 build_string ("Latin-4"),
3004 build_string ("ISO8859-4 (Latin-4)"),
3005 build_string ("ISO8859-4 (Latin-4)"),
3006 build_string ("iso8859-4"),
3008 staticpro (&Vcharset_thai_tis620);
3009 Vcharset_thai_tis620 =
3010 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3011 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3012 build_string ("TIS620"),
3013 build_string ("TIS620 (Thai)"),
3014 build_string ("TIS620.2529 (Thai)"),
3015 build_string ("tis620"),
3016 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3017 staticpro (&Vcharset_greek_iso8859_7);
3018 Vcharset_greek_iso8859_7 =
3019 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3020 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3021 build_string ("ISO8859-7"),
3022 build_string ("ISO8859-7 (Greek)"),
3023 build_string ("ISO8859-7 (Greek)"),
3024 build_string ("iso8859-7"),
3026 0 /* MIN_CHAR_GREEK */,
3027 0 /* MAX_CHAR_GREEK */, 0, 32);
3028 staticpro (&Vcharset_arabic_iso8859_6);
3029 Vcharset_arabic_iso8859_6 =
3030 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3031 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3032 build_string ("ISO8859-6"),
3033 build_string ("ISO8859-6 (Arabic)"),
3034 build_string ("ISO8859-6 (Arabic)"),
3035 build_string ("iso8859-6"),
3037 staticpro (&Vcharset_hebrew_iso8859_8);
3038 Vcharset_hebrew_iso8859_8 =
3039 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3040 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3041 build_string ("ISO8859-8"),
3042 build_string ("ISO8859-8 (Hebrew)"),
3043 build_string ("ISO8859-8 (Hebrew)"),
3044 build_string ("iso8859-8"),
3045 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3046 staticpro (&Vcharset_katakana_jisx0201);
3047 Vcharset_katakana_jisx0201 =
3048 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3049 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3050 build_string ("JISX0201 Kana"),
3051 build_string ("JISX0201.1976 (Japanese Kana)"),
3052 build_string ("JISX0201.1976 Japanese Kana"),
3053 build_string ("jisx0201\\.1976"),
3055 staticpro (&Vcharset_latin_jisx0201);
3056 Vcharset_latin_jisx0201 =
3057 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3058 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3059 build_string ("JISX0201 Roman"),
3060 build_string ("JISX0201.1976 (Japanese Roman)"),
3061 build_string ("JISX0201.1976 Japanese Roman"),
3062 build_string ("jisx0201\\.1976"),
3064 staticpro (&Vcharset_cyrillic_iso8859_5);
3065 Vcharset_cyrillic_iso8859_5 =
3066 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3067 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3068 build_string ("ISO8859-5"),
3069 build_string ("ISO8859-5 (Cyrillic)"),
3070 build_string ("ISO8859-5 (Cyrillic)"),
3071 build_string ("iso8859-5"),
3073 0 /* MIN_CHAR_CYRILLIC */,
3074 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3075 staticpro (&Vcharset_latin_iso8859_9);
3076 Vcharset_latin_iso8859_9 =
3077 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3078 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3079 build_string ("Latin-5"),
3080 build_string ("ISO8859-9 (Latin-5)"),
3081 build_string ("ISO8859-9 (Latin-5)"),
3082 build_string ("iso8859-9"),
3084 staticpro (&Vcharset_japanese_jisx0208_1978);
3085 Vcharset_japanese_jisx0208_1978 =
3086 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3087 Qjapanese_jisx0208_1978, 94, 2,
3088 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3089 build_string ("JIS X0208:1978"),
3090 build_string ("JIS X0208:1978 (Japanese)"),
3092 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3093 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3095 staticpro (&Vcharset_chinese_gb2312);
3096 Vcharset_chinese_gb2312 =
3097 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3098 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3099 build_string ("GB2312"),
3100 build_string ("GB2312)"),
3101 build_string ("GB2312 Chinese simplified"),
3102 build_string ("gb2312"),
3104 staticpro (&Vcharset_japanese_jisx0208);
3105 Vcharset_japanese_jisx0208 =
3106 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3107 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3108 build_string ("JISX0208"),
3109 build_string ("JIS X0208:1983 (Japanese)"),
3110 build_string ("JIS X0208:1983 Japanese Kanji"),
3111 build_string ("jisx0208\\.1983"),
3114 staticpro (&Vcharset_japanese_jisx0208_1990);
3115 Vcharset_japanese_jisx0208_1990 =
3116 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3117 Qjapanese_jisx0208_1990, 94, 2,
3118 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3119 build_string ("JISX0208-1990"),
3120 build_string ("JIS X0208:1990 (Japanese)"),
3121 build_string ("JIS X0208:1990 Japanese Kanji"),
3122 build_string ("jisx0208\\.1990"),
3124 MIN_CHAR_JIS_X0208_1990,
3125 MAX_CHAR_JIS_X0208_1990, 0, 33);
3127 staticpro (&Vcharset_korean_ksc5601);
3128 Vcharset_korean_ksc5601 =
3129 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3130 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3131 build_string ("KSC5601"),
3132 build_string ("KSC5601 (Korean"),
3133 build_string ("KSC5601 Korean Hangul and Hanja"),
3134 build_string ("ksc5601"),
3136 staticpro (&Vcharset_japanese_jisx0212);
3137 Vcharset_japanese_jisx0212 =
3138 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3139 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3140 build_string ("JISX0212"),
3141 build_string ("JISX0212 (Japanese)"),
3142 build_string ("JISX0212 Japanese Supplement"),
3143 build_string ("jisx0212"),
3146 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3147 staticpro (&Vcharset_chinese_cns11643_1);
3148 Vcharset_chinese_cns11643_1 =
3149 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3150 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3151 build_string ("CNS11643-1"),
3152 build_string ("CNS11643-1 (Chinese traditional)"),
3154 ("CNS 11643 Plane 1 Chinese traditional"),
3155 build_string (CHINESE_CNS_PLANE_RE("1")),
3157 staticpro (&Vcharset_chinese_cns11643_2);
3158 Vcharset_chinese_cns11643_2 =
3159 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3160 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3161 build_string ("CNS11643-2"),
3162 build_string ("CNS11643-2 (Chinese traditional)"),
3164 ("CNS 11643 Plane 2 Chinese traditional"),
3165 build_string (CHINESE_CNS_PLANE_RE("2")),
3168 staticpro (&Vcharset_latin_viscii_lower);
3169 Vcharset_latin_viscii_lower =
3170 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3171 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3172 build_string ("VISCII lower"),
3173 build_string ("VISCII lower (Vietnamese)"),
3174 build_string ("VISCII lower (Vietnamese)"),
3175 build_string ("MULEVISCII-LOWER"),
3177 staticpro (&Vcharset_latin_viscii_upper);
3178 Vcharset_latin_viscii_upper =
3179 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3180 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3181 build_string ("VISCII upper"),
3182 build_string ("VISCII upper (Vietnamese)"),
3183 build_string ("VISCII upper (Vietnamese)"),
3184 build_string ("MULEVISCII-UPPER"),
3186 staticpro (&Vcharset_latin_viscii);
3187 Vcharset_latin_viscii =
3188 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3189 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3190 build_string ("VISCII"),
3191 build_string ("VISCII 1.1 (Vietnamese)"),
3192 build_string ("VISCII 1.1 (Vietnamese)"),
3193 build_string ("VISCII1\\.1"),
3195 staticpro (&Vcharset_ideograph_daikanwa);
3196 Vcharset_ideograph_daikanwa =
3197 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3198 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3199 build_string ("Daikanwa"),
3200 build_string ("Morohashi's Daikanwa"),
3201 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3202 build_string ("Daikanwa"),
3203 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3204 staticpro (&Vcharset_mojikyo);
3206 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3207 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3208 build_string ("Mojikyo"),
3209 build_string ("Mojikyo"),
3210 build_string ("Konjaku-Mojikyo"),
3212 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3213 staticpro (&Vcharset_mojikyo_pj_1);
3214 Vcharset_mojikyo_pj_1 =
3215 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3216 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3217 build_string ("Mojikyo-PJ-1"),
3218 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3220 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3221 build_string ("jisx0208\\.Mojikyo-1$"),
3223 staticpro (&Vcharset_mojikyo_pj_2);
3224 Vcharset_mojikyo_pj_2 =
3225 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3226 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3227 build_string ("Mojikyo-PJ-2"),
3228 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3230 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3231 build_string ("jisx0208\\.Mojikyo-2$"),
3233 staticpro (&Vcharset_mojikyo_pj_3);
3234 Vcharset_mojikyo_pj_3 =
3235 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3236 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3237 build_string ("Mojikyo-PJ-3"),
3238 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3240 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3241 build_string ("jisx0208\\.Mojikyo-3$"),
3243 staticpro (&Vcharset_mojikyo_pj_4);
3244 Vcharset_mojikyo_pj_4 =
3245 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3246 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3247 build_string ("Mojikyo-PJ-4"),
3248 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3250 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3251 build_string ("jisx0208\\.Mojikyo-4$"),
3253 staticpro (&Vcharset_mojikyo_pj_5);
3254 Vcharset_mojikyo_pj_5 =
3255 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3256 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3257 build_string ("Mojikyo-PJ-5"),
3258 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3260 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3261 build_string ("jisx0208\\.Mojikyo-5$"),
3263 staticpro (&Vcharset_mojikyo_pj_6);
3264 Vcharset_mojikyo_pj_6 =
3265 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3266 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3267 build_string ("Mojikyo-PJ-6"),
3268 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3270 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3271 build_string ("jisx0208\\.Mojikyo-6$"),
3273 staticpro (&Vcharset_mojikyo_pj_7);
3274 Vcharset_mojikyo_pj_7 =
3275 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3276 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3277 build_string ("Mojikyo-PJ-7"),
3278 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3280 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3281 build_string ("jisx0208\\.Mojikyo-7$"),
3283 staticpro (&Vcharset_mojikyo_pj_8);
3284 Vcharset_mojikyo_pj_8 =
3285 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3286 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3287 build_string ("Mojikyo-PJ-8"),
3288 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3290 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3291 build_string ("jisx0208\\.Mojikyo-8$"),
3293 staticpro (&Vcharset_mojikyo_pj_9);
3294 Vcharset_mojikyo_pj_9 =
3295 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3296 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3297 build_string ("Mojikyo-PJ-9"),
3298 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3300 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3301 build_string ("jisx0208\\.Mojikyo-9$"),
3303 staticpro (&Vcharset_mojikyo_pj_10);
3304 Vcharset_mojikyo_pj_10 =
3305 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3306 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3307 build_string ("Mojikyo-PJ-10"),
3308 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3310 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3311 build_string ("jisx0208\\.Mojikyo-10$"),
3313 staticpro (&Vcharset_mojikyo_pj_11);
3314 Vcharset_mojikyo_pj_11 =
3315 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3316 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3317 build_string ("Mojikyo-PJ-11"),
3318 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3320 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3321 build_string ("jisx0208\\.Mojikyo-11$"),
3323 staticpro (&Vcharset_mojikyo_pj_12);
3324 Vcharset_mojikyo_pj_12 =
3325 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3326 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3327 build_string ("Mojikyo-PJ-12"),
3328 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3330 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3331 build_string ("jisx0208\\.Mojikyo-12$"),
3333 staticpro (&Vcharset_mojikyo_pj_13);
3334 Vcharset_mojikyo_pj_13 =
3335 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3336 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3337 build_string ("Mojikyo-PJ-13"),
3338 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3340 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3341 build_string ("jisx0208\\.Mojikyo-13$"),
3343 staticpro (&Vcharset_mojikyo_pj_14);
3344 Vcharset_mojikyo_pj_14 =
3345 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3346 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3347 build_string ("Mojikyo-PJ-14"),
3348 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3350 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3351 build_string ("jisx0208\\.Mojikyo-14$"),
3353 staticpro (&Vcharset_mojikyo_pj_15);
3354 Vcharset_mojikyo_pj_15 =
3355 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3356 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3357 build_string ("Mojikyo-PJ-15"),
3358 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3360 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3361 build_string ("jisx0208\\.Mojikyo-15$"),
3363 staticpro (&Vcharset_mojikyo_pj_16);
3364 Vcharset_mojikyo_pj_16 =
3365 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3366 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3367 build_string ("Mojikyo-PJ-16"),
3368 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3370 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3371 build_string ("jisx0208\\.Mojikyo-16$"),
3373 staticpro (&Vcharset_mojikyo_pj_17);
3374 Vcharset_mojikyo_pj_17 =
3375 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3376 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3377 build_string ("Mojikyo-PJ-17"),
3378 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3380 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3381 build_string ("jisx0208\\.Mojikyo-17$"),
3383 staticpro (&Vcharset_mojikyo_pj_18);
3384 Vcharset_mojikyo_pj_18 =
3385 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3386 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3387 build_string ("Mojikyo-PJ-18"),
3388 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3390 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3391 build_string ("jisx0208\\.Mojikyo-18$"),
3393 staticpro (&Vcharset_mojikyo_pj_19);
3394 Vcharset_mojikyo_pj_19 =
3395 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3396 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3397 build_string ("Mojikyo-PJ-19"),
3398 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3400 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3401 build_string ("jisx0208\\.Mojikyo-19$"),
3403 staticpro (&Vcharset_mojikyo_pj_20);
3404 Vcharset_mojikyo_pj_20 =
3405 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3406 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3407 build_string ("Mojikyo-PJ-20"),
3408 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3410 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3411 build_string ("jisx0208\\.Mojikyo-20$"),
3413 staticpro (&Vcharset_mojikyo_pj_21);
3414 Vcharset_mojikyo_pj_21 =
3415 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3416 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3417 build_string ("Mojikyo-PJ-21"),
3418 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3420 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3421 build_string ("jisx0208\\.Mojikyo-21$"),
3423 staticpro (&Vcharset_ethiopic_ucs);
3424 Vcharset_ethiopic_ucs =
3425 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3426 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3427 build_string ("Ethiopic (UCS)"),
3428 build_string ("Ethiopic (UCS)"),
3429 build_string ("Ethiopic of UCS"),
3430 build_string ("Ethiopic-Unicode"),
3431 Qnil, 0x1200, 0x137F, 0x1200, 0);
3433 staticpro (&Vcharset_chinese_big5_1);
3434 Vcharset_chinese_big5_1 =
3435 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3436 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3437 build_string ("Big5"),
3438 build_string ("Big5 (Level-1)"),
3440 ("Big5 Level-1 Chinese traditional"),
3441 build_string ("big5"),
3443 staticpro (&Vcharset_chinese_big5_2);
3444 Vcharset_chinese_big5_2 =
3445 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3446 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3447 build_string ("Big5"),
3448 build_string ("Big5 (Level-2)"),
3450 ("Big5 Level-2 Chinese traditional"),
3451 build_string ("big5"),
3454 #ifdef ENABLE_COMPOSITE_CHARS
3455 /* #### For simplicity, we put composite chars into a 96x96 charset.
3456 This is going to lead to problems because you can run out of
3457 room, esp. as we don't yet recycle numbers. */
3458 staticpro (&Vcharset_composite);
3459 Vcharset_composite =
3460 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3461 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3462 build_string ("Composite"),
3463 build_string ("Composite characters"),
3464 build_string ("Composite characters"),
3467 /* #### not dumped properly */
3468 composite_char_row_next = 32;
3469 composite_char_col_next = 32;
3471 Vcomposite_char_string2char_hash_table =
3472 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3473 Vcomposite_char_char2string_hash_table =
3474 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3475 staticpro (&Vcomposite_char_string2char_hash_table);
3476 staticpro (&Vcomposite_char_char2string_hash_table);
3477 #endif /* ENABLE_COMPOSITE_CHARS */