1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo;
70 Lisp_Object Vcharset_mojikyo_pj_1;
71 Lisp_Object Vcharset_mojikyo_pj_2;
72 Lisp_Object Vcharset_mojikyo_pj_3;
73 Lisp_Object Vcharset_mojikyo_pj_4;
74 Lisp_Object Vcharset_mojikyo_pj_5;
75 Lisp_Object Vcharset_mojikyo_pj_6;
76 Lisp_Object Vcharset_mojikyo_pj_7;
77 Lisp_Object Vcharset_mojikyo_pj_8;
78 Lisp_Object Vcharset_mojikyo_pj_9;
79 Lisp_Object Vcharset_mojikyo_pj_10;
80 Lisp_Object Vcharset_mojikyo_pj_11;
81 Lisp_Object Vcharset_mojikyo_pj_12;
82 Lisp_Object Vcharset_mojikyo_pj_13;
83 Lisp_Object Vcharset_mojikyo_pj_14;
84 Lisp_Object Vcharset_mojikyo_pj_15;
85 Lisp_Object Vcharset_mojikyo_pj_16;
86 Lisp_Object Vcharset_mojikyo_pj_17;
87 Lisp_Object Vcharset_mojikyo_pj_18;
88 Lisp_Object Vcharset_mojikyo_pj_19;
89 Lisp_Object Vcharset_mojikyo_pj_20;
90 Lisp_Object Vcharset_mojikyo_pj_21;
91 Lisp_Object Vcharset_ethiopic_ucs;
93 Lisp_Object Vcharset_chinese_big5_1;
94 Lisp_Object Vcharset_chinese_big5_2;
96 #ifdef ENABLE_COMPOSITE_CHARS
97 Lisp_Object Vcharset_composite;
99 /* Hash tables for composite chars. One maps string representing
100 composed chars to their equivalent chars; one goes the
102 Lisp_Object Vcomposite_char_char2string_hash_table;
103 Lisp_Object Vcomposite_char_string2char_hash_table;
105 static int composite_char_row_next;
106 static int composite_char_col_next;
108 #endif /* ENABLE_COMPOSITE_CHARS */
110 struct charset_lookup *chlook;
112 static const struct lrecord_description charset_lookup_description_1[] = {
113 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
122 static const struct struct_description charset_lookup_description = {
123 sizeof(struct charset_lookup),
124 charset_lookup_description_1
128 /* Table of number of bytes in the string representation of a character
129 indexed by the first byte of that representation.
131 rep_bytes_by_first_byte(c) is more efficient than the equivalent
132 canonical computation:
134 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
136 const Bytecount rep_bytes_by_first_byte[0xA0] =
137 { /* 0x00 - 0x7f are for straight ASCII */
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 /* 0x80 - 0x8f are for Dimension-1 official charsets */
148 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
152 /* 0x90 - 0x9d are for Dimension-2 official charsets */
153 /* 0x9e is for Dimension-1 private charsets */
154 /* 0x9f is for Dimension-2 private charsets */
155 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
162 mark_char_byte_table (Lisp_Object obj)
164 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
167 for (i = 0; i < 256; i++)
169 mark_object (cte->property[i]);
175 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
177 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
178 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
181 for (i = 0; i < 256; i++)
182 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
184 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
186 if (!char_byte_table_equal (cte1->property[i],
187 cte2->property[i], depth + 1))
194 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
200 char_byte_table_hash (Lisp_Object obj, int depth)
202 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
204 return internal_array_hash (cte->property, 256, depth);
207 static const struct lrecord_description char_byte_table_description[] = {
208 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
212 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
213 mark_char_byte_table,
214 internal_object_printer,
215 0, char_byte_table_equal,
216 char_byte_table_hash,
217 char_byte_table_description,
218 struct Lisp_Char_Byte_Table);
221 make_char_byte_table (Lisp_Object initval)
225 struct Lisp_Char_Byte_Table *cte =
226 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
227 &lrecord_char_byte_table);
229 for (i = 0; i < 256; i++)
230 cte->property[i] = initval;
232 XSETCHAR_BYTE_TABLE (obj, cte);
237 copy_char_byte_table (Lisp_Object entry)
239 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
242 struct Lisp_Char_Byte_Table *ctenew =
243 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
244 &lrecord_char_byte_table);
246 for (i = 0; i < 256; i++)
248 Lisp_Object new = cte->property[i];
249 if (CHAR_BYTE_TABLE_P (new))
250 ctenew->property[i] = copy_char_byte_table (new);
252 ctenew->property[i] = new;
255 XSETCHAR_BYTE_TABLE (obj, ctenew);
261 mark_char_code_table (Lisp_Object obj)
263 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
269 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
271 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
272 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
274 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
278 char_code_table_hash (Lisp_Object obj, int depth)
280 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
282 return char_code_table_hash (cte->table, depth + 1);
285 static const struct lrecord_description char_code_table_description[] = {
286 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
290 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
291 mark_char_code_table,
292 internal_object_printer,
293 0, char_code_table_equal,
294 char_code_table_hash,
295 char_code_table_description,
296 struct Lisp_Char_Code_Table);
299 make_char_code_table (Lisp_Object initval)
302 struct Lisp_Char_Code_Table *cte =
303 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
304 &lrecord_char_code_table);
306 cte->table = make_char_byte_table (initval);
308 XSETCHAR_CODE_TABLE (obj, cte);
313 copy_char_code_table (Lisp_Object entry)
315 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
317 struct Lisp_Char_Code_Table *ctenew =
318 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
319 &lrecord_char_code_table);
321 ctenew->table = copy_char_byte_table (cte->table);
322 XSETCHAR_CODE_TABLE (obj, ctenew);
328 get_char_code_table (Emchar ch, Lisp_Object table)
330 unsigned int code = ch;
331 struct Lisp_Char_Byte_Table* cpt
332 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
333 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
335 if (CHAR_BYTE_TABLE_P (ret))
336 cpt = XCHAR_BYTE_TABLE (ret);
340 ret = cpt->property [(unsigned char) (code >> 16)];
341 if (CHAR_BYTE_TABLE_P (ret))
342 cpt = XCHAR_BYTE_TABLE (ret);
346 ret = cpt->property [(unsigned char) (code >> 8)];
347 if (CHAR_BYTE_TABLE_P (ret))
348 cpt = XCHAR_BYTE_TABLE (ret);
352 return cpt->property [(unsigned char) code];
356 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
358 unsigned int code = ch;
359 struct Lisp_Char_Byte_Table* cpt1
360 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
361 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
363 if (CHAR_BYTE_TABLE_P (ret))
365 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
367 ret = cpt2->property[(unsigned char)(code >> 16)];
368 if (CHAR_BYTE_TABLE_P (ret))
370 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
372 ret = cpt3->property[(unsigned char)(code >> 8)];
373 if (CHAR_BYTE_TABLE_P (ret))
375 struct Lisp_Char_Byte_Table* cpt4
376 = XCHAR_BYTE_TABLE (ret);
378 cpt4->property[(unsigned char)code] = value;
380 else if (!EQ (ret, value))
382 Lisp_Object cpt4 = make_char_byte_table (ret);
384 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
385 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
388 else if (!EQ (ret, value))
390 Lisp_Object cpt3 = make_char_byte_table (ret);
391 Lisp_Object cpt4 = make_char_byte_table (ret);
393 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
394 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
396 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
399 else if (!EQ (ret, value))
401 Lisp_Object cpt2 = make_char_byte_table (ret);
402 Lisp_Object cpt3 = make_char_byte_table (ret);
403 Lisp_Object cpt4 = make_char_byte_table (ret);
405 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
406 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
407 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
408 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
413 Lisp_Object Vcharacter_attribute_table;
414 Lisp_Object Vcharacter_composition_table;
415 Lisp_Object Vcharacter_variant_table;
417 Lisp_Object Q_decomposition;
420 Lisp_Object Qisolated;
421 Lisp_Object Qinitial;
424 Lisp_Object Qvertical;
425 Lisp_Object QnoBreak;
426 Lisp_Object Qfraction;
437 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
443 else if (EQ (v, Qcompat))
445 else if (EQ (v, Qisolated))
447 else if (EQ (v, Qinitial))
449 else if (EQ (v, Qmedial))
451 else if (EQ (v, Qfinal))
453 else if (EQ (v, Qvertical))
455 else if (EQ (v, QnoBreak))
457 else if (EQ (v, Qfraction))
459 else if (EQ (v, Qsuper))
461 else if (EQ (v, Qsub))
463 else if (EQ (v, Qcircle))
465 else if (EQ (v, Qsquare))
467 else if (EQ (v, Qwide))
469 else if (EQ (v, Qnarrow))
471 else if (EQ (v, Qsmall))
473 else if (EQ (v, Qfont))
476 signal_simple_error (err_msg, err_arg);
479 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
480 Return character corresponding with list.
484 Lisp_Object table = Vcharacter_composition_table;
485 Lisp_Object rest = list;
489 Lisp_Object v = Fcar (rest);
491 Emchar c = to_char_code (v, "Invalid value for composition", list);
493 ret = get_char_code_table (c, table);
498 if (!CHAR_CODE_TABLE_P (ret))
503 else if (!CONSP (rest))
505 else if (CHAR_CODE_TABLE_P (ret))
508 signal_simple_error ("Invalid table is found with", list);
510 signal_simple_error ("Invalid value for composition", list);
513 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
514 Return variants of CHARACTER.
518 CHECK_CHAR (character);
519 return Fcopy_list (get_char_code_table (XCHAR (character),
520 Vcharacter_variant_table));
523 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
524 Return the alist of attributes of CHARACTER.
528 CHECK_CHAR (character);
529 return Fcopy_alist (get_char_code_table (XCHAR (character),
530 Vcharacter_attribute_table));
533 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
534 Return the value of CHARACTER's ATTRIBUTE.
536 (character, attribute))
541 CHECK_CHAR (character);
542 ret = get_char_code_table (XCHAR (character),
543 Vcharacter_attribute_table);
547 if (!NILP (ccs = Ffind_charset (attribute)))
550 return Fcdr (Fassq (attribute, ret));
554 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
557 Emchar char_code = XCHAR (character);
559 = get_char_code_table (char_code, Vcharacter_attribute_table);
562 cell = Fassq (attribute, ret);
566 ret = Fcons (Fcons (attribute, value), ret);
568 else if (!EQ (Fcdr (cell), value))
570 Fsetcdr (cell, value);
572 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
577 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
579 Emchar char_code = XCHAR (character);
581 = get_char_code_table (char_code, Vcharacter_attribute_table);
583 if (EQ (attribute, Fcar (Fcar (alist))))
585 alist = Fcdr (alist);
589 Lisp_Object pr = alist;
590 Lisp_Object r = Fcdr (alist);
594 if (EQ (attribute, Fcar (Fcar (r))))
596 XCDR (pr) = Fcdr (r);
603 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
609 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
610 Store CHARACTER's ATTRIBUTE with VALUE.
612 (character, attribute, value))
616 CHECK_CHAR (character);
617 ccs = Ffind_charset (attribute);
620 if (!EQ (XCHARSET_NAME (ccs), Qucs)
621 || (XCHAR (character) != XINT (value)))
623 Lisp_Object cpos, rest;
624 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
631 /* ad-hoc method for `ascii' */
632 if ((XCHARSET_CHARS (ccs) == 94) &&
633 (XCHARSET_BYTE_OFFSET (ccs) != 33))
634 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
636 ccs_len = XCHARSET_CHARS (ccs);
640 Lisp_Object ret = Fcar (value);
643 signal_simple_error ("Invalid value for coded-charset", value);
644 code_point = XINT (ret);
645 if (XCHARSET_GRAPHIC (ccs) == 1)
653 signal_simple_error ("Invalid value for coded-charset",
657 signal_simple_error ("Invalid value for coded-charset",
660 if (XCHARSET_GRAPHIC (ccs) == 1)
662 code_point = (code_point << 8) | i;
665 value = make_int (code_point);
667 else if (INTP (value))
669 if (XCHARSET_GRAPHIC (ccs) == 1)
670 value = make_int (XINT (value) & 0x7F7F7F7F);
673 signal_simple_error ("Invalid value for coded-charset", value);
676 cpos = Fget_char_attribute (character, attribute);
681 dim = XCHARSET_DIMENSION (ccs);
682 code_point = XINT (cpos);
686 i = ((code_point >> (8 * dim)) & 255)
687 - XCHARSET_BYTE_OFFSET (ccs);
688 nv = XVECTOR_DATA(v)[i];
694 XVECTOR_DATA(v)[i] = Qnil;
695 v = XCHARSET_DECODING_TABLE (ccs);
700 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
703 dim = XCHARSET_DIMENSION (ccs);
704 code_point = XINT (value);
709 i = ((code_point >> (8 * dim)) & 255)
710 - XCHARSET_BYTE_OFFSET (ccs);
711 nv = XVECTOR_DATA(v)[i];
715 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
721 XVECTOR_DATA(v)[i] = character;
726 else if (EQ (attribute, Q_decomposition))
728 Lisp_Object rest = value;
729 Lisp_Object table = Vcharacter_composition_table;
732 signal_simple_error ("Invalid value for ->decomposition",
737 Lisp_Object v = Fcar (rest);
740 = to_char_code (v, "Invalid value for ->decomposition", value);
745 put_char_code_table (c, character, table);
750 ntable = get_char_code_table (c, table);
751 if (!CHAR_CODE_TABLE_P (ntable))
753 ntable = make_char_code_table (Qnil);
754 put_char_code_table (c, ntable, table);
760 else if (EQ (attribute, Q_ucs))
766 signal_simple_error ("Invalid value for ->ucs", value);
770 ret = get_char_code_table (c, Vcharacter_variant_table);
771 if (NILP (Fmemq (character, ret)))
773 put_char_code_table (c, Fcons (character, ret),
774 Vcharacter_variant_table);
777 return put_char_attribute (character, attribute, value);
780 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
781 Remove CHARACTER's ATTRIBUTE.
783 (character, attribute))
787 CHECK_CHAR (character);
788 ccs = Ffind_charset (attribute);
792 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
799 /* ad-hoc method for `ascii' */
800 if ((XCHARSET_CHARS (ccs) == 94) &&
801 (XCHARSET_BYTE_OFFSET (ccs) != 33))
802 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
804 ccs_len = XCHARSET_CHARS (ccs);
807 cpos = Fget_char_attribute (character, attribute);
812 dim = XCHARSET_DIMENSION (ccs);
813 code_point = XINT (cpos);
817 i = ((code_point >> (8 * dim)) & 255)
818 - XCHARSET_BYTE_OFFSET (ccs);
819 nv = XVECTOR_DATA(v)[i];
825 XVECTOR_DATA(v)[i] = Qnil;
826 v = XCHARSET_DECODING_TABLE (ccs);
830 return remove_char_attribute (character, attribute);
833 EXFUN (Fmake_char, 3);
834 EXFUN (Fdecode_char, 2);
836 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
837 Store character's ATTRIBUTES.
841 Lisp_Object rest = attributes;
842 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
843 Lisp_Object character;
849 Lisp_Object cell = Fcar (rest);
853 signal_simple_error ("Invalid argument", attributes);
854 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
855 && ((XCHARSET_FINAL (ccs) != 0) ||
856 (XCHARSET_UCS_MAX (ccs) > 0)) )
860 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
862 character = Fdecode_char (ccs, cell);
863 goto setup_attributes;
867 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
870 signal_simple_error ("Invalid argument", attributes);
872 character = make_char (XINT (code) + 0x100000);
873 goto setup_attributes;
877 else if (!INTP (code))
878 signal_simple_error ("Invalid argument", attributes);
880 character = make_char (XINT (code));
886 Lisp_Object cell = Fcar (rest);
889 signal_simple_error ("Invalid argument", attributes);
890 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
894 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
897 Lisp_Object Vutf_2000_version;
901 int leading_code_private_11;
904 Lisp_Object Qcharsetp;
906 /* Qdoc_string, Qdimension, Qchars defined in general.c */
907 Lisp_Object Qregistry, Qfinal, Qgraphic;
908 Lisp_Object Qdirection;
909 Lisp_Object Qreverse_direction_charset;
910 Lisp_Object Qleading_byte;
911 Lisp_Object Qshort_name, Qlong_name;
927 Qjapanese_jisx0208_1978,
930 Qjapanese_jisx0208_1990,
940 Qvietnamese_viscii_lower,
941 Qvietnamese_viscii_upper,
971 Lisp_Object Ql2r, Qr2l;
973 Lisp_Object Vcharset_hash_table;
976 static Charset_ID next_allocated_leading_byte;
978 static Charset_ID next_allocated_1_byte_leading_byte;
979 static Charset_ID next_allocated_2_byte_leading_byte;
982 /* Composite characters are characters constructed by overstriking two
983 or more regular characters.
985 1) The old Mule implementation involves storing composite characters
986 in a buffer as a tag followed by all of the actual characters
987 used to make up the composite character. I think this is a bad
988 idea; it greatly complicates code that wants to handle strings
989 one character at a time because it has to deal with the possibility
990 of great big ungainly characters. It's much more reasonable to
991 simply store an index into a table of composite characters.
993 2) The current implementation only allows for 16,384 separate
994 composite characters over the lifetime of the XEmacs process.
995 This could become a potential problem if the user
996 edited lots of different files that use composite characters.
997 Due to FSF bogosity, increasing the number of allowable
998 composite characters under Mule would decrease the number
999 of possible faces that can exist. Mule already has shrunk
1000 this to 2048, and further shrinkage would become uncomfortable.
1001 No such problems exist in XEmacs.
1003 Composite characters could be represented as 0x80 C1 C2 C3,
1004 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1005 for slightly under 2^20 (one million) composite characters
1006 over the XEmacs process lifetime, and you only need to
1007 increase the size of a Mule character from 19 to 21 bits.
1008 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1009 85 million (slightly over 2^26) composite characters. */
1012 /************************************************************************/
1013 /* Basic Emchar functions */
1014 /************************************************************************/
1016 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1017 string in STR. Returns the number of bytes stored.
1018 Do not call this directly. Use the macro set_charptr_emchar() instead.
1022 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1028 Lisp_Object charset;
1037 else if ( c <= 0x7ff )
1039 *p++ = (c >> 6) | 0xc0;
1040 *p++ = (c & 0x3f) | 0x80;
1042 else if ( c <= 0xffff )
1044 *p++ = (c >> 12) | 0xe0;
1045 *p++ = ((c >> 6) & 0x3f) | 0x80;
1046 *p++ = (c & 0x3f) | 0x80;
1048 else if ( c <= 0x1fffff )
1050 *p++ = (c >> 18) | 0xf0;
1051 *p++ = ((c >> 12) & 0x3f) | 0x80;
1052 *p++ = ((c >> 6) & 0x3f) | 0x80;
1053 *p++ = (c & 0x3f) | 0x80;
1055 else if ( c <= 0x3ffffff )
1057 *p++ = (c >> 24) | 0xf8;
1058 *p++ = ((c >> 18) & 0x3f) | 0x80;
1059 *p++ = ((c >> 12) & 0x3f) | 0x80;
1060 *p++ = ((c >> 6) & 0x3f) | 0x80;
1061 *p++ = (c & 0x3f) | 0x80;
1065 *p++ = (c >> 30) | 0xfc;
1066 *p++ = ((c >> 24) & 0x3f) | 0x80;
1067 *p++ = ((c >> 18) & 0x3f) | 0x80;
1068 *p++ = ((c >> 12) & 0x3f) | 0x80;
1069 *p++ = ((c >> 6) & 0x3f) | 0x80;
1070 *p++ = (c & 0x3f) | 0x80;
1073 BREAKUP_CHAR (c, charset, c1, c2);
1074 lb = CHAR_LEADING_BYTE (c);
1075 if (LEADING_BYTE_PRIVATE_P (lb))
1076 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1078 if (EQ (charset, Vcharset_control_1))
1087 /* Return the first character from a Mule-encoded string in STR,
1088 assuming it's non-ASCII. Do not call this directly.
1089 Use the macro charptr_emchar() instead. */
1092 non_ascii_charptr_emchar (CONST Bufbyte *str)
1105 else if ( b >= 0xf8 )
1110 else if ( b >= 0xf0 )
1115 else if ( b >= 0xe0 )
1120 else if ( b >= 0xc0 )
1130 for( ; len > 0; len-- )
1133 ch = ( ch << 6 ) | ( b & 0x3f );
1137 Bufbyte i0 = *str, i1, i2 = 0;
1138 Lisp_Object charset;
1140 if (i0 == LEADING_BYTE_CONTROL_1)
1141 return (Emchar) (*++str - 0x20);
1143 if (LEADING_BYTE_PREFIX_P (i0))
1148 charset = CHARSET_BY_LEADING_BYTE (i0);
1149 if (XCHARSET_DIMENSION (charset) == 2)
1152 return MAKE_CHAR (charset, i1, i2);
1156 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1157 Do not call this directly. Use the macro valid_char_p() instead. */
1161 non_ascii_valid_char_p (Emchar ch)
1165 /* Must have only lowest 19 bits set */
1169 f1 = CHAR_FIELD1 (ch);
1170 f2 = CHAR_FIELD2 (ch);
1171 f3 = CHAR_FIELD3 (ch);
1175 Lisp_Object charset;
1177 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1178 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1179 f2 > MAX_CHAR_FIELD2_PRIVATE)
1184 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1185 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1189 NOTE: This takes advantage of the fact that
1190 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1191 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1193 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1194 if (EQ (charset, Qnil))
1196 return (XCHARSET_CHARS (charset) == 96);
1200 Lisp_Object charset;
1202 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1203 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1204 f1 > MAX_CHAR_FIELD1_PRIVATE)
1206 if (f2 < 0x20 || f3 < 0x20)
1209 #ifdef ENABLE_COMPOSITE_CHARS
1210 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1212 if (UNBOUNDP (Fgethash (make_int (ch),
1213 Vcomposite_char_char2string_hash_table,
1218 #endif /* ENABLE_COMPOSITE_CHARS */
1220 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1221 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1224 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1226 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1229 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1231 if (EQ (charset, Qnil))
1233 return (XCHARSET_CHARS (charset) == 96);
1239 /************************************************************************/
1240 /* Basic string functions */
1241 /************************************************************************/
1243 /* Copy the character pointed to by PTR into STR, assuming it's
1244 non-ASCII. Do not call this directly. Use the macro
1245 charptr_copy_char() instead. */
1248 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1250 Bufbyte *strptr = str;
1252 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1254 /* Notice fallthrough. */
1256 case 6: *++strptr = *ptr++;
1257 case 5: *++strptr = *ptr++;
1259 case 4: *++strptr = *ptr++;
1260 case 3: *++strptr = *ptr++;
1261 case 2: *++strptr = *ptr;
1266 return strptr + 1 - str;
1270 /************************************************************************/
1271 /* streams of Emchars */
1272 /************************************************************************/
1274 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1275 The functions below are not meant to be called directly; use
1276 the macros in insdel.h. */
1279 Lstream_get_emchar_1 (Lstream *stream, int ch)
1281 Bufbyte str[MAX_EMCHAR_LEN];
1282 Bufbyte *strptr = str;
1284 str[0] = (Bufbyte) ch;
1285 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1287 /* Notice fallthrough. */
1290 ch = Lstream_getc (stream);
1292 *++strptr = (Bufbyte) ch;
1294 ch = Lstream_getc (stream);
1296 *++strptr = (Bufbyte) ch;
1299 ch = Lstream_getc (stream);
1301 *++strptr = (Bufbyte) ch;
1303 ch = Lstream_getc (stream);
1305 *++strptr = (Bufbyte) ch;
1307 ch = Lstream_getc (stream);
1309 *++strptr = (Bufbyte) ch;
1314 return charptr_emchar (str);
1318 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1320 Bufbyte str[MAX_EMCHAR_LEN];
1321 Bytecount len = set_charptr_emchar (str, ch);
1322 return Lstream_write (stream, str, len);
1326 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1328 Bufbyte str[MAX_EMCHAR_LEN];
1329 Bytecount len = set_charptr_emchar (str, ch);
1330 Lstream_unread (stream, str, len);
1334 /************************************************************************/
1335 /* charset object */
1336 /************************************************************************/
1339 mark_charset (Lisp_Object obj)
1341 struct Lisp_Charset *cs = XCHARSET (obj);
1343 mark_object (cs->short_name);
1344 mark_object (cs->long_name);
1345 mark_object (cs->doc_string);
1346 mark_object (cs->registry);
1347 mark_object (cs->ccl_program);
1349 mark_object (cs->decoding_table);
1355 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1357 struct Lisp_Charset *cs = XCHARSET (obj);
1361 error ("printing unreadable object #<charset %s 0x%x>",
1362 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1365 write_c_string ("#<charset ", printcharfun);
1366 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1367 write_c_string (" ", printcharfun);
1368 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1369 write_c_string (" ", printcharfun);
1370 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1371 write_c_string (" ", printcharfun);
1372 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1373 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1375 CHARSET_DIMENSION (cs),
1376 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1377 CHARSET_COLUMNS (cs),
1378 CHARSET_GRAPHIC (cs),
1379 CHARSET_FINAL (cs));
1380 write_c_string (buf, printcharfun);
1381 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1382 sprintf (buf, " 0x%x>", cs->header.uid);
1383 write_c_string (buf, printcharfun);
1386 static const struct lrecord_description charset_description[] = {
1387 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1389 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1394 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1395 mark_charset, print_charset, 0, 0, 0,
1396 charset_description,
1397 struct Lisp_Charset);
1398 /* Make a new charset. */
1401 make_charset (Charset_ID id, Lisp_Object name,
1402 unsigned short chars, unsigned char dimension,
1403 unsigned char columns, unsigned char graphic,
1404 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1405 Lisp_Object long_name, Lisp_Object doc,
1407 Lisp_Object decoding_table,
1408 Emchar ucs_min, Emchar ucs_max,
1409 Emchar code_offset, unsigned char byte_offset)
1411 unsigned char type = 0;
1413 struct Lisp_Charset *cs =
1414 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1415 XSETCHARSET (obj, cs);
1417 CHARSET_ID (cs) = id;
1418 CHARSET_NAME (cs) = name;
1419 CHARSET_SHORT_NAME (cs) = short_name;
1420 CHARSET_LONG_NAME (cs) = long_name;
1421 CHARSET_CHARS (cs) = chars;
1422 CHARSET_DIMENSION (cs) = dimension;
1423 CHARSET_DIRECTION (cs) = direction;
1424 CHARSET_COLUMNS (cs) = columns;
1425 CHARSET_GRAPHIC (cs) = graphic;
1426 CHARSET_FINAL (cs) = final;
1427 CHARSET_DOC_STRING (cs) = doc;
1428 CHARSET_REGISTRY (cs) = reg;
1429 CHARSET_CCL_PROGRAM (cs) = Qnil;
1430 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1432 CHARSET_DECODING_TABLE(cs) = Qnil;
1433 CHARSET_UCS_MIN(cs) = ucs_min;
1434 CHARSET_UCS_MAX(cs) = ucs_max;
1435 CHARSET_CODE_OFFSET(cs) = code_offset;
1436 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1439 switch (CHARSET_CHARS (cs))
1442 switch (CHARSET_DIMENSION (cs))
1445 type = CHARSET_TYPE_94;
1448 type = CHARSET_TYPE_94X94;
1453 switch (CHARSET_DIMENSION (cs))
1456 type = CHARSET_TYPE_96;
1459 type = CHARSET_TYPE_96X96;
1465 switch (CHARSET_DIMENSION (cs))
1468 type = CHARSET_TYPE_128;
1471 type = CHARSET_TYPE_128X128;
1476 switch (CHARSET_DIMENSION (cs))
1479 type = CHARSET_TYPE_256;
1482 type = CHARSET_TYPE_256X256;
1489 CHARSET_TYPE (cs) = type;
1493 if (id == LEADING_BYTE_ASCII)
1494 CHARSET_REP_BYTES (cs) = 1;
1496 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1498 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1503 /* some charsets do not have final characters. This includes
1504 ASCII, Control-1, Composite, and the two faux private
1507 if (code_offset == 0)
1509 assert (NILP (chlook->charset_by_attributes[type][final]));
1510 chlook->charset_by_attributes[type][final] = obj;
1513 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1514 chlook->charset_by_attributes[type][final][direction] = obj;
1518 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1519 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1521 /* Some charsets are "faux" and don't have names or really exist at
1522 all except in the leading-byte table. */
1524 Fputhash (name, obj, Vcharset_hash_table);
1529 get_unallocated_leading_byte (int dimension)
1534 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1537 lb = next_allocated_leading_byte++;
1541 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1544 lb = next_allocated_1_byte_leading_byte++;
1548 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1551 lb = next_allocated_2_byte_leading_byte++;
1557 ("No more character sets free for this dimension",
1558 make_int (dimension));
1565 make_builtin_char (Lisp_Object charset, int c1, int c2)
1567 if (XCHARSET_UCS_MAX (charset))
1570 = (XCHARSET_DIMENSION (charset) == 1
1572 c1 - XCHARSET_BYTE_OFFSET (charset)
1574 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1575 + c2 - XCHARSET_BYTE_OFFSET (charset))
1576 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1577 if ((code < XCHARSET_UCS_MIN (charset))
1578 || (XCHARSET_UCS_MAX (charset) < code))
1579 signal_simple_error ("Arguments makes invalid character",
1583 else if (XCHARSET_DIMENSION (charset) == 1)
1585 switch (XCHARSET_CHARS (charset))
1589 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1592 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1599 switch (XCHARSET_CHARS (charset))
1602 return MIN_CHAR_94x94
1603 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1604 + (c1 - 33) * 94 + (c2 - 33);
1606 return MIN_CHAR_96x96
1607 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1608 + (c1 - 32) * 96 + (c2 - 32);
1616 range_charset_code_point (Lisp_Object charset, Emchar ch)
1620 if ((XCHARSET_UCS_MIN (charset) <= ch)
1621 && (ch <= XCHARSET_UCS_MAX (charset)))
1623 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1625 if (XCHARSET_CHARS (charset) == 256)
1627 else if (XCHARSET_DIMENSION (charset) == 1)
1628 return d + XCHARSET_BYTE_OFFSET (charset);
1629 else if (XCHARSET_DIMENSION (charset) == 2)
1631 ((d / XCHARSET_CHARS (charset)
1632 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1633 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1634 else if (XCHARSET_DIMENSION (charset) == 3)
1636 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1637 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1638 | ((d / XCHARSET_CHARS (charset)
1639 % XCHARSET_CHARS (charset)
1640 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1641 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1642 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1644 ((d / (XCHARSET_CHARS (charset)
1645 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1646 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1647 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1648 % XCHARSET_CHARS (charset)
1649 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1650 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1651 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1652 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1654 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1656 if (XCHARSET_DIMENSION (charset) == 1)
1658 if (XCHARSET_CHARS (charset) == 94)
1660 if (((d = ch - (MIN_CHAR_94
1661 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1665 else if (XCHARSET_CHARS (charset) == 96)
1667 if (((d = ch - (MIN_CHAR_96
1668 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1675 else if (XCHARSET_DIMENSION (charset) == 2)
1677 if (XCHARSET_CHARS (charset) == 94)
1679 if (((d = ch - (MIN_CHAR_94x94
1680 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1683 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1685 else if (XCHARSET_CHARS (charset) == 96)
1687 if (((d = ch - (MIN_CHAR_96x96
1688 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1691 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1701 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1703 if (c <= MAX_CHAR_BASIC_LATIN)
1705 *charset = Vcharset_ascii;
1710 *charset = Vcharset_control_1;
1715 *charset = Vcharset_latin_iso8859_1;
1719 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1721 *charset = Vcharset_greek_iso8859_7;
1722 return c - MIN_CHAR_GREEK + 0x20;
1724 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1726 *charset = Vcharset_cyrillic_iso8859_5;
1727 return c - MIN_CHAR_CYRILLIC + 0x20;
1730 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1732 *charset = Vcharset_hebrew_iso8859_8;
1733 return c - MIN_CHAR_HEBREW + 0x20;
1735 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1737 *charset = Vcharset_thai_tis620;
1738 return c - MIN_CHAR_THAI + 0x20;
1741 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1742 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1744 return list2 (Vcharset_katakana_jisx0201,
1745 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1748 else if (c <= MAX_CHAR_BMP)
1750 *charset = Vcharset_ucs_bmp;
1753 else if (c < MIN_CHAR_DAIKANWA)
1755 *charset = Vcharset_ucs;
1758 else if (c <= MAX_CHAR_DAIKANWA)
1760 *charset = Vcharset_ideograph_daikanwa;
1761 return c - MIN_CHAR_DAIKANWA;
1763 else if (c < MIN_CHAR_94)
1765 *charset = Vcharset_ucs;
1768 else if (c <= MAX_CHAR_94)
1770 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1771 ((c - MIN_CHAR_94) / 94) + '0',
1772 CHARSET_LEFT_TO_RIGHT);
1773 if (!NILP (*charset))
1774 return ((c - MIN_CHAR_94) % 94) + 33;
1777 *charset = Vcharset_ucs;
1781 else if (c <= MAX_CHAR_96)
1783 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1784 ((c - MIN_CHAR_96) / 96) + '0',
1785 CHARSET_LEFT_TO_RIGHT);
1786 if (!NILP (*charset))
1787 return ((c - MIN_CHAR_96) % 96) + 32;
1790 *charset = Vcharset_ucs;
1794 else if (c <= MAX_CHAR_94x94)
1797 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1798 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1799 CHARSET_LEFT_TO_RIGHT);
1800 if (!NILP (*charset))
1801 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1802 | (((c - MIN_CHAR_94x94) % 94) + 33);
1805 *charset = Vcharset_ucs;
1809 else if (c <= MAX_CHAR_96x96)
1812 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1813 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1814 CHARSET_LEFT_TO_RIGHT);
1815 if (!NILP (*charset))
1816 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1817 | (((c - MIN_CHAR_96x96) % 96) + 32);
1820 *charset = Vcharset_ucs;
1826 *charset = Vcharset_ucs;
1831 Lisp_Object Vdefault_coded_charset_priority_list;
1835 /************************************************************************/
1836 /* Basic charset Lisp functions */
1837 /************************************************************************/
1839 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1840 Return non-nil if OBJECT is a charset.
1844 return CHARSETP (object) ? Qt : Qnil;
1847 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1848 Retrieve the charset of the given name.
1849 If CHARSET-OR-NAME is a charset object, it is simply returned.
1850 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1851 nil is returned. Otherwise the associated charset object is returned.
1855 if (CHARSETP (charset_or_name))
1856 return charset_or_name;
1858 CHECK_SYMBOL (charset_or_name);
1859 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1862 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1863 Retrieve the charset of the given name.
1864 Same as `find-charset' except an error is signalled if there is no such
1865 charset instead of returning nil.
1869 Lisp_Object charset = Ffind_charset (name);
1872 signal_simple_error ("No such charset", name);
1876 /* We store the charsets in hash tables with the names as the key and the
1877 actual charset object as the value. Occasionally we need to use them
1878 in a list format. These routines provide us with that. */
1879 struct charset_list_closure
1881 Lisp_Object *charset_list;
1885 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1886 void *charset_list_closure)
1888 /* This function can GC */
1889 struct charset_list_closure *chcl =
1890 (struct charset_list_closure*) charset_list_closure;
1891 Lisp_Object *charset_list = chcl->charset_list;
1893 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1897 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1898 Return a list of the names of all defined charsets.
1902 Lisp_Object charset_list = Qnil;
1903 struct gcpro gcpro1;
1904 struct charset_list_closure charset_list_closure;
1906 GCPRO1 (charset_list);
1907 charset_list_closure.charset_list = &charset_list;
1908 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1909 &charset_list_closure);
1912 return charset_list;
1915 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1916 Return the name of the given charset.
1920 return XCHARSET_NAME (Fget_charset (charset));
1923 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1924 Define a new character set.
1925 This function is for use with Mule support.
1926 NAME is a symbol, the name by which the character set is normally referred.
1927 DOC-STRING is a string describing the character set.
1928 PROPS is a property list, describing the specific nature of the
1929 character set. Recognized properties are:
1931 'short-name Short version of the charset name (ex: Latin-1)
1932 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1933 'registry A regular expression matching the font registry field for
1935 'dimension Number of octets used to index a character in this charset.
1936 Either 1 or 2. Defaults to 1.
1937 'columns Number of columns used to display a character in this charset.
1938 Only used in TTY mode. (Under X, the actual width of a
1939 character can be derived from the font used to display the
1940 characters.) If unspecified, defaults to the dimension
1941 (this is almost always the correct value).
1942 'chars Number of characters in each dimension (94 or 96).
1943 Defaults to 94. Note that if the dimension is 2, the
1944 character set thus described is 94x94 or 96x96.
1945 'final Final byte of ISO 2022 escape sequence. Must be
1946 supplied. Each combination of (DIMENSION, CHARS) defines a
1947 separate namespace for final bytes. Note that ISO
1948 2022 restricts the final byte to the range
1949 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1950 dimension == 2. Note also that final bytes in the range
1951 0x30 - 0x3F are reserved for user-defined (not official)
1953 'graphic 0 (use left half of font on output) or 1 (use right half
1954 of font on output). Defaults to 0. For example, for
1955 a font whose registry is ISO8859-1, the left half
1956 (octets 0x20 - 0x7F) is the `ascii' character set, while
1957 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1958 character set. With 'graphic set to 0, the octets
1959 will have their high bit cleared; with it set to 1,
1960 the octets will have their high bit set.
1961 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1963 'ccl-program A compiled CCL program used to convert a character in
1964 this charset into an index into the font. This is in
1965 addition to the 'graphic property. The CCL program
1966 is passed the octets of the character, with the high
1967 bit cleared and set depending upon whether the value
1968 of the 'graphic property is 0 or 1.
1970 (name, doc_string, props))
1972 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1973 int direction = CHARSET_LEFT_TO_RIGHT;
1975 Lisp_Object registry = Qnil;
1976 Lisp_Object charset;
1977 Lisp_Object rest, keyword, value;
1978 Lisp_Object ccl_program = Qnil;
1979 Lisp_Object short_name = Qnil, long_name = Qnil;
1980 int byte_offset = -1;
1982 CHECK_SYMBOL (name);
1983 if (!NILP (doc_string))
1984 CHECK_STRING (doc_string);
1986 charset = Ffind_charset (name);
1987 if (!NILP (charset))
1988 signal_simple_error ("Cannot redefine existing charset", name);
1990 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1992 if (EQ (keyword, Qshort_name))
1994 CHECK_STRING (value);
1998 if (EQ (keyword, Qlong_name))
2000 CHECK_STRING (value);
2004 else if (EQ (keyword, Qdimension))
2007 dimension = XINT (value);
2008 if (dimension < 1 || dimension > 2)
2009 signal_simple_error ("Invalid value for 'dimension", value);
2012 else if (EQ (keyword, Qchars))
2015 chars = XINT (value);
2016 if (chars != 94 && chars != 96)
2017 signal_simple_error ("Invalid value for 'chars", value);
2020 else if (EQ (keyword, Qcolumns))
2023 columns = XINT (value);
2024 if (columns != 1 && columns != 2)
2025 signal_simple_error ("Invalid value for 'columns", value);
2028 else if (EQ (keyword, Qgraphic))
2031 graphic = XINT (value);
2033 if (graphic < 0 || graphic > 2)
2035 if (graphic < 0 || graphic > 1)
2037 signal_simple_error ("Invalid value for 'graphic", value);
2040 else if (EQ (keyword, Qregistry))
2042 CHECK_STRING (value);
2046 else if (EQ (keyword, Qdirection))
2048 if (EQ (value, Ql2r))
2049 direction = CHARSET_LEFT_TO_RIGHT;
2050 else if (EQ (value, Qr2l))
2051 direction = CHARSET_RIGHT_TO_LEFT;
2053 signal_simple_error ("Invalid value for 'direction", value);
2056 else if (EQ (keyword, Qfinal))
2058 CHECK_CHAR_COERCE_INT (value);
2059 final = XCHAR (value);
2060 if (final < '0' || final > '~')
2061 signal_simple_error ("Invalid value for 'final", value);
2064 else if (EQ (keyword, Qccl_program))
2066 CHECK_VECTOR (value);
2067 ccl_program = value;
2071 signal_simple_error ("Unrecognized property", keyword);
2075 error ("'final must be specified");
2076 if (dimension == 2 && final > 0x5F)
2078 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2082 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2084 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2086 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2087 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2089 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2091 id = get_unallocated_leading_byte (dimension);
2093 if (NILP (doc_string))
2094 doc_string = build_string ("");
2096 if (NILP (registry))
2097 registry = build_string ("");
2099 if (NILP (short_name))
2100 XSETSTRING (short_name, XSYMBOL (name)->name);
2102 if (NILP (long_name))
2103 long_name = doc_string;
2106 columns = dimension;
2108 if (byte_offset < 0)
2112 else if (chars == 96)
2118 charset = make_charset (id, name, chars, dimension, columns, graphic,
2119 final, direction, short_name, long_name,
2120 doc_string, registry,
2121 Qnil, 0, 0, 0, byte_offset);
2122 if (!NILP (ccl_program))
2123 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2127 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2129 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2130 NEW-NAME is the name of the new charset. Return the new charset.
2132 (charset, new_name))
2134 Lisp_Object new_charset = Qnil;
2135 int id, chars, dimension, columns, graphic, final;
2137 Lisp_Object registry, doc_string, short_name, long_name;
2138 struct Lisp_Charset *cs;
2140 charset = Fget_charset (charset);
2141 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2142 signal_simple_error ("Charset already has reverse-direction charset",
2145 CHECK_SYMBOL (new_name);
2146 if (!NILP (Ffind_charset (new_name)))
2147 signal_simple_error ("Cannot redefine existing charset", new_name);
2149 cs = XCHARSET (charset);
2151 chars = CHARSET_CHARS (cs);
2152 dimension = CHARSET_DIMENSION (cs);
2153 columns = CHARSET_COLUMNS (cs);
2154 id = get_unallocated_leading_byte (dimension);
2156 graphic = CHARSET_GRAPHIC (cs);
2157 final = CHARSET_FINAL (cs);
2158 direction = CHARSET_RIGHT_TO_LEFT;
2159 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2160 direction = CHARSET_LEFT_TO_RIGHT;
2161 doc_string = CHARSET_DOC_STRING (cs);
2162 short_name = CHARSET_SHORT_NAME (cs);
2163 long_name = CHARSET_LONG_NAME (cs);
2164 registry = CHARSET_REGISTRY (cs);
2166 new_charset = make_charset (id, new_name, chars, dimension, columns,
2167 graphic, final, direction, short_name, long_name,
2168 doc_string, registry,
2170 CHARSET_DECODING_TABLE(cs),
2171 CHARSET_UCS_MIN(cs),
2172 CHARSET_UCS_MAX(cs),
2173 CHARSET_CODE_OFFSET(cs),
2174 CHARSET_BYTE_OFFSET(cs)
2180 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2181 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2186 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2187 Define symbol ALIAS as an alias for CHARSET.
2191 CHECK_SYMBOL (alias);
2192 charset = Fget_charset (charset);
2193 return Fputhash (alias, charset, Vcharset_hash_table);
2196 /* #### Reverse direction charsets not yet implemented. */
2198 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2200 Return the reverse-direction charset parallel to CHARSET, if any.
2201 This is the charset with the same properties (in particular, the same
2202 dimension, number of characters per dimension, and final byte) as
2203 CHARSET but whose characters are displayed in the opposite direction.
2207 charset = Fget_charset (charset);
2208 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2212 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2213 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2214 If DIRECTION is omitted, both directions will be checked (left-to-right
2215 will be returned if character sets exist for both directions).
2217 (dimension, chars, final, direction))
2219 int dm, ch, fi, di = -1;
2221 Lisp_Object obj = Qnil;
2223 CHECK_INT (dimension);
2224 dm = XINT (dimension);
2225 if (dm < 1 || dm > 2)
2226 signal_simple_error ("Invalid value for DIMENSION", dimension);
2230 if (ch != 94 && ch != 96)
2231 signal_simple_error ("Invalid value for CHARS", chars);
2233 CHECK_CHAR_COERCE_INT (final);
2235 if (fi < '0' || fi > '~')
2236 signal_simple_error ("Invalid value for FINAL", final);
2238 if (EQ (direction, Ql2r))
2239 di = CHARSET_LEFT_TO_RIGHT;
2240 else if (EQ (direction, Qr2l))
2241 di = CHARSET_RIGHT_TO_LEFT;
2242 else if (!NILP (direction))
2243 signal_simple_error ("Invalid value for DIRECTION", direction);
2245 if (dm == 2 && fi > 0x5F)
2247 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2250 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2252 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2256 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2258 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2261 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2264 return XCHARSET_NAME (obj);
2268 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2269 Return short name of CHARSET.
2273 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2276 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2277 Return long name of CHARSET.
2281 return XCHARSET_LONG_NAME (Fget_charset (charset));
2284 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2285 Return description of CHARSET.
2289 return XCHARSET_DOC_STRING (Fget_charset (charset));
2292 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2293 Return dimension of CHARSET.
2297 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2300 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2301 Return property PROP of CHARSET.
2302 Recognized properties are those listed in `make-charset', as well as
2303 'name and 'doc-string.
2307 struct Lisp_Charset *cs;
2309 charset = Fget_charset (charset);
2310 cs = XCHARSET (charset);
2312 CHECK_SYMBOL (prop);
2313 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2314 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2315 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2316 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2317 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2318 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2319 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2320 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2321 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2322 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2323 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2324 if (EQ (prop, Qdirection))
2325 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2326 if (EQ (prop, Qreverse_direction_charset))
2328 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2332 return XCHARSET_NAME (obj);
2334 signal_simple_error ("Unrecognized charset property name", prop);
2335 return Qnil; /* not reached */
2338 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2339 Return charset identification number of CHARSET.
2343 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2346 /* #### We need to figure out which properties we really want to
2349 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2350 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2352 (charset, ccl_program))
2354 charset = Fget_charset (charset);
2355 CHECK_VECTOR (ccl_program);
2356 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2361 invalidate_charset_font_caches (Lisp_Object charset)
2363 /* Invalidate font cache entries for charset on all devices. */
2364 Lisp_Object devcons, concons, hash_table;
2365 DEVICE_LOOP_NO_BREAK (devcons, concons)
2367 struct device *d = XDEVICE (XCAR (devcons));
2368 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2369 if (!UNBOUNDP (hash_table))
2370 Fclrhash (hash_table);
2374 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2375 Set the 'registry property of CHARSET to REGISTRY.
2377 (charset, registry))
2379 charset = Fget_charset (charset);
2380 CHECK_STRING (registry);
2381 XCHARSET_REGISTRY (charset) = registry;
2382 invalidate_charset_font_caches (charset);
2383 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2388 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2389 Return mapping-table of CHARSET.
2393 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2396 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2397 Set mapping-table of CHARSET to TABLE.
2401 struct Lisp_Charset *cs;
2402 Lisp_Object old_table;
2405 charset = Fget_charset (charset);
2406 cs = XCHARSET (charset);
2408 if (EQ (table, Qnil))
2410 CHARSET_DECODING_TABLE(cs) = table;
2413 else if (VECTORP (table))
2417 /* ad-hoc method for `ascii' */
2418 if ((CHARSET_CHARS (cs) == 94) &&
2419 (CHARSET_BYTE_OFFSET (cs) != 33))
2420 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2422 ccs_len = CHARSET_CHARS (cs);
2424 if (XVECTOR_LENGTH (table) > ccs_len)
2425 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2426 old_table = CHARSET_DECODING_TABLE(cs);
2427 CHARSET_DECODING_TABLE(cs) = table;
2430 signal_error (Qwrong_type_argument,
2431 list2 (build_translated_string ("vector-or-nil-p"),
2433 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2435 switch (CHARSET_DIMENSION (cs))
2438 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2440 Lisp_Object c = XVECTOR_DATA(table)[i];
2445 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2449 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2451 Lisp_Object v = XVECTOR_DATA(table)[i];
2457 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2459 CHARSET_DECODING_TABLE(cs) = old_table;
2460 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2462 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2464 Lisp_Object c = XVECTOR_DATA(v)[j];
2469 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2470 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2474 put_char_attribute (v, charset,
2475 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2484 /************************************************************************/
2485 /* Lisp primitives for working with characters */
2486 /************************************************************************/
2489 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2490 Make a character from CHARSET and code-point CODE.
2496 charset = Fget_charset (charset);
2499 if (XCHARSET_GRAPHIC (charset) == 1)
2501 return make_char (DECODE_CHAR (charset, c));
2505 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2506 Make a character from CHARSET and octets ARG1 and ARG2.
2507 ARG2 is required only for characters from two-dimensional charsets.
2508 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2509 character s with caron.
2511 (charset, arg1, arg2))
2513 struct Lisp_Charset *cs;
2515 int lowlim, highlim;
2517 charset = Fget_charset (charset);
2518 cs = XCHARSET (charset);
2520 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2521 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2523 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2525 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2526 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2529 /* It is useful (and safe, according to Olivier Galibert) to strip
2530 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2531 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2532 Latin 2 code of the character. */
2540 if (a1 < lowlim || a1 > highlim)
2541 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2543 if (CHARSET_DIMENSION (cs) == 1)
2547 ("Charset is of dimension one; second octet must be nil", arg2);
2548 return make_char (MAKE_CHAR (charset, a1, 0));
2557 a2 = XINT (arg2) & 0x7f;
2559 if (a2 < lowlim || a2 > highlim)
2560 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2562 return make_char (MAKE_CHAR (charset, a1, a2));
2565 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2566 Return the character set of char CH.
2570 CHECK_CHAR_COERCE_INT (ch);
2572 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2575 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2576 Return the octet numbered N (should be 0 or 1) of char CH.
2577 N defaults to 0 if omitted.
2581 Lisp_Object charset;
2584 CHECK_CHAR_COERCE_INT (ch);
2586 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2588 if (NILP (n) || EQ (n, Qzero))
2589 return make_int (octet0);
2590 else if (EQ (n, make_int (1)))
2591 return make_int (octet1);
2593 signal_simple_error ("Octet number must be 0 or 1", n);
2596 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2597 Return list of charset and one or two position-codes of CHAR.
2601 /* This function can GC */
2602 struct gcpro gcpro1, gcpro2;
2603 Lisp_Object charset = Qnil;
2604 Lisp_Object rc = Qnil;
2612 GCPRO2 (charset, rc);
2613 CHECK_CHAR_COERCE_INT (character);
2616 code_point = ENCODE_CHAR (XCHAR (character), charset);
2617 dimension = XCHARSET_DIMENSION (charset);
2618 while (dimension > 0)
2620 rc = Fcons (make_int (code_point & 255), rc);
2624 rc = Fcons (XCHARSET_NAME (charset), rc);
2626 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2628 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2630 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2634 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2643 #ifdef ENABLE_COMPOSITE_CHARS
2644 /************************************************************************/
2645 /* composite character functions */
2646 /************************************************************************/
2649 lookup_composite_char (Bufbyte *str, int len)
2651 Lisp_Object lispstr = make_string (str, len);
2652 Lisp_Object ch = Fgethash (lispstr,
2653 Vcomposite_char_string2char_hash_table,
2659 if (composite_char_row_next >= 128)
2660 signal_simple_error ("No more composite chars available", lispstr);
2661 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2662 composite_char_col_next);
2663 Fputhash (make_char (emch), lispstr,
2664 Vcomposite_char_char2string_hash_table);
2665 Fputhash (lispstr, make_char (emch),
2666 Vcomposite_char_string2char_hash_table);
2667 composite_char_col_next++;
2668 if (composite_char_col_next >= 128)
2670 composite_char_col_next = 32;
2671 composite_char_row_next++;
2680 composite_char_string (Emchar ch)
2682 Lisp_Object str = Fgethash (make_char (ch),
2683 Vcomposite_char_char2string_hash_table,
2685 assert (!UNBOUNDP (str));
2689 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2690 Convert a string into a single composite character.
2691 The character is the result of overstriking all the characters in
2696 CHECK_STRING (string);
2697 return make_char (lookup_composite_char (XSTRING_DATA (string),
2698 XSTRING_LENGTH (string)));
2701 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2702 Return a string of the characters comprising a composite character.
2710 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2711 signal_simple_error ("Must be composite char", ch);
2712 return composite_char_string (emch);
2714 #endif /* ENABLE_COMPOSITE_CHARS */
2717 /************************************************************************/
2718 /* initialization */
2719 /************************************************************************/
2722 syms_of_mule_charset (void)
2724 DEFSUBR (Fcharsetp);
2725 DEFSUBR (Ffind_charset);
2726 DEFSUBR (Fget_charset);
2727 DEFSUBR (Fcharset_list);
2728 DEFSUBR (Fcharset_name);
2729 DEFSUBR (Fmake_charset);
2730 DEFSUBR (Fmake_reverse_direction_charset);
2731 /* DEFSUBR (Freverse_direction_charset); */
2732 DEFSUBR (Fdefine_charset_alias);
2733 DEFSUBR (Fcharset_from_attributes);
2734 DEFSUBR (Fcharset_short_name);
2735 DEFSUBR (Fcharset_long_name);
2736 DEFSUBR (Fcharset_description);
2737 DEFSUBR (Fcharset_dimension);
2738 DEFSUBR (Fcharset_property);
2739 DEFSUBR (Fcharset_id);
2740 DEFSUBR (Fset_charset_ccl_program);
2741 DEFSUBR (Fset_charset_registry);
2743 DEFSUBR (Fchar_attribute_alist);
2744 DEFSUBR (Fget_char_attribute);
2745 DEFSUBR (Fput_char_attribute);
2746 DEFSUBR (Fremove_char_attribute);
2747 DEFSUBR (Fdefine_char);
2748 DEFSUBR (Fchar_variants);
2749 DEFSUBR (Fget_composite_char);
2750 DEFSUBR (Fcharset_mapping_table);
2751 DEFSUBR (Fset_charset_mapping_table);
2755 DEFSUBR (Fdecode_char);
2757 DEFSUBR (Fmake_char);
2758 DEFSUBR (Fchar_charset);
2759 DEFSUBR (Fchar_octet);
2760 DEFSUBR (Fsplit_char);
2762 #ifdef ENABLE_COMPOSITE_CHARS
2763 DEFSUBR (Fmake_composite_char);
2764 DEFSUBR (Fcomposite_char_string);
2767 defsymbol (&Qcharsetp, "charsetp");
2768 defsymbol (&Qregistry, "registry");
2769 defsymbol (&Qfinal, "final");
2770 defsymbol (&Qgraphic, "graphic");
2771 defsymbol (&Qdirection, "direction");
2772 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2773 defsymbol (&Qshort_name, "short-name");
2774 defsymbol (&Qlong_name, "long-name");
2776 defsymbol (&Ql2r, "l2r");
2777 defsymbol (&Qr2l, "r2l");
2779 /* Charsets, compatible with FSF 20.3
2780 Naming convention is Script-Charset[-Edition] */
2781 defsymbol (&Qascii, "ascii");
2782 defsymbol (&Qcontrol_1, "control-1");
2783 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2784 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2785 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2786 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2787 defsymbol (&Qthai_tis620, "thai-tis620");
2788 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2789 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2790 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2791 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2792 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2793 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2794 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2795 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2796 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2797 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2798 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2799 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2800 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2801 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2802 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2804 defsymbol (&Q_ucs, "->ucs");
2805 defsymbol (&Q_decomposition, "->decomposition");
2806 defsymbol (&Qcompat, "compat");
2807 defsymbol (&Qisolated, "isolated");
2808 defsymbol (&Qinitial, "initial");
2809 defsymbol (&Qmedial, "medial");
2810 defsymbol (&Qfinal, "final");
2811 defsymbol (&Qvertical, "vertical");
2812 defsymbol (&QnoBreak, "noBreak");
2813 defsymbol (&Qfraction, "fraction");
2814 defsymbol (&Qsuper, "super");
2815 defsymbol (&Qsub, "sub");
2816 defsymbol (&Qcircle, "circle");
2817 defsymbol (&Qsquare, "square");
2818 defsymbol (&Qwide, "wide");
2819 defsymbol (&Qnarrow, "narrow");
2820 defsymbol (&Qsmall, "small");
2821 defsymbol (&Qfont, "font");
2822 defsymbol (&Qucs, "ucs");
2823 defsymbol (&Qucs_bmp, "ucs-bmp");
2824 defsymbol (&Qlatin_viscii, "latin-viscii");
2825 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2826 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2827 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2828 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2829 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2830 defsymbol (&Qmojikyo, "mojikyo");
2831 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2832 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2833 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2834 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2835 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2836 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2837 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2838 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2839 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2840 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2841 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2842 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2843 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2844 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2845 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2846 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2847 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2848 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2849 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2850 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2851 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2852 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2854 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2855 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2857 defsymbol (&Qcomposite, "composite");
2861 vars_of_mule_charset (void)
2868 chlook = xnew (struct charset_lookup);
2869 dumpstruct (&chlook, &charset_lookup_description);
2871 /* Table of charsets indexed by leading byte. */
2872 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2873 chlook->charset_by_leading_byte[i] = Qnil;
2876 /* Table of charsets indexed by type/final-byte. */
2877 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2878 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2879 chlook->charset_by_attributes[i][j] = Qnil;
2881 /* Table of charsets indexed by type/final-byte/direction. */
2882 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2883 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2884 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2885 chlook->charset_by_attributes[i][j][k] = Qnil;
2889 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2891 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2892 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2896 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2897 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2898 Leading-code of private TYPE9N charset of column-width 1.
2900 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2904 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2905 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2906 Version number of UTF-2000.
2909 staticpro (&Vcharacter_attribute_table);
2910 Vcharacter_attribute_table = make_char_code_table (Qnil);
2912 staticpro (&Vcharacter_composition_table);
2913 Vcharacter_composition_table = make_char_code_table (Qnil);
2915 staticpro (&Vcharacter_variant_table);
2916 Vcharacter_variant_table = make_char_code_table (Qnil);
2918 Vdefault_coded_charset_priority_list = Qnil;
2919 DEFVAR_LISP ("default-coded-charset-priority-list",
2920 &Vdefault_coded_charset_priority_list /*
2921 Default order of preferred coded-character-sets.
2927 complex_vars_of_mule_charset (void)
2929 staticpro (&Vcharset_hash_table);
2930 Vcharset_hash_table =
2931 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2933 /* Predefined character sets. We store them into variables for
2937 staticpro (&Vcharset_ucs);
2939 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2940 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2941 build_string ("UCS"),
2942 build_string ("UCS"),
2943 build_string ("ISO/IEC 10646"),
2945 Qnil, 0, 0xFFFFFFF, 0, 0);
2946 staticpro (&Vcharset_ucs_bmp);
2948 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2949 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2950 build_string ("BMP"),
2951 build_string ("BMP"),
2952 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2953 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2954 Qnil, 0, 0xFFFF, 0, 0);
2956 # define MIN_CHAR_THAI 0
2957 # define MAX_CHAR_THAI 0
2958 # define MIN_CHAR_HEBREW 0
2959 # define MAX_CHAR_HEBREW 0
2960 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2961 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2963 staticpro (&Vcharset_ascii);
2965 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2966 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2967 build_string ("ASCII"),
2968 build_string ("ASCII)"),
2969 build_string ("ASCII (ISO646 IRV)"),
2970 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2971 Qnil, 0, 0x7F, 0, 0);
2972 staticpro (&Vcharset_control_1);
2973 Vcharset_control_1 =
2974 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2975 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2976 build_string ("C1"),
2977 build_string ("Control characters"),
2978 build_string ("Control characters 128-191"),
2980 Qnil, 0x80, 0x9F, 0, 0);
2981 staticpro (&Vcharset_latin_iso8859_1);
2982 Vcharset_latin_iso8859_1 =
2983 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2984 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2985 build_string ("Latin-1"),
2986 build_string ("ISO8859-1 (Latin-1)"),
2987 build_string ("ISO8859-1 (Latin-1)"),
2988 build_string ("iso8859-1"),
2989 Qnil, 0xA0, 0xFF, 0, 32);
2990 staticpro (&Vcharset_latin_iso8859_2);
2991 Vcharset_latin_iso8859_2 =
2992 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2993 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2994 build_string ("Latin-2"),
2995 build_string ("ISO8859-2 (Latin-2)"),
2996 build_string ("ISO8859-2 (Latin-2)"),
2997 build_string ("iso8859-2"),
2999 staticpro (&Vcharset_latin_iso8859_3);
3000 Vcharset_latin_iso8859_3 =
3001 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3002 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3003 build_string ("Latin-3"),
3004 build_string ("ISO8859-3 (Latin-3)"),
3005 build_string ("ISO8859-3 (Latin-3)"),
3006 build_string ("iso8859-3"),
3008 staticpro (&Vcharset_latin_iso8859_4);
3009 Vcharset_latin_iso8859_4 =
3010 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3011 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3012 build_string ("Latin-4"),
3013 build_string ("ISO8859-4 (Latin-4)"),
3014 build_string ("ISO8859-4 (Latin-4)"),
3015 build_string ("iso8859-4"),
3017 staticpro (&Vcharset_thai_tis620);
3018 Vcharset_thai_tis620 =
3019 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3020 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3021 build_string ("TIS620"),
3022 build_string ("TIS620 (Thai)"),
3023 build_string ("TIS620.2529 (Thai)"),
3024 build_string ("tis620"),
3025 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3026 staticpro (&Vcharset_greek_iso8859_7);
3027 Vcharset_greek_iso8859_7 =
3028 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3029 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3030 build_string ("ISO8859-7"),
3031 build_string ("ISO8859-7 (Greek)"),
3032 build_string ("ISO8859-7 (Greek)"),
3033 build_string ("iso8859-7"),
3035 0 /* MIN_CHAR_GREEK */,
3036 0 /* MAX_CHAR_GREEK */, 0, 32);
3037 staticpro (&Vcharset_arabic_iso8859_6);
3038 Vcharset_arabic_iso8859_6 =
3039 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3040 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3041 build_string ("ISO8859-6"),
3042 build_string ("ISO8859-6 (Arabic)"),
3043 build_string ("ISO8859-6 (Arabic)"),
3044 build_string ("iso8859-6"),
3046 staticpro (&Vcharset_hebrew_iso8859_8);
3047 Vcharset_hebrew_iso8859_8 =
3048 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3049 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3050 build_string ("ISO8859-8"),
3051 build_string ("ISO8859-8 (Hebrew)"),
3052 build_string ("ISO8859-8 (Hebrew)"),
3053 build_string ("iso8859-8"),
3054 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3055 staticpro (&Vcharset_katakana_jisx0201);
3056 Vcharset_katakana_jisx0201 =
3057 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3058 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3059 build_string ("JISX0201 Kana"),
3060 build_string ("JISX0201.1976 (Japanese Kana)"),
3061 build_string ("JISX0201.1976 Japanese Kana"),
3062 build_string ("jisx0201\\.1976"),
3064 staticpro (&Vcharset_latin_jisx0201);
3065 Vcharset_latin_jisx0201 =
3066 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3067 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3068 build_string ("JISX0201 Roman"),
3069 build_string ("JISX0201.1976 (Japanese Roman)"),
3070 build_string ("JISX0201.1976 Japanese Roman"),
3071 build_string ("jisx0201\\.1976"),
3073 staticpro (&Vcharset_cyrillic_iso8859_5);
3074 Vcharset_cyrillic_iso8859_5 =
3075 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3076 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3077 build_string ("ISO8859-5"),
3078 build_string ("ISO8859-5 (Cyrillic)"),
3079 build_string ("ISO8859-5 (Cyrillic)"),
3080 build_string ("iso8859-5"),
3082 0 /* MIN_CHAR_CYRILLIC */,
3083 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3084 staticpro (&Vcharset_latin_iso8859_9);
3085 Vcharset_latin_iso8859_9 =
3086 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3087 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3088 build_string ("Latin-5"),
3089 build_string ("ISO8859-9 (Latin-5)"),
3090 build_string ("ISO8859-9 (Latin-5)"),
3091 build_string ("iso8859-9"),
3093 staticpro (&Vcharset_japanese_jisx0208_1978);
3094 Vcharset_japanese_jisx0208_1978 =
3095 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3096 Qjapanese_jisx0208_1978, 94, 2,
3097 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3098 build_string ("JIS X0208:1978"),
3099 build_string ("JIS X0208:1978 (Japanese)"),
3101 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3102 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3104 staticpro (&Vcharset_chinese_gb2312);
3105 Vcharset_chinese_gb2312 =
3106 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3107 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3108 build_string ("GB2312"),
3109 build_string ("GB2312)"),
3110 build_string ("GB2312 Chinese simplified"),
3111 build_string ("gb2312"),
3113 staticpro (&Vcharset_japanese_jisx0208);
3114 Vcharset_japanese_jisx0208 =
3115 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3116 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3117 build_string ("JISX0208"),
3118 build_string ("JIS X0208:1983 (Japanese)"),
3119 build_string ("JIS X0208:1983 Japanese Kanji"),
3120 build_string ("jisx0208\\.1983"),
3123 staticpro (&Vcharset_japanese_jisx0208_1990);
3124 Vcharset_japanese_jisx0208_1990 =
3125 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3126 Qjapanese_jisx0208_1990, 94, 2,
3127 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3128 build_string ("JISX0208-1990"),
3129 build_string ("JIS X0208:1990 (Japanese)"),
3130 build_string ("JIS X0208:1990 Japanese Kanji"),
3131 build_string ("jisx0208\\.1990"),
3133 MIN_CHAR_JIS_X0208_1990,
3134 MAX_CHAR_JIS_X0208_1990, 0, 33);
3136 staticpro (&Vcharset_korean_ksc5601);
3137 Vcharset_korean_ksc5601 =
3138 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3139 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3140 build_string ("KSC5601"),
3141 build_string ("KSC5601 (Korean"),
3142 build_string ("KSC5601 Korean Hangul and Hanja"),
3143 build_string ("ksc5601"),
3145 staticpro (&Vcharset_japanese_jisx0212);
3146 Vcharset_japanese_jisx0212 =
3147 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3148 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3149 build_string ("JISX0212"),
3150 build_string ("JISX0212 (Japanese)"),
3151 build_string ("JISX0212 Japanese Supplement"),
3152 build_string ("jisx0212"),
3155 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3156 staticpro (&Vcharset_chinese_cns11643_1);
3157 Vcharset_chinese_cns11643_1 =
3158 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3159 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3160 build_string ("CNS11643-1"),
3161 build_string ("CNS11643-1 (Chinese traditional)"),
3163 ("CNS 11643 Plane 1 Chinese traditional"),
3164 build_string (CHINESE_CNS_PLANE_RE("1")),
3166 staticpro (&Vcharset_chinese_cns11643_2);
3167 Vcharset_chinese_cns11643_2 =
3168 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3169 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3170 build_string ("CNS11643-2"),
3171 build_string ("CNS11643-2 (Chinese traditional)"),
3173 ("CNS 11643 Plane 2 Chinese traditional"),
3174 build_string (CHINESE_CNS_PLANE_RE("2")),
3177 staticpro (&Vcharset_latin_viscii_lower);
3178 Vcharset_latin_viscii_lower =
3179 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3180 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3181 build_string ("VISCII lower"),
3182 build_string ("VISCII lower (Vietnamese)"),
3183 build_string ("VISCII lower (Vietnamese)"),
3184 build_string ("MULEVISCII-LOWER"),
3186 staticpro (&Vcharset_latin_viscii_upper);
3187 Vcharset_latin_viscii_upper =
3188 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3189 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3190 build_string ("VISCII upper"),
3191 build_string ("VISCII upper (Vietnamese)"),
3192 build_string ("VISCII upper (Vietnamese)"),
3193 build_string ("MULEVISCII-UPPER"),
3195 staticpro (&Vcharset_latin_viscii);
3196 Vcharset_latin_viscii =
3197 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3198 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3199 build_string ("VISCII"),
3200 build_string ("VISCII 1.1 (Vietnamese)"),
3201 build_string ("VISCII 1.1 (Vietnamese)"),
3202 build_string ("VISCII1\\.1"),
3204 staticpro (&Vcharset_ideograph_daikanwa);
3205 Vcharset_ideograph_daikanwa =
3206 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3207 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3208 build_string ("Daikanwa"),
3209 build_string ("Morohashi's Daikanwa"),
3210 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3211 build_string ("Daikanwa"),
3212 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3213 staticpro (&Vcharset_mojikyo);
3215 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3216 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3217 build_string ("Mojikyo"),
3218 build_string ("Mojikyo"),
3219 build_string ("Konjaku-Mojikyo"),
3221 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3222 staticpro (&Vcharset_mojikyo_pj_1);
3223 Vcharset_mojikyo_pj_1 =
3224 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3225 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3226 build_string ("Mojikyo-PJ-1"),
3227 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3229 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3230 build_string ("jisx0208\\.Mojikyo-1$"),
3232 staticpro (&Vcharset_mojikyo_pj_2);
3233 Vcharset_mojikyo_pj_2 =
3234 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3235 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3236 build_string ("Mojikyo-PJ-2"),
3237 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3239 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3240 build_string ("jisx0208\\.Mojikyo-2$"),
3242 staticpro (&Vcharset_mojikyo_pj_3);
3243 Vcharset_mojikyo_pj_3 =
3244 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3245 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3246 build_string ("Mojikyo-PJ-3"),
3247 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3249 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3250 build_string ("jisx0208\\.Mojikyo-3$"),
3252 staticpro (&Vcharset_mojikyo_pj_4);
3253 Vcharset_mojikyo_pj_4 =
3254 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3255 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3256 build_string ("Mojikyo-PJ-4"),
3257 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3259 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3260 build_string ("jisx0208\\.Mojikyo-4$"),
3262 staticpro (&Vcharset_mojikyo_pj_5);
3263 Vcharset_mojikyo_pj_5 =
3264 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3265 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3266 build_string ("Mojikyo-PJ-5"),
3267 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3269 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3270 build_string ("jisx0208\\.Mojikyo-5$"),
3272 staticpro (&Vcharset_mojikyo_pj_6);
3273 Vcharset_mojikyo_pj_6 =
3274 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3275 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3276 build_string ("Mojikyo-PJ-6"),
3277 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3279 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3280 build_string ("jisx0208\\.Mojikyo-6$"),
3282 staticpro (&Vcharset_mojikyo_pj_7);
3283 Vcharset_mojikyo_pj_7 =
3284 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3285 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3286 build_string ("Mojikyo-PJ-7"),
3287 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3289 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3290 build_string ("jisx0208\\.Mojikyo-7$"),
3292 staticpro (&Vcharset_mojikyo_pj_8);
3293 Vcharset_mojikyo_pj_8 =
3294 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3295 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3296 build_string ("Mojikyo-PJ-8"),
3297 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3299 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3300 build_string ("jisx0208\\.Mojikyo-8$"),
3302 staticpro (&Vcharset_mojikyo_pj_9);
3303 Vcharset_mojikyo_pj_9 =
3304 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3305 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3306 build_string ("Mojikyo-PJ-9"),
3307 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3309 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3310 build_string ("jisx0208\\.Mojikyo-9$"),
3312 staticpro (&Vcharset_mojikyo_pj_10);
3313 Vcharset_mojikyo_pj_10 =
3314 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3315 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3316 build_string ("Mojikyo-PJ-10"),
3317 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3319 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3320 build_string ("jisx0208\\.Mojikyo-10$"),
3322 staticpro (&Vcharset_mojikyo_pj_11);
3323 Vcharset_mojikyo_pj_11 =
3324 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3325 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3326 build_string ("Mojikyo-PJ-11"),
3327 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3329 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3330 build_string ("jisx0208\\.Mojikyo-11$"),
3332 staticpro (&Vcharset_mojikyo_pj_12);
3333 Vcharset_mojikyo_pj_12 =
3334 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3335 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3336 build_string ("Mojikyo-PJ-12"),
3337 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3339 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3340 build_string ("jisx0208\\.Mojikyo-12$"),
3342 staticpro (&Vcharset_mojikyo_pj_13);
3343 Vcharset_mojikyo_pj_13 =
3344 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3345 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3346 build_string ("Mojikyo-PJ-13"),
3347 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3349 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3350 build_string ("jisx0208\\.Mojikyo-13$"),
3352 staticpro (&Vcharset_mojikyo_pj_14);
3353 Vcharset_mojikyo_pj_14 =
3354 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3355 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3356 build_string ("Mojikyo-PJ-14"),
3357 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3359 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3360 build_string ("jisx0208\\.Mojikyo-14$"),
3362 staticpro (&Vcharset_mojikyo_pj_15);
3363 Vcharset_mojikyo_pj_15 =
3364 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3365 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3366 build_string ("Mojikyo-PJ-15"),
3367 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3369 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3370 build_string ("jisx0208\\.Mojikyo-15$"),
3372 staticpro (&Vcharset_mojikyo_pj_16);
3373 Vcharset_mojikyo_pj_16 =
3374 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3375 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3376 build_string ("Mojikyo-PJ-16"),
3377 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3379 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3380 build_string ("jisx0208\\.Mojikyo-16$"),
3382 staticpro (&Vcharset_mojikyo_pj_17);
3383 Vcharset_mojikyo_pj_17 =
3384 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3385 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3386 build_string ("Mojikyo-PJ-17"),
3387 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3389 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3390 build_string ("jisx0208\\.Mojikyo-17$"),
3392 staticpro (&Vcharset_mojikyo_pj_18);
3393 Vcharset_mojikyo_pj_18 =
3394 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3395 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3396 build_string ("Mojikyo-PJ-18"),
3397 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3399 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3400 build_string ("jisx0208\\.Mojikyo-18$"),
3402 staticpro (&Vcharset_mojikyo_pj_19);
3403 Vcharset_mojikyo_pj_19 =
3404 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3405 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3406 build_string ("Mojikyo-PJ-19"),
3407 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3409 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3410 build_string ("jisx0208\\.Mojikyo-19$"),
3412 staticpro (&Vcharset_mojikyo_pj_20);
3413 Vcharset_mojikyo_pj_20 =
3414 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3415 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3416 build_string ("Mojikyo-PJ-20"),
3417 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3419 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3420 build_string ("jisx0208\\.Mojikyo-20$"),
3422 staticpro (&Vcharset_mojikyo_pj_21);
3423 Vcharset_mojikyo_pj_21 =
3424 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3425 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3426 build_string ("Mojikyo-PJ-21"),
3427 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3429 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3430 build_string ("jisx0208\\.Mojikyo-21$"),
3432 staticpro (&Vcharset_ethiopic_ucs);
3433 Vcharset_ethiopic_ucs =
3434 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3435 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3436 build_string ("Ethiopic (UCS)"),
3437 build_string ("Ethiopic (UCS)"),
3438 build_string ("Ethiopic of UCS"),
3439 build_string ("Ethiopic-Unicode"),
3440 Qnil, 0x1200, 0x137F, 0x1200, 0);
3442 staticpro (&Vcharset_chinese_big5_1);
3443 Vcharset_chinese_big5_1 =
3444 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3445 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3446 build_string ("Big5"),
3447 build_string ("Big5 (Level-1)"),
3449 ("Big5 Level-1 Chinese traditional"),
3450 build_string ("big5"),
3452 staticpro (&Vcharset_chinese_big5_2);
3453 Vcharset_chinese_big5_2 =
3454 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3455 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3456 build_string ("Big5"),
3457 build_string ("Big5 (Level-2)"),
3459 ("Big5 Level-2 Chinese traditional"),
3460 build_string ("big5"),
3463 #ifdef ENABLE_COMPOSITE_CHARS
3464 /* #### For simplicity, we put composite chars into a 96x96 charset.
3465 This is going to lead to problems because you can run out of
3466 room, esp. as we don't yet recycle numbers. */
3467 staticpro (&Vcharset_composite);
3468 Vcharset_composite =
3469 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3470 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3471 build_string ("Composite"),
3472 build_string ("Composite characters"),
3473 build_string ("Composite characters"),
3476 /* #### not dumped properly */
3477 composite_char_row_next = 32;
3478 composite_char_col_next = 32;
3480 Vcomposite_char_string2char_hash_table =
3481 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3482 Vcomposite_char_char2string_hash_table =
3483 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3484 staticpro (&Vcomposite_char_string2char_hash_table);
3485 staticpro (&Vcomposite_char_char2string_hash_table);
3486 #endif /* ENABLE_COMPOSITE_CHARS */