1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_char_byte_table (Lisp_Object obj)
165 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
179 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
185 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
187 if (!char_byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 char_byte_table_hash (Lisp_Object obj, int depth)
203 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description char_byte_table_description[] = {
209 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
214 mark_char_byte_table,
215 internal_object_printer,
216 0, char_byte_table_equal,
217 char_byte_table_hash,
218 char_byte_table_description,
219 struct Lisp_Char_Byte_Table);
222 make_char_byte_table (Lisp_Object initval)
226 struct Lisp_Char_Byte_Table *cte =
227 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
228 &lrecord_char_byte_table);
230 for (i = 0; i < 256; i++)
231 cte->property[i] = initval;
233 XSETCHAR_BYTE_TABLE (obj, cte);
238 copy_char_byte_table (Lisp_Object entry)
240 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
243 struct Lisp_Char_Byte_Table *ctenew =
244 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
245 &lrecord_char_byte_table);
247 for (i = 0; i < 256; i++)
249 Lisp_Object new = cte->property[i];
250 if (CHAR_BYTE_TABLE_P (new))
251 ctenew->property[i] = copy_char_byte_table (new);
253 ctenew->property[i] = new;
256 XSETCHAR_BYTE_TABLE (obj, ctenew);
262 mark_char_code_table (Lisp_Object obj)
264 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
270 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
272 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
273 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
275 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
279 char_code_table_hash (Lisp_Object obj, int depth)
281 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
283 return char_code_table_hash (cte->table, depth + 1);
286 static const struct lrecord_description char_code_table_description[] = {
287 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
291 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
292 mark_char_code_table,
293 internal_object_printer,
294 0, char_code_table_equal,
295 char_code_table_hash,
296 char_code_table_description,
297 struct Lisp_Char_Code_Table);
300 make_char_code_table (Lisp_Object initval)
303 struct Lisp_Char_Code_Table *cte =
304 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
305 &lrecord_char_code_table);
307 cte->table = make_char_byte_table (initval);
309 XSETCHAR_CODE_TABLE (obj, cte);
314 copy_char_code_table (Lisp_Object entry)
316 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
318 struct Lisp_Char_Code_Table *ctenew =
319 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
320 &lrecord_char_code_table);
322 ctenew->table = copy_char_byte_table (cte->table);
323 XSETCHAR_CODE_TABLE (obj, ctenew);
329 get_char_code_table (Emchar ch, Lisp_Object table)
331 unsigned int code = ch;
332 struct Lisp_Char_Byte_Table* cpt
333 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
334 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
336 if (CHAR_BYTE_TABLE_P (ret))
337 cpt = XCHAR_BYTE_TABLE (ret);
341 ret = cpt->property [(unsigned char) (code >> 16)];
342 if (CHAR_BYTE_TABLE_P (ret))
343 cpt = XCHAR_BYTE_TABLE (ret);
347 ret = cpt->property [(unsigned char) (code >> 8)];
348 if (CHAR_BYTE_TABLE_P (ret))
349 cpt = XCHAR_BYTE_TABLE (ret);
353 return cpt->property [(unsigned char) code];
356 void put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table);
358 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
360 unsigned int code = ch;
361 struct Lisp_Char_Byte_Table* cpt1
362 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
363 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
365 if (CHAR_BYTE_TABLE_P (ret))
367 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
369 ret = cpt2->property[(unsigned char)(code >> 16)];
370 if (CHAR_BYTE_TABLE_P (ret))
372 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
374 ret = cpt3->property[(unsigned char)(code >> 8)];
375 if (CHAR_BYTE_TABLE_P (ret))
377 struct Lisp_Char_Byte_Table* cpt4
378 = XCHAR_BYTE_TABLE (ret);
380 cpt4->property[(unsigned char)code] = value;
382 else if (!EQ (ret, value))
384 Lisp_Object cpt4 = make_char_byte_table (ret);
386 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
387 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
390 else if (!EQ (ret, value))
392 Lisp_Object cpt3 = make_char_byte_table (ret);
393 Lisp_Object cpt4 = make_char_byte_table (ret);
395 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
396 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
398 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
401 else if (!EQ (ret, value))
403 Lisp_Object cpt2 = make_char_byte_table (ret);
404 Lisp_Object cpt3 = make_char_byte_table (ret);
405 Lisp_Object cpt4 = make_char_byte_table (ret);
407 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
408 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
409 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
410 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
415 Lisp_Object Vcharacter_attribute_table;
416 Lisp_Object Vcharacter_composition_table;
417 Lisp_Object Vcharacter_variant_table;
419 Lisp_Object Q_decomposition;
422 Lisp_Object Qisolated;
423 Lisp_Object Qinitial;
426 Lisp_Object Qvertical;
427 Lisp_Object QnoBreak;
428 Lisp_Object Qfraction;
438 Emchar to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
440 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
446 else if (EQ (v, Qcompat))
448 else if (EQ (v, Qisolated))
450 else if (EQ (v, Qinitial))
452 else if (EQ (v, Qmedial))
454 else if (EQ (v, Qfinal))
456 else if (EQ (v, Qvertical))
458 else if (EQ (v, QnoBreak))
460 else if (EQ (v, Qfraction))
462 else if (EQ (v, Qsuper))
464 else if (EQ (v, Qsub))
466 else if (EQ (v, Qcircle))
468 else if (EQ (v, Qsquare))
470 else if (EQ (v, Qwide))
472 else if (EQ (v, Qnarrow))
474 else if (EQ (v, Qsmall))
476 else if (EQ (v, Qfont))
479 signal_simple_error (err_msg, err_arg);
482 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
483 Return character corresponding with list.
487 Lisp_Object table = Vcharacter_composition_table;
488 Lisp_Object rest = list;
492 Lisp_Object v = Fcar (rest);
494 Emchar c = to_char_code (v, "Invalid value for composition", list);
496 ret = get_char_code_table (c, table);
501 if (!CHAR_CODE_TABLE_P (ret))
506 else if (!CONSP (rest))
508 else if (CHAR_CODE_TABLE_P (ret))
511 signal_simple_error ("Invalid table is found with", list);
513 signal_simple_error ("Invalid value for composition", list);
516 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
517 Return variants of CHARACTER.
521 CHECK_CHAR (character);
522 return Fcopy_list (get_char_code_table (XCHAR (character),
523 Vcharacter_variant_table));
526 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
527 Return the alist of attributes of CHARACTER.
531 CHECK_CHAR (character);
532 return Fcopy_alist (get_char_code_table (XCHAR (character),
533 Vcharacter_attribute_table));
536 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
537 Return the value of CHARACTER's ATTRIBUTE.
539 (character, attribute))
544 CHECK_CHAR (character);
545 ret = get_char_code_table (XCHAR (character),
546 Vcharacter_attribute_table);
550 if (!NILP (ccs = Ffind_charset (attribute)))
553 return Fcdr (Fassq (attribute, ret));
556 Lisp_Object put_char_attribute (Lisp_Object character,
557 Lisp_Object attribute, Lisp_Object value);
559 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
562 Emchar char_code = XCHAR (character);
564 = get_char_code_table (char_code, Vcharacter_attribute_table);
567 cell = Fassq (attribute, ret);
571 ret = Fcons (Fcons (attribute, value), ret);
573 else if (!EQ (Fcdr (cell), value))
575 Fsetcdr (cell, value);
577 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
581 Lisp_Object remove_char_attribute (Lisp_Object character,
582 Lisp_Object attribute);
584 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
586 Emchar char_code = XCHAR (character);
588 = get_char_code_table (char_code, Vcharacter_attribute_table);
590 if (EQ (attribute, Fcar (Fcar (alist))))
592 alist = Fcdr (alist);
596 Lisp_Object pr = alist;
597 Lisp_Object r = Fcdr (alist);
601 if (EQ (attribute, Fcar (Fcar (r))))
603 XCDR (pr) = Fcdr (r);
610 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
616 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
617 Store CHARACTER's ATTRIBUTE with VALUE.
619 (character, attribute, value))
623 CHECK_CHAR (character);
624 ccs = Ffind_charset (attribute);
627 if (!EQ (XCHARSET_NAME (ccs), Qucs)
628 || (XCHAR (character) != XINT (value)))
630 Lisp_Object cpos, rest;
631 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
638 /* ad-hoc method for `ascii' */
639 if ((XCHARSET_CHARS (ccs) == 94) &&
640 (XCHARSET_BYTE_OFFSET (ccs) != 33))
641 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
643 ccs_len = XCHARSET_CHARS (ccs);
647 Lisp_Object ret = Fcar (value);
650 signal_simple_error ("Invalid value for coded-charset", value);
651 code_point = XINT (ret);
652 if (XCHARSET_GRAPHIC (ccs) == 1)
660 signal_simple_error ("Invalid value for coded-charset",
664 signal_simple_error ("Invalid value for coded-charset",
667 if (XCHARSET_GRAPHIC (ccs) == 1)
669 code_point = (code_point << 8) | j;
672 value = make_int (code_point);
674 else if (INTP (value))
676 if (XCHARSET_GRAPHIC (ccs) == 1)
677 value = make_int (XINT (value) & 0x7F7F7F7F);
680 signal_simple_error ("Invalid value for coded-charset", value);
683 cpos = Fget_char_attribute (character, attribute);
688 dim = XCHARSET_DIMENSION (ccs);
689 code_point = XINT (cpos);
693 i = ((code_point >> (8 * dim)) & 255)
694 - XCHARSET_BYTE_OFFSET (ccs);
695 nv = XVECTOR_DATA(v)[i];
701 XVECTOR_DATA(v)[i] = Qnil;
702 v = XCHARSET_DECODING_TABLE (ccs);
707 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
710 dim = XCHARSET_DIMENSION (ccs);
711 code_point = XINT (value);
716 i = ((code_point >> (8 * dim)) & 255)
717 - XCHARSET_BYTE_OFFSET (ccs);
718 nv = XVECTOR_DATA(v)[i];
722 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
728 XVECTOR_DATA(v)[i] = character;
733 else if (EQ (attribute, Q_decomposition))
736 signal_simple_error ("Invalid value for ->decomposition",
739 if (CONSP (Fcdr (value)))
741 Lisp_Object rest = value;
742 Lisp_Object table = Vcharacter_composition_table;
746 Lisp_Object v = Fcar (rest);
750 "Invalid value for ->decomposition", value);
755 put_char_code_table (c, character, table);
760 ntable = get_char_code_table (c, table);
761 if (!CHAR_CODE_TABLE_P (ntable))
763 ntable = make_char_code_table (Qnil);
764 put_char_code_table (c, ntable, table);
772 Lisp_Object v = Fcar (value);
778 = get_char_code_table (c, Vcharacter_variant_table);
780 if (NILP (Fmemq (v, ret)))
782 put_char_code_table (c, Fcons (character, ret),
783 Vcharacter_variant_table);
788 else if (EQ (attribute, Q_ucs))
794 signal_simple_error ("Invalid value for ->ucs", value);
798 ret = get_char_code_table (c, Vcharacter_variant_table);
799 if (NILP (Fmemq (character, ret)))
801 put_char_code_table (c, Fcons (character, ret),
802 Vcharacter_variant_table);
805 return put_char_attribute (character, attribute, value);
808 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
809 Remove CHARACTER's ATTRIBUTE.
811 (character, attribute))
815 CHECK_CHAR (character);
816 ccs = Ffind_charset (attribute);
820 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
827 /* ad-hoc method for `ascii' */
828 if ((XCHARSET_CHARS (ccs) == 94) &&
829 (XCHARSET_BYTE_OFFSET (ccs) != 33))
830 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
832 ccs_len = XCHARSET_CHARS (ccs);
835 cpos = Fget_char_attribute (character, attribute);
840 dim = XCHARSET_DIMENSION (ccs);
841 code_point = XINT (cpos);
845 i = ((code_point >> (8 * dim)) & 255)
846 - XCHARSET_BYTE_OFFSET (ccs);
847 nv = XVECTOR_DATA(v)[i];
853 XVECTOR_DATA(v)[i] = Qnil;
854 v = XCHARSET_DECODING_TABLE (ccs);
858 return remove_char_attribute (character, attribute);
861 EXFUN (Fmake_char, 3);
862 EXFUN (Fdecode_char, 2);
864 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
865 Store character's ATTRIBUTES.
869 Lisp_Object rest = attributes;
870 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
871 Lisp_Object character;
877 Lisp_Object cell = Fcar (rest);
881 signal_simple_error ("Invalid argument", attributes);
882 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
883 && ((XCHARSET_FINAL (ccs) != 0) ||
884 (XCHARSET_UCS_MAX (ccs) > 0)) )
888 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
890 character = Fdecode_char (ccs, cell);
891 goto setup_attributes;
895 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
898 signal_simple_error ("Invalid argument", attributes);
900 character = make_char (XINT (code) + 0x100000);
901 goto setup_attributes;
905 else if (!INTP (code))
906 signal_simple_error ("Invalid argument", attributes);
908 character = make_char (XINT (code));
914 Lisp_Object cell = Fcar (rest);
917 signal_simple_error ("Invalid argument", attributes);
918 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
922 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
925 Lisp_Object Vutf_2000_version;
929 int leading_code_private_11;
932 Lisp_Object Qcharsetp;
934 /* Qdoc_string, Qdimension, Qchars defined in general.c */
935 Lisp_Object Qregistry, Qfinal, Qgraphic;
936 Lisp_Object Qdirection;
937 Lisp_Object Qreverse_direction_charset;
938 Lisp_Object Qleading_byte;
939 Lisp_Object Qshort_name, Qlong_name;
955 Qjapanese_jisx0208_1978,
958 Qjapanese_jisx0208_1990,
969 Qvietnamese_viscii_lower,
970 Qvietnamese_viscii_upper,
1000 Lisp_Object Ql2r, Qr2l;
1002 Lisp_Object Vcharset_hash_table;
1004 /* Composite characters are characters constructed by overstriking two
1005 or more regular characters.
1007 1) The old Mule implementation involves storing composite characters
1008 in a buffer as a tag followed by all of the actual characters
1009 used to make up the composite character. I think this is a bad
1010 idea; it greatly complicates code that wants to handle strings
1011 one character at a time because it has to deal with the possibility
1012 of great big ungainly characters. It's much more reasonable to
1013 simply store an index into a table of composite characters.
1015 2) The current implementation only allows for 16,384 separate
1016 composite characters over the lifetime of the XEmacs process.
1017 This could become a potential problem if the user
1018 edited lots of different files that use composite characters.
1019 Due to FSF bogosity, increasing the number of allowable
1020 composite characters under Mule would decrease the number
1021 of possible faces that can exist. Mule already has shrunk
1022 this to 2048, and further shrinkage would become uncomfortable.
1023 No such problems exist in XEmacs.
1025 Composite characters could be represented as 0x80 C1 C2 C3,
1026 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1027 for slightly under 2^20 (one million) composite characters
1028 over the XEmacs process lifetime, and you only need to
1029 increase the size of a Mule character from 19 to 21 bits.
1030 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1031 85 million (slightly over 2^26) composite characters. */
1034 /************************************************************************/
1035 /* Basic Emchar functions */
1036 /************************************************************************/
1038 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1039 string in STR. Returns the number of bytes stored.
1040 Do not call this directly. Use the macro set_charptr_emchar() instead.
1044 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1050 Lisp_Object charset;
1059 else if ( c <= 0x7ff )
1061 *p++ = (c >> 6) | 0xc0;
1062 *p++ = (c & 0x3f) | 0x80;
1064 else if ( c <= 0xffff )
1066 *p++ = (c >> 12) | 0xe0;
1067 *p++ = ((c >> 6) & 0x3f) | 0x80;
1068 *p++ = (c & 0x3f) | 0x80;
1070 else if ( c <= 0x1fffff )
1072 *p++ = (c >> 18) | 0xf0;
1073 *p++ = ((c >> 12) & 0x3f) | 0x80;
1074 *p++ = ((c >> 6) & 0x3f) | 0x80;
1075 *p++ = (c & 0x3f) | 0x80;
1077 else if ( c <= 0x3ffffff )
1079 *p++ = (c >> 24) | 0xf8;
1080 *p++ = ((c >> 18) & 0x3f) | 0x80;
1081 *p++ = ((c >> 12) & 0x3f) | 0x80;
1082 *p++ = ((c >> 6) & 0x3f) | 0x80;
1083 *p++ = (c & 0x3f) | 0x80;
1087 *p++ = (c >> 30) | 0xfc;
1088 *p++ = ((c >> 24) & 0x3f) | 0x80;
1089 *p++ = ((c >> 18) & 0x3f) | 0x80;
1090 *p++ = ((c >> 12) & 0x3f) | 0x80;
1091 *p++ = ((c >> 6) & 0x3f) | 0x80;
1092 *p++ = (c & 0x3f) | 0x80;
1095 BREAKUP_CHAR (c, charset, c1, c2);
1096 lb = CHAR_LEADING_BYTE (c);
1097 if (LEADING_BYTE_PRIVATE_P (lb))
1098 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1100 if (EQ (charset, Vcharset_control_1))
1109 /* Return the first character from a Mule-encoded string in STR,
1110 assuming it's non-ASCII. Do not call this directly.
1111 Use the macro charptr_emchar() instead. */
1114 non_ascii_charptr_emchar (const Bufbyte *str)
1127 else if ( b >= 0xf8 )
1132 else if ( b >= 0xf0 )
1137 else if ( b >= 0xe0 )
1142 else if ( b >= 0xc0 )
1152 for( ; len > 0; len-- )
1155 ch = ( ch << 6 ) | ( b & 0x3f );
1159 Bufbyte i0 = *str, i1, i2 = 0;
1160 Lisp_Object charset;
1162 if (i0 == LEADING_BYTE_CONTROL_1)
1163 return (Emchar) (*++str - 0x20);
1165 if (LEADING_BYTE_PREFIX_P (i0))
1170 charset = CHARSET_BY_LEADING_BYTE (i0);
1171 if (XCHARSET_DIMENSION (charset) == 2)
1174 return MAKE_CHAR (charset, i1, i2);
1178 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1179 Do not call this directly. Use the macro valid_char_p() instead. */
1183 non_ascii_valid_char_p (Emchar ch)
1187 /* Must have only lowest 19 bits set */
1191 f1 = CHAR_FIELD1 (ch);
1192 f2 = CHAR_FIELD2 (ch);
1193 f3 = CHAR_FIELD3 (ch);
1197 Lisp_Object charset;
1199 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1200 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1201 f2 > MAX_CHAR_FIELD2_PRIVATE)
1206 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1207 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1211 NOTE: This takes advantage of the fact that
1212 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1213 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1215 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1216 if (EQ (charset, Qnil))
1218 return (XCHARSET_CHARS (charset) == 96);
1222 Lisp_Object charset;
1224 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1225 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1226 f1 > MAX_CHAR_FIELD1_PRIVATE)
1228 if (f2 < 0x20 || f3 < 0x20)
1231 #ifdef ENABLE_COMPOSITE_CHARS
1232 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1234 if (UNBOUNDP (Fgethash (make_int (ch),
1235 Vcomposite_char_char2string_hash_table,
1240 #endif /* ENABLE_COMPOSITE_CHARS */
1242 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1243 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1246 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1248 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1251 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1253 if (EQ (charset, Qnil))
1255 return (XCHARSET_CHARS (charset) == 96);
1261 /************************************************************************/
1262 /* Basic string functions */
1263 /************************************************************************/
1265 /* Copy the character pointed to by PTR into STR, assuming it's
1266 non-ASCII. Do not call this directly. Use the macro
1267 charptr_copy_char() instead. */
1270 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1272 Bufbyte *strptr = str;
1274 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1276 /* Notice fallthrough. */
1278 case 6: *++strptr = *ptr++;
1279 case 5: *++strptr = *ptr++;
1281 case 4: *++strptr = *ptr++;
1282 case 3: *++strptr = *ptr++;
1283 case 2: *++strptr = *ptr;
1288 return strptr + 1 - str;
1292 /************************************************************************/
1293 /* streams of Emchars */
1294 /************************************************************************/
1296 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1297 The functions below are not meant to be called directly; use
1298 the macros in insdel.h. */
1301 Lstream_get_emchar_1 (Lstream *stream, int ch)
1303 Bufbyte str[MAX_EMCHAR_LEN];
1304 Bufbyte *strptr = str;
1306 str[0] = (Bufbyte) ch;
1307 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1309 /* Notice fallthrough. */
1312 ch = Lstream_getc (stream);
1314 *++strptr = (Bufbyte) ch;
1316 ch = Lstream_getc (stream);
1318 *++strptr = (Bufbyte) ch;
1321 ch = Lstream_getc (stream);
1323 *++strptr = (Bufbyte) ch;
1325 ch = Lstream_getc (stream);
1327 *++strptr = (Bufbyte) ch;
1329 ch = Lstream_getc (stream);
1331 *++strptr = (Bufbyte) ch;
1336 return charptr_emchar (str);
1340 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1342 Bufbyte str[MAX_EMCHAR_LEN];
1343 Bytecount len = set_charptr_emchar (str, ch);
1344 return Lstream_write (stream, str, len);
1348 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1350 Bufbyte str[MAX_EMCHAR_LEN];
1351 Bytecount len = set_charptr_emchar (str, ch);
1352 Lstream_unread (stream, str, len);
1356 /************************************************************************/
1357 /* charset object */
1358 /************************************************************************/
1361 mark_charset (Lisp_Object obj)
1363 Lisp_Charset *cs = XCHARSET (obj);
1365 mark_object (cs->short_name);
1366 mark_object (cs->long_name);
1367 mark_object (cs->doc_string);
1368 mark_object (cs->registry);
1369 mark_object (cs->ccl_program);
1371 mark_object (cs->decoding_table);
1377 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1379 Lisp_Charset *cs = XCHARSET (obj);
1383 error ("printing unreadable object #<charset %s 0x%x>",
1384 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1387 write_c_string ("#<charset ", printcharfun);
1388 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1389 write_c_string (" ", printcharfun);
1390 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1391 write_c_string (" ", printcharfun);
1392 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1393 write_c_string (" ", printcharfun);
1394 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1395 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1397 CHARSET_DIMENSION (cs),
1398 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1399 CHARSET_COLUMNS (cs),
1400 CHARSET_GRAPHIC (cs),
1401 CHARSET_FINAL (cs));
1402 write_c_string (buf, printcharfun);
1403 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1404 sprintf (buf, " 0x%x>", cs->header.uid);
1405 write_c_string (buf, printcharfun);
1408 static const struct lrecord_description charset_description[] = {
1409 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1410 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1411 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1412 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1413 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1414 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1415 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1417 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1422 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1423 mark_charset, print_charset, 0, 0, 0,
1424 charset_description,
1426 /* Make a new charset. */
1429 make_charset (Charset_ID id, Lisp_Object name,
1430 unsigned short chars, unsigned char dimension,
1431 unsigned char columns, unsigned char graphic,
1432 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1433 Lisp_Object long_name, Lisp_Object doc,
1435 Lisp_Object decoding_table,
1436 Emchar ucs_min, Emchar ucs_max,
1437 Emchar code_offset, unsigned char byte_offset)
1439 unsigned char type = 0;
1441 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1445 XSETCHARSET (obj, cs);
1447 CHARSET_ID (cs) = id;
1448 CHARSET_NAME (cs) = name;
1449 CHARSET_SHORT_NAME (cs) = short_name;
1450 CHARSET_LONG_NAME (cs) = long_name;
1451 CHARSET_CHARS (cs) = chars;
1452 CHARSET_DIMENSION (cs) = dimension;
1453 CHARSET_DIRECTION (cs) = direction;
1454 CHARSET_COLUMNS (cs) = columns;
1455 CHARSET_GRAPHIC (cs) = graphic;
1456 CHARSET_FINAL (cs) = final;
1457 CHARSET_DOC_STRING (cs) = doc;
1458 CHARSET_REGISTRY (cs) = reg;
1459 CHARSET_CCL_PROGRAM (cs) = Qnil;
1460 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1462 CHARSET_DECODING_TABLE(cs) = Qnil;
1463 CHARSET_UCS_MIN(cs) = ucs_min;
1464 CHARSET_UCS_MAX(cs) = ucs_max;
1465 CHARSET_CODE_OFFSET(cs) = code_offset;
1466 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1469 switch (CHARSET_CHARS (cs))
1472 switch (CHARSET_DIMENSION (cs))
1475 type = CHARSET_TYPE_94;
1478 type = CHARSET_TYPE_94X94;
1483 switch (CHARSET_DIMENSION (cs))
1486 type = CHARSET_TYPE_96;
1489 type = CHARSET_TYPE_96X96;
1495 switch (CHARSET_DIMENSION (cs))
1498 type = CHARSET_TYPE_128;
1501 type = CHARSET_TYPE_128X128;
1506 switch (CHARSET_DIMENSION (cs))
1509 type = CHARSET_TYPE_256;
1512 type = CHARSET_TYPE_256X256;
1519 CHARSET_TYPE (cs) = type;
1523 if (id == LEADING_BYTE_ASCII)
1524 CHARSET_REP_BYTES (cs) = 1;
1526 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1528 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1533 /* some charsets do not have final characters. This includes
1534 ASCII, Control-1, Composite, and the two faux private
1537 if (code_offset == 0)
1539 assert (NILP (chlook->charset_by_attributes[type][final]));
1540 chlook->charset_by_attributes[type][final] = obj;
1543 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1544 chlook->charset_by_attributes[type][final][direction] = obj;
1548 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1549 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1551 /* Some charsets are "faux" and don't have names or really exist at
1552 all except in the leading-byte table. */
1554 Fputhash (name, obj, Vcharset_hash_table);
1559 get_unallocated_leading_byte (int dimension)
1564 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1567 lb = chlook->next_allocated_leading_byte++;
1571 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1574 lb = chlook->next_allocated_1_byte_leading_byte++;
1578 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1581 lb = chlook->next_allocated_2_byte_leading_byte++;
1587 ("No more character sets free for this dimension",
1588 make_int (dimension));
1595 make_builtin_char (Lisp_Object charset, int c1, int c2)
1597 if (XCHARSET_UCS_MAX (charset))
1600 = (XCHARSET_DIMENSION (charset) == 1
1602 c1 - XCHARSET_BYTE_OFFSET (charset)
1604 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1605 + c2 - XCHARSET_BYTE_OFFSET (charset))
1606 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1607 if ((code < XCHARSET_UCS_MIN (charset))
1608 || (XCHARSET_UCS_MAX (charset) < code))
1609 signal_simple_error ("Arguments makes invalid character",
1613 else if (XCHARSET_DIMENSION (charset) == 1)
1615 switch (XCHARSET_CHARS (charset))
1619 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1622 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1629 switch (XCHARSET_CHARS (charset))
1632 return MIN_CHAR_94x94
1633 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1634 + (c1 - 33) * 94 + (c2 - 33);
1636 return MIN_CHAR_96x96
1637 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1638 + (c1 - 32) * 96 + (c2 - 32);
1646 range_charset_code_point (Lisp_Object charset, Emchar ch)
1650 if ((XCHARSET_UCS_MIN (charset) <= ch)
1651 && (ch <= XCHARSET_UCS_MAX (charset)))
1653 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1655 if (XCHARSET_CHARS (charset) == 256)
1657 else if (XCHARSET_DIMENSION (charset) == 1)
1658 return d + XCHARSET_BYTE_OFFSET (charset);
1659 else if (XCHARSET_DIMENSION (charset) == 2)
1661 ((d / XCHARSET_CHARS (charset)
1662 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1663 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1664 else if (XCHARSET_DIMENSION (charset) == 3)
1666 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1667 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1668 | ((d / XCHARSET_CHARS (charset)
1669 % XCHARSET_CHARS (charset)
1670 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1671 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1672 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1674 ((d / (XCHARSET_CHARS (charset)
1675 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1676 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1677 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1678 % XCHARSET_CHARS (charset)
1679 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1680 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1681 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1682 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1684 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1686 if (XCHARSET_DIMENSION (charset) == 1)
1688 if (XCHARSET_CHARS (charset) == 94)
1690 if (((d = ch - (MIN_CHAR_94
1691 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1695 else if (XCHARSET_CHARS (charset) == 96)
1697 if (((d = ch - (MIN_CHAR_96
1698 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1705 else if (XCHARSET_DIMENSION (charset) == 2)
1707 if (XCHARSET_CHARS (charset) == 94)
1709 if (((d = ch - (MIN_CHAR_94x94
1710 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1713 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1715 else if (XCHARSET_CHARS (charset) == 96)
1717 if (((d = ch - (MIN_CHAR_96x96
1718 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1721 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1731 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1733 if (c <= MAX_CHAR_BASIC_LATIN)
1735 *charset = Vcharset_ascii;
1740 *charset = Vcharset_control_1;
1745 *charset = Vcharset_latin_iso8859_1;
1749 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1751 *charset = Vcharset_greek_iso8859_7;
1752 return c - MIN_CHAR_GREEK + 0x20;
1754 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1756 *charset = Vcharset_cyrillic_iso8859_5;
1757 return c - MIN_CHAR_CYRILLIC + 0x20;
1760 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1762 *charset = Vcharset_hebrew_iso8859_8;
1763 return c - MIN_CHAR_HEBREW + 0x20;
1765 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1767 *charset = Vcharset_thai_tis620;
1768 return c - MIN_CHAR_THAI + 0x20;
1771 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1772 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1774 return list2 (Vcharset_katakana_jisx0201,
1775 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1778 else if (c <= MAX_CHAR_BMP)
1780 *charset = Vcharset_ucs_bmp;
1783 else if (c < MIN_CHAR_DAIKANWA)
1785 *charset = Vcharset_ucs;
1789 else if (c <= MAX_CHAR_DAIKANWA)
1791 *charset = Vcharset_ideograph_daikanwa;
1792 return c - MIN_CHAR_DAIKANWA;
1795 else if (c <= MAX_CHAR_MOJIKYO)
1797 *charset = Vcharset_mojikyo;
1798 return c - MIN_CHAR_MOJIKYO;
1800 else if (c < MIN_CHAR_94)
1802 *charset = Vcharset_ucs;
1805 else if (c <= MAX_CHAR_94)
1807 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1808 ((c - MIN_CHAR_94) / 94) + '0',
1809 CHARSET_LEFT_TO_RIGHT);
1810 if (!NILP (*charset))
1811 return ((c - MIN_CHAR_94) % 94) + 33;
1814 *charset = Vcharset_ucs;
1818 else if (c <= MAX_CHAR_96)
1820 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1821 ((c - MIN_CHAR_96) / 96) + '0',
1822 CHARSET_LEFT_TO_RIGHT);
1823 if (!NILP (*charset))
1824 return ((c - MIN_CHAR_96) % 96) + 32;
1827 *charset = Vcharset_ucs;
1831 else if (c <= MAX_CHAR_94x94)
1834 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1835 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1836 CHARSET_LEFT_TO_RIGHT);
1837 if (!NILP (*charset))
1838 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1839 | (((c - MIN_CHAR_94x94) % 94) + 33);
1842 *charset = Vcharset_ucs;
1846 else if (c <= MAX_CHAR_96x96)
1849 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1850 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1851 CHARSET_LEFT_TO_RIGHT);
1852 if (!NILP (*charset))
1853 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1854 | (((c - MIN_CHAR_96x96) % 96) + 32);
1857 *charset = Vcharset_ucs;
1863 *charset = Vcharset_ucs;
1868 Lisp_Object Vdefault_coded_charset_priority_list;
1872 /************************************************************************/
1873 /* Basic charset Lisp functions */
1874 /************************************************************************/
1876 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1877 Return non-nil if OBJECT is a charset.
1881 return CHARSETP (object) ? Qt : Qnil;
1884 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1885 Retrieve the charset of the given name.
1886 If CHARSET-OR-NAME is a charset object, it is simply returned.
1887 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1888 nil is returned. Otherwise the associated charset object is returned.
1892 if (CHARSETP (charset_or_name))
1893 return charset_or_name;
1895 CHECK_SYMBOL (charset_or_name);
1896 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1899 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1900 Retrieve the charset of the given name.
1901 Same as `find-charset' except an error is signalled if there is no such
1902 charset instead of returning nil.
1906 Lisp_Object charset = Ffind_charset (name);
1909 signal_simple_error ("No such charset", name);
1913 /* We store the charsets in hash tables with the names as the key and the
1914 actual charset object as the value. Occasionally we need to use them
1915 in a list format. These routines provide us with that. */
1916 struct charset_list_closure
1918 Lisp_Object *charset_list;
1922 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1923 void *charset_list_closure)
1925 /* This function can GC */
1926 struct charset_list_closure *chcl =
1927 (struct charset_list_closure*) charset_list_closure;
1928 Lisp_Object *charset_list = chcl->charset_list;
1930 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1934 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1935 Return a list of the names of all defined charsets.
1939 Lisp_Object charset_list = Qnil;
1940 struct gcpro gcpro1;
1941 struct charset_list_closure charset_list_closure;
1943 GCPRO1 (charset_list);
1944 charset_list_closure.charset_list = &charset_list;
1945 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1946 &charset_list_closure);
1949 return charset_list;
1952 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1953 Return the name of the given charset.
1957 return XCHARSET_NAME (Fget_charset (charset));
1960 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1961 Define a new character set.
1962 This function is for use with Mule support.
1963 NAME is a symbol, the name by which the character set is normally referred.
1964 DOC-STRING is a string describing the character set.
1965 PROPS is a property list, describing the specific nature of the
1966 character set. Recognized properties are:
1968 'short-name Short version of the charset name (ex: Latin-1)
1969 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1970 'registry A regular expression matching the font registry field for
1972 'dimension Number of octets used to index a character in this charset.
1973 Either 1 or 2. Defaults to 1.
1974 'columns Number of columns used to display a character in this charset.
1975 Only used in TTY mode. (Under X, the actual width of a
1976 character can be derived from the font used to display the
1977 characters.) If unspecified, defaults to the dimension
1978 (this is almost always the correct value).
1979 'chars Number of characters in each dimension (94 or 96).
1980 Defaults to 94. Note that if the dimension is 2, the
1981 character set thus described is 94x94 or 96x96.
1982 'final Final byte of ISO 2022 escape sequence. Must be
1983 supplied. Each combination of (DIMENSION, CHARS) defines a
1984 separate namespace for final bytes. Note that ISO
1985 2022 restricts the final byte to the range
1986 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1987 dimension == 2. Note also that final bytes in the range
1988 0x30 - 0x3F are reserved for user-defined (not official)
1990 'graphic 0 (use left half of font on output) or 1 (use right half
1991 of font on output). Defaults to 0. For example, for
1992 a font whose registry is ISO8859-1, the left half
1993 (octets 0x20 - 0x7F) is the `ascii' character set, while
1994 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1995 character set. With 'graphic set to 0, the octets
1996 will have their high bit cleared; with it set to 1,
1997 the octets will have their high bit set.
1998 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2000 'ccl-program A compiled CCL program used to convert a character in
2001 this charset into an index into the font. This is in
2002 addition to the 'graphic property. The CCL program
2003 is passed the octets of the character, with the high
2004 bit cleared and set depending upon whether the value
2005 of the 'graphic property is 0 or 1.
2007 (name, doc_string, props))
2009 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2010 int direction = CHARSET_LEFT_TO_RIGHT;
2012 Lisp_Object registry = Qnil;
2013 Lisp_Object charset;
2014 Lisp_Object rest, keyword, value;
2015 Lisp_Object ccl_program = Qnil;
2016 Lisp_Object short_name = Qnil, long_name = Qnil;
2017 int byte_offset = -1;
2019 CHECK_SYMBOL (name);
2020 if (!NILP (doc_string))
2021 CHECK_STRING (doc_string);
2023 charset = Ffind_charset (name);
2024 if (!NILP (charset))
2025 signal_simple_error ("Cannot redefine existing charset", name);
2027 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2029 if (EQ (keyword, Qshort_name))
2031 CHECK_STRING (value);
2035 if (EQ (keyword, Qlong_name))
2037 CHECK_STRING (value);
2041 else if (EQ (keyword, Qdimension))
2044 dimension = XINT (value);
2045 if (dimension < 1 || dimension > 2)
2046 signal_simple_error ("Invalid value for 'dimension", value);
2049 else if (EQ (keyword, Qchars))
2052 chars = XINT (value);
2053 if (chars != 94 && chars != 96)
2054 signal_simple_error ("Invalid value for 'chars", value);
2057 else if (EQ (keyword, Qcolumns))
2060 columns = XINT (value);
2061 if (columns != 1 && columns != 2)
2062 signal_simple_error ("Invalid value for 'columns", value);
2065 else if (EQ (keyword, Qgraphic))
2068 graphic = XINT (value);
2070 if (graphic < 0 || graphic > 2)
2072 if (graphic < 0 || graphic > 1)
2074 signal_simple_error ("Invalid value for 'graphic", value);
2077 else if (EQ (keyword, Qregistry))
2079 CHECK_STRING (value);
2083 else if (EQ (keyword, Qdirection))
2085 if (EQ (value, Ql2r))
2086 direction = CHARSET_LEFT_TO_RIGHT;
2087 else if (EQ (value, Qr2l))
2088 direction = CHARSET_RIGHT_TO_LEFT;
2090 signal_simple_error ("Invalid value for 'direction", value);
2093 else if (EQ (keyword, Qfinal))
2095 CHECK_CHAR_COERCE_INT (value);
2096 final = XCHAR (value);
2097 if (final < '0' || final > '~')
2098 signal_simple_error ("Invalid value for 'final", value);
2101 else if (EQ (keyword, Qccl_program))
2103 CHECK_VECTOR (value);
2104 ccl_program = value;
2108 signal_simple_error ("Unrecognized property", keyword);
2112 error ("'final must be specified");
2113 if (dimension == 2 && final > 0x5F)
2115 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2119 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2121 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2123 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2124 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2126 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2128 id = get_unallocated_leading_byte (dimension);
2130 if (NILP (doc_string))
2131 doc_string = build_string ("");
2133 if (NILP (registry))
2134 registry = build_string ("");
2136 if (NILP (short_name))
2137 XSETSTRING (short_name, XSYMBOL (name)->name);
2139 if (NILP (long_name))
2140 long_name = doc_string;
2143 columns = dimension;
2145 if (byte_offset < 0)
2149 else if (chars == 96)
2155 charset = make_charset (id, name, chars, dimension, columns, graphic,
2156 final, direction, short_name, long_name,
2157 doc_string, registry,
2158 Qnil, 0, 0, 0, byte_offset);
2159 if (!NILP (ccl_program))
2160 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2164 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2166 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2167 NEW-NAME is the name of the new charset. Return the new charset.
2169 (charset, new_name))
2171 Lisp_Object new_charset = Qnil;
2172 int id, chars, dimension, columns, graphic, final;
2174 Lisp_Object registry, doc_string, short_name, long_name;
2177 charset = Fget_charset (charset);
2178 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2179 signal_simple_error ("Charset already has reverse-direction charset",
2182 CHECK_SYMBOL (new_name);
2183 if (!NILP (Ffind_charset (new_name)))
2184 signal_simple_error ("Cannot redefine existing charset", new_name);
2186 cs = XCHARSET (charset);
2188 chars = CHARSET_CHARS (cs);
2189 dimension = CHARSET_DIMENSION (cs);
2190 columns = CHARSET_COLUMNS (cs);
2191 id = get_unallocated_leading_byte (dimension);
2193 graphic = CHARSET_GRAPHIC (cs);
2194 final = CHARSET_FINAL (cs);
2195 direction = CHARSET_RIGHT_TO_LEFT;
2196 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2197 direction = CHARSET_LEFT_TO_RIGHT;
2198 doc_string = CHARSET_DOC_STRING (cs);
2199 short_name = CHARSET_SHORT_NAME (cs);
2200 long_name = CHARSET_LONG_NAME (cs);
2201 registry = CHARSET_REGISTRY (cs);
2203 new_charset = make_charset (id, new_name, chars, dimension, columns,
2204 graphic, final, direction, short_name, long_name,
2205 doc_string, registry,
2207 CHARSET_DECODING_TABLE(cs),
2208 CHARSET_UCS_MIN(cs),
2209 CHARSET_UCS_MAX(cs),
2210 CHARSET_CODE_OFFSET(cs),
2211 CHARSET_BYTE_OFFSET(cs)
2217 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2218 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2223 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2224 Define symbol ALIAS as an alias for CHARSET.
2228 CHECK_SYMBOL (alias);
2229 charset = Fget_charset (charset);
2230 return Fputhash (alias, charset, Vcharset_hash_table);
2233 /* #### Reverse direction charsets not yet implemented. */
2235 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2237 Return the reverse-direction charset parallel to CHARSET, if any.
2238 This is the charset with the same properties (in particular, the same
2239 dimension, number of characters per dimension, and final byte) as
2240 CHARSET but whose characters are displayed in the opposite direction.
2244 charset = Fget_charset (charset);
2245 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2249 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2250 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2251 If DIRECTION is omitted, both directions will be checked (left-to-right
2252 will be returned if character sets exist for both directions).
2254 (dimension, chars, final, direction))
2256 int dm, ch, fi, di = -1;
2258 Lisp_Object obj = Qnil;
2260 CHECK_INT (dimension);
2261 dm = XINT (dimension);
2262 if (dm < 1 || dm > 2)
2263 signal_simple_error ("Invalid value for DIMENSION", dimension);
2267 if (ch != 94 && ch != 96)
2268 signal_simple_error ("Invalid value for CHARS", chars);
2270 CHECK_CHAR_COERCE_INT (final);
2272 if (fi < '0' || fi > '~')
2273 signal_simple_error ("Invalid value for FINAL", final);
2275 if (EQ (direction, Ql2r))
2276 di = CHARSET_LEFT_TO_RIGHT;
2277 else if (EQ (direction, Qr2l))
2278 di = CHARSET_RIGHT_TO_LEFT;
2279 else if (!NILP (direction))
2280 signal_simple_error ("Invalid value for DIRECTION", direction);
2282 if (dm == 2 && fi > 0x5F)
2284 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2287 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2289 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2293 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2295 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2298 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2301 return XCHARSET_NAME (obj);
2305 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2306 Return short name of CHARSET.
2310 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2313 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2314 Return long name of CHARSET.
2318 return XCHARSET_LONG_NAME (Fget_charset (charset));
2321 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2322 Return description of CHARSET.
2326 return XCHARSET_DOC_STRING (Fget_charset (charset));
2329 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2330 Return dimension of CHARSET.
2334 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2337 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2338 Return property PROP of CHARSET.
2339 Recognized properties are those listed in `make-charset', as well as
2340 'name and 'doc-string.
2346 charset = Fget_charset (charset);
2347 cs = XCHARSET (charset);
2349 CHECK_SYMBOL (prop);
2350 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2351 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2352 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2353 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2354 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2355 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2356 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2357 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2358 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2359 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2360 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2361 if (EQ (prop, Qdirection))
2362 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2363 if (EQ (prop, Qreverse_direction_charset))
2365 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2369 return XCHARSET_NAME (obj);
2371 signal_simple_error ("Unrecognized charset property name", prop);
2372 return Qnil; /* not reached */
2375 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2376 Return charset identification number of CHARSET.
2380 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2383 /* #### We need to figure out which properties we really want to
2386 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2387 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2389 (charset, ccl_program))
2391 charset = Fget_charset (charset);
2392 CHECK_VECTOR (ccl_program);
2393 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2398 invalidate_charset_font_caches (Lisp_Object charset)
2400 /* Invalidate font cache entries for charset on all devices. */
2401 Lisp_Object devcons, concons, hash_table;
2402 DEVICE_LOOP_NO_BREAK (devcons, concons)
2404 struct device *d = XDEVICE (XCAR (devcons));
2405 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2406 if (!UNBOUNDP (hash_table))
2407 Fclrhash (hash_table);
2411 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2412 Set the 'registry property of CHARSET to REGISTRY.
2414 (charset, registry))
2416 charset = Fget_charset (charset);
2417 CHECK_STRING (registry);
2418 XCHARSET_REGISTRY (charset) = registry;
2419 invalidate_charset_font_caches (charset);
2420 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2425 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2426 Return mapping-table of CHARSET.
2430 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2433 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2434 Set mapping-table of CHARSET to TABLE.
2438 struct Lisp_Charset *cs;
2439 Lisp_Object old_table;
2442 charset = Fget_charset (charset);
2443 cs = XCHARSET (charset);
2445 if (EQ (table, Qnil))
2447 CHARSET_DECODING_TABLE(cs) = table;
2450 else if (VECTORP (table))
2454 /* ad-hoc method for `ascii' */
2455 if ((CHARSET_CHARS (cs) == 94) &&
2456 (CHARSET_BYTE_OFFSET (cs) != 33))
2457 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2459 ccs_len = CHARSET_CHARS (cs);
2461 if (XVECTOR_LENGTH (table) > ccs_len)
2462 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2463 old_table = CHARSET_DECODING_TABLE(cs);
2464 CHARSET_DECODING_TABLE(cs) = table;
2467 signal_error (Qwrong_type_argument,
2468 list2 (build_translated_string ("vector-or-nil-p"),
2470 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2472 switch (CHARSET_DIMENSION (cs))
2475 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2477 Lisp_Object c = XVECTOR_DATA(table)[i];
2482 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2486 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2488 Lisp_Object v = XVECTOR_DATA(table)[i];
2494 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2496 CHARSET_DECODING_TABLE(cs) = old_table;
2497 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2499 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2501 Lisp_Object c = XVECTOR_DATA(v)[j];
2506 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2507 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2511 put_char_attribute (v, charset,
2512 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2521 /************************************************************************/
2522 /* Lisp primitives for working with characters */
2523 /************************************************************************/
2526 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2527 Make a character from CHARSET and code-point CODE.
2533 charset = Fget_charset (charset);
2536 if (XCHARSET_GRAPHIC (charset) == 1)
2538 return make_char (DECODE_CHAR (charset, c));
2542 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2543 Make a character from CHARSET and octets ARG1 and ARG2.
2544 ARG2 is required only for characters from two-dimensional charsets.
2545 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2546 character s with caron.
2548 (charset, arg1, arg2))
2552 int lowlim, highlim;
2554 charset = Fget_charset (charset);
2555 cs = XCHARSET (charset);
2557 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2558 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2560 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2562 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2563 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2566 /* It is useful (and safe, according to Olivier Galibert) to strip
2567 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2568 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2569 Latin 2 code of the character. */
2577 if (a1 < lowlim || a1 > highlim)
2578 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2580 if (CHARSET_DIMENSION (cs) == 1)
2584 ("Charset is of dimension one; second octet must be nil", arg2);
2585 return make_char (MAKE_CHAR (charset, a1, 0));
2594 a2 = XINT (arg2) & 0x7f;
2596 if (a2 < lowlim || a2 > highlim)
2597 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2599 return make_char (MAKE_CHAR (charset, a1, a2));
2602 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2603 Return the character set of char CH.
2607 CHECK_CHAR_COERCE_INT (ch);
2609 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2612 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2613 Return the octet numbered N (should be 0 or 1) of char CH.
2614 N defaults to 0 if omitted.
2618 Lisp_Object charset;
2621 CHECK_CHAR_COERCE_INT (ch);
2623 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2625 if (NILP (n) || EQ (n, Qzero))
2626 return make_int (octet0);
2627 else if (EQ (n, make_int (1)))
2628 return make_int (octet1);
2630 signal_simple_error ("Octet number must be 0 or 1", n);
2633 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2634 Return list of charset and one or two position-codes of CHAR.
2638 /* This function can GC */
2639 struct gcpro gcpro1, gcpro2;
2640 Lisp_Object charset = Qnil;
2641 Lisp_Object rc = Qnil;
2649 GCPRO2 (charset, rc);
2650 CHECK_CHAR_COERCE_INT (character);
2653 code_point = ENCODE_CHAR (XCHAR (character), charset);
2654 dimension = XCHARSET_DIMENSION (charset);
2655 while (dimension > 0)
2657 rc = Fcons (make_int (code_point & 255), rc);
2661 rc = Fcons (XCHARSET_NAME (charset), rc);
2663 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2665 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2667 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2671 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2680 #ifdef ENABLE_COMPOSITE_CHARS
2681 /************************************************************************/
2682 /* composite character functions */
2683 /************************************************************************/
2686 lookup_composite_char (Bufbyte *str, int len)
2688 Lisp_Object lispstr = make_string (str, len);
2689 Lisp_Object ch = Fgethash (lispstr,
2690 Vcomposite_char_string2char_hash_table,
2696 if (composite_char_row_next >= 128)
2697 signal_simple_error ("No more composite chars available", lispstr);
2698 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2699 composite_char_col_next);
2700 Fputhash (make_char (emch), lispstr,
2701 Vcomposite_char_char2string_hash_table);
2702 Fputhash (lispstr, make_char (emch),
2703 Vcomposite_char_string2char_hash_table);
2704 composite_char_col_next++;
2705 if (composite_char_col_next >= 128)
2707 composite_char_col_next = 32;
2708 composite_char_row_next++;
2717 composite_char_string (Emchar ch)
2719 Lisp_Object str = Fgethash (make_char (ch),
2720 Vcomposite_char_char2string_hash_table,
2722 assert (!UNBOUNDP (str));
2726 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2727 Convert a string into a single composite character.
2728 The character is the result of overstriking all the characters in
2733 CHECK_STRING (string);
2734 return make_char (lookup_composite_char (XSTRING_DATA (string),
2735 XSTRING_LENGTH (string)));
2738 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2739 Return a string of the characters comprising a composite character.
2747 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2748 signal_simple_error ("Must be composite char", ch);
2749 return composite_char_string (emch);
2751 #endif /* ENABLE_COMPOSITE_CHARS */
2754 /************************************************************************/
2755 /* initialization */
2756 /************************************************************************/
2759 syms_of_mule_charset (void)
2762 INIT_LRECORD_IMPLEMENTATION (char_byte_table);
2763 INIT_LRECORD_IMPLEMENTATION (char_code_table);
2765 INIT_LRECORD_IMPLEMENTATION (charset);
2767 DEFSUBR (Fcharsetp);
2768 DEFSUBR (Ffind_charset);
2769 DEFSUBR (Fget_charset);
2770 DEFSUBR (Fcharset_list);
2771 DEFSUBR (Fcharset_name);
2772 DEFSUBR (Fmake_charset);
2773 DEFSUBR (Fmake_reverse_direction_charset);
2774 /* DEFSUBR (Freverse_direction_charset); */
2775 DEFSUBR (Fdefine_charset_alias);
2776 DEFSUBR (Fcharset_from_attributes);
2777 DEFSUBR (Fcharset_short_name);
2778 DEFSUBR (Fcharset_long_name);
2779 DEFSUBR (Fcharset_description);
2780 DEFSUBR (Fcharset_dimension);
2781 DEFSUBR (Fcharset_property);
2782 DEFSUBR (Fcharset_id);
2783 DEFSUBR (Fset_charset_ccl_program);
2784 DEFSUBR (Fset_charset_registry);
2786 DEFSUBR (Fchar_attribute_alist);
2787 DEFSUBR (Fget_char_attribute);
2788 DEFSUBR (Fput_char_attribute);
2789 DEFSUBR (Fremove_char_attribute);
2790 DEFSUBR (Fdefine_char);
2791 DEFSUBR (Fchar_variants);
2792 DEFSUBR (Fget_composite_char);
2793 DEFSUBR (Fcharset_mapping_table);
2794 DEFSUBR (Fset_charset_mapping_table);
2798 DEFSUBR (Fdecode_char);
2800 DEFSUBR (Fmake_char);
2801 DEFSUBR (Fchar_charset);
2802 DEFSUBR (Fchar_octet);
2803 DEFSUBR (Fsplit_char);
2805 #ifdef ENABLE_COMPOSITE_CHARS
2806 DEFSUBR (Fmake_composite_char);
2807 DEFSUBR (Fcomposite_char_string);
2810 defsymbol (&Qcharsetp, "charsetp");
2811 defsymbol (&Qregistry, "registry");
2812 defsymbol (&Qfinal, "final");
2813 defsymbol (&Qgraphic, "graphic");
2814 defsymbol (&Qdirection, "direction");
2815 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2816 defsymbol (&Qshort_name, "short-name");
2817 defsymbol (&Qlong_name, "long-name");
2819 defsymbol (&Ql2r, "l2r");
2820 defsymbol (&Qr2l, "r2l");
2822 /* Charsets, compatible with FSF 20.3
2823 Naming convention is Script-Charset[-Edition] */
2824 defsymbol (&Qascii, "ascii");
2825 defsymbol (&Qcontrol_1, "control-1");
2826 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2827 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2828 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2829 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2830 defsymbol (&Qthai_tis620, "thai-tis620");
2831 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2832 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2833 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2834 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2835 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2836 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2837 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2838 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2839 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2840 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2841 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2842 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2843 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2844 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2845 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2847 defsymbol (&Q_ucs, "->ucs");
2848 defsymbol (&Q_decomposition, "->decomposition");
2849 defsymbol (&Qcompat, "compat");
2850 defsymbol (&Qisolated, "isolated");
2851 defsymbol (&Qinitial, "initial");
2852 defsymbol (&Qmedial, "medial");
2853 defsymbol (&Qfinal, "final");
2854 defsymbol (&Qvertical, "vertical");
2855 defsymbol (&QnoBreak, "noBreak");
2856 defsymbol (&Qfraction, "fraction");
2857 defsymbol (&Qsuper, "super");
2858 defsymbol (&Qsub, "sub");
2859 defsymbol (&Qcircle, "circle");
2860 defsymbol (&Qsquare, "square");
2861 defsymbol (&Qwide, "wide");
2862 defsymbol (&Qnarrow, "narrow");
2863 defsymbol (&Qsmall, "small");
2864 defsymbol (&Qfont, "font");
2865 defsymbol (&Qucs, "ucs");
2866 defsymbol (&Qucs_bmp, "ucs-bmp");
2867 defsymbol (&Qlatin_viscii, "latin-viscii");
2868 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2869 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2870 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2871 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2872 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2873 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2874 defsymbol (&Qmojikyo, "mojikyo");
2875 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2876 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2877 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2878 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2879 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2880 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2881 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2882 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2883 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2884 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2885 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2886 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2887 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2888 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2889 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2890 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2891 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2892 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2893 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2894 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2895 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2896 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2898 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2899 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2901 defsymbol (&Qcomposite, "composite");
2905 vars_of_mule_charset (void)
2912 chlook = xnew (struct charset_lookup);
2913 dumpstruct (&chlook, &charset_lookup_description);
2915 /* Table of charsets indexed by leading byte. */
2916 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2917 chlook->charset_by_leading_byte[i] = Qnil;
2920 /* Table of charsets indexed by type/final-byte. */
2921 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2922 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2923 chlook->charset_by_attributes[i][j] = Qnil;
2925 /* Table of charsets indexed by type/final-byte/direction. */
2926 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2927 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2928 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2929 chlook->charset_by_attributes[i][j][k] = Qnil;
2933 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2935 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2936 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2940 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2941 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2942 Leading-code of private TYPE9N charset of column-width 1.
2944 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2948 Vutf_2000_version = build_string("0.15 (Sangō)");
2949 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2950 Version number of UTF-2000.
2953 staticpro (&Vcharacter_attribute_table);
2954 Vcharacter_attribute_table = make_char_code_table (Qnil);
2956 staticpro (&Vcharacter_composition_table);
2957 Vcharacter_composition_table = make_char_code_table (Qnil);
2959 staticpro (&Vcharacter_variant_table);
2960 Vcharacter_variant_table = make_char_code_table (Qnil);
2962 Vdefault_coded_charset_priority_list = Qnil;
2963 DEFVAR_LISP ("default-coded-charset-priority-list",
2964 &Vdefault_coded_charset_priority_list /*
2965 Default order of preferred coded-character-sets.
2971 complex_vars_of_mule_charset (void)
2973 staticpro (&Vcharset_hash_table);
2974 Vcharset_hash_table =
2975 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2977 /* Predefined character sets. We store them into variables for
2981 staticpro (&Vcharset_ucs);
2983 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2984 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2985 build_string ("UCS"),
2986 build_string ("UCS"),
2987 build_string ("ISO/IEC 10646"),
2989 Qnil, 0, 0xFFFFFFF, 0, 0);
2990 staticpro (&Vcharset_ucs_bmp);
2992 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2993 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2994 build_string ("BMP"),
2995 build_string ("BMP"),
2996 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2997 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2998 Qnil, 0, 0xFFFF, 0, 0);
3000 # define MIN_CHAR_THAI 0
3001 # define MAX_CHAR_THAI 0
3002 # define MIN_CHAR_HEBREW 0
3003 # define MAX_CHAR_HEBREW 0
3004 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3005 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3007 staticpro (&Vcharset_ascii);
3009 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3010 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3011 build_string ("ASCII"),
3012 build_string ("ASCII)"),
3013 build_string ("ASCII (ISO646 IRV)"),
3014 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3015 Qnil, 0, 0x7F, 0, 0);
3016 staticpro (&Vcharset_control_1);
3017 Vcharset_control_1 =
3018 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3019 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3020 build_string ("C1"),
3021 build_string ("Control characters"),
3022 build_string ("Control characters 128-191"),
3024 Qnil, 0x80, 0x9F, 0, 0);
3025 staticpro (&Vcharset_latin_iso8859_1);
3026 Vcharset_latin_iso8859_1 =
3027 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3028 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3029 build_string ("Latin-1"),
3030 build_string ("ISO8859-1 (Latin-1)"),
3031 build_string ("ISO8859-1 (Latin-1)"),
3032 build_string ("iso8859-1"),
3033 Qnil, 0xA0, 0xFF, 0, 32);
3034 staticpro (&Vcharset_latin_iso8859_2);
3035 Vcharset_latin_iso8859_2 =
3036 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3037 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3038 build_string ("Latin-2"),
3039 build_string ("ISO8859-2 (Latin-2)"),
3040 build_string ("ISO8859-2 (Latin-2)"),
3041 build_string ("iso8859-2"),
3043 staticpro (&Vcharset_latin_iso8859_3);
3044 Vcharset_latin_iso8859_3 =
3045 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3046 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3047 build_string ("Latin-3"),
3048 build_string ("ISO8859-3 (Latin-3)"),
3049 build_string ("ISO8859-3 (Latin-3)"),
3050 build_string ("iso8859-3"),
3052 staticpro (&Vcharset_latin_iso8859_4);
3053 Vcharset_latin_iso8859_4 =
3054 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3055 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3056 build_string ("Latin-4"),
3057 build_string ("ISO8859-4 (Latin-4)"),
3058 build_string ("ISO8859-4 (Latin-4)"),
3059 build_string ("iso8859-4"),
3061 staticpro (&Vcharset_thai_tis620);
3062 Vcharset_thai_tis620 =
3063 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3064 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3065 build_string ("TIS620"),
3066 build_string ("TIS620 (Thai)"),
3067 build_string ("TIS620.2529 (Thai)"),
3068 build_string ("tis620"),
3069 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3070 staticpro (&Vcharset_greek_iso8859_7);
3071 Vcharset_greek_iso8859_7 =
3072 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3073 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3074 build_string ("ISO8859-7"),
3075 build_string ("ISO8859-7 (Greek)"),
3076 build_string ("ISO8859-7 (Greek)"),
3077 build_string ("iso8859-7"),
3079 0 /* MIN_CHAR_GREEK */,
3080 0 /* MAX_CHAR_GREEK */, 0, 32);
3081 staticpro (&Vcharset_arabic_iso8859_6);
3082 Vcharset_arabic_iso8859_6 =
3083 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3084 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3085 build_string ("ISO8859-6"),
3086 build_string ("ISO8859-6 (Arabic)"),
3087 build_string ("ISO8859-6 (Arabic)"),
3088 build_string ("iso8859-6"),
3090 staticpro (&Vcharset_hebrew_iso8859_8);
3091 Vcharset_hebrew_iso8859_8 =
3092 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3093 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3094 build_string ("ISO8859-8"),
3095 build_string ("ISO8859-8 (Hebrew)"),
3096 build_string ("ISO8859-8 (Hebrew)"),
3097 build_string ("iso8859-8"),
3098 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3099 staticpro (&Vcharset_katakana_jisx0201);
3100 Vcharset_katakana_jisx0201 =
3101 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3102 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3103 build_string ("JISX0201 Kana"),
3104 build_string ("JISX0201.1976 (Japanese Kana)"),
3105 build_string ("JISX0201.1976 Japanese Kana"),
3106 build_string ("jisx0201\\.1976"),
3108 staticpro (&Vcharset_latin_jisx0201);
3109 Vcharset_latin_jisx0201 =
3110 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3111 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3112 build_string ("JISX0201 Roman"),
3113 build_string ("JISX0201.1976 (Japanese Roman)"),
3114 build_string ("JISX0201.1976 Japanese Roman"),
3115 build_string ("jisx0201\\.1976"),
3117 staticpro (&Vcharset_cyrillic_iso8859_5);
3118 Vcharset_cyrillic_iso8859_5 =
3119 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3120 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3121 build_string ("ISO8859-5"),
3122 build_string ("ISO8859-5 (Cyrillic)"),
3123 build_string ("ISO8859-5 (Cyrillic)"),
3124 build_string ("iso8859-5"),
3126 0 /* MIN_CHAR_CYRILLIC */,
3127 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3128 staticpro (&Vcharset_latin_iso8859_9);
3129 Vcharset_latin_iso8859_9 =
3130 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3131 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3132 build_string ("Latin-5"),
3133 build_string ("ISO8859-9 (Latin-5)"),
3134 build_string ("ISO8859-9 (Latin-5)"),
3135 build_string ("iso8859-9"),
3137 staticpro (&Vcharset_japanese_jisx0208_1978);
3138 Vcharset_japanese_jisx0208_1978 =
3139 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3140 Qjapanese_jisx0208_1978, 94, 2,
3141 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3142 build_string ("JIS X0208:1978"),
3143 build_string ("JIS X0208:1978 (Japanese)"),
3145 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3146 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3148 staticpro (&Vcharset_chinese_gb2312);
3149 Vcharset_chinese_gb2312 =
3150 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3151 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3152 build_string ("GB2312"),
3153 build_string ("GB2312)"),
3154 build_string ("GB2312 Chinese simplified"),
3155 build_string ("gb2312"),
3157 staticpro (&Vcharset_japanese_jisx0208);
3158 Vcharset_japanese_jisx0208 =
3159 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3160 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3161 build_string ("JISX0208"),
3162 build_string ("JIS X0208:1983 (Japanese)"),
3163 build_string ("JIS X0208:1983 Japanese Kanji"),
3164 build_string ("jisx0208\\.1983"),
3167 staticpro (&Vcharset_japanese_jisx0208_1990);
3168 Vcharset_japanese_jisx0208_1990 =
3169 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3170 Qjapanese_jisx0208_1990, 94, 2,
3171 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3172 build_string ("JISX0208-1990"),
3173 build_string ("JIS X0208:1990 (Japanese)"),
3174 build_string ("JIS X0208:1990 Japanese Kanji"),
3175 build_string ("jisx0208\\.1990"),
3177 MIN_CHAR_JIS_X0208_1990,
3178 MAX_CHAR_JIS_X0208_1990, 0, 33);
3180 staticpro (&Vcharset_korean_ksc5601);
3181 Vcharset_korean_ksc5601 =
3182 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3183 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3184 build_string ("KSC5601"),
3185 build_string ("KSC5601 (Korean"),
3186 build_string ("KSC5601 Korean Hangul and Hanja"),
3187 build_string ("ksc5601"),
3189 staticpro (&Vcharset_japanese_jisx0212);
3190 Vcharset_japanese_jisx0212 =
3191 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3192 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3193 build_string ("JISX0212"),
3194 build_string ("JISX0212 (Japanese)"),
3195 build_string ("JISX0212 Japanese Supplement"),
3196 build_string ("jisx0212"),
3199 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3200 staticpro (&Vcharset_chinese_cns11643_1);
3201 Vcharset_chinese_cns11643_1 =
3202 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3203 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3204 build_string ("CNS11643-1"),
3205 build_string ("CNS11643-1 (Chinese traditional)"),
3207 ("CNS 11643 Plane 1 Chinese traditional"),
3208 build_string (CHINESE_CNS_PLANE_RE("1")),
3210 staticpro (&Vcharset_chinese_cns11643_2);
3211 Vcharset_chinese_cns11643_2 =
3212 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3213 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3214 build_string ("CNS11643-2"),
3215 build_string ("CNS11643-2 (Chinese traditional)"),
3217 ("CNS 11643 Plane 2 Chinese traditional"),
3218 build_string (CHINESE_CNS_PLANE_RE("2")),
3221 staticpro (&Vcharset_latin_tcvn5712);
3222 Vcharset_latin_tcvn5712 =
3223 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3224 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3225 build_string ("TCVN 5712"),
3226 build_string ("TCVN 5712 (VSCII-2)"),
3227 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3228 build_string ("tcvn5712-1"),
3230 staticpro (&Vcharset_latin_viscii_lower);
3231 Vcharset_latin_viscii_lower =
3232 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3233 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3234 build_string ("VISCII lower"),
3235 build_string ("VISCII lower (Vietnamese)"),
3236 build_string ("VISCII lower (Vietnamese)"),
3237 build_string ("MULEVISCII-LOWER"),
3239 staticpro (&Vcharset_latin_viscii_upper);
3240 Vcharset_latin_viscii_upper =
3241 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3242 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3243 build_string ("VISCII upper"),
3244 build_string ("VISCII upper (Vietnamese)"),
3245 build_string ("VISCII upper (Vietnamese)"),
3246 build_string ("MULEVISCII-UPPER"),
3248 staticpro (&Vcharset_latin_viscii);
3249 Vcharset_latin_viscii =
3250 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3251 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3252 build_string ("VISCII"),
3253 build_string ("VISCII 1.1 (Vietnamese)"),
3254 build_string ("VISCII 1.1 (Vietnamese)"),
3255 build_string ("VISCII1\\.1"),
3257 staticpro (&Vcharset_ideograph_daikanwa);
3258 Vcharset_ideograph_daikanwa =
3259 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3260 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3261 build_string ("Daikanwa"),
3262 build_string ("Morohashi's Daikanwa"),
3263 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3264 build_string ("Daikanwa"),
3265 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3266 staticpro (&Vcharset_mojikyo);
3268 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3269 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3270 build_string ("Mojikyo"),
3271 build_string ("Mojikyo"),
3272 build_string ("Konjaku-Mojikyo"),
3274 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3275 staticpro (&Vcharset_mojikyo_pj_1);
3276 Vcharset_mojikyo_pj_1 =
3277 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3278 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3279 build_string ("Mojikyo-PJ-1"),
3280 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3282 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3283 build_string ("jisx0208\\.Mojikyo-1$"),
3285 staticpro (&Vcharset_mojikyo_pj_2);
3286 Vcharset_mojikyo_pj_2 =
3287 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3288 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3289 build_string ("Mojikyo-PJ-2"),
3290 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3292 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3293 build_string ("jisx0208\\.Mojikyo-2$"),
3295 staticpro (&Vcharset_mojikyo_pj_3);
3296 Vcharset_mojikyo_pj_3 =
3297 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3298 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3299 build_string ("Mojikyo-PJ-3"),
3300 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3302 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3303 build_string ("jisx0208\\.Mojikyo-3$"),
3305 staticpro (&Vcharset_mojikyo_pj_4);
3306 Vcharset_mojikyo_pj_4 =
3307 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3308 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3309 build_string ("Mojikyo-PJ-4"),
3310 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3312 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3313 build_string ("jisx0208\\.Mojikyo-4$"),
3315 staticpro (&Vcharset_mojikyo_pj_5);
3316 Vcharset_mojikyo_pj_5 =
3317 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3318 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3319 build_string ("Mojikyo-PJ-5"),
3320 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3322 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3323 build_string ("jisx0208\\.Mojikyo-5$"),
3325 staticpro (&Vcharset_mojikyo_pj_6);
3326 Vcharset_mojikyo_pj_6 =
3327 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3328 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3329 build_string ("Mojikyo-PJ-6"),
3330 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3332 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3333 build_string ("jisx0208\\.Mojikyo-6$"),
3335 staticpro (&Vcharset_mojikyo_pj_7);
3336 Vcharset_mojikyo_pj_7 =
3337 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3338 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Mojikyo-PJ-7"),
3340 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3342 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3343 build_string ("jisx0208\\.Mojikyo-7$"),
3345 staticpro (&Vcharset_mojikyo_pj_8);
3346 Vcharset_mojikyo_pj_8 =
3347 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3348 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3349 build_string ("Mojikyo-PJ-8"),
3350 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3352 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3353 build_string ("jisx0208\\.Mojikyo-8$"),
3355 staticpro (&Vcharset_mojikyo_pj_9);
3356 Vcharset_mojikyo_pj_9 =
3357 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3358 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3359 build_string ("Mojikyo-PJ-9"),
3360 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3362 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3363 build_string ("jisx0208\\.Mojikyo-9$"),
3365 staticpro (&Vcharset_mojikyo_pj_10);
3366 Vcharset_mojikyo_pj_10 =
3367 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3368 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3369 build_string ("Mojikyo-PJ-10"),
3370 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3372 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3373 build_string ("jisx0208\\.Mojikyo-10$"),
3375 staticpro (&Vcharset_mojikyo_pj_11);
3376 Vcharset_mojikyo_pj_11 =
3377 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3378 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3379 build_string ("Mojikyo-PJ-11"),
3380 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3382 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3383 build_string ("jisx0208\\.Mojikyo-11$"),
3385 staticpro (&Vcharset_mojikyo_pj_12);
3386 Vcharset_mojikyo_pj_12 =
3387 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3388 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3389 build_string ("Mojikyo-PJ-12"),
3390 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3392 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3393 build_string ("jisx0208\\.Mojikyo-12$"),
3395 staticpro (&Vcharset_mojikyo_pj_13);
3396 Vcharset_mojikyo_pj_13 =
3397 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3398 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3399 build_string ("Mojikyo-PJ-13"),
3400 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3402 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3403 build_string ("jisx0208\\.Mojikyo-13$"),
3405 staticpro (&Vcharset_mojikyo_pj_14);
3406 Vcharset_mojikyo_pj_14 =
3407 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3408 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3409 build_string ("Mojikyo-PJ-14"),
3410 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3412 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3413 build_string ("jisx0208\\.Mojikyo-14$"),
3415 staticpro (&Vcharset_mojikyo_pj_15);
3416 Vcharset_mojikyo_pj_15 =
3417 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3418 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3419 build_string ("Mojikyo-PJ-15"),
3420 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3422 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3423 build_string ("jisx0208\\.Mojikyo-15$"),
3425 staticpro (&Vcharset_mojikyo_pj_16);
3426 Vcharset_mojikyo_pj_16 =
3427 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3428 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3429 build_string ("Mojikyo-PJ-16"),
3430 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3432 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3433 build_string ("jisx0208\\.Mojikyo-16$"),
3435 staticpro (&Vcharset_mojikyo_pj_17);
3436 Vcharset_mojikyo_pj_17 =
3437 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3438 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3439 build_string ("Mojikyo-PJ-17"),
3440 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3442 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3443 build_string ("jisx0208\\.Mojikyo-17$"),
3445 staticpro (&Vcharset_mojikyo_pj_18);
3446 Vcharset_mojikyo_pj_18 =
3447 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3448 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3449 build_string ("Mojikyo-PJ-18"),
3450 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3452 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3453 build_string ("jisx0208\\.Mojikyo-18$"),
3455 staticpro (&Vcharset_mojikyo_pj_19);
3456 Vcharset_mojikyo_pj_19 =
3457 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3458 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3459 build_string ("Mojikyo-PJ-19"),
3460 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3462 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3463 build_string ("jisx0208\\.Mojikyo-19$"),
3465 staticpro (&Vcharset_mojikyo_pj_20);
3466 Vcharset_mojikyo_pj_20 =
3467 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3468 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3469 build_string ("Mojikyo-PJ-20"),
3470 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3472 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3473 build_string ("jisx0208\\.Mojikyo-20$"),
3475 staticpro (&Vcharset_mojikyo_pj_21);
3476 Vcharset_mojikyo_pj_21 =
3477 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3478 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3479 build_string ("Mojikyo-PJ-21"),
3480 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3482 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3483 build_string ("jisx0208\\.Mojikyo-21$"),
3485 staticpro (&Vcharset_ethiopic_ucs);
3486 Vcharset_ethiopic_ucs =
3487 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3488 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3489 build_string ("Ethiopic (UCS)"),
3490 build_string ("Ethiopic (UCS)"),
3491 build_string ("Ethiopic of UCS"),
3492 build_string ("Ethiopic-Unicode"),
3493 Qnil, 0x1200, 0x137F, 0x1200, 0);
3495 staticpro (&Vcharset_chinese_big5_1);
3496 Vcharset_chinese_big5_1 =
3497 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3498 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3499 build_string ("Big5"),
3500 build_string ("Big5 (Level-1)"),
3502 ("Big5 Level-1 Chinese traditional"),
3503 build_string ("big5"),
3505 staticpro (&Vcharset_chinese_big5_2);
3506 Vcharset_chinese_big5_2 =
3507 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3508 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3509 build_string ("Big5"),
3510 build_string ("Big5 (Level-2)"),
3512 ("Big5 Level-2 Chinese traditional"),
3513 build_string ("big5"),
3516 #ifdef ENABLE_COMPOSITE_CHARS
3517 /* #### For simplicity, we put composite chars into a 96x96 charset.
3518 This is going to lead to problems because you can run out of
3519 room, esp. as we don't yet recycle numbers. */
3520 staticpro (&Vcharset_composite);
3521 Vcharset_composite =
3522 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3523 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3524 build_string ("Composite"),
3525 build_string ("Composite characters"),
3526 build_string ("Composite characters"),
3529 /* #### not dumped properly */
3530 composite_char_row_next = 32;
3531 composite_char_col_next = 32;
3533 Vcomposite_char_string2char_hash_table =
3534 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3535 Vcomposite_char_char2string_hash_table =
3536 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3537 staticpro (&Vcomposite_char_string2char_hash_table);
3538 staticpro (&Vcomposite_char_char2string_hash_table);
3539 #endif /* ENABLE_COMPOSITE_CHARS */