1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_char_byte_table (Lisp_Object obj)
165 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
179 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
185 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
187 if (!char_byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 char_byte_table_hash (Lisp_Object obj, int depth)
203 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description char_byte_table_description[] = {
209 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
214 mark_char_byte_table,
215 internal_object_printer,
216 0, char_byte_table_equal,
217 char_byte_table_hash,
218 char_byte_table_description,
219 struct Lisp_Char_Byte_Table);
222 make_char_byte_table (Lisp_Object initval)
226 struct Lisp_Char_Byte_Table *cte =
227 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
228 &lrecord_char_byte_table);
230 for (i = 0; i < 256; i++)
231 cte->property[i] = initval;
233 XSETCHAR_BYTE_TABLE (obj, cte);
238 copy_char_byte_table (Lisp_Object entry)
240 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
243 struct Lisp_Char_Byte_Table *ctenew =
244 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
245 &lrecord_char_byte_table);
247 for (i = 0; i < 256; i++)
249 Lisp_Object new = cte->property[i];
250 if (CHAR_BYTE_TABLE_P (new))
251 ctenew->property[i] = copy_char_byte_table (new);
253 ctenew->property[i] = new;
256 XSETCHAR_BYTE_TABLE (obj, ctenew);
262 mark_char_code_table (Lisp_Object obj)
264 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
270 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
272 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
273 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
275 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
279 char_code_table_hash (Lisp_Object obj, int depth)
281 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
283 return char_code_table_hash (cte->table, depth + 1);
286 static const struct lrecord_description char_code_table_description[] = {
287 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
291 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
292 mark_char_code_table,
293 internal_object_printer,
294 0, char_code_table_equal,
295 char_code_table_hash,
296 char_code_table_description,
297 struct Lisp_Char_Code_Table);
300 make_char_code_table (Lisp_Object initval)
303 struct Lisp_Char_Code_Table *cte =
304 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
305 &lrecord_char_code_table);
307 cte->table = make_char_byte_table (initval);
309 XSETCHAR_CODE_TABLE (obj, cte);
314 copy_char_code_table (Lisp_Object entry)
316 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
318 struct Lisp_Char_Code_Table *ctenew =
319 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
320 &lrecord_char_code_table);
322 ctenew->table = copy_char_byte_table (cte->table);
323 XSETCHAR_CODE_TABLE (obj, ctenew);
329 get_char_code_table (Emchar ch, Lisp_Object table)
331 unsigned int code = ch;
332 struct Lisp_Char_Byte_Table* cpt
333 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
334 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
336 if (CHAR_BYTE_TABLE_P (ret))
337 cpt = XCHAR_BYTE_TABLE (ret);
341 ret = cpt->property [(unsigned char) (code >> 16)];
342 if (CHAR_BYTE_TABLE_P (ret))
343 cpt = XCHAR_BYTE_TABLE (ret);
347 ret = cpt->property [(unsigned char) (code >> 8)];
348 if (CHAR_BYTE_TABLE_P (ret))
349 cpt = XCHAR_BYTE_TABLE (ret);
353 return cpt->property [(unsigned char) code];
356 void put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table);
358 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
360 unsigned int code = ch;
361 struct Lisp_Char_Byte_Table* cpt1
362 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
363 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
365 if (CHAR_BYTE_TABLE_P (ret))
367 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
369 ret = cpt2->property[(unsigned char)(code >> 16)];
370 if (CHAR_BYTE_TABLE_P (ret))
372 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
374 ret = cpt3->property[(unsigned char)(code >> 8)];
375 if (CHAR_BYTE_TABLE_P (ret))
377 struct Lisp_Char_Byte_Table* cpt4
378 = XCHAR_BYTE_TABLE (ret);
380 cpt4->property[(unsigned char)code] = value;
382 else if (!EQ (ret, value))
384 Lisp_Object cpt4 = make_char_byte_table (ret);
386 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
387 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
390 else if (!EQ (ret, value))
392 Lisp_Object cpt3 = make_char_byte_table (ret);
393 Lisp_Object cpt4 = make_char_byte_table (ret);
395 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
396 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
398 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
401 else if (!EQ (ret, value))
403 Lisp_Object cpt2 = make_char_byte_table (ret);
404 Lisp_Object cpt3 = make_char_byte_table (ret);
405 Lisp_Object cpt4 = make_char_byte_table (ret);
407 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
408 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
409 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
410 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
415 Lisp_Object Vcharacter_attribute_table;
416 Lisp_Object Vcharacter_composition_table;
417 Lisp_Object Vcharacter_variant_table;
419 Lisp_Object Q_decomposition;
422 Lisp_Object Qisolated;
423 Lisp_Object Qinitial;
426 Lisp_Object Qvertical;
427 Lisp_Object QnoBreak;
428 Lisp_Object Qfraction;
438 Emchar to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
440 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
446 else if (EQ (v, Qcompat))
448 else if (EQ (v, Qisolated))
450 else if (EQ (v, Qinitial))
452 else if (EQ (v, Qmedial))
454 else if (EQ (v, Qfinal))
456 else if (EQ (v, Qvertical))
458 else if (EQ (v, QnoBreak))
460 else if (EQ (v, Qfraction))
462 else if (EQ (v, Qsuper))
464 else if (EQ (v, Qsub))
466 else if (EQ (v, Qcircle))
468 else if (EQ (v, Qsquare))
470 else if (EQ (v, Qwide))
472 else if (EQ (v, Qnarrow))
474 else if (EQ (v, Qsmall))
476 else if (EQ (v, Qfont))
479 signal_simple_error (err_msg, err_arg);
482 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
483 Return character corresponding with list.
487 Lisp_Object table = Vcharacter_composition_table;
488 Lisp_Object rest = list;
492 Lisp_Object v = Fcar (rest);
494 Emchar c = to_char_code (v, "Invalid value for composition", list);
496 ret = get_char_code_table (c, table);
501 if (!CHAR_CODE_TABLE_P (ret))
506 else if (!CONSP (rest))
508 else if (CHAR_CODE_TABLE_P (ret))
511 signal_simple_error ("Invalid table is found with", list);
513 signal_simple_error ("Invalid value for composition", list);
516 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
517 Return variants of CHARACTER.
521 CHECK_CHAR (character);
522 return Fcopy_list (get_char_code_table (XCHAR (character),
523 Vcharacter_variant_table));
526 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
527 Return the alist of attributes of CHARACTER.
531 CHECK_CHAR (character);
532 return Fcopy_alist (get_char_code_table (XCHAR (character),
533 Vcharacter_attribute_table));
536 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
537 Return the value of CHARACTER's ATTRIBUTE.
539 (character, attribute))
544 CHECK_CHAR (character);
545 ret = get_char_code_table (XCHAR (character),
546 Vcharacter_attribute_table);
550 if (!NILP (ccs = Ffind_charset (attribute)))
553 return Fcdr (Fassq (attribute, ret));
556 Lisp_Object put_char_attribute (Lisp_Object character,
557 Lisp_Object attribute, Lisp_Object value);
559 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
562 Emchar char_code = XCHAR (character);
564 = get_char_code_table (char_code, Vcharacter_attribute_table);
567 cell = Fassq (attribute, ret);
571 ret = Fcons (Fcons (attribute, value), ret);
573 else if (!EQ (Fcdr (cell), value))
575 Fsetcdr (cell, value);
577 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
581 Lisp_Object remove_char_attribute (Lisp_Object character,
582 Lisp_Object attribute);
584 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
586 Emchar char_code = XCHAR (character);
588 = get_char_code_table (char_code, Vcharacter_attribute_table);
590 if (EQ (attribute, Fcar (Fcar (alist))))
592 alist = Fcdr (alist);
596 Lisp_Object pr = alist;
597 Lisp_Object r = Fcdr (alist);
601 if (EQ (attribute, Fcar (Fcar (r))))
603 XCDR (pr) = Fcdr (r);
610 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
616 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
617 Store CHARACTER's ATTRIBUTE with VALUE.
619 (character, attribute, value))
623 CHECK_CHAR (character);
624 ccs = Ffind_charset (attribute);
627 if (!EQ (XCHARSET_NAME (ccs), Qucs)
628 || (XCHAR (character) != XINT (value)))
630 Lisp_Object cpos, rest;
631 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
638 /* ad-hoc method for `ascii' */
639 if ((XCHARSET_CHARS (ccs) == 94) &&
640 (XCHARSET_BYTE_OFFSET (ccs) != 33))
641 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
643 ccs_len = XCHARSET_CHARS (ccs);
647 Lisp_Object ret = Fcar (value);
650 signal_simple_error ("Invalid value for coded-charset", value);
651 code_point = XINT (ret);
652 if (XCHARSET_GRAPHIC (ccs) == 1)
660 signal_simple_error ("Invalid value for coded-charset",
664 signal_simple_error ("Invalid value for coded-charset",
667 if (XCHARSET_GRAPHIC (ccs) == 1)
669 code_point = (code_point << 8) | j;
672 value = make_int (code_point);
674 else if (INTP (value))
676 if (XCHARSET_GRAPHIC (ccs) == 1)
677 value = make_int (XINT (value) & 0x7F7F7F7F);
680 signal_simple_error ("Invalid value for coded-charset", value);
683 cpos = Fget_char_attribute (character, attribute);
688 dim = XCHARSET_DIMENSION (ccs);
689 code_point = XINT (cpos);
693 i = ((code_point >> (8 * dim)) & 255)
694 - XCHARSET_BYTE_OFFSET (ccs);
695 nv = XVECTOR_DATA(v)[i];
701 XVECTOR_DATA(v)[i] = Qnil;
702 v = XCHARSET_DECODING_TABLE (ccs);
707 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
710 dim = XCHARSET_DIMENSION (ccs);
711 code_point = XINT (value);
716 i = ((code_point >> (8 * dim)) & 255)
717 - XCHARSET_BYTE_OFFSET (ccs);
718 nv = XVECTOR_DATA(v)[i];
722 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
728 XVECTOR_DATA(v)[i] = character;
733 else if (EQ (attribute, Q_decomposition))
736 signal_simple_error ("Invalid value for ->decomposition",
739 if (CONSP (Fcdr (value)))
741 Lisp_Object rest = value;
742 Lisp_Object table = Vcharacter_composition_table;
746 Lisp_Object v = Fcar (rest);
750 "Invalid value for ->decomposition", value);
755 put_char_code_table (c, character, table);
760 ntable = get_char_code_table (c, table);
761 if (!CHAR_CODE_TABLE_P (ntable))
763 ntable = make_char_code_table (Qnil);
764 put_char_code_table (c, ntable, table);
772 Lisp_Object v = Fcar (value);
778 = get_char_code_table (c, Vcharacter_variant_table);
780 if (NILP (Fmemq (v, ret)))
782 put_char_code_table (c, Fcons (character, ret),
783 Vcharacter_variant_table);
788 else if (EQ (attribute, Q_ucs))
794 signal_simple_error ("Invalid value for ->ucs", value);
798 ret = get_char_code_table (c, Vcharacter_variant_table);
799 if (NILP (Fmemq (character, ret)))
801 put_char_code_table (c, Fcons (character, ret),
802 Vcharacter_variant_table);
805 return put_char_attribute (character, attribute, value);
808 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
809 Remove CHARACTER's ATTRIBUTE.
811 (character, attribute))
815 CHECK_CHAR (character);
816 ccs = Ffind_charset (attribute);
820 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
827 /* ad-hoc method for `ascii' */
828 if ((XCHARSET_CHARS (ccs) == 94) &&
829 (XCHARSET_BYTE_OFFSET (ccs) != 33))
830 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
832 ccs_len = XCHARSET_CHARS (ccs);
835 cpos = Fget_char_attribute (character, attribute);
840 dim = XCHARSET_DIMENSION (ccs);
841 code_point = XINT (cpos);
845 i = ((code_point >> (8 * dim)) & 255)
846 - XCHARSET_BYTE_OFFSET (ccs);
847 nv = XVECTOR_DATA(v)[i];
853 XVECTOR_DATA(v)[i] = Qnil;
854 v = XCHARSET_DECODING_TABLE (ccs);
858 return remove_char_attribute (character, attribute);
861 EXFUN (Fmake_char, 3);
862 EXFUN (Fdecode_char, 2);
864 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
865 Store character's ATTRIBUTES.
869 Lisp_Object rest = attributes;
870 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
871 Lisp_Object character;
877 Lisp_Object cell = Fcar (rest);
881 signal_simple_error ("Invalid argument", attributes);
882 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
883 && ((XCHARSET_FINAL (ccs) != 0) ||
884 (XCHARSET_UCS_MAX (ccs) > 0)) )
888 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
890 character = Fdecode_char (ccs, cell);
891 goto setup_attributes;
895 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
898 signal_simple_error ("Invalid argument", attributes);
900 character = make_char (XINT (code) + 0x100000);
901 goto setup_attributes;
905 else if (!INTP (code))
906 signal_simple_error ("Invalid argument", attributes);
908 character = make_char (XINT (code));
914 Lisp_Object cell = Fcar (rest);
917 signal_simple_error ("Invalid argument", attributes);
918 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
922 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
925 Lisp_Object Vutf_2000_version;
929 int leading_code_private_11;
932 Lisp_Object Qcharsetp;
934 /* Qdoc_string, Qdimension, Qchars defined in general.c */
935 Lisp_Object Qregistry, Qfinal, Qgraphic;
936 Lisp_Object Qdirection;
937 Lisp_Object Qreverse_direction_charset;
938 Lisp_Object Qleading_byte;
939 Lisp_Object Qshort_name, Qlong_name;
955 Qjapanese_jisx0208_1978,
958 Qjapanese_jisx0208_1990,
969 Qvietnamese_viscii_lower,
970 Qvietnamese_viscii_upper,
1000 Lisp_Object Ql2r, Qr2l;
1002 Lisp_Object Vcharset_hash_table;
1005 static Charset_ID next_allocated_leading_byte;
1007 static Charset_ID next_allocated_1_byte_leading_byte;
1008 static Charset_ID next_allocated_2_byte_leading_byte;
1011 /* Composite characters are characters constructed by overstriking two
1012 or more regular characters.
1014 1) The old Mule implementation involves storing composite characters
1015 in a buffer as a tag followed by all of the actual characters
1016 used to make up the composite character. I think this is a bad
1017 idea; it greatly complicates code that wants to handle strings
1018 one character at a time because it has to deal with the possibility
1019 of great big ungainly characters. It's much more reasonable to
1020 simply store an index into a table of composite characters.
1022 2) The current implementation only allows for 16,384 separate
1023 composite characters over the lifetime of the XEmacs process.
1024 This could become a potential problem if the user
1025 edited lots of different files that use composite characters.
1026 Due to FSF bogosity, increasing the number of allowable
1027 composite characters under Mule would decrease the number
1028 of possible faces that can exist. Mule already has shrunk
1029 this to 2048, and further shrinkage would become uncomfortable.
1030 No such problems exist in XEmacs.
1032 Composite characters could be represented as 0x80 C1 C2 C3,
1033 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1034 for slightly under 2^20 (one million) composite characters
1035 over the XEmacs process lifetime, and you only need to
1036 increase the size of a Mule character from 19 to 21 bits.
1037 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1038 85 million (slightly over 2^26) composite characters. */
1041 /************************************************************************/
1042 /* Basic Emchar functions */
1043 /************************************************************************/
1045 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1046 string in STR. Returns the number of bytes stored.
1047 Do not call this directly. Use the macro set_charptr_emchar() instead.
1051 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1057 Lisp_Object charset;
1066 else if ( c <= 0x7ff )
1068 *p++ = (c >> 6) | 0xc0;
1069 *p++ = (c & 0x3f) | 0x80;
1071 else if ( c <= 0xffff )
1073 *p++ = (c >> 12) | 0xe0;
1074 *p++ = ((c >> 6) & 0x3f) | 0x80;
1075 *p++ = (c & 0x3f) | 0x80;
1077 else if ( c <= 0x1fffff )
1079 *p++ = (c >> 18) | 0xf0;
1080 *p++ = ((c >> 12) & 0x3f) | 0x80;
1081 *p++ = ((c >> 6) & 0x3f) | 0x80;
1082 *p++ = (c & 0x3f) | 0x80;
1084 else if ( c <= 0x3ffffff )
1086 *p++ = (c >> 24) | 0xf8;
1087 *p++ = ((c >> 18) & 0x3f) | 0x80;
1088 *p++ = ((c >> 12) & 0x3f) | 0x80;
1089 *p++ = ((c >> 6) & 0x3f) | 0x80;
1090 *p++ = (c & 0x3f) | 0x80;
1094 *p++ = (c >> 30) | 0xfc;
1095 *p++ = ((c >> 24) & 0x3f) | 0x80;
1096 *p++ = ((c >> 18) & 0x3f) | 0x80;
1097 *p++ = ((c >> 12) & 0x3f) | 0x80;
1098 *p++ = ((c >> 6) & 0x3f) | 0x80;
1099 *p++ = (c & 0x3f) | 0x80;
1102 BREAKUP_CHAR (c, charset, c1, c2);
1103 lb = CHAR_LEADING_BYTE (c);
1104 if (LEADING_BYTE_PRIVATE_P (lb))
1105 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1107 if (EQ (charset, Vcharset_control_1))
1116 /* Return the first character from a Mule-encoded string in STR,
1117 assuming it's non-ASCII. Do not call this directly.
1118 Use the macro charptr_emchar() instead. */
1121 non_ascii_charptr_emchar (CONST Bufbyte *str)
1134 else if ( b >= 0xf8 )
1139 else if ( b >= 0xf0 )
1144 else if ( b >= 0xe0 )
1149 else if ( b >= 0xc0 )
1159 for( ; len > 0; len-- )
1162 ch = ( ch << 6 ) | ( b & 0x3f );
1166 Bufbyte i0 = *str, i1, i2 = 0;
1167 Lisp_Object charset;
1169 if (i0 == LEADING_BYTE_CONTROL_1)
1170 return (Emchar) (*++str - 0x20);
1172 if (LEADING_BYTE_PREFIX_P (i0))
1177 charset = CHARSET_BY_LEADING_BYTE (i0);
1178 if (XCHARSET_DIMENSION (charset) == 2)
1181 return MAKE_CHAR (charset, i1, i2);
1185 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1186 Do not call this directly. Use the macro valid_char_p() instead. */
1190 non_ascii_valid_char_p (Emchar ch)
1194 /* Must have only lowest 19 bits set */
1198 f1 = CHAR_FIELD1 (ch);
1199 f2 = CHAR_FIELD2 (ch);
1200 f3 = CHAR_FIELD3 (ch);
1204 Lisp_Object charset;
1206 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1207 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1208 f2 > MAX_CHAR_FIELD2_PRIVATE)
1213 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1214 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1218 NOTE: This takes advantage of the fact that
1219 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1220 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1222 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1223 if (EQ (charset, Qnil))
1225 return (XCHARSET_CHARS (charset) == 96);
1229 Lisp_Object charset;
1231 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1232 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1233 f1 > MAX_CHAR_FIELD1_PRIVATE)
1235 if (f2 < 0x20 || f3 < 0x20)
1238 #ifdef ENABLE_COMPOSITE_CHARS
1239 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1241 if (UNBOUNDP (Fgethash (make_int (ch),
1242 Vcomposite_char_char2string_hash_table,
1247 #endif /* ENABLE_COMPOSITE_CHARS */
1249 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1250 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1253 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1255 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1258 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1260 if (EQ (charset, Qnil))
1262 return (XCHARSET_CHARS (charset) == 96);
1268 /************************************************************************/
1269 /* Basic string functions */
1270 /************************************************************************/
1272 /* Copy the character pointed to by PTR into STR, assuming it's
1273 non-ASCII. Do not call this directly. Use the macro
1274 charptr_copy_char() instead. */
1277 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1279 Bufbyte *strptr = str;
1281 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1283 /* Notice fallthrough. */
1285 case 6: *++strptr = *ptr++;
1286 case 5: *++strptr = *ptr++;
1288 case 4: *++strptr = *ptr++;
1289 case 3: *++strptr = *ptr++;
1290 case 2: *++strptr = *ptr;
1295 return strptr + 1 - str;
1299 /************************************************************************/
1300 /* streams of Emchars */
1301 /************************************************************************/
1303 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1304 The functions below are not meant to be called directly; use
1305 the macros in insdel.h. */
1308 Lstream_get_emchar_1 (Lstream *stream, int ch)
1310 Bufbyte str[MAX_EMCHAR_LEN];
1311 Bufbyte *strptr = str;
1313 str[0] = (Bufbyte) ch;
1314 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1316 /* Notice fallthrough. */
1319 ch = Lstream_getc (stream);
1321 *++strptr = (Bufbyte) ch;
1323 ch = Lstream_getc (stream);
1325 *++strptr = (Bufbyte) ch;
1328 ch = Lstream_getc (stream);
1330 *++strptr = (Bufbyte) ch;
1332 ch = Lstream_getc (stream);
1334 *++strptr = (Bufbyte) ch;
1336 ch = Lstream_getc (stream);
1338 *++strptr = (Bufbyte) ch;
1343 return charptr_emchar (str);
1347 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1349 Bufbyte str[MAX_EMCHAR_LEN];
1350 Bytecount len = set_charptr_emchar (str, ch);
1351 return Lstream_write (stream, str, len);
1355 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1357 Bufbyte str[MAX_EMCHAR_LEN];
1358 Bytecount len = set_charptr_emchar (str, ch);
1359 Lstream_unread (stream, str, len);
1363 /************************************************************************/
1364 /* charset object */
1365 /************************************************************************/
1368 mark_charset (Lisp_Object obj)
1370 Lisp_Charset *cs = XCHARSET (obj);
1372 mark_object (cs->short_name);
1373 mark_object (cs->long_name);
1374 mark_object (cs->doc_string);
1375 mark_object (cs->registry);
1376 mark_object (cs->ccl_program);
1378 mark_object (cs->decoding_table);
1384 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1386 Lisp_Charset *cs = XCHARSET (obj);
1390 error ("printing unreadable object #<charset %s 0x%x>",
1391 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1394 write_c_string ("#<charset ", printcharfun);
1395 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1396 write_c_string (" ", printcharfun);
1397 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1398 write_c_string (" ", printcharfun);
1399 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1400 write_c_string (" ", printcharfun);
1401 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1402 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1404 CHARSET_DIMENSION (cs),
1405 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1406 CHARSET_COLUMNS (cs),
1407 CHARSET_GRAPHIC (cs),
1408 CHARSET_FINAL (cs));
1409 write_c_string (buf, printcharfun);
1410 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1411 sprintf (buf, " 0x%x>", cs->header.uid);
1412 write_c_string (buf, printcharfun);
1415 static const struct lrecord_description charset_description[] = {
1416 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1417 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1418 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1419 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1420 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1421 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1422 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1424 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1429 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1430 mark_charset, print_charset, 0, 0, 0,
1431 charset_description,
1433 /* Make a new charset. */
1436 make_charset (Charset_ID id, Lisp_Object name,
1437 unsigned short chars, unsigned char dimension,
1438 unsigned char columns, unsigned char graphic,
1439 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1440 Lisp_Object long_name, Lisp_Object doc,
1442 Lisp_Object decoding_table,
1443 Emchar ucs_min, Emchar ucs_max,
1444 Emchar code_offset, unsigned char byte_offset)
1446 unsigned char type = 0;
1448 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1452 XSETCHARSET (obj, cs);
1454 CHARSET_ID (cs) = id;
1455 CHARSET_NAME (cs) = name;
1456 CHARSET_SHORT_NAME (cs) = short_name;
1457 CHARSET_LONG_NAME (cs) = long_name;
1458 CHARSET_CHARS (cs) = chars;
1459 CHARSET_DIMENSION (cs) = dimension;
1460 CHARSET_DIRECTION (cs) = direction;
1461 CHARSET_COLUMNS (cs) = columns;
1462 CHARSET_GRAPHIC (cs) = graphic;
1463 CHARSET_FINAL (cs) = final;
1464 CHARSET_DOC_STRING (cs) = doc;
1465 CHARSET_REGISTRY (cs) = reg;
1466 CHARSET_CCL_PROGRAM (cs) = Qnil;
1467 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1469 CHARSET_DECODING_TABLE(cs) = Qnil;
1470 CHARSET_UCS_MIN(cs) = ucs_min;
1471 CHARSET_UCS_MAX(cs) = ucs_max;
1472 CHARSET_CODE_OFFSET(cs) = code_offset;
1473 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1476 switch (CHARSET_CHARS (cs))
1479 switch (CHARSET_DIMENSION (cs))
1482 type = CHARSET_TYPE_94;
1485 type = CHARSET_TYPE_94X94;
1490 switch (CHARSET_DIMENSION (cs))
1493 type = CHARSET_TYPE_96;
1496 type = CHARSET_TYPE_96X96;
1502 switch (CHARSET_DIMENSION (cs))
1505 type = CHARSET_TYPE_128;
1508 type = CHARSET_TYPE_128X128;
1513 switch (CHARSET_DIMENSION (cs))
1516 type = CHARSET_TYPE_256;
1519 type = CHARSET_TYPE_256X256;
1526 CHARSET_TYPE (cs) = type;
1530 if (id == LEADING_BYTE_ASCII)
1531 CHARSET_REP_BYTES (cs) = 1;
1533 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1535 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1540 /* some charsets do not have final characters. This includes
1541 ASCII, Control-1, Composite, and the two faux private
1544 if (code_offset == 0)
1546 assert (NILP (chlook->charset_by_attributes[type][final]));
1547 chlook->charset_by_attributes[type][final] = obj;
1550 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1551 chlook->charset_by_attributes[type][final][direction] = obj;
1555 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1556 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1558 /* Some charsets are "faux" and don't have names or really exist at
1559 all except in the leading-byte table. */
1561 Fputhash (name, obj, Vcharset_hash_table);
1566 get_unallocated_leading_byte (int dimension)
1571 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1574 lb = next_allocated_leading_byte++;
1578 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1581 lb = next_allocated_1_byte_leading_byte++;
1585 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1588 lb = next_allocated_2_byte_leading_byte++;
1594 ("No more character sets free for this dimension",
1595 make_int (dimension));
1602 make_builtin_char (Lisp_Object charset, int c1, int c2)
1604 if (XCHARSET_UCS_MAX (charset))
1607 = (XCHARSET_DIMENSION (charset) == 1
1609 c1 - XCHARSET_BYTE_OFFSET (charset)
1611 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1612 + c2 - XCHARSET_BYTE_OFFSET (charset))
1613 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1614 if ((code < XCHARSET_UCS_MIN (charset))
1615 || (XCHARSET_UCS_MAX (charset) < code))
1616 signal_simple_error ("Arguments makes invalid character",
1620 else if (XCHARSET_DIMENSION (charset) == 1)
1622 switch (XCHARSET_CHARS (charset))
1626 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1629 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1636 switch (XCHARSET_CHARS (charset))
1639 return MIN_CHAR_94x94
1640 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1641 + (c1 - 33) * 94 + (c2 - 33);
1643 return MIN_CHAR_96x96
1644 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1645 + (c1 - 32) * 96 + (c2 - 32);
1653 range_charset_code_point (Lisp_Object charset, Emchar ch)
1657 if ((XCHARSET_UCS_MIN (charset) <= ch)
1658 && (ch <= XCHARSET_UCS_MAX (charset)))
1660 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1662 if (XCHARSET_CHARS (charset) == 256)
1664 else if (XCHARSET_DIMENSION (charset) == 1)
1665 return d + XCHARSET_BYTE_OFFSET (charset);
1666 else if (XCHARSET_DIMENSION (charset) == 2)
1668 ((d / XCHARSET_CHARS (charset)
1669 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1670 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1671 else if (XCHARSET_DIMENSION (charset) == 3)
1673 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1674 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1675 | ((d / XCHARSET_CHARS (charset)
1676 % XCHARSET_CHARS (charset)
1677 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1678 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1679 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1681 ((d / (XCHARSET_CHARS (charset)
1682 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1683 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1684 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1685 % XCHARSET_CHARS (charset)
1686 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1687 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1688 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1689 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1691 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1693 if (XCHARSET_DIMENSION (charset) == 1)
1695 if (XCHARSET_CHARS (charset) == 94)
1697 if (((d = ch - (MIN_CHAR_94
1698 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1702 else if (XCHARSET_CHARS (charset) == 96)
1704 if (((d = ch - (MIN_CHAR_96
1705 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1712 else if (XCHARSET_DIMENSION (charset) == 2)
1714 if (XCHARSET_CHARS (charset) == 94)
1716 if (((d = ch - (MIN_CHAR_94x94
1717 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1720 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1722 else if (XCHARSET_CHARS (charset) == 96)
1724 if (((d = ch - (MIN_CHAR_96x96
1725 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1728 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1738 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1740 if (c <= MAX_CHAR_BASIC_LATIN)
1742 *charset = Vcharset_ascii;
1747 *charset = Vcharset_control_1;
1752 *charset = Vcharset_latin_iso8859_1;
1756 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1758 *charset = Vcharset_greek_iso8859_7;
1759 return c - MIN_CHAR_GREEK + 0x20;
1761 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1763 *charset = Vcharset_cyrillic_iso8859_5;
1764 return c - MIN_CHAR_CYRILLIC + 0x20;
1767 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1769 *charset = Vcharset_hebrew_iso8859_8;
1770 return c - MIN_CHAR_HEBREW + 0x20;
1772 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1774 *charset = Vcharset_thai_tis620;
1775 return c - MIN_CHAR_THAI + 0x20;
1778 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1779 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1781 return list2 (Vcharset_katakana_jisx0201,
1782 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1785 else if (c <= MAX_CHAR_BMP)
1787 *charset = Vcharset_ucs_bmp;
1790 else if (c < MIN_CHAR_DAIKANWA)
1792 *charset = Vcharset_ucs;
1796 else if (c <= MAX_CHAR_DAIKANWA)
1798 *charset = Vcharset_ideograph_daikanwa;
1799 return c - MIN_CHAR_DAIKANWA;
1802 else if (c <= MAX_CHAR_MOJIKYO)
1804 *charset = Vcharset_mojikyo;
1805 return c - MIN_CHAR_MOJIKYO;
1807 else if (c < MIN_CHAR_94)
1809 *charset = Vcharset_ucs;
1812 else if (c <= MAX_CHAR_94)
1814 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1815 ((c - MIN_CHAR_94) / 94) + '0',
1816 CHARSET_LEFT_TO_RIGHT);
1817 if (!NILP (*charset))
1818 return ((c - MIN_CHAR_94) % 94) + 33;
1821 *charset = Vcharset_ucs;
1825 else if (c <= MAX_CHAR_96)
1827 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1828 ((c - MIN_CHAR_96) / 96) + '0',
1829 CHARSET_LEFT_TO_RIGHT);
1830 if (!NILP (*charset))
1831 return ((c - MIN_CHAR_96) % 96) + 32;
1834 *charset = Vcharset_ucs;
1838 else if (c <= MAX_CHAR_94x94)
1841 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1842 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1843 CHARSET_LEFT_TO_RIGHT);
1844 if (!NILP (*charset))
1845 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1846 | (((c - MIN_CHAR_94x94) % 94) + 33);
1849 *charset = Vcharset_ucs;
1853 else if (c <= MAX_CHAR_96x96)
1856 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1857 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1858 CHARSET_LEFT_TO_RIGHT);
1859 if (!NILP (*charset))
1860 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1861 | (((c - MIN_CHAR_96x96) % 96) + 32);
1864 *charset = Vcharset_ucs;
1870 *charset = Vcharset_ucs;
1875 Lisp_Object Vdefault_coded_charset_priority_list;
1879 /************************************************************************/
1880 /* Basic charset Lisp functions */
1881 /************************************************************************/
1883 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1884 Return non-nil if OBJECT is a charset.
1888 return CHARSETP (object) ? Qt : Qnil;
1891 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1892 Retrieve the charset of the given name.
1893 If CHARSET-OR-NAME is a charset object, it is simply returned.
1894 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1895 nil is returned. Otherwise the associated charset object is returned.
1899 if (CHARSETP (charset_or_name))
1900 return charset_or_name;
1902 CHECK_SYMBOL (charset_or_name);
1903 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1906 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1907 Retrieve the charset of the given name.
1908 Same as `find-charset' except an error is signalled if there is no such
1909 charset instead of returning nil.
1913 Lisp_Object charset = Ffind_charset (name);
1916 signal_simple_error ("No such charset", name);
1920 /* We store the charsets in hash tables with the names as the key and the
1921 actual charset object as the value. Occasionally we need to use them
1922 in a list format. These routines provide us with that. */
1923 struct charset_list_closure
1925 Lisp_Object *charset_list;
1929 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1930 void *charset_list_closure)
1932 /* This function can GC */
1933 struct charset_list_closure *chcl =
1934 (struct charset_list_closure*) charset_list_closure;
1935 Lisp_Object *charset_list = chcl->charset_list;
1937 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1941 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1942 Return a list of the names of all defined charsets.
1946 Lisp_Object charset_list = Qnil;
1947 struct gcpro gcpro1;
1948 struct charset_list_closure charset_list_closure;
1950 GCPRO1 (charset_list);
1951 charset_list_closure.charset_list = &charset_list;
1952 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1953 &charset_list_closure);
1956 return charset_list;
1959 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1960 Return the name of the given charset.
1964 return XCHARSET_NAME (Fget_charset (charset));
1967 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1968 Define a new character set.
1969 This function is for use with Mule support.
1970 NAME is a symbol, the name by which the character set is normally referred.
1971 DOC-STRING is a string describing the character set.
1972 PROPS is a property list, describing the specific nature of the
1973 character set. Recognized properties are:
1975 'short-name Short version of the charset name (ex: Latin-1)
1976 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1977 'registry A regular expression matching the font registry field for
1979 'dimension Number of octets used to index a character in this charset.
1980 Either 1 or 2. Defaults to 1.
1981 'columns Number of columns used to display a character in this charset.
1982 Only used in TTY mode. (Under X, the actual width of a
1983 character can be derived from the font used to display the
1984 characters.) If unspecified, defaults to the dimension
1985 (this is almost always the correct value).
1986 'chars Number of characters in each dimension (94 or 96).
1987 Defaults to 94. Note that if the dimension is 2, the
1988 character set thus described is 94x94 or 96x96.
1989 'final Final byte of ISO 2022 escape sequence. Must be
1990 supplied. Each combination of (DIMENSION, CHARS) defines a
1991 separate namespace for final bytes. Note that ISO
1992 2022 restricts the final byte to the range
1993 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1994 dimension == 2. Note also that final bytes in the range
1995 0x30 - 0x3F are reserved for user-defined (not official)
1997 'graphic 0 (use left half of font on output) or 1 (use right half
1998 of font on output). Defaults to 0. For example, for
1999 a font whose registry is ISO8859-1, the left half
2000 (octets 0x20 - 0x7F) is the `ascii' character set, while
2001 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2002 character set. With 'graphic set to 0, the octets
2003 will have their high bit cleared; with it set to 1,
2004 the octets will have their high bit set.
2005 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2007 'ccl-program A compiled CCL program used to convert a character in
2008 this charset into an index into the font. This is in
2009 addition to the 'graphic property. The CCL program
2010 is passed the octets of the character, with the high
2011 bit cleared and set depending upon whether the value
2012 of the 'graphic property is 0 or 1.
2014 (name, doc_string, props))
2016 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2017 int direction = CHARSET_LEFT_TO_RIGHT;
2019 Lisp_Object registry = Qnil;
2020 Lisp_Object charset;
2021 Lisp_Object rest, keyword, value;
2022 Lisp_Object ccl_program = Qnil;
2023 Lisp_Object short_name = Qnil, long_name = Qnil;
2024 int byte_offset = -1;
2026 CHECK_SYMBOL (name);
2027 if (!NILP (doc_string))
2028 CHECK_STRING (doc_string);
2030 charset = Ffind_charset (name);
2031 if (!NILP (charset))
2032 signal_simple_error ("Cannot redefine existing charset", name);
2034 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2036 if (EQ (keyword, Qshort_name))
2038 CHECK_STRING (value);
2042 if (EQ (keyword, Qlong_name))
2044 CHECK_STRING (value);
2048 else if (EQ (keyword, Qdimension))
2051 dimension = XINT (value);
2052 if (dimension < 1 || dimension > 2)
2053 signal_simple_error ("Invalid value for 'dimension", value);
2056 else if (EQ (keyword, Qchars))
2059 chars = XINT (value);
2060 if (chars != 94 && chars != 96)
2061 signal_simple_error ("Invalid value for 'chars", value);
2064 else if (EQ (keyword, Qcolumns))
2067 columns = XINT (value);
2068 if (columns != 1 && columns != 2)
2069 signal_simple_error ("Invalid value for 'columns", value);
2072 else if (EQ (keyword, Qgraphic))
2075 graphic = XINT (value);
2077 if (graphic < 0 || graphic > 2)
2079 if (graphic < 0 || graphic > 1)
2081 signal_simple_error ("Invalid value for 'graphic", value);
2084 else if (EQ (keyword, Qregistry))
2086 CHECK_STRING (value);
2090 else if (EQ (keyword, Qdirection))
2092 if (EQ (value, Ql2r))
2093 direction = CHARSET_LEFT_TO_RIGHT;
2094 else if (EQ (value, Qr2l))
2095 direction = CHARSET_RIGHT_TO_LEFT;
2097 signal_simple_error ("Invalid value for 'direction", value);
2100 else if (EQ (keyword, Qfinal))
2102 CHECK_CHAR_COERCE_INT (value);
2103 final = XCHAR (value);
2104 if (final < '0' || final > '~')
2105 signal_simple_error ("Invalid value for 'final", value);
2108 else if (EQ (keyword, Qccl_program))
2110 CHECK_VECTOR (value);
2111 ccl_program = value;
2115 signal_simple_error ("Unrecognized property", keyword);
2119 error ("'final must be specified");
2120 if (dimension == 2 && final > 0x5F)
2122 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2126 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2128 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2130 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2131 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2133 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2135 id = get_unallocated_leading_byte (dimension);
2137 if (NILP (doc_string))
2138 doc_string = build_string ("");
2140 if (NILP (registry))
2141 registry = build_string ("");
2143 if (NILP (short_name))
2144 XSETSTRING (short_name, XSYMBOL (name)->name);
2146 if (NILP (long_name))
2147 long_name = doc_string;
2150 columns = dimension;
2152 if (byte_offset < 0)
2156 else if (chars == 96)
2162 charset = make_charset (id, name, chars, dimension, columns, graphic,
2163 final, direction, short_name, long_name,
2164 doc_string, registry,
2165 Qnil, 0, 0, 0, byte_offset);
2166 if (!NILP (ccl_program))
2167 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2171 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2173 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2174 NEW-NAME is the name of the new charset. Return the new charset.
2176 (charset, new_name))
2178 Lisp_Object new_charset = Qnil;
2179 int id, chars, dimension, columns, graphic, final;
2181 Lisp_Object registry, doc_string, short_name, long_name;
2184 charset = Fget_charset (charset);
2185 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2186 signal_simple_error ("Charset already has reverse-direction charset",
2189 CHECK_SYMBOL (new_name);
2190 if (!NILP (Ffind_charset (new_name)))
2191 signal_simple_error ("Cannot redefine existing charset", new_name);
2193 cs = XCHARSET (charset);
2195 chars = CHARSET_CHARS (cs);
2196 dimension = CHARSET_DIMENSION (cs);
2197 columns = CHARSET_COLUMNS (cs);
2198 id = get_unallocated_leading_byte (dimension);
2200 graphic = CHARSET_GRAPHIC (cs);
2201 final = CHARSET_FINAL (cs);
2202 direction = CHARSET_RIGHT_TO_LEFT;
2203 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2204 direction = CHARSET_LEFT_TO_RIGHT;
2205 doc_string = CHARSET_DOC_STRING (cs);
2206 short_name = CHARSET_SHORT_NAME (cs);
2207 long_name = CHARSET_LONG_NAME (cs);
2208 registry = CHARSET_REGISTRY (cs);
2210 new_charset = make_charset (id, new_name, chars, dimension, columns,
2211 graphic, final, direction, short_name, long_name,
2212 doc_string, registry,
2214 CHARSET_DECODING_TABLE(cs),
2215 CHARSET_UCS_MIN(cs),
2216 CHARSET_UCS_MAX(cs),
2217 CHARSET_CODE_OFFSET(cs),
2218 CHARSET_BYTE_OFFSET(cs)
2224 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2225 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2230 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2231 Define symbol ALIAS as an alias for CHARSET.
2235 CHECK_SYMBOL (alias);
2236 charset = Fget_charset (charset);
2237 return Fputhash (alias, charset, Vcharset_hash_table);
2240 /* #### Reverse direction charsets not yet implemented. */
2242 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2244 Return the reverse-direction charset parallel to CHARSET, if any.
2245 This is the charset with the same properties (in particular, the same
2246 dimension, number of characters per dimension, and final byte) as
2247 CHARSET but whose characters are displayed in the opposite direction.
2251 charset = Fget_charset (charset);
2252 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2256 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2257 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2258 If DIRECTION is omitted, both directions will be checked (left-to-right
2259 will be returned if character sets exist for both directions).
2261 (dimension, chars, final, direction))
2263 int dm, ch, fi, di = -1;
2265 Lisp_Object obj = Qnil;
2267 CHECK_INT (dimension);
2268 dm = XINT (dimension);
2269 if (dm < 1 || dm > 2)
2270 signal_simple_error ("Invalid value for DIMENSION", dimension);
2274 if (ch != 94 && ch != 96)
2275 signal_simple_error ("Invalid value for CHARS", chars);
2277 CHECK_CHAR_COERCE_INT (final);
2279 if (fi < '0' || fi > '~')
2280 signal_simple_error ("Invalid value for FINAL", final);
2282 if (EQ (direction, Ql2r))
2283 di = CHARSET_LEFT_TO_RIGHT;
2284 else if (EQ (direction, Qr2l))
2285 di = CHARSET_RIGHT_TO_LEFT;
2286 else if (!NILP (direction))
2287 signal_simple_error ("Invalid value for DIRECTION", direction);
2289 if (dm == 2 && fi > 0x5F)
2291 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2294 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2296 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2300 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2302 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2305 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2308 return XCHARSET_NAME (obj);
2312 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2313 Return short name of CHARSET.
2317 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2320 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2321 Return long name of CHARSET.
2325 return XCHARSET_LONG_NAME (Fget_charset (charset));
2328 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2329 Return description of CHARSET.
2333 return XCHARSET_DOC_STRING (Fget_charset (charset));
2336 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2337 Return dimension of CHARSET.
2341 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2344 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2345 Return property PROP of CHARSET.
2346 Recognized properties are those listed in `make-charset', as well as
2347 'name and 'doc-string.
2353 charset = Fget_charset (charset);
2354 cs = XCHARSET (charset);
2356 CHECK_SYMBOL (prop);
2357 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2358 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2359 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2360 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2361 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2362 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2363 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2364 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2365 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2366 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2367 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2368 if (EQ (prop, Qdirection))
2369 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2370 if (EQ (prop, Qreverse_direction_charset))
2372 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2376 return XCHARSET_NAME (obj);
2378 signal_simple_error ("Unrecognized charset property name", prop);
2379 return Qnil; /* not reached */
2382 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2383 Return charset identification number of CHARSET.
2387 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2390 /* #### We need to figure out which properties we really want to
2393 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2394 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2396 (charset, ccl_program))
2398 charset = Fget_charset (charset);
2399 CHECK_VECTOR (ccl_program);
2400 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2405 invalidate_charset_font_caches (Lisp_Object charset)
2407 /* Invalidate font cache entries for charset on all devices. */
2408 Lisp_Object devcons, concons, hash_table;
2409 DEVICE_LOOP_NO_BREAK (devcons, concons)
2411 struct device *d = XDEVICE (XCAR (devcons));
2412 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2413 if (!UNBOUNDP (hash_table))
2414 Fclrhash (hash_table);
2418 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2419 Set the 'registry property of CHARSET to REGISTRY.
2421 (charset, registry))
2423 charset = Fget_charset (charset);
2424 CHECK_STRING (registry);
2425 XCHARSET_REGISTRY (charset) = registry;
2426 invalidate_charset_font_caches (charset);
2427 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2432 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2433 Return mapping-table of CHARSET.
2437 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2440 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2441 Set mapping-table of CHARSET to TABLE.
2445 struct Lisp_Charset *cs;
2446 Lisp_Object old_table;
2449 charset = Fget_charset (charset);
2450 cs = XCHARSET (charset);
2452 if (EQ (table, Qnil))
2454 CHARSET_DECODING_TABLE(cs) = table;
2457 else if (VECTORP (table))
2461 /* ad-hoc method for `ascii' */
2462 if ((CHARSET_CHARS (cs) == 94) &&
2463 (CHARSET_BYTE_OFFSET (cs) != 33))
2464 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2466 ccs_len = CHARSET_CHARS (cs);
2468 if (XVECTOR_LENGTH (table) > ccs_len)
2469 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2470 old_table = CHARSET_DECODING_TABLE(cs);
2471 CHARSET_DECODING_TABLE(cs) = table;
2474 signal_error (Qwrong_type_argument,
2475 list2 (build_translated_string ("vector-or-nil-p"),
2477 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2479 switch (CHARSET_DIMENSION (cs))
2482 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2484 Lisp_Object c = XVECTOR_DATA(table)[i];
2489 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2493 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2495 Lisp_Object v = XVECTOR_DATA(table)[i];
2501 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2503 CHARSET_DECODING_TABLE(cs) = old_table;
2504 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2506 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2508 Lisp_Object c = XVECTOR_DATA(v)[j];
2513 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2514 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2518 put_char_attribute (v, charset,
2519 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2528 /************************************************************************/
2529 /* Lisp primitives for working with characters */
2530 /************************************************************************/
2533 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2534 Make a character from CHARSET and code-point CODE.
2540 charset = Fget_charset (charset);
2543 if (XCHARSET_GRAPHIC (charset) == 1)
2545 return make_char (DECODE_CHAR (charset, c));
2549 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2550 Make a character from CHARSET and octets ARG1 and ARG2.
2551 ARG2 is required only for characters from two-dimensional charsets.
2552 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2553 character s with caron.
2555 (charset, arg1, arg2))
2559 int lowlim, highlim;
2561 charset = Fget_charset (charset);
2562 cs = XCHARSET (charset);
2564 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2565 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2567 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2569 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2570 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2573 /* It is useful (and safe, according to Olivier Galibert) to strip
2574 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2575 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2576 Latin 2 code of the character. */
2584 if (a1 < lowlim || a1 > highlim)
2585 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2587 if (CHARSET_DIMENSION (cs) == 1)
2591 ("Charset is of dimension one; second octet must be nil", arg2);
2592 return make_char (MAKE_CHAR (charset, a1, 0));
2601 a2 = XINT (arg2) & 0x7f;
2603 if (a2 < lowlim || a2 > highlim)
2604 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2606 return make_char (MAKE_CHAR (charset, a1, a2));
2609 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2610 Return the character set of char CH.
2614 CHECK_CHAR_COERCE_INT (ch);
2616 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2619 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2620 Return the octet numbered N (should be 0 or 1) of char CH.
2621 N defaults to 0 if omitted.
2625 Lisp_Object charset;
2628 CHECK_CHAR_COERCE_INT (ch);
2630 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2632 if (NILP (n) || EQ (n, Qzero))
2633 return make_int (octet0);
2634 else if (EQ (n, make_int (1)))
2635 return make_int (octet1);
2637 signal_simple_error ("Octet number must be 0 or 1", n);
2640 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2641 Return list of charset and one or two position-codes of CHAR.
2645 /* This function can GC */
2646 struct gcpro gcpro1, gcpro2;
2647 Lisp_Object charset = Qnil;
2648 Lisp_Object rc = Qnil;
2656 GCPRO2 (charset, rc);
2657 CHECK_CHAR_COERCE_INT (character);
2660 code_point = ENCODE_CHAR (XCHAR (character), charset);
2661 dimension = XCHARSET_DIMENSION (charset);
2662 while (dimension > 0)
2664 rc = Fcons (make_int (code_point & 255), rc);
2668 rc = Fcons (XCHARSET_NAME (charset), rc);
2670 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2672 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2674 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2678 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2687 #ifdef ENABLE_COMPOSITE_CHARS
2688 /************************************************************************/
2689 /* composite character functions */
2690 /************************************************************************/
2693 lookup_composite_char (Bufbyte *str, int len)
2695 Lisp_Object lispstr = make_string (str, len);
2696 Lisp_Object ch = Fgethash (lispstr,
2697 Vcomposite_char_string2char_hash_table,
2703 if (composite_char_row_next >= 128)
2704 signal_simple_error ("No more composite chars available", lispstr);
2705 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2706 composite_char_col_next);
2707 Fputhash (make_char (emch), lispstr,
2708 Vcomposite_char_char2string_hash_table);
2709 Fputhash (lispstr, make_char (emch),
2710 Vcomposite_char_string2char_hash_table);
2711 composite_char_col_next++;
2712 if (composite_char_col_next >= 128)
2714 composite_char_col_next = 32;
2715 composite_char_row_next++;
2724 composite_char_string (Emchar ch)
2726 Lisp_Object str = Fgethash (make_char (ch),
2727 Vcomposite_char_char2string_hash_table,
2729 assert (!UNBOUNDP (str));
2733 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2734 Convert a string into a single composite character.
2735 The character is the result of overstriking all the characters in
2740 CHECK_STRING (string);
2741 return make_char (lookup_composite_char (XSTRING_DATA (string),
2742 XSTRING_LENGTH (string)));
2745 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2746 Return a string of the characters comprising a composite character.
2754 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2755 signal_simple_error ("Must be composite char", ch);
2756 return composite_char_string (emch);
2758 #endif /* ENABLE_COMPOSITE_CHARS */
2761 /************************************************************************/
2762 /* initialization */
2763 /************************************************************************/
2766 syms_of_mule_charset (void)
2768 DEFSUBR (Fcharsetp);
2769 DEFSUBR (Ffind_charset);
2770 DEFSUBR (Fget_charset);
2771 DEFSUBR (Fcharset_list);
2772 DEFSUBR (Fcharset_name);
2773 DEFSUBR (Fmake_charset);
2774 DEFSUBR (Fmake_reverse_direction_charset);
2775 /* DEFSUBR (Freverse_direction_charset); */
2776 DEFSUBR (Fdefine_charset_alias);
2777 DEFSUBR (Fcharset_from_attributes);
2778 DEFSUBR (Fcharset_short_name);
2779 DEFSUBR (Fcharset_long_name);
2780 DEFSUBR (Fcharset_description);
2781 DEFSUBR (Fcharset_dimension);
2782 DEFSUBR (Fcharset_property);
2783 DEFSUBR (Fcharset_id);
2784 DEFSUBR (Fset_charset_ccl_program);
2785 DEFSUBR (Fset_charset_registry);
2787 DEFSUBR (Fchar_attribute_alist);
2788 DEFSUBR (Fget_char_attribute);
2789 DEFSUBR (Fput_char_attribute);
2790 DEFSUBR (Fremove_char_attribute);
2791 DEFSUBR (Fdefine_char);
2792 DEFSUBR (Fchar_variants);
2793 DEFSUBR (Fget_composite_char);
2794 DEFSUBR (Fcharset_mapping_table);
2795 DEFSUBR (Fset_charset_mapping_table);
2799 DEFSUBR (Fdecode_char);
2801 DEFSUBR (Fmake_char);
2802 DEFSUBR (Fchar_charset);
2803 DEFSUBR (Fchar_octet);
2804 DEFSUBR (Fsplit_char);
2806 #ifdef ENABLE_COMPOSITE_CHARS
2807 DEFSUBR (Fmake_composite_char);
2808 DEFSUBR (Fcomposite_char_string);
2811 defsymbol (&Qcharsetp, "charsetp");
2812 defsymbol (&Qregistry, "registry");
2813 defsymbol (&Qfinal, "final");
2814 defsymbol (&Qgraphic, "graphic");
2815 defsymbol (&Qdirection, "direction");
2816 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2817 defsymbol (&Qshort_name, "short-name");
2818 defsymbol (&Qlong_name, "long-name");
2820 defsymbol (&Ql2r, "l2r");
2821 defsymbol (&Qr2l, "r2l");
2823 /* Charsets, compatible with FSF 20.3
2824 Naming convention is Script-Charset[-Edition] */
2825 defsymbol (&Qascii, "ascii");
2826 defsymbol (&Qcontrol_1, "control-1");
2827 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2828 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2829 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2830 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2831 defsymbol (&Qthai_tis620, "thai-tis620");
2832 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2833 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2834 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2835 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2836 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2837 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2838 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2839 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2840 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2841 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2842 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2843 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2844 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2845 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2846 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2848 defsymbol (&Q_ucs, "->ucs");
2849 defsymbol (&Q_decomposition, "->decomposition");
2850 defsymbol (&Qcompat, "compat");
2851 defsymbol (&Qisolated, "isolated");
2852 defsymbol (&Qinitial, "initial");
2853 defsymbol (&Qmedial, "medial");
2854 defsymbol (&Qfinal, "final");
2855 defsymbol (&Qvertical, "vertical");
2856 defsymbol (&QnoBreak, "noBreak");
2857 defsymbol (&Qfraction, "fraction");
2858 defsymbol (&Qsuper, "super");
2859 defsymbol (&Qsub, "sub");
2860 defsymbol (&Qcircle, "circle");
2861 defsymbol (&Qsquare, "square");
2862 defsymbol (&Qwide, "wide");
2863 defsymbol (&Qnarrow, "narrow");
2864 defsymbol (&Qsmall, "small");
2865 defsymbol (&Qfont, "font");
2866 defsymbol (&Qucs, "ucs");
2867 defsymbol (&Qucs_bmp, "ucs-bmp");
2868 defsymbol (&Qlatin_viscii, "latin-viscii");
2869 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2870 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2871 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2872 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2873 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2874 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2875 defsymbol (&Qmojikyo, "mojikyo");
2876 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2877 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2878 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2879 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2880 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2881 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2882 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2883 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2884 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2885 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2886 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2887 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2888 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2889 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2890 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2891 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2892 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2893 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2894 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2895 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2896 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2897 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2899 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2900 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2902 defsymbol (&Qcomposite, "composite");
2906 vars_of_mule_charset (void)
2913 chlook = xnew (struct charset_lookup);
2914 dumpstruct (&chlook, &charset_lookup_description);
2916 /* Table of charsets indexed by leading byte. */
2917 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2918 chlook->charset_by_leading_byte[i] = Qnil;
2921 /* Table of charsets indexed by type/final-byte. */
2922 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2923 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2924 chlook->charset_by_attributes[i][j] = Qnil;
2926 /* Table of charsets indexed by type/final-byte/direction. */
2927 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2928 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2929 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2930 chlook->charset_by_attributes[i][j][k] = Qnil;
2934 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2936 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2937 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2941 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2942 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2943 Leading-code of private TYPE9N charset of column-width 1.
2945 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2949 Vutf_2000_version = build_string("0.15 (Sangō)");
2950 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2951 Version number of UTF-2000.
2954 staticpro (&Vcharacter_attribute_table);
2955 Vcharacter_attribute_table = make_char_code_table (Qnil);
2957 staticpro (&Vcharacter_composition_table);
2958 Vcharacter_composition_table = make_char_code_table (Qnil);
2960 staticpro (&Vcharacter_variant_table);
2961 Vcharacter_variant_table = make_char_code_table (Qnil);
2963 Vdefault_coded_charset_priority_list = Qnil;
2964 DEFVAR_LISP ("default-coded-charset-priority-list",
2965 &Vdefault_coded_charset_priority_list /*
2966 Default order of preferred coded-character-sets.
2972 complex_vars_of_mule_charset (void)
2974 staticpro (&Vcharset_hash_table);
2975 Vcharset_hash_table =
2976 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2978 /* Predefined character sets. We store them into variables for
2982 staticpro (&Vcharset_ucs);
2984 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2985 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2986 build_string ("UCS"),
2987 build_string ("UCS"),
2988 build_string ("ISO/IEC 10646"),
2990 Qnil, 0, 0xFFFFFFF, 0, 0);
2991 staticpro (&Vcharset_ucs_bmp);
2993 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2994 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2995 build_string ("BMP"),
2996 build_string ("BMP"),
2997 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2998 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2999 Qnil, 0, 0xFFFF, 0, 0);
3001 # define MIN_CHAR_THAI 0
3002 # define MAX_CHAR_THAI 0
3003 # define MIN_CHAR_HEBREW 0
3004 # define MAX_CHAR_HEBREW 0
3005 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3006 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3008 staticpro (&Vcharset_ascii);
3010 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3011 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3012 build_string ("ASCII"),
3013 build_string ("ASCII)"),
3014 build_string ("ASCII (ISO646 IRV)"),
3015 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3016 Qnil, 0, 0x7F, 0, 0);
3017 staticpro (&Vcharset_control_1);
3018 Vcharset_control_1 =
3019 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3020 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3021 build_string ("C1"),
3022 build_string ("Control characters"),
3023 build_string ("Control characters 128-191"),
3025 Qnil, 0x80, 0x9F, 0, 0);
3026 staticpro (&Vcharset_latin_iso8859_1);
3027 Vcharset_latin_iso8859_1 =
3028 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3029 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3030 build_string ("Latin-1"),
3031 build_string ("ISO8859-1 (Latin-1)"),
3032 build_string ("ISO8859-1 (Latin-1)"),
3033 build_string ("iso8859-1"),
3034 Qnil, 0xA0, 0xFF, 0, 32);
3035 staticpro (&Vcharset_latin_iso8859_2);
3036 Vcharset_latin_iso8859_2 =
3037 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3038 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3039 build_string ("Latin-2"),
3040 build_string ("ISO8859-2 (Latin-2)"),
3041 build_string ("ISO8859-2 (Latin-2)"),
3042 build_string ("iso8859-2"),
3044 staticpro (&Vcharset_latin_iso8859_3);
3045 Vcharset_latin_iso8859_3 =
3046 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3047 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3048 build_string ("Latin-3"),
3049 build_string ("ISO8859-3 (Latin-3)"),
3050 build_string ("ISO8859-3 (Latin-3)"),
3051 build_string ("iso8859-3"),
3053 staticpro (&Vcharset_latin_iso8859_4);
3054 Vcharset_latin_iso8859_4 =
3055 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3056 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3057 build_string ("Latin-4"),
3058 build_string ("ISO8859-4 (Latin-4)"),
3059 build_string ("ISO8859-4 (Latin-4)"),
3060 build_string ("iso8859-4"),
3062 staticpro (&Vcharset_thai_tis620);
3063 Vcharset_thai_tis620 =
3064 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3065 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3066 build_string ("TIS620"),
3067 build_string ("TIS620 (Thai)"),
3068 build_string ("TIS620.2529 (Thai)"),
3069 build_string ("tis620"),
3070 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3071 staticpro (&Vcharset_greek_iso8859_7);
3072 Vcharset_greek_iso8859_7 =
3073 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3074 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3075 build_string ("ISO8859-7"),
3076 build_string ("ISO8859-7 (Greek)"),
3077 build_string ("ISO8859-7 (Greek)"),
3078 build_string ("iso8859-7"),
3080 0 /* MIN_CHAR_GREEK */,
3081 0 /* MAX_CHAR_GREEK */, 0, 32);
3082 staticpro (&Vcharset_arabic_iso8859_6);
3083 Vcharset_arabic_iso8859_6 =
3084 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3085 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3086 build_string ("ISO8859-6"),
3087 build_string ("ISO8859-6 (Arabic)"),
3088 build_string ("ISO8859-6 (Arabic)"),
3089 build_string ("iso8859-6"),
3091 staticpro (&Vcharset_hebrew_iso8859_8);
3092 Vcharset_hebrew_iso8859_8 =
3093 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3094 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3095 build_string ("ISO8859-8"),
3096 build_string ("ISO8859-8 (Hebrew)"),
3097 build_string ("ISO8859-8 (Hebrew)"),
3098 build_string ("iso8859-8"),
3099 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3100 staticpro (&Vcharset_katakana_jisx0201);
3101 Vcharset_katakana_jisx0201 =
3102 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3103 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3104 build_string ("JISX0201 Kana"),
3105 build_string ("JISX0201.1976 (Japanese Kana)"),
3106 build_string ("JISX0201.1976 Japanese Kana"),
3107 build_string ("jisx0201\\.1976"),
3109 staticpro (&Vcharset_latin_jisx0201);
3110 Vcharset_latin_jisx0201 =
3111 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3112 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3113 build_string ("JISX0201 Roman"),
3114 build_string ("JISX0201.1976 (Japanese Roman)"),
3115 build_string ("JISX0201.1976 Japanese Roman"),
3116 build_string ("jisx0201\\.1976"),
3118 staticpro (&Vcharset_cyrillic_iso8859_5);
3119 Vcharset_cyrillic_iso8859_5 =
3120 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3121 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3122 build_string ("ISO8859-5"),
3123 build_string ("ISO8859-5 (Cyrillic)"),
3124 build_string ("ISO8859-5 (Cyrillic)"),
3125 build_string ("iso8859-5"),
3127 0 /* MIN_CHAR_CYRILLIC */,
3128 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3129 staticpro (&Vcharset_latin_iso8859_9);
3130 Vcharset_latin_iso8859_9 =
3131 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3132 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3133 build_string ("Latin-5"),
3134 build_string ("ISO8859-9 (Latin-5)"),
3135 build_string ("ISO8859-9 (Latin-5)"),
3136 build_string ("iso8859-9"),
3138 staticpro (&Vcharset_japanese_jisx0208_1978);
3139 Vcharset_japanese_jisx0208_1978 =
3140 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3141 Qjapanese_jisx0208_1978, 94, 2,
3142 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3143 build_string ("JIS X0208:1978"),
3144 build_string ("JIS X0208:1978 (Japanese)"),
3146 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3147 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3149 staticpro (&Vcharset_chinese_gb2312);
3150 Vcharset_chinese_gb2312 =
3151 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3152 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3153 build_string ("GB2312"),
3154 build_string ("GB2312)"),
3155 build_string ("GB2312 Chinese simplified"),
3156 build_string ("gb2312"),
3158 staticpro (&Vcharset_japanese_jisx0208);
3159 Vcharset_japanese_jisx0208 =
3160 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3161 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3162 build_string ("JISX0208"),
3163 build_string ("JIS X0208:1983 (Japanese)"),
3164 build_string ("JIS X0208:1983 Japanese Kanji"),
3165 build_string ("jisx0208\\.1983"),
3168 staticpro (&Vcharset_japanese_jisx0208_1990);
3169 Vcharset_japanese_jisx0208_1990 =
3170 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3171 Qjapanese_jisx0208_1990, 94, 2,
3172 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3173 build_string ("JISX0208-1990"),
3174 build_string ("JIS X0208:1990 (Japanese)"),
3175 build_string ("JIS X0208:1990 Japanese Kanji"),
3176 build_string ("jisx0208\\.1990"),
3178 MIN_CHAR_JIS_X0208_1990,
3179 MAX_CHAR_JIS_X0208_1990, 0, 33);
3181 staticpro (&Vcharset_korean_ksc5601);
3182 Vcharset_korean_ksc5601 =
3183 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3184 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3185 build_string ("KSC5601"),
3186 build_string ("KSC5601 (Korean"),
3187 build_string ("KSC5601 Korean Hangul and Hanja"),
3188 build_string ("ksc5601"),
3190 staticpro (&Vcharset_japanese_jisx0212);
3191 Vcharset_japanese_jisx0212 =
3192 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3193 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3194 build_string ("JISX0212"),
3195 build_string ("JISX0212 (Japanese)"),
3196 build_string ("JISX0212 Japanese Supplement"),
3197 build_string ("jisx0212"),
3200 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3201 staticpro (&Vcharset_chinese_cns11643_1);
3202 Vcharset_chinese_cns11643_1 =
3203 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3204 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3205 build_string ("CNS11643-1"),
3206 build_string ("CNS11643-1 (Chinese traditional)"),
3208 ("CNS 11643 Plane 1 Chinese traditional"),
3209 build_string (CHINESE_CNS_PLANE_RE("1")),
3211 staticpro (&Vcharset_chinese_cns11643_2);
3212 Vcharset_chinese_cns11643_2 =
3213 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3214 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3215 build_string ("CNS11643-2"),
3216 build_string ("CNS11643-2 (Chinese traditional)"),
3218 ("CNS 11643 Plane 2 Chinese traditional"),
3219 build_string (CHINESE_CNS_PLANE_RE("2")),
3222 staticpro (&Vcharset_latin_tcvn5712);
3223 Vcharset_latin_tcvn5712 =
3224 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3225 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3226 build_string ("TCVN 5712"),
3227 build_string ("TCVN 5712 (VSCII-2)"),
3228 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3229 build_string ("tcvn5712-1"),
3231 staticpro (&Vcharset_latin_viscii_lower);
3232 Vcharset_latin_viscii_lower =
3233 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3234 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3235 build_string ("VISCII lower"),
3236 build_string ("VISCII lower (Vietnamese)"),
3237 build_string ("VISCII lower (Vietnamese)"),
3238 build_string ("MULEVISCII-LOWER"),
3240 staticpro (&Vcharset_latin_viscii_upper);
3241 Vcharset_latin_viscii_upper =
3242 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3243 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3244 build_string ("VISCII upper"),
3245 build_string ("VISCII upper (Vietnamese)"),
3246 build_string ("VISCII upper (Vietnamese)"),
3247 build_string ("MULEVISCII-UPPER"),
3249 staticpro (&Vcharset_latin_viscii);
3250 Vcharset_latin_viscii =
3251 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3252 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3253 build_string ("VISCII"),
3254 build_string ("VISCII 1.1 (Vietnamese)"),
3255 build_string ("VISCII 1.1 (Vietnamese)"),
3256 build_string ("VISCII1\\.1"),
3258 staticpro (&Vcharset_ideograph_daikanwa);
3259 Vcharset_ideograph_daikanwa =
3260 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3261 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3262 build_string ("Daikanwa"),
3263 build_string ("Morohashi's Daikanwa"),
3264 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3265 build_string ("Daikanwa"),
3266 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3267 staticpro (&Vcharset_mojikyo);
3269 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3270 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3271 build_string ("Mojikyo"),
3272 build_string ("Mojikyo"),
3273 build_string ("Konjaku-Mojikyo"),
3275 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3276 staticpro (&Vcharset_mojikyo_pj_1);
3277 Vcharset_mojikyo_pj_1 =
3278 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3279 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3280 build_string ("Mojikyo-PJ-1"),
3281 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3283 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3284 build_string ("jisx0208\\.Mojikyo-1$"),
3286 staticpro (&Vcharset_mojikyo_pj_2);
3287 Vcharset_mojikyo_pj_2 =
3288 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3289 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3290 build_string ("Mojikyo-PJ-2"),
3291 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3293 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3294 build_string ("jisx0208\\.Mojikyo-2$"),
3296 staticpro (&Vcharset_mojikyo_pj_3);
3297 Vcharset_mojikyo_pj_3 =
3298 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3299 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3300 build_string ("Mojikyo-PJ-3"),
3301 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3303 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3304 build_string ("jisx0208\\.Mojikyo-3$"),
3306 staticpro (&Vcharset_mojikyo_pj_4);
3307 Vcharset_mojikyo_pj_4 =
3308 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3309 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3310 build_string ("Mojikyo-PJ-4"),
3311 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3313 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3314 build_string ("jisx0208\\.Mojikyo-4$"),
3316 staticpro (&Vcharset_mojikyo_pj_5);
3317 Vcharset_mojikyo_pj_5 =
3318 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3319 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3320 build_string ("Mojikyo-PJ-5"),
3321 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3323 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3324 build_string ("jisx0208\\.Mojikyo-5$"),
3326 staticpro (&Vcharset_mojikyo_pj_6);
3327 Vcharset_mojikyo_pj_6 =
3328 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3329 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3330 build_string ("Mojikyo-PJ-6"),
3331 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3333 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3334 build_string ("jisx0208\\.Mojikyo-6$"),
3336 staticpro (&Vcharset_mojikyo_pj_7);
3337 Vcharset_mojikyo_pj_7 =
3338 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3339 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3340 build_string ("Mojikyo-PJ-7"),
3341 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3343 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3344 build_string ("jisx0208\\.Mojikyo-7$"),
3346 staticpro (&Vcharset_mojikyo_pj_8);
3347 Vcharset_mojikyo_pj_8 =
3348 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3349 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3350 build_string ("Mojikyo-PJ-8"),
3351 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3353 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3354 build_string ("jisx0208\\.Mojikyo-8$"),
3356 staticpro (&Vcharset_mojikyo_pj_9);
3357 Vcharset_mojikyo_pj_9 =
3358 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3359 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3360 build_string ("Mojikyo-PJ-9"),
3361 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3363 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3364 build_string ("jisx0208\\.Mojikyo-9$"),
3366 staticpro (&Vcharset_mojikyo_pj_10);
3367 Vcharset_mojikyo_pj_10 =
3368 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3369 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3370 build_string ("Mojikyo-PJ-10"),
3371 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3373 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3374 build_string ("jisx0208\\.Mojikyo-10$"),
3376 staticpro (&Vcharset_mojikyo_pj_11);
3377 Vcharset_mojikyo_pj_11 =
3378 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3379 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3380 build_string ("Mojikyo-PJ-11"),
3381 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3383 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3384 build_string ("jisx0208\\.Mojikyo-11$"),
3386 staticpro (&Vcharset_mojikyo_pj_12);
3387 Vcharset_mojikyo_pj_12 =
3388 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3389 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3390 build_string ("Mojikyo-PJ-12"),
3391 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3393 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3394 build_string ("jisx0208\\.Mojikyo-12$"),
3396 staticpro (&Vcharset_mojikyo_pj_13);
3397 Vcharset_mojikyo_pj_13 =
3398 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3399 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3400 build_string ("Mojikyo-PJ-13"),
3401 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3403 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3404 build_string ("jisx0208\\.Mojikyo-13$"),
3406 staticpro (&Vcharset_mojikyo_pj_14);
3407 Vcharset_mojikyo_pj_14 =
3408 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3409 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3410 build_string ("Mojikyo-PJ-14"),
3411 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3413 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3414 build_string ("jisx0208\\.Mojikyo-14$"),
3416 staticpro (&Vcharset_mojikyo_pj_15);
3417 Vcharset_mojikyo_pj_15 =
3418 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3419 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3420 build_string ("Mojikyo-PJ-15"),
3421 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3423 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3424 build_string ("jisx0208\\.Mojikyo-15$"),
3426 staticpro (&Vcharset_mojikyo_pj_16);
3427 Vcharset_mojikyo_pj_16 =
3428 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3429 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3430 build_string ("Mojikyo-PJ-16"),
3431 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3433 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3434 build_string ("jisx0208\\.Mojikyo-16$"),
3436 staticpro (&Vcharset_mojikyo_pj_17);
3437 Vcharset_mojikyo_pj_17 =
3438 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3439 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3440 build_string ("Mojikyo-PJ-17"),
3441 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3443 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3444 build_string ("jisx0208\\.Mojikyo-17$"),
3446 staticpro (&Vcharset_mojikyo_pj_18);
3447 Vcharset_mojikyo_pj_18 =
3448 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3449 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3450 build_string ("Mojikyo-PJ-18"),
3451 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3453 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3454 build_string ("jisx0208\\.Mojikyo-18$"),
3456 staticpro (&Vcharset_mojikyo_pj_19);
3457 Vcharset_mojikyo_pj_19 =
3458 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3459 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3460 build_string ("Mojikyo-PJ-19"),
3461 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3463 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3464 build_string ("jisx0208\\.Mojikyo-19$"),
3466 staticpro (&Vcharset_mojikyo_pj_20);
3467 Vcharset_mojikyo_pj_20 =
3468 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3469 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3470 build_string ("Mojikyo-PJ-20"),
3471 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3473 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3474 build_string ("jisx0208\\.Mojikyo-20$"),
3476 staticpro (&Vcharset_mojikyo_pj_21);
3477 Vcharset_mojikyo_pj_21 =
3478 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3479 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3480 build_string ("Mojikyo-PJ-21"),
3481 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3483 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3484 build_string ("jisx0208\\.Mojikyo-21$"),
3486 staticpro (&Vcharset_ethiopic_ucs);
3487 Vcharset_ethiopic_ucs =
3488 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3489 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3490 build_string ("Ethiopic (UCS)"),
3491 build_string ("Ethiopic (UCS)"),
3492 build_string ("Ethiopic of UCS"),
3493 build_string ("Ethiopic-Unicode"),
3494 Qnil, 0x1200, 0x137F, 0x1200, 0);
3496 staticpro (&Vcharset_chinese_big5_1);
3497 Vcharset_chinese_big5_1 =
3498 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3499 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3500 build_string ("Big5"),
3501 build_string ("Big5 (Level-1)"),
3503 ("Big5 Level-1 Chinese traditional"),
3504 build_string ("big5"),
3506 staticpro (&Vcharset_chinese_big5_2);
3507 Vcharset_chinese_big5_2 =
3508 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3509 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3510 build_string ("Big5"),
3511 build_string ("Big5 (Level-2)"),
3513 ("Big5 Level-2 Chinese traditional"),
3514 build_string ("big5"),
3517 #ifdef ENABLE_COMPOSITE_CHARS
3518 /* #### For simplicity, we put composite chars into a 96x96 charset.
3519 This is going to lead to problems because you can run out of
3520 room, esp. as we don't yet recycle numbers. */
3521 staticpro (&Vcharset_composite);
3522 Vcharset_composite =
3523 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3524 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3525 build_string ("Composite"),
3526 build_string ("Composite characters"),
3527 build_string ("Composite characters"),
3530 /* #### not dumped properly */
3531 composite_char_row_next = 32;
3532 composite_char_col_next = 32;
3534 Vcomposite_char_string2char_hash_table =
3535 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3536 Vcomposite_char_char2string_hash_table =
3537 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3538 staticpro (&Vcomposite_char_string2char_hash_table);
3539 staticpro (&Vcomposite_char_char2string_hash_table);
3540 #endif /* ENABLE_COMPOSITE_CHARS */