1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_byte_table (Lisp_Object obj)
165 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
179 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (BYTE_TABLE_P (cte1->property[i]))
185 if (BYTE_TABLE_P (cte2->property[i]))
187 if (!byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 byte_table_hash (Lisp_Object obj, int depth)
203 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description byte_table_description[] = {
209 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
215 internal_object_printer,
218 byte_table_description,
222 make_byte_table (Lisp_Object initval)
227 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
229 for (i = 0; i < 256; i++)
230 cte->property[i] = initval;
232 XSETBYTE_TABLE (obj, cte);
237 copy_byte_table (Lisp_Object entry)
239 Lisp_Byte_Table *cte = XBYTE_TABLE (entry);
242 Lisp_Byte_Table *ctenew
243 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_byte_table (new);
251 ctenew->property[i] = new;
254 XSETBYTE_TABLE (obj, ctenew);
260 mark_char_id_table (Lisp_Object obj)
262 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
268 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 Lisp_Char_ID_Table *cte1 = XCHAR_ID_TABLE (obj1);
271 Lisp_Char_ID_Table *cte2 = XCHAR_ID_TABLE (obj2);
273 return byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_id_table_hash (Lisp_Object obj, int depth)
279 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
281 return char_id_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_id_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
291 internal_object_printer,
292 0, char_id_table_equal,
294 char_id_table_description,
298 make_char_id_table (Lisp_Object initval)
301 Lisp_Char_ID_Table *cte
302 = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
304 cte->table = make_byte_table (initval);
306 XSETCHAR_ID_TABLE (obj, cte);
311 copy_char_id_table (Lisp_Object entry)
313 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (entry);
315 Lisp_Char_ID_Table *ctenew
316 = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
318 ctenew->table = copy_byte_table (cte->table);
319 XSETCHAR_ID_TABLE (obj, ctenew);
325 get_char_id_table (Emchar ch, Lisp_Object table)
327 unsigned int code = ch;
329 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
330 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
332 if (BYTE_TABLE_P (ret))
333 cpt = XBYTE_TABLE (ret);
337 ret = cpt->property [(unsigned char) (code >> 16)];
338 if (BYTE_TABLE_P (ret))
339 cpt = XBYTE_TABLE (ret);
343 ret = cpt->property [(unsigned char) (code >> 8)];
344 if (BYTE_TABLE_P (ret))
345 cpt = XBYTE_TABLE (ret);
349 return cpt->property [(unsigned char) code];
352 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
354 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
356 unsigned int code = ch;
357 Lisp_Byte_Table* cpt1 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
358 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
360 if (BYTE_TABLE_P (ret))
362 Lisp_Byte_Table* cpt2 = XBYTE_TABLE (ret);
364 ret = cpt2->property[(unsigned char)(code >> 16)];
365 if (BYTE_TABLE_P (ret))
367 Lisp_Byte_Table* cpt3 = XBYTE_TABLE (ret);
369 ret = cpt3->property[(unsigned char)(code >> 8)];
370 if (BYTE_TABLE_P (ret))
372 Lisp_Byte_Table* cpt4 = XBYTE_TABLE (ret);
374 cpt4->property[(unsigned char)code] = value;
376 else if (!EQ (ret, value))
378 Lisp_Object cpt4 = make_byte_table (ret);
380 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
381 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
384 else if (!EQ (ret, value))
386 Lisp_Object cpt3 = make_byte_table (ret);
387 Lisp_Object cpt4 = make_byte_table (ret);
389 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
390 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
392 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
395 else if (!EQ (ret, value))
397 Lisp_Object cpt2 = make_byte_table (ret);
398 Lisp_Object cpt3 = make_byte_table (ret);
399 Lisp_Object cpt4 = make_byte_table (ret);
401 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
402 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
403 XBYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
404 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
409 Lisp_Object Vcharacter_attribute_table;
410 Lisp_Object Vcharacter_composition_table;
411 Lisp_Object Vcharacter_variant_table;
413 Lisp_Object Q_decomposition;
416 Lisp_Object Qisolated;
417 Lisp_Object Qinitial;
420 Lisp_Object Qvertical;
421 Lisp_Object QnoBreak;
422 Lisp_Object Qfraction;
432 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
434 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
440 else if (EQ (v, Qcompat))
442 else if (EQ (v, Qisolated))
444 else if (EQ (v, Qinitial))
446 else if (EQ (v, Qmedial))
448 else if (EQ (v, Qfinal))
450 else if (EQ (v, Qvertical))
452 else if (EQ (v, QnoBreak))
454 else if (EQ (v, Qfraction))
456 else if (EQ (v, Qsuper))
458 else if (EQ (v, Qsub))
460 else if (EQ (v, Qcircle))
462 else if (EQ (v, Qsquare))
464 else if (EQ (v, Qwide))
466 else if (EQ (v, Qnarrow))
468 else if (EQ (v, Qsmall))
470 else if (EQ (v, Qfont))
473 signal_simple_error (err_msg, err_arg);
476 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
477 Return character corresponding with list.
481 Lisp_Object table = Vcharacter_composition_table;
482 Lisp_Object rest = list;
486 Lisp_Object v = Fcar (rest);
488 Emchar c = to_char_id (v, "Invalid value for composition", list);
490 ret = get_char_id_table (c, table);
495 if (!CHAR_ID_TABLE_P (ret))
500 else if (!CONSP (rest))
502 else if (CHAR_ID_TABLE_P (ret))
505 signal_simple_error ("Invalid table is found with", list);
507 signal_simple_error ("Invalid value for composition", list);
510 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
511 Return variants of CHARACTER.
515 CHECK_CHAR (character);
516 return Fcopy_list (get_char_id_table (XCHAR (character),
517 Vcharacter_variant_table));
520 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
521 Return the alist of attributes of CHARACTER.
525 CHECK_CHAR (character);
526 return Fcopy_alist (get_char_id_table (XCHAR (character),
527 Vcharacter_attribute_table));
530 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
531 Return the value of CHARACTER's ATTRIBUTE.
533 (character, attribute))
537 CHECK_CHAR (character);
538 if (!NILP (ccs = Ffind_charset (attribute)))
540 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
542 if (CHAR_ID_TABLE_P (encoding_table))
543 return get_char_id_table (XCHAR (character), encoding_table);
550 = get_char_id_table (XCHAR (character), Vcharacter_attribute_table);
555 return Fcdr (Fassq (attribute, ret));
559 Lisp_Object put_char_attribute (Lisp_Object character,
560 Lisp_Object attribute, Lisp_Object value);
562 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
565 Emchar char_id = XCHAR (character);
566 Lisp_Object ret = get_char_id_table (char_id, Vcharacter_attribute_table);
569 cell = Fassq (attribute, ret);
573 ret = Fcons (Fcons (attribute, value), ret);
575 else if (!EQ (Fcdr (cell), value))
577 Fsetcdr (cell, value);
579 put_char_id_table (char_id, ret, Vcharacter_attribute_table);
583 Lisp_Object remove_char_attribute (Lisp_Object character,
584 Lisp_Object attribute);
586 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
588 Emchar char_id = XCHAR (character);
589 Lisp_Object alist = get_char_id_table (char_id, Vcharacter_attribute_table);
591 if (EQ (attribute, Fcar (Fcar (alist))))
593 alist = Fcdr (alist);
597 Lisp_Object pr = alist;
598 Lisp_Object r = Fcdr (alist);
602 if (EQ (attribute, Fcar (Fcar (r))))
604 XCDR (pr) = Fcdr (r);
611 put_char_id_table (char_id, alist, Vcharacter_attribute_table);
617 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
618 Store CHARACTER's ATTRIBUTE with VALUE.
620 (character, attribute, value))
624 CHECK_CHAR (character);
625 ccs = Ffind_charset (attribute);
628 Lisp_Object encoding_table;
630 if (!EQ (XCHARSET_NAME (ccs), Qucs)
631 || (XCHAR (character) != XINT (value)))
633 Lisp_Object cpos, rest;
634 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
641 /* ad-hoc method for `ascii' */
642 if ((XCHARSET_CHARS (ccs) == 94) &&
643 (XCHARSET_BYTE_OFFSET (ccs) != 33))
644 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
646 ccs_len = XCHARSET_CHARS (ccs);
650 Lisp_Object ret = Fcar (value);
653 signal_simple_error ("Invalid value for coded-charset", value);
654 code_point = XINT (ret);
655 if (XCHARSET_GRAPHIC (ccs) == 1)
663 signal_simple_error ("Invalid value for coded-charset",
667 signal_simple_error ("Invalid value for coded-charset",
670 if (XCHARSET_GRAPHIC (ccs) == 1)
672 code_point = (code_point << 8) | j;
675 value = make_int (code_point);
677 else if (INTP (value))
679 if (XCHARSET_GRAPHIC (ccs) == 1)
680 value = make_int (XINT (value) & 0x7F7F7F7F);
683 signal_simple_error ("Invalid value for coded-charset", value);
686 cpos = Fget_char_attribute (character, attribute);
691 dim = XCHARSET_DIMENSION (ccs);
692 code_point = XINT (cpos);
696 i = ((code_point >> (8 * dim)) & 255)
697 - XCHARSET_BYTE_OFFSET (ccs);
698 nv = XVECTOR_DATA(v)[i];
704 XVECTOR_DATA(v)[i] = Qnil;
705 v = XCHARSET_DECODING_TABLE (ccs);
710 XCHARSET_DECODING_TABLE (ccs) = v
711 = make_older_vector (ccs_len, Qnil);
714 dim = XCHARSET_DIMENSION (ccs);
715 code_point = XINT (value);
720 i = ((code_point >> (8 * dim)) & 255)
721 - XCHARSET_BYTE_OFFSET (ccs);
722 nv = XVECTOR_DATA(v)[i];
726 nv = (XVECTOR_DATA(v)[i]
727 = make_older_vector (ccs_len, Qnil));
733 XVECTOR_DATA(v)[i] = character;
737 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
739 XCHARSET_ENCODING_TABLE (ccs) = encoding_table
740 = make_char_id_table (Qnil);
742 put_char_id_table (XCHAR (character), value, encoding_table);
745 else if (EQ (attribute, Q_decomposition))
748 signal_simple_error ("Invalid value for ->decomposition",
751 if (CONSP (Fcdr (value)))
753 Lisp_Object rest = value;
754 Lisp_Object table = Vcharacter_composition_table;
758 Lisp_Object v = Fcar (rest);
761 = to_char_id (v, "Invalid value for ->decomposition", value);
766 put_char_id_table (c, character, table);
771 ntable = get_char_id_table (c, table);
772 if (!CHAR_ID_TABLE_P (ntable))
774 ntable = make_char_id_table (Qnil);
775 put_char_id_table (c, ntable, table);
783 Lisp_Object v = Fcar (value);
789 = get_char_id_table (c, Vcharacter_variant_table);
791 if (NILP (Fmemq (v, ret)))
793 put_char_id_table (c, Fcons (character, ret),
794 Vcharacter_variant_table);
799 else if (EQ (attribute, Q_ucs))
805 signal_simple_error ("Invalid value for ->ucs", value);
809 ret = get_char_id_table (c, Vcharacter_variant_table);
810 if (NILP (Fmemq (character, ret)))
812 put_char_id_table (c, Fcons (character, ret),
813 Vcharacter_variant_table);
816 return put_char_attribute (character, attribute, value);
819 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
820 Remove CHARACTER's ATTRIBUTE.
822 (character, attribute))
826 CHECK_CHAR (character);
827 ccs = Ffind_charset (attribute);
831 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
837 Lisp_Object encoding_table;
839 /* ad-hoc method for `ascii' */
840 if ((XCHARSET_CHARS (ccs) == 94) &&
841 (XCHARSET_BYTE_OFFSET (ccs) != 33))
842 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
844 ccs_len = XCHARSET_CHARS (ccs);
847 cpos = Fget_char_attribute (character, attribute);
852 dim = XCHARSET_DIMENSION (ccs);
853 code_point = XINT (cpos);
857 i = ((code_point >> (8 * dim)) & 255)
858 - XCHARSET_BYTE_OFFSET (ccs);
859 nv = XVECTOR_DATA(v)[i];
865 XVECTOR_DATA(v)[i] = Qnil;
866 v = XCHARSET_DECODING_TABLE (ccs);
869 if (!NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
871 put_char_id_table (XCHAR (character), Qnil, encoding_table);
875 return remove_char_attribute (character, attribute);
878 EXFUN (Fmake_char, 3);
879 EXFUN (Fdecode_char, 2);
881 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
882 Store character's ATTRIBUTES.
886 Lisp_Object rest = attributes;
887 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
888 Lisp_Object character;
894 Lisp_Object cell = Fcar (rest);
898 signal_simple_error ("Invalid argument", attributes);
899 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
900 && ((XCHARSET_FINAL (ccs) != 0) ||
901 (XCHARSET_UCS_MAX (ccs) > 0)) )
905 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
907 character = Fdecode_char (ccs, cell);
908 goto setup_attributes;
912 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
915 signal_simple_error ("Invalid argument", attributes);
917 character = make_char (XINT (code) + 0x100000);
918 goto setup_attributes;
922 else if (!INTP (code))
923 signal_simple_error ("Invalid argument", attributes);
925 character = make_char (XINT (code));
931 Lisp_Object cell = Fcar (rest);
934 signal_simple_error ("Invalid argument", attributes);
935 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
939 get_char_id_table (XCHAR (character), Vcharacter_attribute_table);
942 Lisp_Object Vutf_2000_version;
946 int leading_code_private_11;
949 Lisp_Object Qcharsetp;
951 /* Qdoc_string, Qdimension, Qchars defined in general.c */
952 Lisp_Object Qregistry, Qfinal, Qgraphic;
953 Lisp_Object Qdirection;
954 Lisp_Object Qreverse_direction_charset;
955 Lisp_Object Qleading_byte;
956 Lisp_Object Qshort_name, Qlong_name;
972 Qjapanese_jisx0208_1978,
975 Qjapanese_jisx0208_1990,
986 Qvietnamese_viscii_lower,
987 Qvietnamese_viscii_upper,
1017 Lisp_Object Ql2r, Qr2l;
1019 Lisp_Object Vcharset_hash_table;
1021 /* Composite characters are characters constructed by overstriking two
1022 or more regular characters.
1024 1) The old Mule implementation involves storing composite characters
1025 in a buffer as a tag followed by all of the actual characters
1026 used to make up the composite character. I think this is a bad
1027 idea; it greatly complicates code that wants to handle strings
1028 one character at a time because it has to deal with the possibility
1029 of great big ungainly characters. It's much more reasonable to
1030 simply store an index into a table of composite characters.
1032 2) The current implementation only allows for 16,384 separate
1033 composite characters over the lifetime of the XEmacs process.
1034 This could become a potential problem if the user
1035 edited lots of different files that use composite characters.
1036 Due to FSF bogosity, increasing the number of allowable
1037 composite characters under Mule would decrease the number
1038 of possible faces that can exist. Mule already has shrunk
1039 this to 2048, and further shrinkage would become uncomfortable.
1040 No such problems exist in XEmacs.
1042 Composite characters could be represented as 0x80 C1 C2 C3,
1043 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1044 for slightly under 2^20 (one million) composite characters
1045 over the XEmacs process lifetime, and you only need to
1046 increase the size of a Mule character from 19 to 21 bits.
1047 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1048 85 million (slightly over 2^26) composite characters. */
1051 /************************************************************************/
1052 /* Basic Emchar functions */
1053 /************************************************************************/
1055 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1056 string in STR. Returns the number of bytes stored.
1057 Do not call this directly. Use the macro set_charptr_emchar() instead.
1061 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1067 Lisp_Object charset;
1076 else if ( c <= 0x7ff )
1078 *p++ = (c >> 6) | 0xc0;
1079 *p++ = (c & 0x3f) | 0x80;
1081 else if ( c <= 0xffff )
1083 *p++ = (c >> 12) | 0xe0;
1084 *p++ = ((c >> 6) & 0x3f) | 0x80;
1085 *p++ = (c & 0x3f) | 0x80;
1087 else if ( c <= 0x1fffff )
1089 *p++ = (c >> 18) | 0xf0;
1090 *p++ = ((c >> 12) & 0x3f) | 0x80;
1091 *p++ = ((c >> 6) & 0x3f) | 0x80;
1092 *p++ = (c & 0x3f) | 0x80;
1094 else if ( c <= 0x3ffffff )
1096 *p++ = (c >> 24) | 0xf8;
1097 *p++ = ((c >> 18) & 0x3f) | 0x80;
1098 *p++ = ((c >> 12) & 0x3f) | 0x80;
1099 *p++ = ((c >> 6) & 0x3f) | 0x80;
1100 *p++ = (c & 0x3f) | 0x80;
1104 *p++ = (c >> 30) | 0xfc;
1105 *p++ = ((c >> 24) & 0x3f) | 0x80;
1106 *p++ = ((c >> 18) & 0x3f) | 0x80;
1107 *p++ = ((c >> 12) & 0x3f) | 0x80;
1108 *p++ = ((c >> 6) & 0x3f) | 0x80;
1109 *p++ = (c & 0x3f) | 0x80;
1112 BREAKUP_CHAR (c, charset, c1, c2);
1113 lb = CHAR_LEADING_BYTE (c);
1114 if (LEADING_BYTE_PRIVATE_P (lb))
1115 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1117 if (EQ (charset, Vcharset_control_1))
1126 /* Return the first character from a Mule-encoded string in STR,
1127 assuming it's non-ASCII. Do not call this directly.
1128 Use the macro charptr_emchar() instead. */
1131 non_ascii_charptr_emchar (const Bufbyte *str)
1144 else if ( b >= 0xf8 )
1149 else if ( b >= 0xf0 )
1154 else if ( b >= 0xe0 )
1159 else if ( b >= 0xc0 )
1169 for( ; len > 0; len-- )
1172 ch = ( ch << 6 ) | ( b & 0x3f );
1176 Bufbyte i0 = *str, i1, i2 = 0;
1177 Lisp_Object charset;
1179 if (i0 == LEADING_BYTE_CONTROL_1)
1180 return (Emchar) (*++str - 0x20);
1182 if (LEADING_BYTE_PREFIX_P (i0))
1187 charset = CHARSET_BY_LEADING_BYTE (i0);
1188 if (XCHARSET_DIMENSION (charset) == 2)
1191 return MAKE_CHAR (charset, i1, i2);
1195 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1196 Do not call this directly. Use the macro valid_char_p() instead. */
1200 non_ascii_valid_char_p (Emchar ch)
1204 /* Must have only lowest 19 bits set */
1208 f1 = CHAR_FIELD1 (ch);
1209 f2 = CHAR_FIELD2 (ch);
1210 f3 = CHAR_FIELD3 (ch);
1214 Lisp_Object charset;
1216 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1217 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1218 f2 > MAX_CHAR_FIELD2_PRIVATE)
1223 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1224 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1228 NOTE: This takes advantage of the fact that
1229 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1230 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1232 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1233 if (EQ (charset, Qnil))
1235 return (XCHARSET_CHARS (charset) == 96);
1239 Lisp_Object charset;
1241 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1242 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1243 f1 > MAX_CHAR_FIELD1_PRIVATE)
1245 if (f2 < 0x20 || f3 < 0x20)
1248 #ifdef ENABLE_COMPOSITE_CHARS
1249 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1251 if (UNBOUNDP (Fgethash (make_int (ch),
1252 Vcomposite_char_char2string_hash_table,
1257 #endif /* ENABLE_COMPOSITE_CHARS */
1259 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1260 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1263 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1265 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1268 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1270 if (EQ (charset, Qnil))
1272 return (XCHARSET_CHARS (charset) == 96);
1278 /************************************************************************/
1279 /* Basic string functions */
1280 /************************************************************************/
1282 /* Copy the character pointed to by PTR into STR, assuming it's
1283 non-ASCII. Do not call this directly. Use the macro
1284 charptr_copy_char() instead. */
1287 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1289 Bufbyte *strptr = str;
1291 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1293 /* Notice fallthrough. */
1295 case 6: *++strptr = *ptr++;
1296 case 5: *++strptr = *ptr++;
1298 case 4: *++strptr = *ptr++;
1299 case 3: *++strptr = *ptr++;
1300 case 2: *++strptr = *ptr;
1305 return strptr + 1 - str;
1309 /************************************************************************/
1310 /* streams of Emchars */
1311 /************************************************************************/
1313 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1314 The functions below are not meant to be called directly; use
1315 the macros in insdel.h. */
1318 Lstream_get_emchar_1 (Lstream *stream, int ch)
1320 Bufbyte str[MAX_EMCHAR_LEN];
1321 Bufbyte *strptr = str;
1323 str[0] = (Bufbyte) ch;
1324 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1326 /* Notice fallthrough. */
1329 ch = Lstream_getc (stream);
1331 *++strptr = (Bufbyte) ch;
1333 ch = Lstream_getc (stream);
1335 *++strptr = (Bufbyte) ch;
1338 ch = Lstream_getc (stream);
1340 *++strptr = (Bufbyte) ch;
1342 ch = Lstream_getc (stream);
1344 *++strptr = (Bufbyte) ch;
1346 ch = Lstream_getc (stream);
1348 *++strptr = (Bufbyte) ch;
1353 return charptr_emchar (str);
1357 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1359 Bufbyte str[MAX_EMCHAR_LEN];
1360 Bytecount len = set_charptr_emchar (str, ch);
1361 return Lstream_write (stream, str, len);
1365 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1367 Bufbyte str[MAX_EMCHAR_LEN];
1368 Bytecount len = set_charptr_emchar (str, ch);
1369 Lstream_unread (stream, str, len);
1373 /************************************************************************/
1374 /* charset object */
1375 /************************************************************************/
1378 mark_charset (Lisp_Object obj)
1380 Lisp_Charset *cs = XCHARSET (obj);
1382 mark_object (cs->short_name);
1383 mark_object (cs->long_name);
1384 mark_object (cs->doc_string);
1385 mark_object (cs->registry);
1386 mark_object (cs->ccl_program);
1388 mark_object (cs->encoding_table);
1389 /* mark_object (cs->decoding_table); */
1395 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1397 Lisp_Charset *cs = XCHARSET (obj);
1401 error ("printing unreadable object #<charset %s 0x%x>",
1402 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1405 write_c_string ("#<charset ", printcharfun);
1406 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1407 write_c_string (" ", printcharfun);
1408 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1409 write_c_string (" ", printcharfun);
1410 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1411 write_c_string (" ", printcharfun);
1412 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1413 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1415 CHARSET_DIMENSION (cs),
1416 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1417 CHARSET_COLUMNS (cs),
1418 CHARSET_GRAPHIC (cs),
1419 CHARSET_FINAL (cs));
1420 write_c_string (buf, printcharfun);
1421 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1422 sprintf (buf, " 0x%x>", cs->header.uid);
1423 write_c_string (buf, printcharfun);
1426 static const struct lrecord_description charset_description[] = {
1427 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1428 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1429 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1430 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1431 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1432 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1433 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1435 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1436 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
1441 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1442 mark_charset, print_charset, 0, 0, 0,
1443 charset_description,
1445 /* Make a new charset. */
1448 make_charset (Charset_ID id, Lisp_Object name,
1449 unsigned short chars, unsigned char dimension,
1450 unsigned char columns, unsigned char graphic,
1451 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1452 Lisp_Object long_name, Lisp_Object doc,
1454 Lisp_Object decoding_table,
1455 Emchar ucs_min, Emchar ucs_max,
1456 Emchar code_offset, unsigned char byte_offset)
1458 unsigned char type = 0;
1460 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1464 XSETCHARSET (obj, cs);
1466 CHARSET_ID (cs) = id;
1467 CHARSET_NAME (cs) = name;
1468 CHARSET_SHORT_NAME (cs) = short_name;
1469 CHARSET_LONG_NAME (cs) = long_name;
1470 CHARSET_CHARS (cs) = chars;
1471 CHARSET_DIMENSION (cs) = dimension;
1472 CHARSET_DIRECTION (cs) = direction;
1473 CHARSET_COLUMNS (cs) = columns;
1474 CHARSET_GRAPHIC (cs) = graphic;
1475 CHARSET_FINAL (cs) = final;
1476 CHARSET_DOC_STRING (cs) = doc;
1477 CHARSET_REGISTRY (cs) = reg;
1478 CHARSET_CCL_PROGRAM (cs) = Qnil;
1479 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1481 CHARSET_DECODING_TABLE(cs) = Qnil;
1482 CHARSET_ENCODING_TABLE(cs) = Qnil;
1483 CHARSET_UCS_MIN(cs) = ucs_min;
1484 CHARSET_UCS_MAX(cs) = ucs_max;
1485 CHARSET_CODE_OFFSET(cs) = code_offset;
1486 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1489 switch (CHARSET_CHARS (cs))
1492 switch (CHARSET_DIMENSION (cs))
1495 type = CHARSET_TYPE_94;
1498 type = CHARSET_TYPE_94X94;
1503 switch (CHARSET_DIMENSION (cs))
1506 type = CHARSET_TYPE_96;
1509 type = CHARSET_TYPE_96X96;
1515 switch (CHARSET_DIMENSION (cs))
1518 type = CHARSET_TYPE_128;
1521 type = CHARSET_TYPE_128X128;
1526 switch (CHARSET_DIMENSION (cs))
1529 type = CHARSET_TYPE_256;
1532 type = CHARSET_TYPE_256X256;
1539 CHARSET_TYPE (cs) = type;
1543 if (id == LEADING_BYTE_ASCII)
1544 CHARSET_REP_BYTES (cs) = 1;
1546 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1548 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1553 /* some charsets do not have final characters. This includes
1554 ASCII, Control-1, Composite, and the two faux private
1557 if (code_offset == 0)
1559 assert (NILP (chlook->charset_by_attributes[type][final]));
1560 chlook->charset_by_attributes[type][final] = obj;
1563 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1564 chlook->charset_by_attributes[type][final][direction] = obj;
1568 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1569 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1571 /* Some charsets are "faux" and don't have names or really exist at
1572 all except in the leading-byte table. */
1574 Fputhash (name, obj, Vcharset_hash_table);
1579 get_unallocated_leading_byte (int dimension)
1584 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1587 lb = chlook->next_allocated_leading_byte++;
1591 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1594 lb = chlook->next_allocated_1_byte_leading_byte++;
1598 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1601 lb = chlook->next_allocated_2_byte_leading_byte++;
1607 ("No more character sets free for this dimension",
1608 make_int (dimension));
1615 make_builtin_char (Lisp_Object charset, int c1, int c2)
1617 if (XCHARSET_UCS_MAX (charset))
1620 = (XCHARSET_DIMENSION (charset) == 1
1622 c1 - XCHARSET_BYTE_OFFSET (charset)
1624 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1625 + c2 - XCHARSET_BYTE_OFFSET (charset))
1626 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1627 if ((code < XCHARSET_UCS_MIN (charset))
1628 || (XCHARSET_UCS_MAX (charset) < code))
1629 signal_simple_error ("Arguments makes invalid character",
1633 else if (XCHARSET_DIMENSION (charset) == 1)
1635 switch (XCHARSET_CHARS (charset))
1639 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1642 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1649 switch (XCHARSET_CHARS (charset))
1652 return MIN_CHAR_94x94
1653 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1654 + (c1 - 33) * 94 + (c2 - 33);
1656 return MIN_CHAR_96x96
1657 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1658 + (c1 - 32) * 96 + (c2 - 32);
1666 range_charset_code_point (Lisp_Object charset, Emchar ch)
1670 if ((XCHARSET_UCS_MIN (charset) <= ch)
1671 && (ch <= XCHARSET_UCS_MAX (charset)))
1673 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1675 if (XCHARSET_CHARS (charset) == 256)
1677 else if (XCHARSET_DIMENSION (charset) == 1)
1678 return d + XCHARSET_BYTE_OFFSET (charset);
1679 else if (XCHARSET_DIMENSION (charset) == 2)
1681 ((d / XCHARSET_CHARS (charset)
1682 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1683 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1684 else if (XCHARSET_DIMENSION (charset) == 3)
1686 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1687 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1688 | ((d / XCHARSET_CHARS (charset)
1689 % XCHARSET_CHARS (charset)
1690 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1691 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1692 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1694 ((d / (XCHARSET_CHARS (charset)
1695 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1696 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1697 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1698 % XCHARSET_CHARS (charset)
1699 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1700 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1701 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1702 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1704 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1706 if (XCHARSET_DIMENSION (charset) == 1)
1708 if (XCHARSET_CHARS (charset) == 94)
1710 if (((d = ch - (MIN_CHAR_94
1711 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1715 else if (XCHARSET_CHARS (charset) == 96)
1717 if (((d = ch - (MIN_CHAR_96
1718 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1725 else if (XCHARSET_DIMENSION (charset) == 2)
1727 if (XCHARSET_CHARS (charset) == 94)
1729 if (((d = ch - (MIN_CHAR_94x94
1730 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1733 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1735 else if (XCHARSET_CHARS (charset) == 96)
1737 if (((d = ch - (MIN_CHAR_96x96
1738 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1741 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1751 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1753 if (c <= MAX_CHAR_BASIC_LATIN)
1755 *charset = Vcharset_ascii;
1760 *charset = Vcharset_control_1;
1765 *charset = Vcharset_latin_iso8859_1;
1769 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1771 *charset = Vcharset_greek_iso8859_7;
1772 return c - MIN_CHAR_GREEK + 0x20;
1774 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1776 *charset = Vcharset_cyrillic_iso8859_5;
1777 return c - MIN_CHAR_CYRILLIC + 0x20;
1780 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1782 *charset = Vcharset_hebrew_iso8859_8;
1783 return c - MIN_CHAR_HEBREW + 0x20;
1785 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1787 *charset = Vcharset_thai_tis620;
1788 return c - MIN_CHAR_THAI + 0x20;
1791 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1792 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1794 return list2 (Vcharset_katakana_jisx0201,
1795 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1798 else if (c <= MAX_CHAR_BMP)
1800 *charset = Vcharset_ucs_bmp;
1803 else if (c < MIN_CHAR_DAIKANWA)
1805 *charset = Vcharset_ucs;
1809 else if (c <= MAX_CHAR_DAIKANWA)
1811 *charset = Vcharset_ideograph_daikanwa;
1812 return c - MIN_CHAR_DAIKANWA;
1815 else if (c <= MAX_CHAR_MOJIKYO)
1817 *charset = Vcharset_mojikyo;
1818 return c - MIN_CHAR_MOJIKYO;
1820 else if (c < MIN_CHAR_94)
1822 *charset = Vcharset_ucs;
1825 else if (c <= MAX_CHAR_94)
1827 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1828 ((c - MIN_CHAR_94) / 94) + '0',
1829 CHARSET_LEFT_TO_RIGHT);
1830 if (!NILP (*charset))
1831 return ((c - MIN_CHAR_94) % 94) + 33;
1834 *charset = Vcharset_ucs;
1838 else if (c <= MAX_CHAR_96)
1840 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1841 ((c - MIN_CHAR_96) / 96) + '0',
1842 CHARSET_LEFT_TO_RIGHT);
1843 if (!NILP (*charset))
1844 return ((c - MIN_CHAR_96) % 96) + 32;
1847 *charset = Vcharset_ucs;
1851 else if (c <= MAX_CHAR_94x94)
1854 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1855 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1856 CHARSET_LEFT_TO_RIGHT);
1857 if (!NILP (*charset))
1858 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1859 | (((c - MIN_CHAR_94x94) % 94) + 33);
1862 *charset = Vcharset_ucs;
1866 else if (c <= MAX_CHAR_96x96)
1869 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1870 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1871 CHARSET_LEFT_TO_RIGHT);
1872 if (!NILP (*charset))
1873 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1874 | (((c - MIN_CHAR_96x96) % 96) + 32);
1877 *charset = Vcharset_ucs;
1883 *charset = Vcharset_ucs;
1888 Lisp_Object Vdefault_coded_charset_priority_list;
1892 /************************************************************************/
1893 /* Basic charset Lisp functions */
1894 /************************************************************************/
1896 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1897 Return non-nil if OBJECT is a charset.
1901 return CHARSETP (object) ? Qt : Qnil;
1904 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1905 Retrieve the charset of the given name.
1906 If CHARSET-OR-NAME is a charset object, it is simply returned.
1907 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1908 nil is returned. Otherwise the associated charset object is returned.
1912 if (CHARSETP (charset_or_name))
1913 return charset_or_name;
1915 CHECK_SYMBOL (charset_or_name);
1916 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1919 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1920 Retrieve the charset of the given name.
1921 Same as `find-charset' except an error is signalled if there is no such
1922 charset instead of returning nil.
1926 Lisp_Object charset = Ffind_charset (name);
1929 signal_simple_error ("No such charset", name);
1933 /* We store the charsets in hash tables with the names as the key and the
1934 actual charset object as the value. Occasionally we need to use them
1935 in a list format. These routines provide us with that. */
1936 struct charset_list_closure
1938 Lisp_Object *charset_list;
1942 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1943 void *charset_list_closure)
1945 /* This function can GC */
1946 struct charset_list_closure *chcl =
1947 (struct charset_list_closure*) charset_list_closure;
1948 Lisp_Object *charset_list = chcl->charset_list;
1950 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1954 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1955 Return a list of the names of all defined charsets.
1959 Lisp_Object charset_list = Qnil;
1960 struct gcpro gcpro1;
1961 struct charset_list_closure charset_list_closure;
1963 GCPRO1 (charset_list);
1964 charset_list_closure.charset_list = &charset_list;
1965 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1966 &charset_list_closure);
1969 return charset_list;
1972 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1973 Return the name of the given charset.
1977 return XCHARSET_NAME (Fget_charset (charset));
1980 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1981 Define a new character set.
1982 This function is for use with Mule support.
1983 NAME is a symbol, the name by which the character set is normally referred.
1984 DOC-STRING is a string describing the character set.
1985 PROPS is a property list, describing the specific nature of the
1986 character set. Recognized properties are:
1988 'short-name Short version of the charset name (ex: Latin-1)
1989 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1990 'registry A regular expression matching the font registry field for
1992 'dimension Number of octets used to index a character in this charset.
1993 Either 1 or 2. Defaults to 1.
1994 'columns Number of columns used to display a character in this charset.
1995 Only used in TTY mode. (Under X, the actual width of a
1996 character can be derived from the font used to display the
1997 characters.) If unspecified, defaults to the dimension
1998 (this is almost always the correct value).
1999 'chars Number of characters in each dimension (94 or 96).
2000 Defaults to 94. Note that if the dimension is 2, the
2001 character set thus described is 94x94 or 96x96.
2002 'final Final byte of ISO 2022 escape sequence. Must be
2003 supplied. Each combination of (DIMENSION, CHARS) defines a
2004 separate namespace for final bytes. Note that ISO
2005 2022 restricts the final byte to the range
2006 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2007 dimension == 2. Note also that final bytes in the range
2008 0x30 - 0x3F are reserved for user-defined (not official)
2010 'graphic 0 (use left half of font on output) or 1 (use right half
2011 of font on output). Defaults to 0. For example, for
2012 a font whose registry is ISO8859-1, the left half
2013 (octets 0x20 - 0x7F) is the `ascii' character set, while
2014 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2015 character set. With 'graphic set to 0, the octets
2016 will have their high bit cleared; with it set to 1,
2017 the octets will have their high bit set.
2018 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2020 'ccl-program A compiled CCL program used to convert a character in
2021 this charset into an index into the font. This is in
2022 addition to the 'graphic property. The CCL program
2023 is passed the octets of the character, with the high
2024 bit cleared and set depending upon whether the value
2025 of the 'graphic property is 0 or 1.
2027 (name, doc_string, props))
2029 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2030 int direction = CHARSET_LEFT_TO_RIGHT;
2032 Lisp_Object registry = Qnil;
2033 Lisp_Object charset;
2034 Lisp_Object rest, keyword, value;
2035 Lisp_Object ccl_program = Qnil;
2036 Lisp_Object short_name = Qnil, long_name = Qnil;
2037 int byte_offset = -1;
2039 CHECK_SYMBOL (name);
2040 if (!NILP (doc_string))
2041 CHECK_STRING (doc_string);
2043 charset = Ffind_charset (name);
2044 if (!NILP (charset))
2045 signal_simple_error ("Cannot redefine existing charset", name);
2047 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2049 if (EQ (keyword, Qshort_name))
2051 CHECK_STRING (value);
2055 if (EQ (keyword, Qlong_name))
2057 CHECK_STRING (value);
2061 else if (EQ (keyword, Qdimension))
2064 dimension = XINT (value);
2065 if (dimension < 1 || dimension > 2)
2066 signal_simple_error ("Invalid value for 'dimension", value);
2069 else if (EQ (keyword, Qchars))
2072 chars = XINT (value);
2073 if (chars != 94 && chars != 96)
2074 signal_simple_error ("Invalid value for 'chars", value);
2077 else if (EQ (keyword, Qcolumns))
2080 columns = XINT (value);
2081 if (columns != 1 && columns != 2)
2082 signal_simple_error ("Invalid value for 'columns", value);
2085 else if (EQ (keyword, Qgraphic))
2088 graphic = XINT (value);
2090 if (graphic < 0 || graphic > 2)
2092 if (graphic < 0 || graphic > 1)
2094 signal_simple_error ("Invalid value for 'graphic", value);
2097 else if (EQ (keyword, Qregistry))
2099 CHECK_STRING (value);
2103 else if (EQ (keyword, Qdirection))
2105 if (EQ (value, Ql2r))
2106 direction = CHARSET_LEFT_TO_RIGHT;
2107 else if (EQ (value, Qr2l))
2108 direction = CHARSET_RIGHT_TO_LEFT;
2110 signal_simple_error ("Invalid value for 'direction", value);
2113 else if (EQ (keyword, Qfinal))
2115 CHECK_CHAR_COERCE_INT (value);
2116 final = XCHAR (value);
2117 if (final < '0' || final > '~')
2118 signal_simple_error ("Invalid value for 'final", value);
2121 else if (EQ (keyword, Qccl_program))
2123 CHECK_VECTOR (value);
2124 ccl_program = value;
2128 signal_simple_error ("Unrecognized property", keyword);
2132 error ("'final must be specified");
2133 if (dimension == 2 && final > 0x5F)
2135 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2139 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2141 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2143 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2144 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2146 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2148 id = get_unallocated_leading_byte (dimension);
2150 if (NILP (doc_string))
2151 doc_string = build_string ("");
2153 if (NILP (registry))
2154 registry = build_string ("");
2156 if (NILP (short_name))
2157 XSETSTRING (short_name, XSYMBOL (name)->name);
2159 if (NILP (long_name))
2160 long_name = doc_string;
2163 columns = dimension;
2165 if (byte_offset < 0)
2169 else if (chars == 96)
2175 charset = make_charset (id, name, chars, dimension, columns, graphic,
2176 final, direction, short_name, long_name,
2177 doc_string, registry,
2178 Qnil, 0, 0, 0, byte_offset);
2179 if (!NILP (ccl_program))
2180 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2184 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2186 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2187 NEW-NAME is the name of the new charset. Return the new charset.
2189 (charset, new_name))
2191 Lisp_Object new_charset = Qnil;
2192 int id, chars, dimension, columns, graphic, final;
2194 Lisp_Object registry, doc_string, short_name, long_name;
2197 charset = Fget_charset (charset);
2198 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2199 signal_simple_error ("Charset already has reverse-direction charset",
2202 CHECK_SYMBOL (new_name);
2203 if (!NILP (Ffind_charset (new_name)))
2204 signal_simple_error ("Cannot redefine existing charset", new_name);
2206 cs = XCHARSET (charset);
2208 chars = CHARSET_CHARS (cs);
2209 dimension = CHARSET_DIMENSION (cs);
2210 columns = CHARSET_COLUMNS (cs);
2211 id = get_unallocated_leading_byte (dimension);
2213 graphic = CHARSET_GRAPHIC (cs);
2214 final = CHARSET_FINAL (cs);
2215 direction = CHARSET_RIGHT_TO_LEFT;
2216 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2217 direction = CHARSET_LEFT_TO_RIGHT;
2218 doc_string = CHARSET_DOC_STRING (cs);
2219 short_name = CHARSET_SHORT_NAME (cs);
2220 long_name = CHARSET_LONG_NAME (cs);
2221 registry = CHARSET_REGISTRY (cs);
2223 new_charset = make_charset (id, new_name, chars, dimension, columns,
2224 graphic, final, direction, short_name, long_name,
2225 doc_string, registry,
2227 CHARSET_DECODING_TABLE(cs),
2228 CHARSET_UCS_MIN(cs),
2229 CHARSET_UCS_MAX(cs),
2230 CHARSET_CODE_OFFSET(cs),
2231 CHARSET_BYTE_OFFSET(cs)
2237 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2238 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2243 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2244 Define symbol ALIAS as an alias for CHARSET.
2248 CHECK_SYMBOL (alias);
2249 charset = Fget_charset (charset);
2250 return Fputhash (alias, charset, Vcharset_hash_table);
2253 /* #### Reverse direction charsets not yet implemented. */
2255 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2257 Return the reverse-direction charset parallel to CHARSET, if any.
2258 This is the charset with the same properties (in particular, the same
2259 dimension, number of characters per dimension, and final byte) as
2260 CHARSET but whose characters are displayed in the opposite direction.
2264 charset = Fget_charset (charset);
2265 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2269 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2270 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2271 If DIRECTION is omitted, both directions will be checked (left-to-right
2272 will be returned if character sets exist for both directions).
2274 (dimension, chars, final, direction))
2276 int dm, ch, fi, di = -1;
2278 Lisp_Object obj = Qnil;
2280 CHECK_INT (dimension);
2281 dm = XINT (dimension);
2282 if (dm < 1 || dm > 2)
2283 signal_simple_error ("Invalid value for DIMENSION", dimension);
2287 if (ch != 94 && ch != 96)
2288 signal_simple_error ("Invalid value for CHARS", chars);
2290 CHECK_CHAR_COERCE_INT (final);
2292 if (fi < '0' || fi > '~')
2293 signal_simple_error ("Invalid value for FINAL", final);
2295 if (EQ (direction, Ql2r))
2296 di = CHARSET_LEFT_TO_RIGHT;
2297 else if (EQ (direction, Qr2l))
2298 di = CHARSET_RIGHT_TO_LEFT;
2299 else if (!NILP (direction))
2300 signal_simple_error ("Invalid value for DIRECTION", direction);
2302 if (dm == 2 && fi > 0x5F)
2304 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2307 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2309 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2313 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2315 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2318 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2321 return XCHARSET_NAME (obj);
2325 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2326 Return short name of CHARSET.
2330 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2333 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2334 Return long name of CHARSET.
2338 return XCHARSET_LONG_NAME (Fget_charset (charset));
2341 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2342 Return description of CHARSET.
2346 return XCHARSET_DOC_STRING (Fget_charset (charset));
2349 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2350 Return dimension of CHARSET.
2354 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2357 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2358 Return property PROP of CHARSET.
2359 Recognized properties are those listed in `make-charset', as well as
2360 'name and 'doc-string.
2366 charset = Fget_charset (charset);
2367 cs = XCHARSET (charset);
2369 CHECK_SYMBOL (prop);
2370 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2371 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2372 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2373 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2374 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2375 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2376 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2377 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2378 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2379 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2380 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2381 if (EQ (prop, Qdirection))
2382 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2383 if (EQ (prop, Qreverse_direction_charset))
2385 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2389 return XCHARSET_NAME (obj);
2391 signal_simple_error ("Unrecognized charset property name", prop);
2392 return Qnil; /* not reached */
2395 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2396 Return charset identification number of CHARSET.
2400 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2403 /* #### We need to figure out which properties we really want to
2406 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2407 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2409 (charset, ccl_program))
2411 charset = Fget_charset (charset);
2412 CHECK_VECTOR (ccl_program);
2413 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2418 invalidate_charset_font_caches (Lisp_Object charset)
2420 /* Invalidate font cache entries for charset on all devices. */
2421 Lisp_Object devcons, concons, hash_table;
2422 DEVICE_LOOP_NO_BREAK (devcons, concons)
2424 struct device *d = XDEVICE (XCAR (devcons));
2425 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2426 if (!UNBOUNDP (hash_table))
2427 Fclrhash (hash_table);
2431 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2432 Set the 'registry property of CHARSET to REGISTRY.
2434 (charset, registry))
2436 charset = Fget_charset (charset);
2437 CHECK_STRING (registry);
2438 XCHARSET_REGISTRY (charset) = registry;
2439 invalidate_charset_font_caches (charset);
2440 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2445 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2446 Return mapping-table of CHARSET.
2450 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2453 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2454 Set mapping-table of CHARSET to TABLE.
2458 struct Lisp_Charset *cs;
2459 Lisp_Object old_table;
2462 charset = Fget_charset (charset);
2463 cs = XCHARSET (charset);
2465 if (EQ (table, Qnil))
2467 CHARSET_DECODING_TABLE(cs) = table;
2470 else if (VECTORP (table))
2474 /* ad-hoc method for `ascii' */
2475 if ((CHARSET_CHARS (cs) == 94) &&
2476 (CHARSET_BYTE_OFFSET (cs) != 33))
2477 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2479 ccs_len = CHARSET_CHARS (cs);
2481 if (XVECTOR_LENGTH (table) > ccs_len)
2482 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2483 old_table = CHARSET_DECODING_TABLE(cs);
2484 CHARSET_DECODING_TABLE(cs) = table;
2487 signal_error (Qwrong_type_argument,
2488 list2 (build_translated_string ("vector-or-nil-p"),
2490 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2492 switch (CHARSET_DIMENSION (cs))
2495 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2497 Lisp_Object c = XVECTOR_DATA(table)[i];
2502 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2506 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2508 Lisp_Object v = XVECTOR_DATA(table)[i];
2514 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2516 CHARSET_DECODING_TABLE(cs) = old_table;
2517 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2519 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2521 Lisp_Object c = XVECTOR_DATA(v)[j];
2526 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2527 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2531 put_char_attribute (v, charset,
2532 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2541 /************************************************************************/
2542 /* Lisp primitives for working with characters */
2543 /************************************************************************/
2546 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2547 Make a character from CHARSET and code-point CODE.
2553 charset = Fget_charset (charset);
2556 if (XCHARSET_GRAPHIC (charset) == 1)
2558 return make_char (DECODE_CHAR (charset, c));
2562 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2563 Make a character from CHARSET and octets ARG1 and ARG2.
2564 ARG2 is required only for characters from two-dimensional charsets.
2565 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2566 character s with caron.
2568 (charset, arg1, arg2))
2572 int lowlim, highlim;
2574 charset = Fget_charset (charset);
2575 cs = XCHARSET (charset);
2577 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2578 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2580 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2582 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2583 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2586 /* It is useful (and safe, according to Olivier Galibert) to strip
2587 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2588 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2589 Latin 2 code of the character. */
2597 if (a1 < lowlim || a1 > highlim)
2598 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2600 if (CHARSET_DIMENSION (cs) == 1)
2604 ("Charset is of dimension one; second octet must be nil", arg2);
2605 return make_char (MAKE_CHAR (charset, a1, 0));
2614 a2 = XINT (arg2) & 0x7f;
2616 if (a2 < lowlim || a2 > highlim)
2617 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2619 return make_char (MAKE_CHAR (charset, a1, a2));
2622 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2623 Return the character set of char CH.
2627 CHECK_CHAR_COERCE_INT (ch);
2629 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2632 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2633 Return the octet numbered N (should be 0 or 1) of char CH.
2634 N defaults to 0 if omitted.
2638 Lisp_Object charset;
2641 CHECK_CHAR_COERCE_INT (ch);
2643 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2645 if (NILP (n) || EQ (n, Qzero))
2646 return make_int (octet0);
2647 else if (EQ (n, make_int (1)))
2648 return make_int (octet1);
2650 signal_simple_error ("Octet number must be 0 or 1", n);
2653 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2654 Return list of charset and one or two position-codes of CHAR.
2658 /* This function can GC */
2659 struct gcpro gcpro1, gcpro2;
2660 Lisp_Object charset = Qnil;
2661 Lisp_Object rc = Qnil;
2669 GCPRO2 (charset, rc);
2670 CHECK_CHAR_COERCE_INT (character);
2673 code_point = ENCODE_CHAR (XCHAR (character), charset);
2674 dimension = XCHARSET_DIMENSION (charset);
2675 while (dimension > 0)
2677 rc = Fcons (make_int (code_point & 255), rc);
2681 rc = Fcons (XCHARSET_NAME (charset), rc);
2683 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2685 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2687 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2691 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2700 #ifdef ENABLE_COMPOSITE_CHARS
2701 /************************************************************************/
2702 /* composite character functions */
2703 /************************************************************************/
2706 lookup_composite_char (Bufbyte *str, int len)
2708 Lisp_Object lispstr = make_string (str, len);
2709 Lisp_Object ch = Fgethash (lispstr,
2710 Vcomposite_char_string2char_hash_table,
2716 if (composite_char_row_next >= 128)
2717 signal_simple_error ("No more composite chars available", lispstr);
2718 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2719 composite_char_col_next);
2720 Fputhash (make_char (emch), lispstr,
2721 Vcomposite_char_char2string_hash_table);
2722 Fputhash (lispstr, make_char (emch),
2723 Vcomposite_char_string2char_hash_table);
2724 composite_char_col_next++;
2725 if (composite_char_col_next >= 128)
2727 composite_char_col_next = 32;
2728 composite_char_row_next++;
2737 composite_char_string (Emchar ch)
2739 Lisp_Object str = Fgethash (make_char (ch),
2740 Vcomposite_char_char2string_hash_table,
2742 assert (!UNBOUNDP (str));
2746 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2747 Convert a string into a single composite character.
2748 The character is the result of overstriking all the characters in
2753 CHECK_STRING (string);
2754 return make_char (lookup_composite_char (XSTRING_DATA (string),
2755 XSTRING_LENGTH (string)));
2758 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2759 Return a string of the characters comprising a composite character.
2767 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2768 signal_simple_error ("Must be composite char", ch);
2769 return composite_char_string (emch);
2771 #endif /* ENABLE_COMPOSITE_CHARS */
2774 /************************************************************************/
2775 /* initialization */
2776 /************************************************************************/
2779 syms_of_mule_charset (void)
2782 INIT_LRECORD_IMPLEMENTATION (byte_table);
2783 INIT_LRECORD_IMPLEMENTATION (char_id_table);
2785 INIT_LRECORD_IMPLEMENTATION (charset);
2787 DEFSUBR (Fcharsetp);
2788 DEFSUBR (Ffind_charset);
2789 DEFSUBR (Fget_charset);
2790 DEFSUBR (Fcharset_list);
2791 DEFSUBR (Fcharset_name);
2792 DEFSUBR (Fmake_charset);
2793 DEFSUBR (Fmake_reverse_direction_charset);
2794 /* DEFSUBR (Freverse_direction_charset); */
2795 DEFSUBR (Fdefine_charset_alias);
2796 DEFSUBR (Fcharset_from_attributes);
2797 DEFSUBR (Fcharset_short_name);
2798 DEFSUBR (Fcharset_long_name);
2799 DEFSUBR (Fcharset_description);
2800 DEFSUBR (Fcharset_dimension);
2801 DEFSUBR (Fcharset_property);
2802 DEFSUBR (Fcharset_id);
2803 DEFSUBR (Fset_charset_ccl_program);
2804 DEFSUBR (Fset_charset_registry);
2806 DEFSUBR (Fchar_attribute_alist);
2807 DEFSUBR (Fget_char_attribute);
2808 DEFSUBR (Fput_char_attribute);
2809 DEFSUBR (Fremove_char_attribute);
2810 DEFSUBR (Fdefine_char);
2811 DEFSUBR (Fchar_variants);
2812 DEFSUBR (Fget_composite_char);
2813 DEFSUBR (Fcharset_mapping_table);
2814 DEFSUBR (Fset_charset_mapping_table);
2818 DEFSUBR (Fdecode_char);
2820 DEFSUBR (Fmake_char);
2821 DEFSUBR (Fchar_charset);
2822 DEFSUBR (Fchar_octet);
2823 DEFSUBR (Fsplit_char);
2825 #ifdef ENABLE_COMPOSITE_CHARS
2826 DEFSUBR (Fmake_composite_char);
2827 DEFSUBR (Fcomposite_char_string);
2830 defsymbol (&Qcharsetp, "charsetp");
2831 defsymbol (&Qregistry, "registry");
2832 defsymbol (&Qfinal, "final");
2833 defsymbol (&Qgraphic, "graphic");
2834 defsymbol (&Qdirection, "direction");
2835 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2836 defsymbol (&Qshort_name, "short-name");
2837 defsymbol (&Qlong_name, "long-name");
2839 defsymbol (&Ql2r, "l2r");
2840 defsymbol (&Qr2l, "r2l");
2842 /* Charsets, compatible with FSF 20.3
2843 Naming convention is Script-Charset[-Edition] */
2844 defsymbol (&Qascii, "ascii");
2845 defsymbol (&Qcontrol_1, "control-1");
2846 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2847 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2848 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2849 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2850 defsymbol (&Qthai_tis620, "thai-tis620");
2851 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2852 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2853 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2854 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2855 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2856 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2857 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2858 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2859 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2860 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2861 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2862 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2863 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2864 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2865 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2867 defsymbol (&Q_ucs, "->ucs");
2868 defsymbol (&Q_decomposition, "->decomposition");
2869 defsymbol (&Qcompat, "compat");
2870 defsymbol (&Qisolated, "isolated");
2871 defsymbol (&Qinitial, "initial");
2872 defsymbol (&Qmedial, "medial");
2873 defsymbol (&Qfinal, "final");
2874 defsymbol (&Qvertical, "vertical");
2875 defsymbol (&QnoBreak, "noBreak");
2876 defsymbol (&Qfraction, "fraction");
2877 defsymbol (&Qsuper, "super");
2878 defsymbol (&Qsub, "sub");
2879 defsymbol (&Qcircle, "circle");
2880 defsymbol (&Qsquare, "square");
2881 defsymbol (&Qwide, "wide");
2882 defsymbol (&Qnarrow, "narrow");
2883 defsymbol (&Qsmall, "small");
2884 defsymbol (&Qfont, "font");
2885 defsymbol (&Qucs, "ucs");
2886 defsymbol (&Qucs_bmp, "ucs-bmp");
2887 defsymbol (&Qlatin_viscii, "latin-viscii");
2888 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2889 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2890 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2891 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2892 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2893 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2894 defsymbol (&Qmojikyo, "mojikyo");
2895 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2896 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2897 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2898 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2899 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2900 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2901 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2902 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2903 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2904 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2905 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2906 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2907 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2908 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2909 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2910 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2911 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2912 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2913 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2914 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2915 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2916 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2918 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2919 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2921 defsymbol (&Qcomposite, "composite");
2925 vars_of_mule_charset (void)
2932 chlook = xnew (struct charset_lookup);
2933 dumpstruct (&chlook, &charset_lookup_description);
2935 /* Table of charsets indexed by leading byte. */
2936 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2937 chlook->charset_by_leading_byte[i] = Qnil;
2940 /* Table of charsets indexed by type/final-byte. */
2941 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2942 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2943 chlook->charset_by_attributes[i][j] = Qnil;
2945 /* Table of charsets indexed by type/final-byte/direction. */
2946 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2947 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2948 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2949 chlook->charset_by_attributes[i][j][k] = Qnil;
2953 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2955 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2956 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2960 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2961 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2962 Leading-code of private TYPE9N charset of column-width 1.
2964 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2968 Vutf_2000_version = build_string("0.15 (Sangō)");
2969 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2970 Version number of UTF-2000.
2973 staticpro (&Vcharacter_attribute_table);
2974 Vcharacter_attribute_table = make_char_id_table (Qnil);
2976 staticpro (&Vcharacter_composition_table);
2977 Vcharacter_composition_table = make_char_id_table (Qnil);
2979 staticpro (&Vcharacter_variant_table);
2980 Vcharacter_variant_table = make_char_id_table (Qnil);
2982 Vdefault_coded_charset_priority_list = Qnil;
2983 DEFVAR_LISP ("default-coded-charset-priority-list",
2984 &Vdefault_coded_charset_priority_list /*
2985 Default order of preferred coded-character-sets.
2991 complex_vars_of_mule_charset (void)
2993 staticpro (&Vcharset_hash_table);
2994 Vcharset_hash_table =
2995 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2997 /* Predefined character sets. We store them into variables for
3001 staticpro (&Vcharset_ucs);
3003 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3004 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3005 build_string ("UCS"),
3006 build_string ("UCS"),
3007 build_string ("ISO/IEC 10646"),
3009 Qnil, 0, 0xFFFFFFF, 0, 0);
3010 staticpro (&Vcharset_ucs_bmp);
3012 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3013 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3014 build_string ("BMP"),
3015 build_string ("BMP"),
3016 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3017 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3018 Qnil, 0, 0xFFFF, 0, 0);
3020 # define MIN_CHAR_THAI 0
3021 # define MAX_CHAR_THAI 0
3022 # define MIN_CHAR_HEBREW 0
3023 # define MAX_CHAR_HEBREW 0
3024 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3025 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3027 staticpro (&Vcharset_ascii);
3029 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3030 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3031 build_string ("ASCII"),
3032 build_string ("ASCII)"),
3033 build_string ("ASCII (ISO646 IRV)"),
3034 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3035 Qnil, 0, 0x7F, 0, 0);
3036 staticpro (&Vcharset_control_1);
3037 Vcharset_control_1 =
3038 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3039 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3040 build_string ("C1"),
3041 build_string ("Control characters"),
3042 build_string ("Control characters 128-191"),
3044 Qnil, 0x80, 0x9F, 0, 0);
3045 staticpro (&Vcharset_latin_iso8859_1);
3046 Vcharset_latin_iso8859_1 =
3047 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3048 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3049 build_string ("Latin-1"),
3050 build_string ("ISO8859-1 (Latin-1)"),
3051 build_string ("ISO8859-1 (Latin-1)"),
3052 build_string ("iso8859-1"),
3053 Qnil, 0xA0, 0xFF, 0, 32);
3054 staticpro (&Vcharset_latin_iso8859_2);
3055 Vcharset_latin_iso8859_2 =
3056 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3057 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3058 build_string ("Latin-2"),
3059 build_string ("ISO8859-2 (Latin-2)"),
3060 build_string ("ISO8859-2 (Latin-2)"),
3061 build_string ("iso8859-2"),
3063 staticpro (&Vcharset_latin_iso8859_3);
3064 Vcharset_latin_iso8859_3 =
3065 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3066 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3067 build_string ("Latin-3"),
3068 build_string ("ISO8859-3 (Latin-3)"),
3069 build_string ("ISO8859-3 (Latin-3)"),
3070 build_string ("iso8859-3"),
3072 staticpro (&Vcharset_latin_iso8859_4);
3073 Vcharset_latin_iso8859_4 =
3074 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3075 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3076 build_string ("Latin-4"),
3077 build_string ("ISO8859-4 (Latin-4)"),
3078 build_string ("ISO8859-4 (Latin-4)"),
3079 build_string ("iso8859-4"),
3081 staticpro (&Vcharset_thai_tis620);
3082 Vcharset_thai_tis620 =
3083 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3084 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3085 build_string ("TIS620"),
3086 build_string ("TIS620 (Thai)"),
3087 build_string ("TIS620.2529 (Thai)"),
3088 build_string ("tis620"),
3089 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3090 staticpro (&Vcharset_greek_iso8859_7);
3091 Vcharset_greek_iso8859_7 =
3092 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3093 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3094 build_string ("ISO8859-7"),
3095 build_string ("ISO8859-7 (Greek)"),
3096 build_string ("ISO8859-7 (Greek)"),
3097 build_string ("iso8859-7"),
3099 0 /* MIN_CHAR_GREEK */,
3100 0 /* MAX_CHAR_GREEK */, 0, 32);
3101 staticpro (&Vcharset_arabic_iso8859_6);
3102 Vcharset_arabic_iso8859_6 =
3103 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3104 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3105 build_string ("ISO8859-6"),
3106 build_string ("ISO8859-6 (Arabic)"),
3107 build_string ("ISO8859-6 (Arabic)"),
3108 build_string ("iso8859-6"),
3110 staticpro (&Vcharset_hebrew_iso8859_8);
3111 Vcharset_hebrew_iso8859_8 =
3112 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3113 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3114 build_string ("ISO8859-8"),
3115 build_string ("ISO8859-8 (Hebrew)"),
3116 build_string ("ISO8859-8 (Hebrew)"),
3117 build_string ("iso8859-8"),
3118 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3119 staticpro (&Vcharset_katakana_jisx0201);
3120 Vcharset_katakana_jisx0201 =
3121 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3122 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3123 build_string ("JISX0201 Kana"),
3124 build_string ("JISX0201.1976 (Japanese Kana)"),
3125 build_string ("JISX0201.1976 Japanese Kana"),
3126 build_string ("jisx0201\\.1976"),
3128 staticpro (&Vcharset_latin_jisx0201);
3129 Vcharset_latin_jisx0201 =
3130 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3131 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3132 build_string ("JISX0201 Roman"),
3133 build_string ("JISX0201.1976 (Japanese Roman)"),
3134 build_string ("JISX0201.1976 Japanese Roman"),
3135 build_string ("jisx0201\\.1976"),
3137 staticpro (&Vcharset_cyrillic_iso8859_5);
3138 Vcharset_cyrillic_iso8859_5 =
3139 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3140 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3141 build_string ("ISO8859-5"),
3142 build_string ("ISO8859-5 (Cyrillic)"),
3143 build_string ("ISO8859-5 (Cyrillic)"),
3144 build_string ("iso8859-5"),
3146 0 /* MIN_CHAR_CYRILLIC */,
3147 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3148 staticpro (&Vcharset_latin_iso8859_9);
3149 Vcharset_latin_iso8859_9 =
3150 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3151 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3152 build_string ("Latin-5"),
3153 build_string ("ISO8859-9 (Latin-5)"),
3154 build_string ("ISO8859-9 (Latin-5)"),
3155 build_string ("iso8859-9"),
3157 staticpro (&Vcharset_japanese_jisx0208_1978);
3158 Vcharset_japanese_jisx0208_1978 =
3159 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3160 Qjapanese_jisx0208_1978, 94, 2,
3161 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3162 build_string ("JIS X0208:1978"),
3163 build_string ("JIS X0208:1978 (Japanese)"),
3165 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3166 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3168 staticpro (&Vcharset_chinese_gb2312);
3169 Vcharset_chinese_gb2312 =
3170 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3171 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3172 build_string ("GB2312"),
3173 build_string ("GB2312)"),
3174 build_string ("GB2312 Chinese simplified"),
3175 build_string ("gb2312"),
3177 staticpro (&Vcharset_japanese_jisx0208);
3178 Vcharset_japanese_jisx0208 =
3179 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3180 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3181 build_string ("JISX0208"),
3182 build_string ("JIS X0208:1983 (Japanese)"),
3183 build_string ("JIS X0208:1983 Japanese Kanji"),
3184 build_string ("jisx0208\\.1983"),
3187 staticpro (&Vcharset_japanese_jisx0208_1990);
3188 Vcharset_japanese_jisx0208_1990 =
3189 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3190 Qjapanese_jisx0208_1990, 94, 2,
3191 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3192 build_string ("JISX0208-1990"),
3193 build_string ("JIS X0208:1990 (Japanese)"),
3194 build_string ("JIS X0208:1990 Japanese Kanji"),
3195 build_string ("jisx0208\\.1990"),
3197 MIN_CHAR_JIS_X0208_1990,
3198 MAX_CHAR_JIS_X0208_1990, 0, 33);
3200 staticpro (&Vcharset_korean_ksc5601);
3201 Vcharset_korean_ksc5601 =
3202 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3203 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3204 build_string ("KSC5601"),
3205 build_string ("KSC5601 (Korean"),
3206 build_string ("KSC5601 Korean Hangul and Hanja"),
3207 build_string ("ksc5601"),
3209 staticpro (&Vcharset_japanese_jisx0212);
3210 Vcharset_japanese_jisx0212 =
3211 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3212 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3213 build_string ("JISX0212"),
3214 build_string ("JISX0212 (Japanese)"),
3215 build_string ("JISX0212 Japanese Supplement"),
3216 build_string ("jisx0212"),
3219 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3220 staticpro (&Vcharset_chinese_cns11643_1);
3221 Vcharset_chinese_cns11643_1 =
3222 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3223 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3224 build_string ("CNS11643-1"),
3225 build_string ("CNS11643-1 (Chinese traditional)"),
3227 ("CNS 11643 Plane 1 Chinese traditional"),
3228 build_string (CHINESE_CNS_PLANE_RE("1")),
3230 staticpro (&Vcharset_chinese_cns11643_2);
3231 Vcharset_chinese_cns11643_2 =
3232 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3233 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3234 build_string ("CNS11643-2"),
3235 build_string ("CNS11643-2 (Chinese traditional)"),
3237 ("CNS 11643 Plane 2 Chinese traditional"),
3238 build_string (CHINESE_CNS_PLANE_RE("2")),
3241 staticpro (&Vcharset_latin_tcvn5712);
3242 Vcharset_latin_tcvn5712 =
3243 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3244 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3245 build_string ("TCVN 5712"),
3246 build_string ("TCVN 5712 (VSCII-2)"),
3247 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3248 build_string ("tcvn5712-1"),
3250 staticpro (&Vcharset_latin_viscii_lower);
3251 Vcharset_latin_viscii_lower =
3252 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3253 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3254 build_string ("VISCII lower"),
3255 build_string ("VISCII lower (Vietnamese)"),
3256 build_string ("VISCII lower (Vietnamese)"),
3257 build_string ("MULEVISCII-LOWER"),
3259 staticpro (&Vcharset_latin_viscii_upper);
3260 Vcharset_latin_viscii_upper =
3261 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3262 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3263 build_string ("VISCII upper"),
3264 build_string ("VISCII upper (Vietnamese)"),
3265 build_string ("VISCII upper (Vietnamese)"),
3266 build_string ("MULEVISCII-UPPER"),
3268 staticpro (&Vcharset_latin_viscii);
3269 Vcharset_latin_viscii =
3270 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3271 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3272 build_string ("VISCII"),
3273 build_string ("VISCII 1.1 (Vietnamese)"),
3274 build_string ("VISCII 1.1 (Vietnamese)"),
3275 build_string ("VISCII1\\.1"),
3277 staticpro (&Vcharset_ideograph_daikanwa);
3278 Vcharset_ideograph_daikanwa =
3279 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3280 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3281 build_string ("Daikanwa"),
3282 build_string ("Morohashi's Daikanwa"),
3283 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3284 build_string ("Daikanwa"),
3285 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3286 staticpro (&Vcharset_mojikyo);
3288 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3289 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3290 build_string ("Mojikyo"),
3291 build_string ("Mojikyo"),
3292 build_string ("Konjaku-Mojikyo"),
3294 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3295 staticpro (&Vcharset_mojikyo_pj_1);
3296 Vcharset_mojikyo_pj_1 =
3297 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3298 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3299 build_string ("Mojikyo-PJ-1"),
3300 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3302 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3303 build_string ("jisx0208\\.Mojikyo-1$"),
3305 staticpro (&Vcharset_mojikyo_pj_2);
3306 Vcharset_mojikyo_pj_2 =
3307 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3308 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3309 build_string ("Mojikyo-PJ-2"),
3310 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3312 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3313 build_string ("jisx0208\\.Mojikyo-2$"),
3315 staticpro (&Vcharset_mojikyo_pj_3);
3316 Vcharset_mojikyo_pj_3 =
3317 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3318 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3319 build_string ("Mojikyo-PJ-3"),
3320 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3322 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3323 build_string ("jisx0208\\.Mojikyo-3$"),
3325 staticpro (&Vcharset_mojikyo_pj_4);
3326 Vcharset_mojikyo_pj_4 =
3327 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3328 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3329 build_string ("Mojikyo-PJ-4"),
3330 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3332 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3333 build_string ("jisx0208\\.Mojikyo-4$"),
3335 staticpro (&Vcharset_mojikyo_pj_5);
3336 Vcharset_mojikyo_pj_5 =
3337 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3338 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Mojikyo-PJ-5"),
3340 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3342 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3343 build_string ("jisx0208\\.Mojikyo-5$"),
3345 staticpro (&Vcharset_mojikyo_pj_6);
3346 Vcharset_mojikyo_pj_6 =
3347 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3348 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3349 build_string ("Mojikyo-PJ-6"),
3350 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3352 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3353 build_string ("jisx0208\\.Mojikyo-6$"),
3355 staticpro (&Vcharset_mojikyo_pj_7);
3356 Vcharset_mojikyo_pj_7 =
3357 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3358 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3359 build_string ("Mojikyo-PJ-7"),
3360 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3362 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3363 build_string ("jisx0208\\.Mojikyo-7$"),
3365 staticpro (&Vcharset_mojikyo_pj_8);
3366 Vcharset_mojikyo_pj_8 =
3367 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3368 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3369 build_string ("Mojikyo-PJ-8"),
3370 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3372 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3373 build_string ("jisx0208\\.Mojikyo-8$"),
3375 staticpro (&Vcharset_mojikyo_pj_9);
3376 Vcharset_mojikyo_pj_9 =
3377 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3378 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3379 build_string ("Mojikyo-PJ-9"),
3380 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3382 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3383 build_string ("jisx0208\\.Mojikyo-9$"),
3385 staticpro (&Vcharset_mojikyo_pj_10);
3386 Vcharset_mojikyo_pj_10 =
3387 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3388 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3389 build_string ("Mojikyo-PJ-10"),
3390 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3392 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3393 build_string ("jisx0208\\.Mojikyo-10$"),
3395 staticpro (&Vcharset_mojikyo_pj_11);
3396 Vcharset_mojikyo_pj_11 =
3397 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3398 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3399 build_string ("Mojikyo-PJ-11"),
3400 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3402 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3403 build_string ("jisx0208\\.Mojikyo-11$"),
3405 staticpro (&Vcharset_mojikyo_pj_12);
3406 Vcharset_mojikyo_pj_12 =
3407 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3408 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3409 build_string ("Mojikyo-PJ-12"),
3410 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3412 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3413 build_string ("jisx0208\\.Mojikyo-12$"),
3415 staticpro (&Vcharset_mojikyo_pj_13);
3416 Vcharset_mojikyo_pj_13 =
3417 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3418 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3419 build_string ("Mojikyo-PJ-13"),
3420 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3422 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3423 build_string ("jisx0208\\.Mojikyo-13$"),
3425 staticpro (&Vcharset_mojikyo_pj_14);
3426 Vcharset_mojikyo_pj_14 =
3427 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3428 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3429 build_string ("Mojikyo-PJ-14"),
3430 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3432 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3433 build_string ("jisx0208\\.Mojikyo-14$"),
3435 staticpro (&Vcharset_mojikyo_pj_15);
3436 Vcharset_mojikyo_pj_15 =
3437 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3438 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3439 build_string ("Mojikyo-PJ-15"),
3440 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3442 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3443 build_string ("jisx0208\\.Mojikyo-15$"),
3445 staticpro (&Vcharset_mojikyo_pj_16);
3446 Vcharset_mojikyo_pj_16 =
3447 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3448 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3449 build_string ("Mojikyo-PJ-16"),
3450 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3452 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3453 build_string ("jisx0208\\.Mojikyo-16$"),
3455 staticpro (&Vcharset_mojikyo_pj_17);
3456 Vcharset_mojikyo_pj_17 =
3457 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3458 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3459 build_string ("Mojikyo-PJ-17"),
3460 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3462 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3463 build_string ("jisx0208\\.Mojikyo-17$"),
3465 staticpro (&Vcharset_mojikyo_pj_18);
3466 Vcharset_mojikyo_pj_18 =
3467 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3468 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3469 build_string ("Mojikyo-PJ-18"),
3470 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3472 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3473 build_string ("jisx0208\\.Mojikyo-18$"),
3475 staticpro (&Vcharset_mojikyo_pj_19);
3476 Vcharset_mojikyo_pj_19 =
3477 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3478 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3479 build_string ("Mojikyo-PJ-19"),
3480 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3482 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3483 build_string ("jisx0208\\.Mojikyo-19$"),
3485 staticpro (&Vcharset_mojikyo_pj_20);
3486 Vcharset_mojikyo_pj_20 =
3487 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3488 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3489 build_string ("Mojikyo-PJ-20"),
3490 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3492 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3493 build_string ("jisx0208\\.Mojikyo-20$"),
3495 staticpro (&Vcharset_mojikyo_pj_21);
3496 Vcharset_mojikyo_pj_21 =
3497 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3498 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3499 build_string ("Mojikyo-PJ-21"),
3500 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3502 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3503 build_string ("jisx0208\\.Mojikyo-21$"),
3505 staticpro (&Vcharset_ethiopic_ucs);
3506 Vcharset_ethiopic_ucs =
3507 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3508 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3509 build_string ("Ethiopic (UCS)"),
3510 build_string ("Ethiopic (UCS)"),
3511 build_string ("Ethiopic of UCS"),
3512 build_string ("Ethiopic-Unicode"),
3513 Qnil, 0x1200, 0x137F, 0x1200, 0);
3515 staticpro (&Vcharset_chinese_big5_1);
3516 Vcharset_chinese_big5_1 =
3517 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3518 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3519 build_string ("Big5"),
3520 build_string ("Big5 (Level-1)"),
3522 ("Big5 Level-1 Chinese traditional"),
3523 build_string ("big5"),
3525 staticpro (&Vcharset_chinese_big5_2);
3526 Vcharset_chinese_big5_2 =
3527 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3528 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3529 build_string ("Big5"),
3530 build_string ("Big5 (Level-2)"),
3532 ("Big5 Level-2 Chinese traditional"),
3533 build_string ("big5"),
3536 #ifdef ENABLE_COMPOSITE_CHARS
3537 /* #### For simplicity, we put composite chars into a 96x96 charset.
3538 This is going to lead to problems because you can run out of
3539 room, esp. as we don't yet recycle numbers. */
3540 staticpro (&Vcharset_composite);
3541 Vcharset_composite =
3542 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3543 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3544 build_string ("Composite"),
3545 build_string ("Composite characters"),
3546 build_string ("Composite characters"),
3549 /* #### not dumped properly */
3550 composite_char_row_next = 32;
3551 composite_char_col_next = 32;
3553 Vcomposite_char_string2char_hash_table =
3554 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3555 Vcomposite_char_char2string_hash_table =
3556 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3557 staticpro (&Vcomposite_char_string2char_hash_table);
3558 staticpro (&Vcomposite_char_char2string_hash_table);
3559 #endif /* ENABLE_COMPOSITE_CHARS */