1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_byte_table (Lisp_Object obj)
165 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
179 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (BYTE_TABLE_P (cte1->property[i]))
185 if (BYTE_TABLE_P (cte2->property[i]))
187 if (!byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 byte_table_hash (Lisp_Object obj, int depth)
203 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description byte_table_description[] = {
209 { XD_LISP_OBJECT, offsetof(Lisp_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
215 internal_object_printer,
218 byte_table_description,
222 make_byte_table (Lisp_Object initval)
227 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
229 for (i = 0; i < 256; i++)
230 cte->property[i] = initval;
232 XSETBYTE_TABLE (obj, cte);
237 copy_byte_table (Lisp_Object entry)
239 Lisp_Byte_Table *cte = XBYTE_TABLE (entry);
242 Lisp_Byte_Table *ctenew
243 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_byte_table (new);
251 ctenew->property[i] = new;
254 XSETBYTE_TABLE (obj, ctenew);
260 mark_char_id_table (Lisp_Object obj)
262 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
268 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 Lisp_Char_ID_Table *cte1 = XCHAR_ID_TABLE (obj1);
271 Lisp_Char_ID_Table *cte2 = XCHAR_ID_TABLE (obj2);
273 return byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_id_table_hash (Lisp_Object obj, int depth)
279 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
281 return char_id_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_id_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
291 internal_object_printer,
292 0, char_id_table_equal,
294 char_id_table_description,
298 make_char_id_table (Lisp_Object initval)
301 Lisp_Char_ID_Table *cte
302 = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
304 cte->table = make_byte_table (initval);
306 XSETCHAR_ID_TABLE (obj, cte);
311 copy_char_id_table (Lisp_Object entry)
313 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (entry);
315 Lisp_Char_ID_Table *ctenew
316 = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
318 ctenew->table = copy_byte_table (cte->table);
319 XSETCHAR_ID_TABLE (obj, ctenew);
325 get_char_id_table (Emchar ch, Lisp_Object table)
327 unsigned int code = ch;
329 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
330 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
332 if (BYTE_TABLE_P (ret))
333 cpt = XBYTE_TABLE (ret);
337 ret = cpt->property [(unsigned char) (code >> 16)];
338 if (BYTE_TABLE_P (ret))
339 cpt = XBYTE_TABLE (ret);
343 ret = cpt->property [(unsigned char) (code >> 8)];
344 if (BYTE_TABLE_P (ret))
345 cpt = XBYTE_TABLE (ret);
349 return cpt->property [(unsigned char) code];
352 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
354 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
356 unsigned int code = ch;
357 Lisp_Byte_Table* cpt1 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
358 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
360 if (BYTE_TABLE_P (ret))
362 Lisp_Byte_Table* cpt2 = XBYTE_TABLE (ret);
364 ret = cpt2->property[(unsigned char)(code >> 16)];
365 if (BYTE_TABLE_P (ret))
367 Lisp_Byte_Table* cpt3 = XBYTE_TABLE (ret);
369 ret = cpt3->property[(unsigned char)(code >> 8)];
370 if (BYTE_TABLE_P (ret))
372 Lisp_Byte_Table* cpt4 = XBYTE_TABLE (ret);
374 cpt4->property[(unsigned char)code] = value;
376 else if (!EQ (ret, value))
378 Lisp_Object cpt4 = make_byte_table (ret);
380 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
381 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
384 else if (!EQ (ret, value))
386 Lisp_Object cpt3 = make_byte_table (ret);
387 Lisp_Object cpt4 = make_byte_table (ret);
389 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
390 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
392 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
395 else if (!EQ (ret, value))
397 Lisp_Object cpt2 = make_byte_table (ret);
398 Lisp_Object cpt3 = make_byte_table (ret);
399 Lisp_Object cpt4 = make_byte_table (ret);
401 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
402 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
403 XBYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
404 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
409 Lisp_Object Vcharacter_attribute_table;
410 Lisp_Object Vcharacter_composition_table;
411 Lisp_Object Vcharacter_variant_table;
413 Lisp_Object Q_decomposition;
416 Lisp_Object Qisolated;
417 Lisp_Object Qinitial;
420 Lisp_Object Qvertical;
421 Lisp_Object QnoBreak;
422 Lisp_Object Qfraction;
432 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
434 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
440 else if (EQ (v, Qcompat))
442 else if (EQ (v, Qisolated))
444 else if (EQ (v, Qinitial))
446 else if (EQ (v, Qmedial))
448 else if (EQ (v, Qfinal))
450 else if (EQ (v, Qvertical))
452 else if (EQ (v, QnoBreak))
454 else if (EQ (v, Qfraction))
456 else if (EQ (v, Qsuper))
458 else if (EQ (v, Qsub))
460 else if (EQ (v, Qcircle))
462 else if (EQ (v, Qsquare))
464 else if (EQ (v, Qwide))
466 else if (EQ (v, Qnarrow))
468 else if (EQ (v, Qsmall))
470 else if (EQ (v, Qfont))
473 signal_simple_error (err_msg, err_arg);
476 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
477 Return character corresponding with list.
481 Lisp_Object table = Vcharacter_composition_table;
482 Lisp_Object rest = list;
486 Lisp_Object v = Fcar (rest);
488 Emchar c = to_char_id (v, "Invalid value for composition", list);
490 ret = get_char_id_table (c, table);
495 if (!CHAR_ID_TABLE_P (ret))
500 else if (!CONSP (rest))
502 else if (CHAR_ID_TABLE_P (ret))
505 signal_simple_error ("Invalid table is found with", list);
507 signal_simple_error ("Invalid value for composition", list);
510 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
511 Return variants of CHARACTER.
515 CHECK_CHAR (character);
516 return Fcopy_list (get_char_id_table (XCHAR (character),
517 Vcharacter_variant_table));
520 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
521 Return the alist of attributes of CHARACTER.
525 CHECK_CHAR (character);
526 return Fcopy_alist (get_char_id_table (XCHAR (character),
527 Vcharacter_attribute_table));
530 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
531 Return the value of CHARACTER's ATTRIBUTE.
533 (character, attribute))
538 CHECK_CHAR (character);
539 ret = get_char_id_table (XCHAR (character), Vcharacter_attribute_table);
543 if (!NILP (ccs = Ffind_charset (attribute)))
546 return Fcdr (Fassq (attribute, ret));
549 Lisp_Object put_char_attribute (Lisp_Object character,
550 Lisp_Object attribute, Lisp_Object value);
552 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
555 Emchar char_id = XCHAR (character);
556 Lisp_Object ret = get_char_id_table (char_id, Vcharacter_attribute_table);
559 cell = Fassq (attribute, ret);
563 ret = Fcons (Fcons (attribute, value), ret);
565 else if (!EQ (Fcdr (cell), value))
567 Fsetcdr (cell, value);
569 put_char_id_table (char_id, ret, Vcharacter_attribute_table);
573 Lisp_Object remove_char_attribute (Lisp_Object character,
574 Lisp_Object attribute);
576 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
578 Emchar char_id = XCHAR (character);
579 Lisp_Object alist = get_char_id_table (char_id, Vcharacter_attribute_table);
581 if (EQ (attribute, Fcar (Fcar (alist))))
583 alist = Fcdr (alist);
587 Lisp_Object pr = alist;
588 Lisp_Object r = Fcdr (alist);
592 if (EQ (attribute, Fcar (Fcar (r))))
594 XCDR (pr) = Fcdr (r);
601 put_char_id_table (char_id, alist, Vcharacter_attribute_table);
607 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
608 Store CHARACTER's ATTRIBUTE with VALUE.
610 (character, attribute, value))
614 CHECK_CHAR (character);
615 ccs = Ffind_charset (attribute);
618 if (!EQ (XCHARSET_NAME (ccs), Qucs)
619 || (XCHAR (character) != XINT (value)))
621 Lisp_Object cpos, rest;
622 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
629 /* ad-hoc method for `ascii' */
630 if ((XCHARSET_CHARS (ccs) == 94) &&
631 (XCHARSET_BYTE_OFFSET (ccs) != 33))
632 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
634 ccs_len = XCHARSET_CHARS (ccs);
638 Lisp_Object ret = Fcar (value);
641 signal_simple_error ("Invalid value for coded-charset", value);
642 code_point = XINT (ret);
643 if (XCHARSET_GRAPHIC (ccs) == 1)
651 signal_simple_error ("Invalid value for coded-charset",
655 signal_simple_error ("Invalid value for coded-charset",
658 if (XCHARSET_GRAPHIC (ccs) == 1)
660 code_point = (code_point << 8) | j;
663 value = make_int (code_point);
665 else if (INTP (value))
667 if (XCHARSET_GRAPHIC (ccs) == 1)
668 value = make_int (XINT (value) & 0x7F7F7F7F);
671 signal_simple_error ("Invalid value for coded-charset", value);
674 cpos = Fget_char_attribute (character, attribute);
679 dim = XCHARSET_DIMENSION (ccs);
680 code_point = XINT (cpos);
684 i = ((code_point >> (8 * dim)) & 255)
685 - XCHARSET_BYTE_OFFSET (ccs);
686 nv = XVECTOR_DATA(v)[i];
692 XVECTOR_DATA(v)[i] = Qnil;
693 v = XCHARSET_DECODING_TABLE (ccs);
698 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
701 dim = XCHARSET_DIMENSION (ccs);
702 code_point = XINT (value);
707 i = ((code_point >> (8 * dim)) & 255)
708 - XCHARSET_BYTE_OFFSET (ccs);
709 nv = XVECTOR_DATA(v)[i];
713 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
719 XVECTOR_DATA(v)[i] = character;
724 else if (EQ (attribute, Q_decomposition))
727 signal_simple_error ("Invalid value for ->decomposition",
730 if (CONSP (Fcdr (value)))
732 Lisp_Object rest = value;
733 Lisp_Object table = Vcharacter_composition_table;
737 Lisp_Object v = Fcar (rest);
740 = to_char_id (v, "Invalid value for ->decomposition", value);
745 put_char_id_table (c, character, table);
750 ntable = get_char_id_table (c, table);
751 if (!CHAR_ID_TABLE_P (ntable))
753 ntable = make_char_id_table (Qnil);
754 put_char_id_table (c, ntable, table);
762 Lisp_Object v = Fcar (value);
768 = get_char_id_table (c, Vcharacter_variant_table);
770 if (NILP (Fmemq (v, ret)))
772 put_char_id_table (c, Fcons (character, ret),
773 Vcharacter_variant_table);
778 else if (EQ (attribute, Q_ucs))
784 signal_simple_error ("Invalid value for ->ucs", value);
788 ret = get_char_id_table (c, Vcharacter_variant_table);
789 if (NILP (Fmemq (character, ret)))
791 put_char_id_table (c, Fcons (character, ret),
792 Vcharacter_variant_table);
795 return put_char_attribute (character, attribute, value);
798 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
799 Remove CHARACTER's ATTRIBUTE.
801 (character, attribute))
805 CHECK_CHAR (character);
806 ccs = Ffind_charset (attribute);
810 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
817 /* ad-hoc method for `ascii' */
818 if ((XCHARSET_CHARS (ccs) == 94) &&
819 (XCHARSET_BYTE_OFFSET (ccs) != 33))
820 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
822 ccs_len = XCHARSET_CHARS (ccs);
825 cpos = Fget_char_attribute (character, attribute);
830 dim = XCHARSET_DIMENSION (ccs);
831 code_point = XINT (cpos);
835 i = ((code_point >> (8 * dim)) & 255)
836 - XCHARSET_BYTE_OFFSET (ccs);
837 nv = XVECTOR_DATA(v)[i];
843 XVECTOR_DATA(v)[i] = Qnil;
844 v = XCHARSET_DECODING_TABLE (ccs);
848 return remove_char_attribute (character, attribute);
851 EXFUN (Fmake_char, 3);
852 EXFUN (Fdecode_char, 2);
854 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
855 Store character's ATTRIBUTES.
859 Lisp_Object rest = attributes;
860 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
861 Lisp_Object character;
867 Lisp_Object cell = Fcar (rest);
871 signal_simple_error ("Invalid argument", attributes);
872 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
873 && ((XCHARSET_FINAL (ccs) != 0) ||
874 (XCHARSET_UCS_MAX (ccs) > 0)) )
878 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
880 character = Fdecode_char (ccs, cell);
881 goto setup_attributes;
885 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
888 signal_simple_error ("Invalid argument", attributes);
890 character = make_char (XINT (code) + 0x100000);
891 goto setup_attributes;
895 else if (!INTP (code))
896 signal_simple_error ("Invalid argument", attributes);
898 character = make_char (XINT (code));
904 Lisp_Object cell = Fcar (rest);
907 signal_simple_error ("Invalid argument", attributes);
908 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
912 get_char_id_table (XCHAR (character), Vcharacter_attribute_table);
915 Lisp_Object Vutf_2000_version;
919 int leading_code_private_11;
922 Lisp_Object Qcharsetp;
924 /* Qdoc_string, Qdimension, Qchars defined in general.c */
925 Lisp_Object Qregistry, Qfinal, Qgraphic;
926 Lisp_Object Qdirection;
927 Lisp_Object Qreverse_direction_charset;
928 Lisp_Object Qleading_byte;
929 Lisp_Object Qshort_name, Qlong_name;
945 Qjapanese_jisx0208_1978,
948 Qjapanese_jisx0208_1990,
959 Qvietnamese_viscii_lower,
960 Qvietnamese_viscii_upper,
990 Lisp_Object Ql2r, Qr2l;
992 Lisp_Object Vcharset_hash_table;
994 /* Composite characters are characters constructed by overstriking two
995 or more regular characters.
997 1) The old Mule implementation involves storing composite characters
998 in a buffer as a tag followed by all of the actual characters
999 used to make up the composite character. I think this is a bad
1000 idea; it greatly complicates code that wants to handle strings
1001 one character at a time because it has to deal with the possibility
1002 of great big ungainly characters. It's much more reasonable to
1003 simply store an index into a table of composite characters.
1005 2) The current implementation only allows for 16,384 separate
1006 composite characters over the lifetime of the XEmacs process.
1007 This could become a potential problem if the user
1008 edited lots of different files that use composite characters.
1009 Due to FSF bogosity, increasing the number of allowable
1010 composite characters under Mule would decrease the number
1011 of possible faces that can exist. Mule already has shrunk
1012 this to 2048, and further shrinkage would become uncomfortable.
1013 No such problems exist in XEmacs.
1015 Composite characters could be represented as 0x80 C1 C2 C3,
1016 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1017 for slightly under 2^20 (one million) composite characters
1018 over the XEmacs process lifetime, and you only need to
1019 increase the size of a Mule character from 19 to 21 bits.
1020 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1021 85 million (slightly over 2^26) composite characters. */
1024 /************************************************************************/
1025 /* Basic Emchar functions */
1026 /************************************************************************/
1028 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1029 string in STR. Returns the number of bytes stored.
1030 Do not call this directly. Use the macro set_charptr_emchar() instead.
1034 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1040 Lisp_Object charset;
1049 else if ( c <= 0x7ff )
1051 *p++ = (c >> 6) | 0xc0;
1052 *p++ = (c & 0x3f) | 0x80;
1054 else if ( c <= 0xffff )
1056 *p++ = (c >> 12) | 0xe0;
1057 *p++ = ((c >> 6) & 0x3f) | 0x80;
1058 *p++ = (c & 0x3f) | 0x80;
1060 else if ( c <= 0x1fffff )
1062 *p++ = (c >> 18) | 0xf0;
1063 *p++ = ((c >> 12) & 0x3f) | 0x80;
1064 *p++ = ((c >> 6) & 0x3f) | 0x80;
1065 *p++ = (c & 0x3f) | 0x80;
1067 else if ( c <= 0x3ffffff )
1069 *p++ = (c >> 24) | 0xf8;
1070 *p++ = ((c >> 18) & 0x3f) | 0x80;
1071 *p++ = ((c >> 12) & 0x3f) | 0x80;
1072 *p++ = ((c >> 6) & 0x3f) | 0x80;
1073 *p++ = (c & 0x3f) | 0x80;
1077 *p++ = (c >> 30) | 0xfc;
1078 *p++ = ((c >> 24) & 0x3f) | 0x80;
1079 *p++ = ((c >> 18) & 0x3f) | 0x80;
1080 *p++ = ((c >> 12) & 0x3f) | 0x80;
1081 *p++ = ((c >> 6) & 0x3f) | 0x80;
1082 *p++ = (c & 0x3f) | 0x80;
1085 BREAKUP_CHAR (c, charset, c1, c2);
1086 lb = CHAR_LEADING_BYTE (c);
1087 if (LEADING_BYTE_PRIVATE_P (lb))
1088 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1090 if (EQ (charset, Vcharset_control_1))
1099 /* Return the first character from a Mule-encoded string in STR,
1100 assuming it's non-ASCII. Do not call this directly.
1101 Use the macro charptr_emchar() instead. */
1104 non_ascii_charptr_emchar (const Bufbyte *str)
1117 else if ( b >= 0xf8 )
1122 else if ( b >= 0xf0 )
1127 else if ( b >= 0xe0 )
1132 else if ( b >= 0xc0 )
1142 for( ; len > 0; len-- )
1145 ch = ( ch << 6 ) | ( b & 0x3f );
1149 Bufbyte i0 = *str, i1, i2 = 0;
1150 Lisp_Object charset;
1152 if (i0 == LEADING_BYTE_CONTROL_1)
1153 return (Emchar) (*++str - 0x20);
1155 if (LEADING_BYTE_PREFIX_P (i0))
1160 charset = CHARSET_BY_LEADING_BYTE (i0);
1161 if (XCHARSET_DIMENSION (charset) == 2)
1164 return MAKE_CHAR (charset, i1, i2);
1168 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1169 Do not call this directly. Use the macro valid_char_p() instead. */
1173 non_ascii_valid_char_p (Emchar ch)
1177 /* Must have only lowest 19 bits set */
1181 f1 = CHAR_FIELD1 (ch);
1182 f2 = CHAR_FIELD2 (ch);
1183 f3 = CHAR_FIELD3 (ch);
1187 Lisp_Object charset;
1189 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1190 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1191 f2 > MAX_CHAR_FIELD2_PRIVATE)
1196 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1197 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1201 NOTE: This takes advantage of the fact that
1202 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1203 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1205 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1206 if (EQ (charset, Qnil))
1208 return (XCHARSET_CHARS (charset) == 96);
1212 Lisp_Object charset;
1214 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1215 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1216 f1 > MAX_CHAR_FIELD1_PRIVATE)
1218 if (f2 < 0x20 || f3 < 0x20)
1221 #ifdef ENABLE_COMPOSITE_CHARS
1222 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1224 if (UNBOUNDP (Fgethash (make_int (ch),
1225 Vcomposite_char_char2string_hash_table,
1230 #endif /* ENABLE_COMPOSITE_CHARS */
1232 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1233 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1236 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1238 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1241 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1243 if (EQ (charset, Qnil))
1245 return (XCHARSET_CHARS (charset) == 96);
1251 /************************************************************************/
1252 /* Basic string functions */
1253 /************************************************************************/
1255 /* Copy the character pointed to by PTR into STR, assuming it's
1256 non-ASCII. Do not call this directly. Use the macro
1257 charptr_copy_char() instead. */
1260 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1262 Bufbyte *strptr = str;
1264 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1266 /* Notice fallthrough. */
1268 case 6: *++strptr = *ptr++;
1269 case 5: *++strptr = *ptr++;
1271 case 4: *++strptr = *ptr++;
1272 case 3: *++strptr = *ptr++;
1273 case 2: *++strptr = *ptr;
1278 return strptr + 1 - str;
1282 /************************************************************************/
1283 /* streams of Emchars */
1284 /************************************************************************/
1286 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1287 The functions below are not meant to be called directly; use
1288 the macros in insdel.h. */
1291 Lstream_get_emchar_1 (Lstream *stream, int ch)
1293 Bufbyte str[MAX_EMCHAR_LEN];
1294 Bufbyte *strptr = str;
1296 str[0] = (Bufbyte) ch;
1297 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1299 /* Notice fallthrough. */
1302 ch = Lstream_getc (stream);
1304 *++strptr = (Bufbyte) ch;
1306 ch = Lstream_getc (stream);
1308 *++strptr = (Bufbyte) ch;
1311 ch = Lstream_getc (stream);
1313 *++strptr = (Bufbyte) ch;
1315 ch = Lstream_getc (stream);
1317 *++strptr = (Bufbyte) ch;
1319 ch = Lstream_getc (stream);
1321 *++strptr = (Bufbyte) ch;
1326 return charptr_emchar (str);
1330 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1332 Bufbyte str[MAX_EMCHAR_LEN];
1333 Bytecount len = set_charptr_emchar (str, ch);
1334 return Lstream_write (stream, str, len);
1338 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1340 Bufbyte str[MAX_EMCHAR_LEN];
1341 Bytecount len = set_charptr_emchar (str, ch);
1342 Lstream_unread (stream, str, len);
1346 /************************************************************************/
1347 /* charset object */
1348 /************************************************************************/
1351 mark_charset (Lisp_Object obj)
1353 Lisp_Charset *cs = XCHARSET (obj);
1355 mark_object (cs->short_name);
1356 mark_object (cs->long_name);
1357 mark_object (cs->doc_string);
1358 mark_object (cs->registry);
1359 mark_object (cs->ccl_program);
1361 mark_object (cs->decoding_table);
1367 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1369 Lisp_Charset *cs = XCHARSET (obj);
1373 error ("printing unreadable object #<charset %s 0x%x>",
1374 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1377 write_c_string ("#<charset ", printcharfun);
1378 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1379 write_c_string (" ", printcharfun);
1380 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1381 write_c_string (" ", printcharfun);
1382 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1383 write_c_string (" ", printcharfun);
1384 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1385 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1387 CHARSET_DIMENSION (cs),
1388 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1389 CHARSET_COLUMNS (cs),
1390 CHARSET_GRAPHIC (cs),
1391 CHARSET_FINAL (cs));
1392 write_c_string (buf, printcharfun);
1393 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1394 sprintf (buf, " 0x%x>", cs->header.uid);
1395 write_c_string (buf, printcharfun);
1398 static const struct lrecord_description charset_description[] = {
1399 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1400 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1401 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1402 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1403 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1404 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1405 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1407 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1412 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1413 mark_charset, print_charset, 0, 0, 0,
1414 charset_description,
1416 /* Make a new charset. */
1419 make_charset (Charset_ID id, Lisp_Object name,
1420 unsigned short chars, unsigned char dimension,
1421 unsigned char columns, unsigned char graphic,
1422 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1423 Lisp_Object long_name, Lisp_Object doc,
1425 Lisp_Object decoding_table,
1426 Emchar ucs_min, Emchar ucs_max,
1427 Emchar code_offset, unsigned char byte_offset)
1429 unsigned char type = 0;
1431 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1435 XSETCHARSET (obj, cs);
1437 CHARSET_ID (cs) = id;
1438 CHARSET_NAME (cs) = name;
1439 CHARSET_SHORT_NAME (cs) = short_name;
1440 CHARSET_LONG_NAME (cs) = long_name;
1441 CHARSET_CHARS (cs) = chars;
1442 CHARSET_DIMENSION (cs) = dimension;
1443 CHARSET_DIRECTION (cs) = direction;
1444 CHARSET_COLUMNS (cs) = columns;
1445 CHARSET_GRAPHIC (cs) = graphic;
1446 CHARSET_FINAL (cs) = final;
1447 CHARSET_DOC_STRING (cs) = doc;
1448 CHARSET_REGISTRY (cs) = reg;
1449 CHARSET_CCL_PROGRAM (cs) = Qnil;
1450 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1452 CHARSET_DECODING_TABLE(cs) = Qnil;
1453 CHARSET_UCS_MIN(cs) = ucs_min;
1454 CHARSET_UCS_MAX(cs) = ucs_max;
1455 CHARSET_CODE_OFFSET(cs) = code_offset;
1456 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1459 switch (CHARSET_CHARS (cs))
1462 switch (CHARSET_DIMENSION (cs))
1465 type = CHARSET_TYPE_94;
1468 type = CHARSET_TYPE_94X94;
1473 switch (CHARSET_DIMENSION (cs))
1476 type = CHARSET_TYPE_96;
1479 type = CHARSET_TYPE_96X96;
1485 switch (CHARSET_DIMENSION (cs))
1488 type = CHARSET_TYPE_128;
1491 type = CHARSET_TYPE_128X128;
1496 switch (CHARSET_DIMENSION (cs))
1499 type = CHARSET_TYPE_256;
1502 type = CHARSET_TYPE_256X256;
1509 CHARSET_TYPE (cs) = type;
1513 if (id == LEADING_BYTE_ASCII)
1514 CHARSET_REP_BYTES (cs) = 1;
1516 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1518 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1523 /* some charsets do not have final characters. This includes
1524 ASCII, Control-1, Composite, and the two faux private
1527 if (code_offset == 0)
1529 assert (NILP (chlook->charset_by_attributes[type][final]));
1530 chlook->charset_by_attributes[type][final] = obj;
1533 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1534 chlook->charset_by_attributes[type][final][direction] = obj;
1538 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1539 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1541 /* Some charsets are "faux" and don't have names or really exist at
1542 all except in the leading-byte table. */
1544 Fputhash (name, obj, Vcharset_hash_table);
1549 get_unallocated_leading_byte (int dimension)
1554 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1557 lb = chlook->next_allocated_leading_byte++;
1561 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1564 lb = chlook->next_allocated_1_byte_leading_byte++;
1568 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1571 lb = chlook->next_allocated_2_byte_leading_byte++;
1577 ("No more character sets free for this dimension",
1578 make_int (dimension));
1585 make_builtin_char (Lisp_Object charset, int c1, int c2)
1587 if (XCHARSET_UCS_MAX (charset))
1590 = (XCHARSET_DIMENSION (charset) == 1
1592 c1 - XCHARSET_BYTE_OFFSET (charset)
1594 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1595 + c2 - XCHARSET_BYTE_OFFSET (charset))
1596 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1597 if ((code < XCHARSET_UCS_MIN (charset))
1598 || (XCHARSET_UCS_MAX (charset) < code))
1599 signal_simple_error ("Arguments makes invalid character",
1603 else if (XCHARSET_DIMENSION (charset) == 1)
1605 switch (XCHARSET_CHARS (charset))
1609 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1612 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1619 switch (XCHARSET_CHARS (charset))
1622 return MIN_CHAR_94x94
1623 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1624 + (c1 - 33) * 94 + (c2 - 33);
1626 return MIN_CHAR_96x96
1627 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1628 + (c1 - 32) * 96 + (c2 - 32);
1636 range_charset_code_point (Lisp_Object charset, Emchar ch)
1640 if ((XCHARSET_UCS_MIN (charset) <= ch)
1641 && (ch <= XCHARSET_UCS_MAX (charset)))
1643 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1645 if (XCHARSET_CHARS (charset) == 256)
1647 else if (XCHARSET_DIMENSION (charset) == 1)
1648 return d + XCHARSET_BYTE_OFFSET (charset);
1649 else if (XCHARSET_DIMENSION (charset) == 2)
1651 ((d / XCHARSET_CHARS (charset)
1652 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1653 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1654 else if (XCHARSET_DIMENSION (charset) == 3)
1656 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1657 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1658 | ((d / XCHARSET_CHARS (charset)
1659 % XCHARSET_CHARS (charset)
1660 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1661 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1662 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1664 ((d / (XCHARSET_CHARS (charset)
1665 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1666 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1667 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1668 % XCHARSET_CHARS (charset)
1669 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1670 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1671 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1672 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1674 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1676 if (XCHARSET_DIMENSION (charset) == 1)
1678 if (XCHARSET_CHARS (charset) == 94)
1680 if (((d = ch - (MIN_CHAR_94
1681 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1685 else if (XCHARSET_CHARS (charset) == 96)
1687 if (((d = ch - (MIN_CHAR_96
1688 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1695 else if (XCHARSET_DIMENSION (charset) == 2)
1697 if (XCHARSET_CHARS (charset) == 94)
1699 if (((d = ch - (MIN_CHAR_94x94
1700 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1703 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1705 else if (XCHARSET_CHARS (charset) == 96)
1707 if (((d = ch - (MIN_CHAR_96x96
1708 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1711 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1721 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1723 if (c <= MAX_CHAR_BASIC_LATIN)
1725 *charset = Vcharset_ascii;
1730 *charset = Vcharset_control_1;
1735 *charset = Vcharset_latin_iso8859_1;
1739 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1741 *charset = Vcharset_greek_iso8859_7;
1742 return c - MIN_CHAR_GREEK + 0x20;
1744 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1746 *charset = Vcharset_cyrillic_iso8859_5;
1747 return c - MIN_CHAR_CYRILLIC + 0x20;
1750 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1752 *charset = Vcharset_hebrew_iso8859_8;
1753 return c - MIN_CHAR_HEBREW + 0x20;
1755 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1757 *charset = Vcharset_thai_tis620;
1758 return c - MIN_CHAR_THAI + 0x20;
1761 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1762 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1764 return list2 (Vcharset_katakana_jisx0201,
1765 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1768 else if (c <= MAX_CHAR_BMP)
1770 *charset = Vcharset_ucs_bmp;
1773 else if (c < MIN_CHAR_DAIKANWA)
1775 *charset = Vcharset_ucs;
1779 else if (c <= MAX_CHAR_DAIKANWA)
1781 *charset = Vcharset_ideograph_daikanwa;
1782 return c - MIN_CHAR_DAIKANWA;
1785 else if (c <= MAX_CHAR_MOJIKYO)
1787 *charset = Vcharset_mojikyo;
1788 return c - MIN_CHAR_MOJIKYO;
1790 else if (c < MIN_CHAR_94)
1792 *charset = Vcharset_ucs;
1795 else if (c <= MAX_CHAR_94)
1797 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1798 ((c - MIN_CHAR_94) / 94) + '0',
1799 CHARSET_LEFT_TO_RIGHT);
1800 if (!NILP (*charset))
1801 return ((c - MIN_CHAR_94) % 94) + 33;
1804 *charset = Vcharset_ucs;
1808 else if (c <= MAX_CHAR_96)
1810 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1811 ((c - MIN_CHAR_96) / 96) + '0',
1812 CHARSET_LEFT_TO_RIGHT);
1813 if (!NILP (*charset))
1814 return ((c - MIN_CHAR_96) % 96) + 32;
1817 *charset = Vcharset_ucs;
1821 else if (c <= MAX_CHAR_94x94)
1824 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1825 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1826 CHARSET_LEFT_TO_RIGHT);
1827 if (!NILP (*charset))
1828 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1829 | (((c - MIN_CHAR_94x94) % 94) + 33);
1832 *charset = Vcharset_ucs;
1836 else if (c <= MAX_CHAR_96x96)
1839 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1840 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1841 CHARSET_LEFT_TO_RIGHT);
1842 if (!NILP (*charset))
1843 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1844 | (((c - MIN_CHAR_96x96) % 96) + 32);
1847 *charset = Vcharset_ucs;
1853 *charset = Vcharset_ucs;
1858 Lisp_Object Vdefault_coded_charset_priority_list;
1862 /************************************************************************/
1863 /* Basic charset Lisp functions */
1864 /************************************************************************/
1866 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1867 Return non-nil if OBJECT is a charset.
1871 return CHARSETP (object) ? Qt : Qnil;
1874 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1875 Retrieve the charset of the given name.
1876 If CHARSET-OR-NAME is a charset object, it is simply returned.
1877 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1878 nil is returned. Otherwise the associated charset object is returned.
1882 if (CHARSETP (charset_or_name))
1883 return charset_or_name;
1885 CHECK_SYMBOL (charset_or_name);
1886 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1889 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1890 Retrieve the charset of the given name.
1891 Same as `find-charset' except an error is signalled if there is no such
1892 charset instead of returning nil.
1896 Lisp_Object charset = Ffind_charset (name);
1899 signal_simple_error ("No such charset", name);
1903 /* We store the charsets in hash tables with the names as the key and the
1904 actual charset object as the value. Occasionally we need to use them
1905 in a list format. These routines provide us with that. */
1906 struct charset_list_closure
1908 Lisp_Object *charset_list;
1912 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1913 void *charset_list_closure)
1915 /* This function can GC */
1916 struct charset_list_closure *chcl =
1917 (struct charset_list_closure*) charset_list_closure;
1918 Lisp_Object *charset_list = chcl->charset_list;
1920 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1924 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1925 Return a list of the names of all defined charsets.
1929 Lisp_Object charset_list = Qnil;
1930 struct gcpro gcpro1;
1931 struct charset_list_closure charset_list_closure;
1933 GCPRO1 (charset_list);
1934 charset_list_closure.charset_list = &charset_list;
1935 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1936 &charset_list_closure);
1939 return charset_list;
1942 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1943 Return the name of the given charset.
1947 return XCHARSET_NAME (Fget_charset (charset));
1950 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1951 Define a new character set.
1952 This function is for use with Mule support.
1953 NAME is a symbol, the name by which the character set is normally referred.
1954 DOC-STRING is a string describing the character set.
1955 PROPS is a property list, describing the specific nature of the
1956 character set. Recognized properties are:
1958 'short-name Short version of the charset name (ex: Latin-1)
1959 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1960 'registry A regular expression matching the font registry field for
1962 'dimension Number of octets used to index a character in this charset.
1963 Either 1 or 2. Defaults to 1.
1964 'columns Number of columns used to display a character in this charset.
1965 Only used in TTY mode. (Under X, the actual width of a
1966 character can be derived from the font used to display the
1967 characters.) If unspecified, defaults to the dimension
1968 (this is almost always the correct value).
1969 'chars Number of characters in each dimension (94 or 96).
1970 Defaults to 94. Note that if the dimension is 2, the
1971 character set thus described is 94x94 or 96x96.
1972 'final Final byte of ISO 2022 escape sequence. Must be
1973 supplied. Each combination of (DIMENSION, CHARS) defines a
1974 separate namespace for final bytes. Note that ISO
1975 2022 restricts the final byte to the range
1976 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1977 dimension == 2. Note also that final bytes in the range
1978 0x30 - 0x3F are reserved for user-defined (not official)
1980 'graphic 0 (use left half of font on output) or 1 (use right half
1981 of font on output). Defaults to 0. For example, for
1982 a font whose registry is ISO8859-1, the left half
1983 (octets 0x20 - 0x7F) is the `ascii' character set, while
1984 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1985 character set. With 'graphic set to 0, the octets
1986 will have their high bit cleared; with it set to 1,
1987 the octets will have their high bit set.
1988 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1990 'ccl-program A compiled CCL program used to convert a character in
1991 this charset into an index into the font. This is in
1992 addition to the 'graphic property. The CCL program
1993 is passed the octets of the character, with the high
1994 bit cleared and set depending upon whether the value
1995 of the 'graphic property is 0 or 1.
1997 (name, doc_string, props))
1999 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2000 int direction = CHARSET_LEFT_TO_RIGHT;
2002 Lisp_Object registry = Qnil;
2003 Lisp_Object charset;
2004 Lisp_Object rest, keyword, value;
2005 Lisp_Object ccl_program = Qnil;
2006 Lisp_Object short_name = Qnil, long_name = Qnil;
2007 int byte_offset = -1;
2009 CHECK_SYMBOL (name);
2010 if (!NILP (doc_string))
2011 CHECK_STRING (doc_string);
2013 charset = Ffind_charset (name);
2014 if (!NILP (charset))
2015 signal_simple_error ("Cannot redefine existing charset", name);
2017 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2019 if (EQ (keyword, Qshort_name))
2021 CHECK_STRING (value);
2025 if (EQ (keyword, Qlong_name))
2027 CHECK_STRING (value);
2031 else if (EQ (keyword, Qdimension))
2034 dimension = XINT (value);
2035 if (dimension < 1 || dimension > 2)
2036 signal_simple_error ("Invalid value for 'dimension", value);
2039 else if (EQ (keyword, Qchars))
2042 chars = XINT (value);
2043 if (chars != 94 && chars != 96)
2044 signal_simple_error ("Invalid value for 'chars", value);
2047 else if (EQ (keyword, Qcolumns))
2050 columns = XINT (value);
2051 if (columns != 1 && columns != 2)
2052 signal_simple_error ("Invalid value for 'columns", value);
2055 else if (EQ (keyword, Qgraphic))
2058 graphic = XINT (value);
2060 if (graphic < 0 || graphic > 2)
2062 if (graphic < 0 || graphic > 1)
2064 signal_simple_error ("Invalid value for 'graphic", value);
2067 else if (EQ (keyword, Qregistry))
2069 CHECK_STRING (value);
2073 else if (EQ (keyword, Qdirection))
2075 if (EQ (value, Ql2r))
2076 direction = CHARSET_LEFT_TO_RIGHT;
2077 else if (EQ (value, Qr2l))
2078 direction = CHARSET_RIGHT_TO_LEFT;
2080 signal_simple_error ("Invalid value for 'direction", value);
2083 else if (EQ (keyword, Qfinal))
2085 CHECK_CHAR_COERCE_INT (value);
2086 final = XCHAR (value);
2087 if (final < '0' || final > '~')
2088 signal_simple_error ("Invalid value for 'final", value);
2091 else if (EQ (keyword, Qccl_program))
2093 CHECK_VECTOR (value);
2094 ccl_program = value;
2098 signal_simple_error ("Unrecognized property", keyword);
2102 error ("'final must be specified");
2103 if (dimension == 2 && final > 0x5F)
2105 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2109 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2111 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2113 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2114 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2116 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2118 id = get_unallocated_leading_byte (dimension);
2120 if (NILP (doc_string))
2121 doc_string = build_string ("");
2123 if (NILP (registry))
2124 registry = build_string ("");
2126 if (NILP (short_name))
2127 XSETSTRING (short_name, XSYMBOL (name)->name);
2129 if (NILP (long_name))
2130 long_name = doc_string;
2133 columns = dimension;
2135 if (byte_offset < 0)
2139 else if (chars == 96)
2145 charset = make_charset (id, name, chars, dimension, columns, graphic,
2146 final, direction, short_name, long_name,
2147 doc_string, registry,
2148 Qnil, 0, 0, 0, byte_offset);
2149 if (!NILP (ccl_program))
2150 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2154 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2156 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2157 NEW-NAME is the name of the new charset. Return the new charset.
2159 (charset, new_name))
2161 Lisp_Object new_charset = Qnil;
2162 int id, chars, dimension, columns, graphic, final;
2164 Lisp_Object registry, doc_string, short_name, long_name;
2167 charset = Fget_charset (charset);
2168 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2169 signal_simple_error ("Charset already has reverse-direction charset",
2172 CHECK_SYMBOL (new_name);
2173 if (!NILP (Ffind_charset (new_name)))
2174 signal_simple_error ("Cannot redefine existing charset", new_name);
2176 cs = XCHARSET (charset);
2178 chars = CHARSET_CHARS (cs);
2179 dimension = CHARSET_DIMENSION (cs);
2180 columns = CHARSET_COLUMNS (cs);
2181 id = get_unallocated_leading_byte (dimension);
2183 graphic = CHARSET_GRAPHIC (cs);
2184 final = CHARSET_FINAL (cs);
2185 direction = CHARSET_RIGHT_TO_LEFT;
2186 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2187 direction = CHARSET_LEFT_TO_RIGHT;
2188 doc_string = CHARSET_DOC_STRING (cs);
2189 short_name = CHARSET_SHORT_NAME (cs);
2190 long_name = CHARSET_LONG_NAME (cs);
2191 registry = CHARSET_REGISTRY (cs);
2193 new_charset = make_charset (id, new_name, chars, dimension, columns,
2194 graphic, final, direction, short_name, long_name,
2195 doc_string, registry,
2197 CHARSET_DECODING_TABLE(cs),
2198 CHARSET_UCS_MIN(cs),
2199 CHARSET_UCS_MAX(cs),
2200 CHARSET_CODE_OFFSET(cs),
2201 CHARSET_BYTE_OFFSET(cs)
2207 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2208 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2213 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2214 Define symbol ALIAS as an alias for CHARSET.
2218 CHECK_SYMBOL (alias);
2219 charset = Fget_charset (charset);
2220 return Fputhash (alias, charset, Vcharset_hash_table);
2223 /* #### Reverse direction charsets not yet implemented. */
2225 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2227 Return the reverse-direction charset parallel to CHARSET, if any.
2228 This is the charset with the same properties (in particular, the same
2229 dimension, number of characters per dimension, and final byte) as
2230 CHARSET but whose characters are displayed in the opposite direction.
2234 charset = Fget_charset (charset);
2235 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2239 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2240 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2241 If DIRECTION is omitted, both directions will be checked (left-to-right
2242 will be returned if character sets exist for both directions).
2244 (dimension, chars, final, direction))
2246 int dm, ch, fi, di = -1;
2248 Lisp_Object obj = Qnil;
2250 CHECK_INT (dimension);
2251 dm = XINT (dimension);
2252 if (dm < 1 || dm > 2)
2253 signal_simple_error ("Invalid value for DIMENSION", dimension);
2257 if (ch != 94 && ch != 96)
2258 signal_simple_error ("Invalid value for CHARS", chars);
2260 CHECK_CHAR_COERCE_INT (final);
2262 if (fi < '0' || fi > '~')
2263 signal_simple_error ("Invalid value for FINAL", final);
2265 if (EQ (direction, Ql2r))
2266 di = CHARSET_LEFT_TO_RIGHT;
2267 else if (EQ (direction, Qr2l))
2268 di = CHARSET_RIGHT_TO_LEFT;
2269 else if (!NILP (direction))
2270 signal_simple_error ("Invalid value for DIRECTION", direction);
2272 if (dm == 2 && fi > 0x5F)
2274 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2277 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2279 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2283 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2285 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2288 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2291 return XCHARSET_NAME (obj);
2295 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2296 Return short name of CHARSET.
2300 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2303 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2304 Return long name of CHARSET.
2308 return XCHARSET_LONG_NAME (Fget_charset (charset));
2311 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2312 Return description of CHARSET.
2316 return XCHARSET_DOC_STRING (Fget_charset (charset));
2319 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2320 Return dimension of CHARSET.
2324 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2327 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2328 Return property PROP of CHARSET.
2329 Recognized properties are those listed in `make-charset', as well as
2330 'name and 'doc-string.
2336 charset = Fget_charset (charset);
2337 cs = XCHARSET (charset);
2339 CHECK_SYMBOL (prop);
2340 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2341 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2342 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2343 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2344 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2345 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2346 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2347 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2348 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2349 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2350 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2351 if (EQ (prop, Qdirection))
2352 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2353 if (EQ (prop, Qreverse_direction_charset))
2355 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2359 return XCHARSET_NAME (obj);
2361 signal_simple_error ("Unrecognized charset property name", prop);
2362 return Qnil; /* not reached */
2365 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2366 Return charset identification number of CHARSET.
2370 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2373 /* #### We need to figure out which properties we really want to
2376 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2377 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2379 (charset, ccl_program))
2381 charset = Fget_charset (charset);
2382 CHECK_VECTOR (ccl_program);
2383 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2388 invalidate_charset_font_caches (Lisp_Object charset)
2390 /* Invalidate font cache entries for charset on all devices. */
2391 Lisp_Object devcons, concons, hash_table;
2392 DEVICE_LOOP_NO_BREAK (devcons, concons)
2394 struct device *d = XDEVICE (XCAR (devcons));
2395 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2396 if (!UNBOUNDP (hash_table))
2397 Fclrhash (hash_table);
2401 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2402 Set the 'registry property of CHARSET to REGISTRY.
2404 (charset, registry))
2406 charset = Fget_charset (charset);
2407 CHECK_STRING (registry);
2408 XCHARSET_REGISTRY (charset) = registry;
2409 invalidate_charset_font_caches (charset);
2410 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2415 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2416 Return mapping-table of CHARSET.
2420 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2423 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2424 Set mapping-table of CHARSET to TABLE.
2428 struct Lisp_Charset *cs;
2429 Lisp_Object old_table;
2432 charset = Fget_charset (charset);
2433 cs = XCHARSET (charset);
2435 if (EQ (table, Qnil))
2437 CHARSET_DECODING_TABLE(cs) = table;
2440 else if (VECTORP (table))
2444 /* ad-hoc method for `ascii' */
2445 if ((CHARSET_CHARS (cs) == 94) &&
2446 (CHARSET_BYTE_OFFSET (cs) != 33))
2447 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2449 ccs_len = CHARSET_CHARS (cs);
2451 if (XVECTOR_LENGTH (table) > ccs_len)
2452 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2453 old_table = CHARSET_DECODING_TABLE(cs);
2454 CHARSET_DECODING_TABLE(cs) = table;
2457 signal_error (Qwrong_type_argument,
2458 list2 (build_translated_string ("vector-or-nil-p"),
2460 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2462 switch (CHARSET_DIMENSION (cs))
2465 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2467 Lisp_Object c = XVECTOR_DATA(table)[i];
2472 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2476 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2478 Lisp_Object v = XVECTOR_DATA(table)[i];
2484 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2486 CHARSET_DECODING_TABLE(cs) = old_table;
2487 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2489 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2491 Lisp_Object c = XVECTOR_DATA(v)[j];
2496 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2497 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2501 put_char_attribute (v, charset,
2502 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2511 /************************************************************************/
2512 /* Lisp primitives for working with characters */
2513 /************************************************************************/
2516 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2517 Make a character from CHARSET and code-point CODE.
2523 charset = Fget_charset (charset);
2526 if (XCHARSET_GRAPHIC (charset) == 1)
2528 return make_char (DECODE_CHAR (charset, c));
2532 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2533 Make a character from CHARSET and octets ARG1 and ARG2.
2534 ARG2 is required only for characters from two-dimensional charsets.
2535 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2536 character s with caron.
2538 (charset, arg1, arg2))
2542 int lowlim, highlim;
2544 charset = Fget_charset (charset);
2545 cs = XCHARSET (charset);
2547 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2548 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2550 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2552 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2553 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2556 /* It is useful (and safe, according to Olivier Galibert) to strip
2557 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2558 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2559 Latin 2 code of the character. */
2567 if (a1 < lowlim || a1 > highlim)
2568 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2570 if (CHARSET_DIMENSION (cs) == 1)
2574 ("Charset is of dimension one; second octet must be nil", arg2);
2575 return make_char (MAKE_CHAR (charset, a1, 0));
2584 a2 = XINT (arg2) & 0x7f;
2586 if (a2 < lowlim || a2 > highlim)
2587 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2589 return make_char (MAKE_CHAR (charset, a1, a2));
2592 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2593 Return the character set of char CH.
2597 CHECK_CHAR_COERCE_INT (ch);
2599 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2602 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2603 Return the octet numbered N (should be 0 or 1) of char CH.
2604 N defaults to 0 if omitted.
2608 Lisp_Object charset;
2611 CHECK_CHAR_COERCE_INT (ch);
2613 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2615 if (NILP (n) || EQ (n, Qzero))
2616 return make_int (octet0);
2617 else if (EQ (n, make_int (1)))
2618 return make_int (octet1);
2620 signal_simple_error ("Octet number must be 0 or 1", n);
2623 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2624 Return list of charset and one or two position-codes of CHAR.
2628 /* This function can GC */
2629 struct gcpro gcpro1, gcpro2;
2630 Lisp_Object charset = Qnil;
2631 Lisp_Object rc = Qnil;
2639 GCPRO2 (charset, rc);
2640 CHECK_CHAR_COERCE_INT (character);
2643 code_point = ENCODE_CHAR (XCHAR (character), charset);
2644 dimension = XCHARSET_DIMENSION (charset);
2645 while (dimension > 0)
2647 rc = Fcons (make_int (code_point & 255), rc);
2651 rc = Fcons (XCHARSET_NAME (charset), rc);
2653 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2655 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2657 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2661 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2670 #ifdef ENABLE_COMPOSITE_CHARS
2671 /************************************************************************/
2672 /* composite character functions */
2673 /************************************************************************/
2676 lookup_composite_char (Bufbyte *str, int len)
2678 Lisp_Object lispstr = make_string (str, len);
2679 Lisp_Object ch = Fgethash (lispstr,
2680 Vcomposite_char_string2char_hash_table,
2686 if (composite_char_row_next >= 128)
2687 signal_simple_error ("No more composite chars available", lispstr);
2688 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2689 composite_char_col_next);
2690 Fputhash (make_char (emch), lispstr,
2691 Vcomposite_char_char2string_hash_table);
2692 Fputhash (lispstr, make_char (emch),
2693 Vcomposite_char_string2char_hash_table);
2694 composite_char_col_next++;
2695 if (composite_char_col_next >= 128)
2697 composite_char_col_next = 32;
2698 composite_char_row_next++;
2707 composite_char_string (Emchar ch)
2709 Lisp_Object str = Fgethash (make_char (ch),
2710 Vcomposite_char_char2string_hash_table,
2712 assert (!UNBOUNDP (str));
2716 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2717 Convert a string into a single composite character.
2718 The character is the result of overstriking all the characters in
2723 CHECK_STRING (string);
2724 return make_char (lookup_composite_char (XSTRING_DATA (string),
2725 XSTRING_LENGTH (string)));
2728 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2729 Return a string of the characters comprising a composite character.
2737 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2738 signal_simple_error ("Must be composite char", ch);
2739 return composite_char_string (emch);
2741 #endif /* ENABLE_COMPOSITE_CHARS */
2744 /************************************************************************/
2745 /* initialization */
2746 /************************************************************************/
2749 syms_of_mule_charset (void)
2752 INIT_LRECORD_IMPLEMENTATION (byte_table);
2753 INIT_LRECORD_IMPLEMENTATION (char_id_table);
2755 INIT_LRECORD_IMPLEMENTATION (charset);
2757 DEFSUBR (Fcharsetp);
2758 DEFSUBR (Ffind_charset);
2759 DEFSUBR (Fget_charset);
2760 DEFSUBR (Fcharset_list);
2761 DEFSUBR (Fcharset_name);
2762 DEFSUBR (Fmake_charset);
2763 DEFSUBR (Fmake_reverse_direction_charset);
2764 /* DEFSUBR (Freverse_direction_charset); */
2765 DEFSUBR (Fdefine_charset_alias);
2766 DEFSUBR (Fcharset_from_attributes);
2767 DEFSUBR (Fcharset_short_name);
2768 DEFSUBR (Fcharset_long_name);
2769 DEFSUBR (Fcharset_description);
2770 DEFSUBR (Fcharset_dimension);
2771 DEFSUBR (Fcharset_property);
2772 DEFSUBR (Fcharset_id);
2773 DEFSUBR (Fset_charset_ccl_program);
2774 DEFSUBR (Fset_charset_registry);
2776 DEFSUBR (Fchar_attribute_alist);
2777 DEFSUBR (Fget_char_attribute);
2778 DEFSUBR (Fput_char_attribute);
2779 DEFSUBR (Fremove_char_attribute);
2780 DEFSUBR (Fdefine_char);
2781 DEFSUBR (Fchar_variants);
2782 DEFSUBR (Fget_composite_char);
2783 DEFSUBR (Fcharset_mapping_table);
2784 DEFSUBR (Fset_charset_mapping_table);
2788 DEFSUBR (Fdecode_char);
2790 DEFSUBR (Fmake_char);
2791 DEFSUBR (Fchar_charset);
2792 DEFSUBR (Fchar_octet);
2793 DEFSUBR (Fsplit_char);
2795 #ifdef ENABLE_COMPOSITE_CHARS
2796 DEFSUBR (Fmake_composite_char);
2797 DEFSUBR (Fcomposite_char_string);
2800 defsymbol (&Qcharsetp, "charsetp");
2801 defsymbol (&Qregistry, "registry");
2802 defsymbol (&Qfinal, "final");
2803 defsymbol (&Qgraphic, "graphic");
2804 defsymbol (&Qdirection, "direction");
2805 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2806 defsymbol (&Qshort_name, "short-name");
2807 defsymbol (&Qlong_name, "long-name");
2809 defsymbol (&Ql2r, "l2r");
2810 defsymbol (&Qr2l, "r2l");
2812 /* Charsets, compatible with FSF 20.3
2813 Naming convention is Script-Charset[-Edition] */
2814 defsymbol (&Qascii, "ascii");
2815 defsymbol (&Qcontrol_1, "control-1");
2816 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2817 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2818 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2819 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2820 defsymbol (&Qthai_tis620, "thai-tis620");
2821 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2822 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2823 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2824 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2825 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2826 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2827 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2828 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2829 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2830 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2831 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2832 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2833 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2834 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2835 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2837 defsymbol (&Q_ucs, "->ucs");
2838 defsymbol (&Q_decomposition, "->decomposition");
2839 defsymbol (&Qcompat, "compat");
2840 defsymbol (&Qisolated, "isolated");
2841 defsymbol (&Qinitial, "initial");
2842 defsymbol (&Qmedial, "medial");
2843 defsymbol (&Qfinal, "final");
2844 defsymbol (&Qvertical, "vertical");
2845 defsymbol (&QnoBreak, "noBreak");
2846 defsymbol (&Qfraction, "fraction");
2847 defsymbol (&Qsuper, "super");
2848 defsymbol (&Qsub, "sub");
2849 defsymbol (&Qcircle, "circle");
2850 defsymbol (&Qsquare, "square");
2851 defsymbol (&Qwide, "wide");
2852 defsymbol (&Qnarrow, "narrow");
2853 defsymbol (&Qsmall, "small");
2854 defsymbol (&Qfont, "font");
2855 defsymbol (&Qucs, "ucs");
2856 defsymbol (&Qucs_bmp, "ucs-bmp");
2857 defsymbol (&Qlatin_viscii, "latin-viscii");
2858 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2859 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2860 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2861 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2862 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2863 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2864 defsymbol (&Qmojikyo, "mojikyo");
2865 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2866 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2867 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2868 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2869 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2870 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2871 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2872 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2873 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2874 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2875 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2876 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2877 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2878 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2879 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2880 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2881 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2882 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2883 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2884 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2885 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2886 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2888 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2889 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2891 defsymbol (&Qcomposite, "composite");
2895 vars_of_mule_charset (void)
2902 chlook = xnew (struct charset_lookup);
2903 dumpstruct (&chlook, &charset_lookup_description);
2905 /* Table of charsets indexed by leading byte. */
2906 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2907 chlook->charset_by_leading_byte[i] = Qnil;
2910 /* Table of charsets indexed by type/final-byte. */
2911 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2912 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2913 chlook->charset_by_attributes[i][j] = Qnil;
2915 /* Table of charsets indexed by type/final-byte/direction. */
2916 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2917 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2918 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2919 chlook->charset_by_attributes[i][j][k] = Qnil;
2923 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2925 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2926 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2930 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2931 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2932 Leading-code of private TYPE9N charset of column-width 1.
2934 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2938 Vutf_2000_version = build_string("0.15 (Sangō)");
2939 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2940 Version number of UTF-2000.
2943 staticpro (&Vcharacter_attribute_table);
2944 Vcharacter_attribute_table = make_char_id_table (Qnil);
2946 staticpro (&Vcharacter_composition_table);
2947 Vcharacter_composition_table = make_char_id_table (Qnil);
2949 staticpro (&Vcharacter_variant_table);
2950 Vcharacter_variant_table = make_char_id_table (Qnil);
2952 Vdefault_coded_charset_priority_list = Qnil;
2953 DEFVAR_LISP ("default-coded-charset-priority-list",
2954 &Vdefault_coded_charset_priority_list /*
2955 Default order of preferred coded-character-sets.
2961 complex_vars_of_mule_charset (void)
2963 staticpro (&Vcharset_hash_table);
2964 Vcharset_hash_table =
2965 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2967 /* Predefined character sets. We store them into variables for
2971 staticpro (&Vcharset_ucs);
2973 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2974 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2975 build_string ("UCS"),
2976 build_string ("UCS"),
2977 build_string ("ISO/IEC 10646"),
2979 Qnil, 0, 0xFFFFFFF, 0, 0);
2980 staticpro (&Vcharset_ucs_bmp);
2982 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2983 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2984 build_string ("BMP"),
2985 build_string ("BMP"),
2986 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2987 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2988 Qnil, 0, 0xFFFF, 0, 0);
2990 # define MIN_CHAR_THAI 0
2991 # define MAX_CHAR_THAI 0
2992 # define MIN_CHAR_HEBREW 0
2993 # define MAX_CHAR_HEBREW 0
2994 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2995 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2997 staticpro (&Vcharset_ascii);
2999 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3000 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3001 build_string ("ASCII"),
3002 build_string ("ASCII)"),
3003 build_string ("ASCII (ISO646 IRV)"),
3004 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3005 Qnil, 0, 0x7F, 0, 0);
3006 staticpro (&Vcharset_control_1);
3007 Vcharset_control_1 =
3008 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3009 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3010 build_string ("C1"),
3011 build_string ("Control characters"),
3012 build_string ("Control characters 128-191"),
3014 Qnil, 0x80, 0x9F, 0, 0);
3015 staticpro (&Vcharset_latin_iso8859_1);
3016 Vcharset_latin_iso8859_1 =
3017 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3018 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3019 build_string ("Latin-1"),
3020 build_string ("ISO8859-1 (Latin-1)"),
3021 build_string ("ISO8859-1 (Latin-1)"),
3022 build_string ("iso8859-1"),
3023 Qnil, 0xA0, 0xFF, 0, 32);
3024 staticpro (&Vcharset_latin_iso8859_2);
3025 Vcharset_latin_iso8859_2 =
3026 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3027 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3028 build_string ("Latin-2"),
3029 build_string ("ISO8859-2 (Latin-2)"),
3030 build_string ("ISO8859-2 (Latin-2)"),
3031 build_string ("iso8859-2"),
3033 staticpro (&Vcharset_latin_iso8859_3);
3034 Vcharset_latin_iso8859_3 =
3035 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3036 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3037 build_string ("Latin-3"),
3038 build_string ("ISO8859-3 (Latin-3)"),
3039 build_string ("ISO8859-3 (Latin-3)"),
3040 build_string ("iso8859-3"),
3042 staticpro (&Vcharset_latin_iso8859_4);
3043 Vcharset_latin_iso8859_4 =
3044 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3045 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3046 build_string ("Latin-4"),
3047 build_string ("ISO8859-4 (Latin-4)"),
3048 build_string ("ISO8859-4 (Latin-4)"),
3049 build_string ("iso8859-4"),
3051 staticpro (&Vcharset_thai_tis620);
3052 Vcharset_thai_tis620 =
3053 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3054 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3055 build_string ("TIS620"),
3056 build_string ("TIS620 (Thai)"),
3057 build_string ("TIS620.2529 (Thai)"),
3058 build_string ("tis620"),
3059 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3060 staticpro (&Vcharset_greek_iso8859_7);
3061 Vcharset_greek_iso8859_7 =
3062 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3063 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3064 build_string ("ISO8859-7"),
3065 build_string ("ISO8859-7 (Greek)"),
3066 build_string ("ISO8859-7 (Greek)"),
3067 build_string ("iso8859-7"),
3069 0 /* MIN_CHAR_GREEK */,
3070 0 /* MAX_CHAR_GREEK */, 0, 32);
3071 staticpro (&Vcharset_arabic_iso8859_6);
3072 Vcharset_arabic_iso8859_6 =
3073 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3074 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3075 build_string ("ISO8859-6"),
3076 build_string ("ISO8859-6 (Arabic)"),
3077 build_string ("ISO8859-6 (Arabic)"),
3078 build_string ("iso8859-6"),
3080 staticpro (&Vcharset_hebrew_iso8859_8);
3081 Vcharset_hebrew_iso8859_8 =
3082 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3083 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3084 build_string ("ISO8859-8"),
3085 build_string ("ISO8859-8 (Hebrew)"),
3086 build_string ("ISO8859-8 (Hebrew)"),
3087 build_string ("iso8859-8"),
3088 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3089 staticpro (&Vcharset_katakana_jisx0201);
3090 Vcharset_katakana_jisx0201 =
3091 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3092 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3093 build_string ("JISX0201 Kana"),
3094 build_string ("JISX0201.1976 (Japanese Kana)"),
3095 build_string ("JISX0201.1976 Japanese Kana"),
3096 build_string ("jisx0201\\.1976"),
3098 staticpro (&Vcharset_latin_jisx0201);
3099 Vcharset_latin_jisx0201 =
3100 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3101 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3102 build_string ("JISX0201 Roman"),
3103 build_string ("JISX0201.1976 (Japanese Roman)"),
3104 build_string ("JISX0201.1976 Japanese Roman"),
3105 build_string ("jisx0201\\.1976"),
3107 staticpro (&Vcharset_cyrillic_iso8859_5);
3108 Vcharset_cyrillic_iso8859_5 =
3109 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3110 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3111 build_string ("ISO8859-5"),
3112 build_string ("ISO8859-5 (Cyrillic)"),
3113 build_string ("ISO8859-5 (Cyrillic)"),
3114 build_string ("iso8859-5"),
3116 0 /* MIN_CHAR_CYRILLIC */,
3117 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3118 staticpro (&Vcharset_latin_iso8859_9);
3119 Vcharset_latin_iso8859_9 =
3120 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3121 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3122 build_string ("Latin-5"),
3123 build_string ("ISO8859-9 (Latin-5)"),
3124 build_string ("ISO8859-9 (Latin-5)"),
3125 build_string ("iso8859-9"),
3127 staticpro (&Vcharset_japanese_jisx0208_1978);
3128 Vcharset_japanese_jisx0208_1978 =
3129 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3130 Qjapanese_jisx0208_1978, 94, 2,
3131 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3132 build_string ("JIS X0208:1978"),
3133 build_string ("JIS X0208:1978 (Japanese)"),
3135 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3136 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3138 staticpro (&Vcharset_chinese_gb2312);
3139 Vcharset_chinese_gb2312 =
3140 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3141 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3142 build_string ("GB2312"),
3143 build_string ("GB2312)"),
3144 build_string ("GB2312 Chinese simplified"),
3145 build_string ("gb2312"),
3147 staticpro (&Vcharset_japanese_jisx0208);
3148 Vcharset_japanese_jisx0208 =
3149 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3150 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3151 build_string ("JISX0208"),
3152 build_string ("JIS X0208:1983 (Japanese)"),
3153 build_string ("JIS X0208:1983 Japanese Kanji"),
3154 build_string ("jisx0208\\.1983"),
3157 staticpro (&Vcharset_japanese_jisx0208_1990);
3158 Vcharset_japanese_jisx0208_1990 =
3159 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3160 Qjapanese_jisx0208_1990, 94, 2,
3161 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3162 build_string ("JISX0208-1990"),
3163 build_string ("JIS X0208:1990 (Japanese)"),
3164 build_string ("JIS X0208:1990 Japanese Kanji"),
3165 build_string ("jisx0208\\.1990"),
3167 MIN_CHAR_JIS_X0208_1990,
3168 MAX_CHAR_JIS_X0208_1990, 0, 33);
3170 staticpro (&Vcharset_korean_ksc5601);
3171 Vcharset_korean_ksc5601 =
3172 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3173 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3174 build_string ("KSC5601"),
3175 build_string ("KSC5601 (Korean"),
3176 build_string ("KSC5601 Korean Hangul and Hanja"),
3177 build_string ("ksc5601"),
3179 staticpro (&Vcharset_japanese_jisx0212);
3180 Vcharset_japanese_jisx0212 =
3181 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3182 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3183 build_string ("JISX0212"),
3184 build_string ("JISX0212 (Japanese)"),
3185 build_string ("JISX0212 Japanese Supplement"),
3186 build_string ("jisx0212"),
3189 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3190 staticpro (&Vcharset_chinese_cns11643_1);
3191 Vcharset_chinese_cns11643_1 =
3192 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3193 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3194 build_string ("CNS11643-1"),
3195 build_string ("CNS11643-1 (Chinese traditional)"),
3197 ("CNS 11643 Plane 1 Chinese traditional"),
3198 build_string (CHINESE_CNS_PLANE_RE("1")),
3200 staticpro (&Vcharset_chinese_cns11643_2);
3201 Vcharset_chinese_cns11643_2 =
3202 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3203 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3204 build_string ("CNS11643-2"),
3205 build_string ("CNS11643-2 (Chinese traditional)"),
3207 ("CNS 11643 Plane 2 Chinese traditional"),
3208 build_string (CHINESE_CNS_PLANE_RE("2")),
3211 staticpro (&Vcharset_latin_tcvn5712);
3212 Vcharset_latin_tcvn5712 =
3213 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3214 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3215 build_string ("TCVN 5712"),
3216 build_string ("TCVN 5712 (VSCII-2)"),
3217 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3218 build_string ("tcvn5712-1"),
3220 staticpro (&Vcharset_latin_viscii_lower);
3221 Vcharset_latin_viscii_lower =
3222 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3223 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3224 build_string ("VISCII lower"),
3225 build_string ("VISCII lower (Vietnamese)"),
3226 build_string ("VISCII lower (Vietnamese)"),
3227 build_string ("MULEVISCII-LOWER"),
3229 staticpro (&Vcharset_latin_viscii_upper);
3230 Vcharset_latin_viscii_upper =
3231 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3232 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3233 build_string ("VISCII upper"),
3234 build_string ("VISCII upper (Vietnamese)"),
3235 build_string ("VISCII upper (Vietnamese)"),
3236 build_string ("MULEVISCII-UPPER"),
3238 staticpro (&Vcharset_latin_viscii);
3239 Vcharset_latin_viscii =
3240 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3241 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3242 build_string ("VISCII"),
3243 build_string ("VISCII 1.1 (Vietnamese)"),
3244 build_string ("VISCII 1.1 (Vietnamese)"),
3245 build_string ("VISCII1\\.1"),
3247 staticpro (&Vcharset_ideograph_daikanwa);
3248 Vcharset_ideograph_daikanwa =
3249 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3250 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3251 build_string ("Daikanwa"),
3252 build_string ("Morohashi's Daikanwa"),
3253 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3254 build_string ("Daikanwa"),
3255 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3256 staticpro (&Vcharset_mojikyo);
3258 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3259 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3260 build_string ("Mojikyo"),
3261 build_string ("Mojikyo"),
3262 build_string ("Konjaku-Mojikyo"),
3264 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3265 staticpro (&Vcharset_mojikyo_pj_1);
3266 Vcharset_mojikyo_pj_1 =
3267 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3268 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3269 build_string ("Mojikyo-PJ-1"),
3270 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3272 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3273 build_string ("jisx0208\\.Mojikyo-1$"),
3275 staticpro (&Vcharset_mojikyo_pj_2);
3276 Vcharset_mojikyo_pj_2 =
3277 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3278 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3279 build_string ("Mojikyo-PJ-2"),
3280 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3282 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3283 build_string ("jisx0208\\.Mojikyo-2$"),
3285 staticpro (&Vcharset_mojikyo_pj_3);
3286 Vcharset_mojikyo_pj_3 =
3287 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3288 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3289 build_string ("Mojikyo-PJ-3"),
3290 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3292 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3293 build_string ("jisx0208\\.Mojikyo-3$"),
3295 staticpro (&Vcharset_mojikyo_pj_4);
3296 Vcharset_mojikyo_pj_4 =
3297 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3298 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3299 build_string ("Mojikyo-PJ-4"),
3300 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3302 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3303 build_string ("jisx0208\\.Mojikyo-4$"),
3305 staticpro (&Vcharset_mojikyo_pj_5);
3306 Vcharset_mojikyo_pj_5 =
3307 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3308 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3309 build_string ("Mojikyo-PJ-5"),
3310 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3312 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3313 build_string ("jisx0208\\.Mojikyo-5$"),
3315 staticpro (&Vcharset_mojikyo_pj_6);
3316 Vcharset_mojikyo_pj_6 =
3317 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3318 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3319 build_string ("Mojikyo-PJ-6"),
3320 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3322 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3323 build_string ("jisx0208\\.Mojikyo-6$"),
3325 staticpro (&Vcharset_mojikyo_pj_7);
3326 Vcharset_mojikyo_pj_7 =
3327 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3328 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3329 build_string ("Mojikyo-PJ-7"),
3330 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3332 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3333 build_string ("jisx0208\\.Mojikyo-7$"),
3335 staticpro (&Vcharset_mojikyo_pj_8);
3336 Vcharset_mojikyo_pj_8 =
3337 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3338 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Mojikyo-PJ-8"),
3340 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3342 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3343 build_string ("jisx0208\\.Mojikyo-8$"),
3345 staticpro (&Vcharset_mojikyo_pj_9);
3346 Vcharset_mojikyo_pj_9 =
3347 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3348 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3349 build_string ("Mojikyo-PJ-9"),
3350 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3352 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3353 build_string ("jisx0208\\.Mojikyo-9$"),
3355 staticpro (&Vcharset_mojikyo_pj_10);
3356 Vcharset_mojikyo_pj_10 =
3357 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3358 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3359 build_string ("Mojikyo-PJ-10"),
3360 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3362 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3363 build_string ("jisx0208\\.Mojikyo-10$"),
3365 staticpro (&Vcharset_mojikyo_pj_11);
3366 Vcharset_mojikyo_pj_11 =
3367 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3368 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3369 build_string ("Mojikyo-PJ-11"),
3370 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3372 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3373 build_string ("jisx0208\\.Mojikyo-11$"),
3375 staticpro (&Vcharset_mojikyo_pj_12);
3376 Vcharset_mojikyo_pj_12 =
3377 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3378 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3379 build_string ("Mojikyo-PJ-12"),
3380 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3382 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3383 build_string ("jisx0208\\.Mojikyo-12$"),
3385 staticpro (&Vcharset_mojikyo_pj_13);
3386 Vcharset_mojikyo_pj_13 =
3387 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3388 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3389 build_string ("Mojikyo-PJ-13"),
3390 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3392 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3393 build_string ("jisx0208\\.Mojikyo-13$"),
3395 staticpro (&Vcharset_mojikyo_pj_14);
3396 Vcharset_mojikyo_pj_14 =
3397 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3398 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3399 build_string ("Mojikyo-PJ-14"),
3400 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3402 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3403 build_string ("jisx0208\\.Mojikyo-14$"),
3405 staticpro (&Vcharset_mojikyo_pj_15);
3406 Vcharset_mojikyo_pj_15 =
3407 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3408 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3409 build_string ("Mojikyo-PJ-15"),
3410 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3412 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3413 build_string ("jisx0208\\.Mojikyo-15$"),
3415 staticpro (&Vcharset_mojikyo_pj_16);
3416 Vcharset_mojikyo_pj_16 =
3417 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3418 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3419 build_string ("Mojikyo-PJ-16"),
3420 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3422 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3423 build_string ("jisx0208\\.Mojikyo-16$"),
3425 staticpro (&Vcharset_mojikyo_pj_17);
3426 Vcharset_mojikyo_pj_17 =
3427 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3428 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3429 build_string ("Mojikyo-PJ-17"),
3430 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3432 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3433 build_string ("jisx0208\\.Mojikyo-17$"),
3435 staticpro (&Vcharset_mojikyo_pj_18);
3436 Vcharset_mojikyo_pj_18 =
3437 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3438 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3439 build_string ("Mojikyo-PJ-18"),
3440 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3442 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3443 build_string ("jisx0208\\.Mojikyo-18$"),
3445 staticpro (&Vcharset_mojikyo_pj_19);
3446 Vcharset_mojikyo_pj_19 =
3447 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3448 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3449 build_string ("Mojikyo-PJ-19"),
3450 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3452 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3453 build_string ("jisx0208\\.Mojikyo-19$"),
3455 staticpro (&Vcharset_mojikyo_pj_20);
3456 Vcharset_mojikyo_pj_20 =
3457 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3458 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3459 build_string ("Mojikyo-PJ-20"),
3460 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3462 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3463 build_string ("jisx0208\\.Mojikyo-20$"),
3465 staticpro (&Vcharset_mojikyo_pj_21);
3466 Vcharset_mojikyo_pj_21 =
3467 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3468 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3469 build_string ("Mojikyo-PJ-21"),
3470 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3472 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3473 build_string ("jisx0208\\.Mojikyo-21$"),
3475 staticpro (&Vcharset_ethiopic_ucs);
3476 Vcharset_ethiopic_ucs =
3477 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3478 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3479 build_string ("Ethiopic (UCS)"),
3480 build_string ("Ethiopic (UCS)"),
3481 build_string ("Ethiopic of UCS"),
3482 build_string ("Ethiopic-Unicode"),
3483 Qnil, 0x1200, 0x137F, 0x1200, 0);
3485 staticpro (&Vcharset_chinese_big5_1);
3486 Vcharset_chinese_big5_1 =
3487 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3488 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3489 build_string ("Big5"),
3490 build_string ("Big5 (Level-1)"),
3492 ("Big5 Level-1 Chinese traditional"),
3493 build_string ("big5"),
3495 staticpro (&Vcharset_chinese_big5_2);
3496 Vcharset_chinese_big5_2 =
3497 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3498 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3499 build_string ("Big5"),
3500 build_string ("Big5 (Level-2)"),
3502 ("Big5 Level-2 Chinese traditional"),
3503 build_string ("big5"),
3506 #ifdef ENABLE_COMPOSITE_CHARS
3507 /* #### For simplicity, we put composite chars into a 96x96 charset.
3508 This is going to lead to problems because you can run out of
3509 room, esp. as we don't yet recycle numbers. */
3510 staticpro (&Vcharset_composite);
3511 Vcharset_composite =
3512 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3513 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3514 build_string ("Composite"),
3515 build_string ("Composite characters"),
3516 build_string ("Composite characters"),
3519 /* #### not dumped properly */
3520 composite_char_row_next = 32;
3521 composite_char_col_next = 32;
3523 Vcomposite_char_string2char_hash_table =
3524 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3525 Vcomposite_char_char2string_hash_table =
3526 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3527 staticpro (&Vcomposite_char_string2char_hash_table);
3528 staticpro (&Vcomposite_char_char2string_hash_table);
3529 #endif /* ENABLE_COMPOSITE_CHARS */