1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo_pj_1;
70 Lisp_Object Vcharset_mojikyo_pj_2;
71 Lisp_Object Vcharset_mojikyo_pj_3;
72 Lisp_Object Vcharset_mojikyo_pj_4;
73 Lisp_Object Vcharset_mojikyo_pj_5;
74 Lisp_Object Vcharset_mojikyo_pj_6;
75 Lisp_Object Vcharset_mojikyo_pj_7;
76 Lisp_Object Vcharset_mojikyo_pj_8;
77 Lisp_Object Vcharset_mojikyo_pj_9;
78 Lisp_Object Vcharset_mojikyo_pj_10;
79 Lisp_Object Vcharset_mojikyo_pj_11;
80 Lisp_Object Vcharset_mojikyo_pj_12;
81 Lisp_Object Vcharset_mojikyo_pj_13;
82 Lisp_Object Vcharset_mojikyo_pj_14;
83 Lisp_Object Vcharset_mojikyo_pj_15;
84 Lisp_Object Vcharset_mojikyo_pj_16;
85 Lisp_Object Vcharset_mojikyo_pj_17;
86 Lisp_Object Vcharset_mojikyo_pj_18;
87 Lisp_Object Vcharset_mojikyo_pj_19;
88 Lisp_Object Vcharset_mojikyo_pj_20;
89 Lisp_Object Vcharset_mojikyo_pj_21;
90 Lisp_Object Vcharset_ethiopic_ucs;
92 Lisp_Object Vcharset_chinese_big5_1;
93 Lisp_Object Vcharset_chinese_big5_2;
95 #ifdef ENABLE_COMPOSITE_CHARS
96 Lisp_Object Vcharset_composite;
98 /* Hash tables for composite chars. One maps string representing
99 composed chars to their equivalent chars; one goes the
101 Lisp_Object Vcomposite_char_char2string_hash_table;
102 Lisp_Object Vcomposite_char_string2char_hash_table;
104 static int composite_char_row_next;
105 static int composite_char_col_next;
107 #endif /* ENABLE_COMPOSITE_CHARS */
109 struct charset_lookup *chlook;
111 static const struct lrecord_description charset_lookup_description_1[] = {
112 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
121 static const struct struct_description charset_lookup_description = {
122 sizeof(struct charset_lookup),
123 charset_lookup_description_1
127 /* Table of number of bytes in the string representation of a character
128 indexed by the first byte of that representation.
130 rep_bytes_by_first_byte(c) is more efficient than the equivalent
131 canonical computation:
133 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
135 const Bytecount rep_bytes_by_first_byte[0xA0] =
136 { /* 0x00 - 0x7f are for straight ASCII */
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 /* 0x80 - 0x8f are for Dimension-1 official charsets */
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 /* 0x90 - 0x9d are for Dimension-2 official charsets */
152 /* 0x9e is for Dimension-1 private charsets */
153 /* 0x9f is for Dimension-2 private charsets */
154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
161 mark_char_byte_table (Lisp_Object obj)
163 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
166 for (i = 0; i < 256; i++)
168 mark_object (cte->property[i]);
174 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
176 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
177 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
180 for (i = 0; i < 256; i++)
181 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
183 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
185 if (!char_byte_table_equal (cte1->property[i],
186 cte2->property[i], depth + 1))
193 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
199 char_byte_table_hash (Lisp_Object obj, int depth)
201 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
203 return internal_array_hash (cte->property, 256, depth);
206 static const struct lrecord_description char_byte_table_description[] = {
207 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
211 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
212 mark_char_byte_table,
213 internal_object_printer,
214 0, char_byte_table_equal,
215 char_byte_table_hash,
216 char_byte_table_description,
217 struct Lisp_Char_Byte_Table);
220 make_char_byte_table (Lisp_Object initval)
224 struct Lisp_Char_Byte_Table *cte =
225 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
226 &lrecord_char_byte_table);
228 for (i = 0; i < 256; i++)
229 cte->property[i] = initval;
231 XSETCHAR_BYTE_TABLE (obj, cte);
236 copy_char_byte_table (Lisp_Object entry)
238 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
241 struct Lisp_Char_Byte_Table *ctenew =
242 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
243 &lrecord_char_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (CHAR_BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_char_byte_table (new);
251 ctenew->property[i] = new;
254 XSETCHAR_BYTE_TABLE (obj, ctenew);
260 mark_char_code_table (Lisp_Object obj)
262 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
268 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
271 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
273 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_code_table_hash (Lisp_Object obj, int depth)
279 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
281 return char_code_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_code_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
290 mark_char_code_table,
291 internal_object_printer,
292 0, char_code_table_equal,
293 char_code_table_hash,
294 char_code_table_description,
295 struct Lisp_Char_Code_Table);
298 make_char_code_table (Lisp_Object initval)
301 struct Lisp_Char_Code_Table *cte =
302 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
303 &lrecord_char_code_table);
305 cte->table = make_char_byte_table (initval);
307 XSETCHAR_CODE_TABLE (obj, cte);
312 copy_char_code_table (Lisp_Object entry)
314 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
316 struct Lisp_Char_Code_Table *ctenew =
317 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
318 &lrecord_char_code_table);
320 ctenew->table = copy_char_byte_table (cte->table);
321 XSETCHAR_CODE_TABLE (obj, ctenew);
327 get_char_code_table (Emchar ch, Lisp_Object table)
329 unsigned int code = ch;
330 struct Lisp_Char_Byte_Table* cpt
331 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
332 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
334 if (CHAR_BYTE_TABLE_P (ret))
335 cpt = XCHAR_BYTE_TABLE (ret);
339 ret = cpt->property [(unsigned char) (code >> 16)];
340 if (CHAR_BYTE_TABLE_P (ret))
341 cpt = XCHAR_BYTE_TABLE (ret);
345 ret = cpt->property [(unsigned char) (code >> 8)];
346 if (CHAR_BYTE_TABLE_P (ret))
347 cpt = XCHAR_BYTE_TABLE (ret);
351 return cpt->property [(unsigned char) code];
355 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
357 unsigned int code = ch;
358 struct Lisp_Char_Byte_Table* cpt1
359 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
360 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
362 if (CHAR_BYTE_TABLE_P (ret))
364 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
366 ret = cpt2->property[(unsigned char)(code >> 16)];
367 if (CHAR_BYTE_TABLE_P (ret))
369 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
371 ret = cpt3->property[(unsigned char)(code >> 8)];
372 if (CHAR_BYTE_TABLE_P (ret))
374 struct Lisp_Char_Byte_Table* cpt4
375 = XCHAR_BYTE_TABLE (ret);
377 cpt4->property[(unsigned char)code] = value;
379 else if (!EQ (ret, value))
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
387 else if (!EQ (ret, value))
389 Lisp_Object cpt3 = make_char_byte_table (ret);
390 Lisp_Object cpt4 = make_char_byte_table (ret);
392 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
393 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
395 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
398 else if (!EQ (ret, value))
400 Lisp_Object cpt2 = make_char_byte_table (ret);
401 Lisp_Object cpt3 = make_char_byte_table (ret);
402 Lisp_Object cpt4 = make_char_byte_table (ret);
404 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
405 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
406 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
407 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
412 Lisp_Object Vcharacter_attribute_table;
413 Lisp_Object Vcharacter_composition_table;
414 Lisp_Object Vcharacter_variant_table;
416 Lisp_Object Q_decomposition;
419 Lisp_Object Qisolated;
420 Lisp_Object Qinitial;
423 Lisp_Object Qvertical;
424 Lisp_Object QnoBreak;
425 Lisp_Object Qfraction;
436 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
442 else if (EQ (v, Qcompat))
444 else if (EQ (v, Qisolated))
446 else if (EQ (v, Qinitial))
448 else if (EQ (v, Qmedial))
450 else if (EQ (v, Qfinal))
452 else if (EQ (v, Qvertical))
454 else if (EQ (v, QnoBreak))
456 else if (EQ (v, Qfraction))
458 else if (EQ (v, Qsuper))
460 else if (EQ (v, Qsub))
462 else if (EQ (v, Qcircle))
464 else if (EQ (v, Qsquare))
466 else if (EQ (v, Qwide))
468 else if (EQ (v, Qnarrow))
470 else if (EQ (v, Qsmall))
472 else if (EQ (v, Qfont))
475 signal_simple_error (err_msg, err_arg);
478 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
479 Return character corresponding with list.
483 Lisp_Object table = Vcharacter_composition_table;
484 Lisp_Object rest = list;
488 Lisp_Object v = Fcar (rest);
490 Emchar c = to_char_code (v, "Invalid value for composition", list);
492 ret = get_char_code_table (c, table);
497 if (!CHAR_CODE_TABLE_P (ret))
502 else if (!CONSP (rest))
504 else if (CHAR_CODE_TABLE_P (ret))
507 signal_simple_error ("Invalid table is found with", list);
509 signal_simple_error ("Invalid value for composition", list);
512 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
513 Return variants of CHARACTER.
517 CHECK_CHAR (character);
518 return Fcopy_list (get_char_code_table (XCHAR (character),
519 Vcharacter_variant_table));
522 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
523 Return the alist of attributes of CHARACTER.
527 CHECK_CHAR (character);
528 return Fcopy_alist (get_char_code_table (XCHAR (character),
529 Vcharacter_attribute_table));
532 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
533 Return the value of CHARACTER's ATTRIBUTE.
535 (character, attribute))
540 CHECK_CHAR (character);
541 ret = get_char_code_table (XCHAR (character),
542 Vcharacter_attribute_table);
546 if (!NILP (ccs = Ffind_charset (attribute)))
549 return Fcdr (Fassq (attribute, ret));
553 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
556 Emchar char_code = XCHAR (character);
558 = get_char_code_table (char_code, Vcharacter_attribute_table);
561 cell = Fassq (attribute, ret);
565 ret = Fcons (Fcons (attribute, value), ret);
567 else if (!EQ (Fcdr (cell), value))
569 Fsetcdr (cell, value);
571 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
576 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
578 Emchar char_code = XCHAR (character);
580 = get_char_code_table (char_code, Vcharacter_attribute_table);
582 if (!EQ (attribute, Fcar (Fcar (alist))))
584 alist = Fcdr (alist);
588 Lisp_Object pr = alist;
589 Lisp_Object r = Fcdr (alist);
593 if (EQ (attribute, Fcar (Fcar (r))))
595 XCDR (pr) = Fcdr (r);
602 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
606 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
607 Store CHARACTER's ATTRIBUTE with VALUE.
609 (character, attribute, value))
613 CHECK_CHAR (character);
614 ccs = Ffind_charset (attribute);
617 Lisp_Object cpos, rest;
618 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
625 /* ad-hoc method for `ascii' */
626 if ((XCHARSET_CHARS (ccs) == 94) &&
627 (XCHARSET_BYTE_OFFSET (ccs) != 33))
628 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
630 ccs_len = XCHARSET_CHARS (ccs);
634 Lisp_Object ret = Fcar (value);
637 signal_simple_error ("Invalid value for coded-charset", value);
638 code_point = XINT (ret);
639 if (XCHARSET_GRAPHIC (ccs) == 1)
647 signal_simple_error ("Invalid value for coded-charset", value);
650 signal_simple_error ("Invalid value for coded-charset", value);
652 if (XCHARSET_GRAPHIC (ccs) == 1)
654 code_point = (code_point << 8) | i;
657 value = make_int (code_point);
659 else if (INTP (value))
661 if (XCHARSET_GRAPHIC (ccs) == 1)
662 value = make_int (XINT (value) & 0x7F7F7F7F);
665 signal_simple_error ("Invalid value for coded-charset", value);
668 cpos = Fget_char_attribute (character, attribute);
673 dim = XCHARSET_DIMENSION (ccs);
674 code_point = XINT (cpos);
678 i = ((code_point >> (8 * dim)) & 255)
679 - XCHARSET_BYTE_OFFSET (ccs);
680 nv = XVECTOR_DATA(v)[i];
686 XVECTOR_DATA(v)[i] = Qnil;
687 v = XCHARSET_DECODING_TABLE (ccs);
692 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
695 dim = XCHARSET_DIMENSION (ccs);
696 code_point = XINT (value);
701 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
702 nv = XVECTOR_DATA(v)[i];
706 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
712 XVECTOR_DATA(v)[i] = character;
714 else if (EQ (attribute, Q_decomposition))
716 Lisp_Object rest = value;
717 Lisp_Object table = Vcharacter_composition_table;
720 signal_simple_error ("Invalid value for ->decomposition",
725 Lisp_Object v = Fcar (rest);
728 = to_char_code (v, "Invalid value for ->decomposition", value);
733 put_char_code_table (c, character, table);
738 ntable = get_char_code_table (c, table);
739 if (!CHAR_CODE_TABLE_P (ntable))
741 ntable = make_char_code_table (Qnil);
742 put_char_code_table (c, ntable, table);
748 else if (EQ (attribute, Q_ucs))
754 signal_simple_error ("Invalid value for ->ucs", value);
758 ret = get_char_code_table (c, Vcharacter_variant_table);
759 if (NILP (Fmemq (character, ret)))
761 put_char_code_table (c, Fcons (character, ret),
762 Vcharacter_variant_table);
765 return put_char_attribute (character, attribute, value);
768 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
769 Remove CHARACTER's ATTRIBUTE.
771 (character, attribute))
775 CHECK_CHAR (character);
776 ccs = Ffind_charset (attribute);
780 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
787 /* ad-hoc method for `ascii' */
788 if ((XCHARSET_CHARS (ccs) == 94) &&
789 (XCHARSET_BYTE_OFFSET (ccs) != 33))
790 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
792 ccs_len = XCHARSET_CHARS (ccs);
795 cpos = Fget_char_attribute (character, attribute);
800 dim = XCHARSET_DIMENSION (ccs);
801 code_point = XINT (cpos);
805 i = ((code_point >> (8 * dim)) & 255)
806 - XCHARSET_BYTE_OFFSET (ccs);
807 nv = XVECTOR_DATA(v)[i];
813 XVECTOR_DATA(v)[i] = Qnil;
814 v = XCHARSET_DECODING_TABLE (ccs);
818 return remove_char_attribute (character, attribute);
823 EXFUN (Fmake_char, 3);
824 EXFUN (Fdecode_char, 2);
826 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
827 Store character's ATTRIBUTES.
831 Lisp_Object rest = attributes;
832 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
833 Lisp_Object character;
839 Lisp_Object cell = Fcar (rest);
843 signal_simple_error ("Invalid argument", attributes);
844 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
848 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
850 character = Fdecode_char (ccs, cell);
851 goto setup_attributes;
855 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
858 signal_simple_error ("Invalid argument", attributes);
860 character = make_char (XINT (code) + 0x100000);
861 goto setup_attributes;
865 else if (!INTP (code))
866 signal_simple_error ("Invalid argument", attributes);
868 character = make_char (XINT (code));
874 Lisp_Object cell = Fcar (rest);
877 signal_simple_error ("Invalid argument", attributes);
878 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
882 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
885 Lisp_Object Vutf_2000_version;
889 int leading_code_private_11;
892 Lisp_Object Qcharsetp;
894 /* Qdoc_string, Qdimension, Qchars defined in general.c */
895 Lisp_Object Qregistry, Qfinal, Qgraphic;
896 Lisp_Object Qdirection;
897 Lisp_Object Qreverse_direction_charset;
898 Lisp_Object Qleading_byte;
899 Lisp_Object Qshort_name, Qlong_name;
915 Qjapanese_jisx0208_1978,
918 Qjapanese_jisx0208_1990,
928 Qvietnamese_viscii_lower,
929 Qvietnamese_viscii_upper,
958 Lisp_Object Ql2r, Qr2l;
960 Lisp_Object Vcharset_hash_table;
963 static Charset_ID next_allocated_leading_byte;
965 static Charset_ID next_allocated_1_byte_leading_byte;
966 static Charset_ID next_allocated_2_byte_leading_byte;
969 /* Composite characters are characters constructed by overstriking two
970 or more regular characters.
972 1) The old Mule implementation involves storing composite characters
973 in a buffer as a tag followed by all of the actual characters
974 used to make up the composite character. I think this is a bad
975 idea; it greatly complicates code that wants to handle strings
976 one character at a time because it has to deal with the possibility
977 of great big ungainly characters. It's much more reasonable to
978 simply store an index into a table of composite characters.
980 2) The current implementation only allows for 16,384 separate
981 composite characters over the lifetime of the XEmacs process.
982 This could become a potential problem if the user
983 edited lots of different files that use composite characters.
984 Due to FSF bogosity, increasing the number of allowable
985 composite characters under Mule would decrease the number
986 of possible faces that can exist. Mule already has shrunk
987 this to 2048, and further shrinkage would become uncomfortable.
988 No such problems exist in XEmacs.
990 Composite characters could be represented as 0x80 C1 C2 C3,
991 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
992 for slightly under 2^20 (one million) composite characters
993 over the XEmacs process lifetime, and you only need to
994 increase the size of a Mule character from 19 to 21 bits.
995 Or you could use 0x80 C1 C2 C3 C4, allowing for about
996 85 million (slightly over 2^26) composite characters. */
999 /************************************************************************/
1000 /* Basic Emchar functions */
1001 /************************************************************************/
1003 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1004 string in STR. Returns the number of bytes stored.
1005 Do not call this directly. Use the macro set_charptr_emchar() instead.
1009 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1015 Lisp_Object charset;
1024 else if ( c <= 0x7ff )
1026 *p++ = (c >> 6) | 0xc0;
1027 *p++ = (c & 0x3f) | 0x80;
1029 else if ( c <= 0xffff )
1031 *p++ = (c >> 12) | 0xe0;
1032 *p++ = ((c >> 6) & 0x3f) | 0x80;
1033 *p++ = (c & 0x3f) | 0x80;
1035 else if ( c <= 0x1fffff )
1037 *p++ = (c >> 18) | 0xf0;
1038 *p++ = ((c >> 12) & 0x3f) | 0x80;
1039 *p++ = ((c >> 6) & 0x3f) | 0x80;
1040 *p++ = (c & 0x3f) | 0x80;
1042 else if ( c <= 0x3ffffff )
1044 *p++ = (c >> 24) | 0xf8;
1045 *p++ = ((c >> 18) & 0x3f) | 0x80;
1046 *p++ = ((c >> 12) & 0x3f) | 0x80;
1047 *p++ = ((c >> 6) & 0x3f) | 0x80;
1048 *p++ = (c & 0x3f) | 0x80;
1052 *p++ = (c >> 30) | 0xfc;
1053 *p++ = ((c >> 24) & 0x3f) | 0x80;
1054 *p++ = ((c >> 18) & 0x3f) | 0x80;
1055 *p++ = ((c >> 12) & 0x3f) | 0x80;
1056 *p++ = ((c >> 6) & 0x3f) | 0x80;
1057 *p++ = (c & 0x3f) | 0x80;
1060 BREAKUP_CHAR (c, charset, c1, c2);
1061 lb = CHAR_LEADING_BYTE (c);
1062 if (LEADING_BYTE_PRIVATE_P (lb))
1063 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1065 if (EQ (charset, Vcharset_control_1))
1074 /* Return the first character from a Mule-encoded string in STR,
1075 assuming it's non-ASCII. Do not call this directly.
1076 Use the macro charptr_emchar() instead. */
1079 non_ascii_charptr_emchar (CONST Bufbyte *str)
1092 else if ( b >= 0xf8 )
1097 else if ( b >= 0xf0 )
1102 else if ( b >= 0xe0 )
1107 else if ( b >= 0xc0 )
1117 for( ; len > 0; len-- )
1120 ch = ( ch << 6 ) | ( b & 0x3f );
1124 Bufbyte i0 = *str, i1, i2 = 0;
1125 Lisp_Object charset;
1127 if (i0 == LEADING_BYTE_CONTROL_1)
1128 return (Emchar) (*++str - 0x20);
1130 if (LEADING_BYTE_PREFIX_P (i0))
1135 charset = CHARSET_BY_LEADING_BYTE (i0);
1136 if (XCHARSET_DIMENSION (charset) == 2)
1139 return MAKE_CHAR (charset, i1, i2);
1143 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1144 Do not call this directly. Use the macro valid_char_p() instead. */
1148 non_ascii_valid_char_p (Emchar ch)
1152 /* Must have only lowest 19 bits set */
1156 f1 = CHAR_FIELD1 (ch);
1157 f2 = CHAR_FIELD2 (ch);
1158 f3 = CHAR_FIELD3 (ch);
1162 Lisp_Object charset;
1164 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1165 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1166 f2 > MAX_CHAR_FIELD2_PRIVATE)
1171 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1172 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1176 NOTE: This takes advantage of the fact that
1177 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1178 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1180 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1181 if (EQ (charset, Qnil))
1183 return (XCHARSET_CHARS (charset) == 96);
1187 Lisp_Object charset;
1189 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1190 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1191 f1 > MAX_CHAR_FIELD1_PRIVATE)
1193 if (f2 < 0x20 || f3 < 0x20)
1196 #ifdef ENABLE_COMPOSITE_CHARS
1197 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1199 if (UNBOUNDP (Fgethash (make_int (ch),
1200 Vcomposite_char_char2string_hash_table,
1205 #endif /* ENABLE_COMPOSITE_CHARS */
1207 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1208 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1211 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1213 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1216 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1218 if (EQ (charset, Qnil))
1220 return (XCHARSET_CHARS (charset) == 96);
1226 /************************************************************************/
1227 /* Basic string functions */
1228 /************************************************************************/
1230 /* Copy the character pointed to by PTR into STR, assuming it's
1231 non-ASCII. Do not call this directly. Use the macro
1232 charptr_copy_char() instead. */
1235 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1237 Bufbyte *strptr = str;
1239 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1241 /* Notice fallthrough. */
1243 case 6: *++strptr = *ptr++;
1244 case 5: *++strptr = *ptr++;
1246 case 4: *++strptr = *ptr++;
1247 case 3: *++strptr = *ptr++;
1248 case 2: *++strptr = *ptr;
1253 return strptr + 1 - str;
1257 /************************************************************************/
1258 /* streams of Emchars */
1259 /************************************************************************/
1261 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1262 The functions below are not meant to be called directly; use
1263 the macros in insdel.h. */
1266 Lstream_get_emchar_1 (Lstream *stream, int ch)
1268 Bufbyte str[MAX_EMCHAR_LEN];
1269 Bufbyte *strptr = str;
1271 str[0] = (Bufbyte) ch;
1272 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1274 /* Notice fallthrough. */
1277 ch = Lstream_getc (stream);
1279 *++strptr = (Bufbyte) ch;
1281 ch = Lstream_getc (stream);
1283 *++strptr = (Bufbyte) ch;
1286 ch = Lstream_getc (stream);
1288 *++strptr = (Bufbyte) ch;
1290 ch = Lstream_getc (stream);
1292 *++strptr = (Bufbyte) ch;
1294 ch = Lstream_getc (stream);
1296 *++strptr = (Bufbyte) ch;
1301 return charptr_emchar (str);
1305 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1307 Bufbyte str[MAX_EMCHAR_LEN];
1308 Bytecount len = set_charptr_emchar (str, ch);
1309 return Lstream_write (stream, str, len);
1313 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1315 Bufbyte str[MAX_EMCHAR_LEN];
1316 Bytecount len = set_charptr_emchar (str, ch);
1317 Lstream_unread (stream, str, len);
1321 /************************************************************************/
1322 /* charset object */
1323 /************************************************************************/
1326 mark_charset (Lisp_Object obj)
1328 struct Lisp_Charset *cs = XCHARSET (obj);
1330 mark_object (cs->short_name);
1331 mark_object (cs->long_name);
1332 mark_object (cs->doc_string);
1333 mark_object (cs->registry);
1334 mark_object (cs->ccl_program);
1336 mark_object (cs->decoding_table);
1342 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1344 struct Lisp_Charset *cs = XCHARSET (obj);
1348 error ("printing unreadable object #<charset %s 0x%x>",
1349 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1352 write_c_string ("#<charset ", printcharfun);
1353 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1354 write_c_string (" ", printcharfun);
1355 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1356 write_c_string (" ", printcharfun);
1357 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1358 write_c_string (" ", printcharfun);
1359 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1360 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1362 CHARSET_DIMENSION (cs),
1363 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1364 CHARSET_COLUMNS (cs),
1365 CHARSET_GRAPHIC (cs),
1366 CHARSET_FINAL (cs));
1367 write_c_string (buf, printcharfun);
1368 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1369 sprintf (buf, " 0x%x>", cs->header.uid);
1370 write_c_string (buf, printcharfun);
1373 static const struct lrecord_description charset_description[] = {
1374 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1376 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1381 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1382 mark_charset, print_charset, 0, 0, 0,
1383 charset_description,
1384 struct Lisp_Charset);
1385 /* Make a new charset. */
1388 make_charset (Charset_ID id, Lisp_Object name,
1389 unsigned short chars, unsigned char dimension,
1390 unsigned char columns, unsigned char graphic,
1391 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1392 Lisp_Object long_name, Lisp_Object doc,
1394 Lisp_Object decoding_table,
1395 Emchar ucs_min, Emchar ucs_max,
1396 Emchar code_offset, unsigned char byte_offset)
1398 unsigned char type = 0;
1400 struct Lisp_Charset *cs =
1401 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1402 XSETCHARSET (obj, cs);
1404 CHARSET_ID (cs) = id;
1405 CHARSET_NAME (cs) = name;
1406 CHARSET_SHORT_NAME (cs) = short_name;
1407 CHARSET_LONG_NAME (cs) = long_name;
1408 CHARSET_CHARS (cs) = chars;
1409 CHARSET_DIMENSION (cs) = dimension;
1410 CHARSET_DIRECTION (cs) = direction;
1411 CHARSET_COLUMNS (cs) = columns;
1412 CHARSET_GRAPHIC (cs) = graphic;
1413 CHARSET_FINAL (cs) = final;
1414 CHARSET_DOC_STRING (cs) = doc;
1415 CHARSET_REGISTRY (cs) = reg;
1416 CHARSET_CCL_PROGRAM (cs) = Qnil;
1417 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1419 CHARSET_DECODING_TABLE(cs) = Qnil;
1420 CHARSET_UCS_MIN(cs) = ucs_min;
1421 CHARSET_UCS_MAX(cs) = ucs_max;
1422 CHARSET_CODE_OFFSET(cs) = code_offset;
1423 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1426 switch (CHARSET_CHARS (cs))
1429 switch (CHARSET_DIMENSION (cs))
1432 type = CHARSET_TYPE_94;
1435 type = CHARSET_TYPE_94X94;
1440 switch (CHARSET_DIMENSION (cs))
1443 type = CHARSET_TYPE_96;
1446 type = CHARSET_TYPE_96X96;
1452 switch (CHARSET_DIMENSION (cs))
1455 type = CHARSET_TYPE_128;
1458 type = CHARSET_TYPE_128X128;
1463 switch (CHARSET_DIMENSION (cs))
1466 type = CHARSET_TYPE_256;
1469 type = CHARSET_TYPE_256X256;
1476 CHARSET_TYPE (cs) = type;
1480 if (id == LEADING_BYTE_ASCII)
1481 CHARSET_REP_BYTES (cs) = 1;
1483 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1485 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1490 /* some charsets do not have final characters. This includes
1491 ASCII, Control-1, Composite, and the two faux private
1494 if (code_offset == 0)
1496 assert (NILP (chlook->charset_by_attributes[type][final]));
1497 chlook->charset_by_attributes[type][final] = obj;
1500 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1501 chlook->charset_by_attributes[type][final][direction] = obj;
1505 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1506 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1508 /* Some charsets are "faux" and don't have names or really exist at
1509 all except in the leading-byte table. */
1511 Fputhash (name, obj, Vcharset_hash_table);
1516 get_unallocated_leading_byte (int dimension)
1521 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1524 lb = next_allocated_leading_byte++;
1528 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1531 lb = next_allocated_1_byte_leading_byte++;
1535 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1538 lb = next_allocated_2_byte_leading_byte++;
1544 ("No more character sets free for this dimension",
1545 make_int (dimension));
1552 make_builtin_char (Lisp_Object charset, int c1, int c2)
1554 if (XCHARSET_UCS_MAX (charset))
1557 = (XCHARSET_DIMENSION (charset) == 1
1559 c1 - XCHARSET_BYTE_OFFSET (charset)
1561 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1562 + c2 - XCHARSET_BYTE_OFFSET (charset))
1563 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1564 if ((code < XCHARSET_UCS_MIN (charset))
1565 || (XCHARSET_UCS_MAX (charset) < code))
1566 signal_simple_error ("Arguments makes invalid character",
1570 else if (XCHARSET_DIMENSION (charset) == 1)
1572 switch (XCHARSET_CHARS (charset))
1576 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1579 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1586 switch (XCHARSET_CHARS (charset))
1589 return MIN_CHAR_94x94
1590 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1591 + (c1 - 33) * 94 + (c2 - 33);
1593 return MIN_CHAR_96x96
1594 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1595 + (c1 - 32) * 96 + (c2 - 32);
1603 range_charset_code_point (Lisp_Object charset, Emchar ch)
1607 if ((XCHARSET_UCS_MIN (charset) <= ch)
1608 && (ch <= XCHARSET_UCS_MAX (charset)))
1610 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1612 if (XCHARSET_CHARS (charset) == 256)
1614 else if (XCHARSET_DIMENSION (charset) == 1)
1615 return d + XCHARSET_BYTE_OFFSET (charset);
1616 else if (XCHARSET_DIMENSION (charset) == 2)
1618 ((d / XCHARSET_CHARS (charset)
1619 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1620 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1621 else if (XCHARSET_DIMENSION (charset) == 3)
1623 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1624 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1625 | ((d / XCHARSET_CHARS (charset)
1626 % XCHARSET_CHARS (charset)
1627 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1628 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1629 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1631 ((d / (XCHARSET_CHARS (charset)
1632 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1633 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1634 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1635 % XCHARSET_CHARS (charset)
1636 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1637 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1638 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1639 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1641 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1643 if (XCHARSET_DIMENSION (charset) == 1)
1645 if (XCHARSET_CHARS (charset) == 94)
1647 if (((d = ch - (MIN_CHAR_94
1648 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1652 else if (XCHARSET_CHARS (charset) == 96)
1654 if (((d = ch - (MIN_CHAR_96
1655 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1662 else if (XCHARSET_DIMENSION (charset) == 2)
1664 if (XCHARSET_CHARS (charset) == 94)
1666 if (((d = ch - (MIN_CHAR_94x94
1667 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1670 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1672 else if (XCHARSET_CHARS (charset) == 96)
1674 if (((d = ch - (MIN_CHAR_96x96
1675 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1678 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1688 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1690 if (c <= MAX_CHAR_BASIC_LATIN)
1692 *charset = Vcharset_ascii;
1697 *charset = Vcharset_control_1;
1702 *charset = Vcharset_latin_iso8859_1;
1706 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1708 *charset = Vcharset_greek_iso8859_7;
1709 return c - MIN_CHAR_GREEK + 0x20;
1711 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1713 *charset = Vcharset_cyrillic_iso8859_5;
1714 return c - MIN_CHAR_CYRILLIC + 0x20;
1717 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1719 *charset = Vcharset_hebrew_iso8859_8;
1720 return c - MIN_CHAR_HEBREW + 0x20;
1722 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1724 *charset = Vcharset_thai_tis620;
1725 return c - MIN_CHAR_THAI + 0x20;
1728 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1729 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1731 return list2 (Vcharset_katakana_jisx0201,
1732 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1735 else if (c <= MAX_CHAR_BMP)
1737 *charset = Vcharset_ucs_bmp;
1740 else if (c < MIN_CHAR_DAIKANWA)
1742 *charset = Vcharset_ucs;
1745 else if (c <= MAX_CHAR_DAIKANWA)
1747 *charset = Vcharset_ideograph_daikanwa;
1748 return c - MIN_CHAR_DAIKANWA;
1750 else if (c < MIN_CHAR_94)
1752 *charset = Vcharset_ucs;
1755 else if (c <= MAX_CHAR_94)
1757 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1758 ((c - MIN_CHAR_94) / 94) + '0',
1759 CHARSET_LEFT_TO_RIGHT);
1760 if (!NILP (*charset))
1761 return ((c - MIN_CHAR_94) % 94) + 33;
1764 *charset = Vcharset_ucs;
1768 else if (c <= MAX_CHAR_96)
1770 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1771 ((c - MIN_CHAR_96) / 96) + '0',
1772 CHARSET_LEFT_TO_RIGHT);
1773 if (!NILP (*charset))
1774 return ((c - MIN_CHAR_96) % 96) + 32;
1777 *charset = Vcharset_ucs;
1781 else if (c <= MAX_CHAR_94x94)
1784 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1785 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1786 CHARSET_LEFT_TO_RIGHT);
1787 if (!NILP (*charset))
1788 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1789 | (((c - MIN_CHAR_94x94) % 94) + 33);
1792 *charset = Vcharset_ucs;
1796 else if (c <= MAX_CHAR_96x96)
1799 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1800 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1801 CHARSET_LEFT_TO_RIGHT);
1802 if (!NILP (*charset))
1803 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1804 | (((c - MIN_CHAR_96x96) % 96) + 32);
1807 *charset = Vcharset_ucs;
1813 *charset = Vcharset_ucs;
1818 Lisp_Object Vdefault_coded_charset_priority_list;
1822 /************************************************************************/
1823 /* Basic charset Lisp functions */
1824 /************************************************************************/
1826 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1827 Return non-nil if OBJECT is a charset.
1831 return CHARSETP (object) ? Qt : Qnil;
1834 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1835 Retrieve the charset of the given name.
1836 If CHARSET-OR-NAME is a charset object, it is simply returned.
1837 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1838 nil is returned. Otherwise the associated charset object is returned.
1842 if (CHARSETP (charset_or_name))
1843 return charset_or_name;
1845 CHECK_SYMBOL (charset_or_name);
1846 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1849 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1850 Retrieve the charset of the given name.
1851 Same as `find-charset' except an error is signalled if there is no such
1852 charset instead of returning nil.
1856 Lisp_Object charset = Ffind_charset (name);
1859 signal_simple_error ("No such charset", name);
1863 /* We store the charsets in hash tables with the names as the key and the
1864 actual charset object as the value. Occasionally we need to use them
1865 in a list format. These routines provide us with that. */
1866 struct charset_list_closure
1868 Lisp_Object *charset_list;
1872 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1873 void *charset_list_closure)
1875 /* This function can GC */
1876 struct charset_list_closure *chcl =
1877 (struct charset_list_closure*) charset_list_closure;
1878 Lisp_Object *charset_list = chcl->charset_list;
1880 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1884 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1885 Return a list of the names of all defined charsets.
1889 Lisp_Object charset_list = Qnil;
1890 struct gcpro gcpro1;
1891 struct charset_list_closure charset_list_closure;
1893 GCPRO1 (charset_list);
1894 charset_list_closure.charset_list = &charset_list;
1895 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1896 &charset_list_closure);
1899 return charset_list;
1902 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1903 Return the name of the given charset.
1907 return XCHARSET_NAME (Fget_charset (charset));
1910 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1911 Define a new character set.
1912 This function is for use with Mule support.
1913 NAME is a symbol, the name by which the character set is normally referred.
1914 DOC-STRING is a string describing the character set.
1915 PROPS is a property list, describing the specific nature of the
1916 character set. Recognized properties are:
1918 'short-name Short version of the charset name (ex: Latin-1)
1919 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1920 'registry A regular expression matching the font registry field for
1922 'dimension Number of octets used to index a character in this charset.
1923 Either 1 or 2. Defaults to 1.
1924 'columns Number of columns used to display a character in this charset.
1925 Only used in TTY mode. (Under X, the actual width of a
1926 character can be derived from the font used to display the
1927 characters.) If unspecified, defaults to the dimension
1928 (this is almost always the correct value).
1929 'chars Number of characters in each dimension (94 or 96).
1930 Defaults to 94. Note that if the dimension is 2, the
1931 character set thus described is 94x94 or 96x96.
1932 'final Final byte of ISO 2022 escape sequence. Must be
1933 supplied. Each combination of (DIMENSION, CHARS) defines a
1934 separate namespace for final bytes. Note that ISO
1935 2022 restricts the final byte to the range
1936 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1937 dimension == 2. Note also that final bytes in the range
1938 0x30 - 0x3F are reserved for user-defined (not official)
1940 'graphic 0 (use left half of font on output) or 1 (use right half
1941 of font on output). Defaults to 0. For example, for
1942 a font whose registry is ISO8859-1, the left half
1943 (octets 0x20 - 0x7F) is the `ascii' character set, while
1944 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1945 character set. With 'graphic set to 0, the octets
1946 will have their high bit cleared; with it set to 1,
1947 the octets will have their high bit set.
1948 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1950 'ccl-program A compiled CCL program used to convert a character in
1951 this charset into an index into the font. This is in
1952 addition to the 'graphic property. The CCL program
1953 is passed the octets of the character, with the high
1954 bit cleared and set depending upon whether the value
1955 of the 'graphic property is 0 or 1.
1957 (name, doc_string, props))
1959 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1960 int direction = CHARSET_LEFT_TO_RIGHT;
1962 Lisp_Object registry = Qnil;
1963 Lisp_Object charset;
1964 Lisp_Object rest, keyword, value;
1965 Lisp_Object ccl_program = Qnil;
1966 Lisp_Object short_name = Qnil, long_name = Qnil;
1967 int byte_offset = -1;
1969 CHECK_SYMBOL (name);
1970 if (!NILP (doc_string))
1971 CHECK_STRING (doc_string);
1973 charset = Ffind_charset (name);
1974 if (!NILP (charset))
1975 signal_simple_error ("Cannot redefine existing charset", name);
1977 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1979 if (EQ (keyword, Qshort_name))
1981 CHECK_STRING (value);
1985 if (EQ (keyword, Qlong_name))
1987 CHECK_STRING (value);
1991 else if (EQ (keyword, Qdimension))
1994 dimension = XINT (value);
1995 if (dimension < 1 || dimension > 2)
1996 signal_simple_error ("Invalid value for 'dimension", value);
1999 else if (EQ (keyword, Qchars))
2002 chars = XINT (value);
2003 if (chars != 94 && chars != 96)
2004 signal_simple_error ("Invalid value for 'chars", value);
2007 else if (EQ (keyword, Qcolumns))
2010 columns = XINT (value);
2011 if (columns != 1 && columns != 2)
2012 signal_simple_error ("Invalid value for 'columns", value);
2015 else if (EQ (keyword, Qgraphic))
2018 graphic = XINT (value);
2020 if (graphic < 0 || graphic > 2)
2022 if (graphic < 0 || graphic > 1)
2024 signal_simple_error ("Invalid value for 'graphic", value);
2027 else if (EQ (keyword, Qregistry))
2029 CHECK_STRING (value);
2033 else if (EQ (keyword, Qdirection))
2035 if (EQ (value, Ql2r))
2036 direction = CHARSET_LEFT_TO_RIGHT;
2037 else if (EQ (value, Qr2l))
2038 direction = CHARSET_RIGHT_TO_LEFT;
2040 signal_simple_error ("Invalid value for 'direction", value);
2043 else if (EQ (keyword, Qfinal))
2045 CHECK_CHAR_COERCE_INT (value);
2046 final = XCHAR (value);
2047 if (final < '0' || final > '~')
2048 signal_simple_error ("Invalid value for 'final", value);
2051 else if (EQ (keyword, Qccl_program))
2053 CHECK_VECTOR (value);
2054 ccl_program = value;
2058 signal_simple_error ("Unrecognized property", keyword);
2062 error ("'final must be specified");
2063 if (dimension == 2 && final > 0x5F)
2065 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2069 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2071 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2073 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2074 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2076 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2078 id = get_unallocated_leading_byte (dimension);
2080 if (NILP (doc_string))
2081 doc_string = build_string ("");
2083 if (NILP (registry))
2084 registry = build_string ("");
2086 if (NILP (short_name))
2087 XSETSTRING (short_name, XSYMBOL (name)->name);
2089 if (NILP (long_name))
2090 long_name = doc_string;
2093 columns = dimension;
2095 if (byte_offset < 0)
2099 else if (chars == 96)
2105 charset = make_charset (id, name, chars, dimension, columns, graphic,
2106 final, direction, short_name, long_name,
2107 doc_string, registry,
2108 Qnil, 0, 0, 0, byte_offset);
2109 if (!NILP (ccl_program))
2110 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2114 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2116 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2117 NEW-NAME is the name of the new charset. Return the new charset.
2119 (charset, new_name))
2121 Lisp_Object new_charset = Qnil;
2122 int id, chars, dimension, columns, graphic, final;
2124 Lisp_Object registry, doc_string, short_name, long_name;
2125 struct Lisp_Charset *cs;
2127 charset = Fget_charset (charset);
2128 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2129 signal_simple_error ("Charset already has reverse-direction charset",
2132 CHECK_SYMBOL (new_name);
2133 if (!NILP (Ffind_charset (new_name)))
2134 signal_simple_error ("Cannot redefine existing charset", new_name);
2136 cs = XCHARSET (charset);
2138 chars = CHARSET_CHARS (cs);
2139 dimension = CHARSET_DIMENSION (cs);
2140 columns = CHARSET_COLUMNS (cs);
2141 id = get_unallocated_leading_byte (dimension);
2143 graphic = CHARSET_GRAPHIC (cs);
2144 final = CHARSET_FINAL (cs);
2145 direction = CHARSET_RIGHT_TO_LEFT;
2146 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2147 direction = CHARSET_LEFT_TO_RIGHT;
2148 doc_string = CHARSET_DOC_STRING (cs);
2149 short_name = CHARSET_SHORT_NAME (cs);
2150 long_name = CHARSET_LONG_NAME (cs);
2151 registry = CHARSET_REGISTRY (cs);
2153 new_charset = make_charset (id, new_name, chars, dimension, columns,
2154 graphic, final, direction, short_name, long_name,
2155 doc_string, registry,
2157 CHARSET_DECODING_TABLE(cs),
2158 CHARSET_UCS_MIN(cs),
2159 CHARSET_UCS_MAX(cs),
2160 CHARSET_CODE_OFFSET(cs),
2161 CHARSET_BYTE_OFFSET(cs)
2167 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2168 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2173 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2174 Define symbol ALIAS as an alias for CHARSET.
2178 CHECK_SYMBOL (alias);
2179 charset = Fget_charset (charset);
2180 return Fputhash (alias, charset, Vcharset_hash_table);
2183 /* #### Reverse direction charsets not yet implemented. */
2185 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2187 Return the reverse-direction charset parallel to CHARSET, if any.
2188 This is the charset with the same properties (in particular, the same
2189 dimension, number of characters per dimension, and final byte) as
2190 CHARSET but whose characters are displayed in the opposite direction.
2194 charset = Fget_charset (charset);
2195 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2199 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2200 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2201 If DIRECTION is omitted, both directions will be checked (left-to-right
2202 will be returned if character sets exist for both directions).
2204 (dimension, chars, final, direction))
2206 int dm, ch, fi, di = -1;
2208 Lisp_Object obj = Qnil;
2210 CHECK_INT (dimension);
2211 dm = XINT (dimension);
2212 if (dm < 1 || dm > 2)
2213 signal_simple_error ("Invalid value for DIMENSION", dimension);
2217 if (ch != 94 && ch != 96)
2218 signal_simple_error ("Invalid value for CHARS", chars);
2220 CHECK_CHAR_COERCE_INT (final);
2222 if (fi < '0' || fi > '~')
2223 signal_simple_error ("Invalid value for FINAL", final);
2225 if (EQ (direction, Ql2r))
2226 di = CHARSET_LEFT_TO_RIGHT;
2227 else if (EQ (direction, Qr2l))
2228 di = CHARSET_RIGHT_TO_LEFT;
2229 else if (!NILP (direction))
2230 signal_simple_error ("Invalid value for DIRECTION", direction);
2232 if (dm == 2 && fi > 0x5F)
2234 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2237 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2239 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2243 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2245 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2248 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2251 return XCHARSET_NAME (obj);
2255 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2256 Return short name of CHARSET.
2260 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2263 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2264 Return long name of CHARSET.
2268 return XCHARSET_LONG_NAME (Fget_charset (charset));
2271 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2272 Return description of CHARSET.
2276 return XCHARSET_DOC_STRING (Fget_charset (charset));
2279 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2280 Return dimension of CHARSET.
2284 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2287 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2288 Return property PROP of CHARSET.
2289 Recognized properties are those listed in `make-charset', as well as
2290 'name and 'doc-string.
2294 struct Lisp_Charset *cs;
2296 charset = Fget_charset (charset);
2297 cs = XCHARSET (charset);
2299 CHECK_SYMBOL (prop);
2300 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2301 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2302 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2303 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2304 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2305 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2306 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2307 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2308 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2309 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2310 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2311 if (EQ (prop, Qdirection))
2312 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2313 if (EQ (prop, Qreverse_direction_charset))
2315 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2319 return XCHARSET_NAME (obj);
2321 signal_simple_error ("Unrecognized charset property name", prop);
2322 return Qnil; /* not reached */
2325 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2326 Return charset identification number of CHARSET.
2330 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2333 /* #### We need to figure out which properties we really want to
2336 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2337 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2339 (charset, ccl_program))
2341 charset = Fget_charset (charset);
2342 CHECK_VECTOR (ccl_program);
2343 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2348 invalidate_charset_font_caches (Lisp_Object charset)
2350 /* Invalidate font cache entries for charset on all devices. */
2351 Lisp_Object devcons, concons, hash_table;
2352 DEVICE_LOOP_NO_BREAK (devcons, concons)
2354 struct device *d = XDEVICE (XCAR (devcons));
2355 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2356 if (!UNBOUNDP (hash_table))
2357 Fclrhash (hash_table);
2361 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2362 Set the 'registry property of CHARSET to REGISTRY.
2364 (charset, registry))
2366 charset = Fget_charset (charset);
2367 CHECK_STRING (registry);
2368 XCHARSET_REGISTRY (charset) = registry;
2369 invalidate_charset_font_caches (charset);
2370 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2375 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2376 Return mapping-table of CHARSET.
2380 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2383 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2384 Set mapping-table of CHARSET to TABLE.
2388 struct Lisp_Charset *cs;
2389 Lisp_Object old_table;
2392 charset = Fget_charset (charset);
2393 cs = XCHARSET (charset);
2395 if (EQ (table, Qnil))
2397 CHARSET_DECODING_TABLE(cs) = table;
2400 else if (VECTORP (table))
2404 /* ad-hoc method for `ascii' */
2405 if ((CHARSET_CHARS (cs) == 94) &&
2406 (CHARSET_BYTE_OFFSET (cs) != 33))
2407 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2409 ccs_len = CHARSET_CHARS (cs);
2411 if (XVECTOR_LENGTH (table) > ccs_len)
2412 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2413 old_table = CHARSET_DECODING_TABLE(cs);
2414 CHARSET_DECODING_TABLE(cs) = table;
2417 signal_error (Qwrong_type_argument,
2418 list2 (build_translated_string ("vector-or-nil-p"),
2420 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2422 switch (CHARSET_DIMENSION (cs))
2425 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2427 Lisp_Object c = XVECTOR_DATA(table)[i];
2432 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2436 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2438 Lisp_Object v = XVECTOR_DATA(table)[i];
2444 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2446 CHARSET_DECODING_TABLE(cs) = old_table;
2447 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2449 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2451 Lisp_Object c = XVECTOR_DATA(v)[j];
2456 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2457 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2461 put_char_attribute (v, charset,
2462 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2471 /************************************************************************/
2472 /* Lisp primitives for working with characters */
2473 /************************************************************************/
2476 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2477 Make a character from CHARSET and code-point CODE.
2483 charset = Fget_charset (charset);
2486 if (XCHARSET_GRAPHIC (charset) == 1)
2488 return make_char (DECODE_CHAR (charset, c));
2492 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2493 Make a character from CHARSET and octets ARG1 and ARG2.
2494 ARG2 is required only for characters from two-dimensional charsets.
2495 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2496 character s with caron.
2498 (charset, arg1, arg2))
2500 struct Lisp_Charset *cs;
2502 int lowlim, highlim;
2504 charset = Fget_charset (charset);
2505 cs = XCHARSET (charset);
2507 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2508 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2510 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2512 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2513 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2516 /* It is useful (and safe, according to Olivier Galibert) to strip
2517 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2518 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2519 Latin 2 code of the character. */
2527 if (a1 < lowlim || a1 > highlim)
2528 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2530 if (CHARSET_DIMENSION (cs) == 1)
2534 ("Charset is of dimension one; second octet must be nil", arg2);
2535 return make_char (MAKE_CHAR (charset, a1, 0));
2544 a2 = XINT (arg2) & 0x7f;
2546 if (a2 < lowlim || a2 > highlim)
2547 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2549 return make_char (MAKE_CHAR (charset, a1, a2));
2552 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2553 Return the character set of char CH.
2557 CHECK_CHAR_COERCE_INT (ch);
2559 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2562 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2563 Return the octet numbered N (should be 0 or 1) of char CH.
2564 N defaults to 0 if omitted.
2568 Lisp_Object charset;
2571 CHECK_CHAR_COERCE_INT (ch);
2573 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2575 if (NILP (n) || EQ (n, Qzero))
2576 return make_int (octet0);
2577 else if (EQ (n, make_int (1)))
2578 return make_int (octet1);
2580 signal_simple_error ("Octet number must be 0 or 1", n);
2583 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2584 Return list of charset and one or two position-codes of CHAR.
2588 /* This function can GC */
2589 struct gcpro gcpro1, gcpro2;
2590 Lisp_Object charset = Qnil;
2591 Lisp_Object rc = Qnil;
2599 GCPRO2 (charset, rc);
2600 CHECK_CHAR_COERCE_INT (character);
2603 code_point = ENCODE_CHAR (XCHAR (character), charset);
2604 dimension = XCHARSET_DIMENSION (charset);
2605 while (dimension > 0)
2607 rc = Fcons (make_int (code_point & 255), rc);
2611 rc = Fcons (XCHARSET_NAME (charset), rc);
2613 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2615 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2617 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2621 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2630 #ifdef ENABLE_COMPOSITE_CHARS
2631 /************************************************************************/
2632 /* composite character functions */
2633 /************************************************************************/
2636 lookup_composite_char (Bufbyte *str, int len)
2638 Lisp_Object lispstr = make_string (str, len);
2639 Lisp_Object ch = Fgethash (lispstr,
2640 Vcomposite_char_string2char_hash_table,
2646 if (composite_char_row_next >= 128)
2647 signal_simple_error ("No more composite chars available", lispstr);
2648 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2649 composite_char_col_next);
2650 Fputhash (make_char (emch), lispstr,
2651 Vcomposite_char_char2string_hash_table);
2652 Fputhash (lispstr, make_char (emch),
2653 Vcomposite_char_string2char_hash_table);
2654 composite_char_col_next++;
2655 if (composite_char_col_next >= 128)
2657 composite_char_col_next = 32;
2658 composite_char_row_next++;
2667 composite_char_string (Emchar ch)
2669 Lisp_Object str = Fgethash (make_char (ch),
2670 Vcomposite_char_char2string_hash_table,
2672 assert (!UNBOUNDP (str));
2676 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2677 Convert a string into a single composite character.
2678 The character is the result of overstriking all the characters in
2683 CHECK_STRING (string);
2684 return make_char (lookup_composite_char (XSTRING_DATA (string),
2685 XSTRING_LENGTH (string)));
2688 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2689 Return a string of the characters comprising a composite character.
2697 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2698 signal_simple_error ("Must be composite char", ch);
2699 return composite_char_string (emch);
2701 #endif /* ENABLE_COMPOSITE_CHARS */
2704 /************************************************************************/
2705 /* initialization */
2706 /************************************************************************/
2709 syms_of_mule_charset (void)
2711 DEFSUBR (Fcharsetp);
2712 DEFSUBR (Ffind_charset);
2713 DEFSUBR (Fget_charset);
2714 DEFSUBR (Fcharset_list);
2715 DEFSUBR (Fcharset_name);
2716 DEFSUBR (Fmake_charset);
2717 DEFSUBR (Fmake_reverse_direction_charset);
2718 /* DEFSUBR (Freverse_direction_charset); */
2719 DEFSUBR (Fdefine_charset_alias);
2720 DEFSUBR (Fcharset_from_attributes);
2721 DEFSUBR (Fcharset_short_name);
2722 DEFSUBR (Fcharset_long_name);
2723 DEFSUBR (Fcharset_description);
2724 DEFSUBR (Fcharset_dimension);
2725 DEFSUBR (Fcharset_property);
2726 DEFSUBR (Fcharset_id);
2727 DEFSUBR (Fset_charset_ccl_program);
2728 DEFSUBR (Fset_charset_registry);
2730 DEFSUBR (Fchar_attribute_alist);
2731 DEFSUBR (Fget_char_attribute);
2732 DEFSUBR (Fput_char_attribute);
2733 DEFSUBR (Fremove_char_attribute);
2734 DEFSUBR (Fdefine_char);
2735 DEFSUBR (Fchar_variants);
2736 DEFSUBR (Fget_composite_char);
2737 DEFSUBR (Fcharset_mapping_table);
2738 DEFSUBR (Fset_charset_mapping_table);
2742 DEFSUBR (Fdecode_char);
2744 DEFSUBR (Fmake_char);
2745 DEFSUBR (Fchar_charset);
2746 DEFSUBR (Fchar_octet);
2747 DEFSUBR (Fsplit_char);
2749 #ifdef ENABLE_COMPOSITE_CHARS
2750 DEFSUBR (Fmake_composite_char);
2751 DEFSUBR (Fcomposite_char_string);
2754 defsymbol (&Qcharsetp, "charsetp");
2755 defsymbol (&Qregistry, "registry");
2756 defsymbol (&Qfinal, "final");
2757 defsymbol (&Qgraphic, "graphic");
2758 defsymbol (&Qdirection, "direction");
2759 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2760 defsymbol (&Qshort_name, "short-name");
2761 defsymbol (&Qlong_name, "long-name");
2763 defsymbol (&Ql2r, "l2r");
2764 defsymbol (&Qr2l, "r2l");
2766 /* Charsets, compatible with FSF 20.3
2767 Naming convention is Script-Charset[-Edition] */
2768 defsymbol (&Qascii, "ascii");
2769 defsymbol (&Qcontrol_1, "control-1");
2770 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2771 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2772 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2773 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2774 defsymbol (&Qthai_tis620, "thai-tis620");
2775 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2776 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2777 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2778 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2779 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2780 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2781 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2782 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2783 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2784 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2785 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2786 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2787 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2788 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2789 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2791 defsymbol (&Q_ucs, "->ucs");
2792 defsymbol (&Q_decomposition, "->decomposition");
2793 defsymbol (&Qcompat, "compat");
2794 defsymbol (&Qisolated, "isolated");
2795 defsymbol (&Qinitial, "initial");
2796 defsymbol (&Qmedial, "medial");
2797 defsymbol (&Qfinal, "final");
2798 defsymbol (&Qvertical, "vertical");
2799 defsymbol (&QnoBreak, "noBreak");
2800 defsymbol (&Qfraction, "fraction");
2801 defsymbol (&Qsuper, "super");
2802 defsymbol (&Qsub, "sub");
2803 defsymbol (&Qcircle, "circle");
2804 defsymbol (&Qsquare, "square");
2805 defsymbol (&Qwide, "wide");
2806 defsymbol (&Qnarrow, "narrow");
2807 defsymbol (&Qsmall, "small");
2808 defsymbol (&Qfont, "font");
2809 defsymbol (&Qucs, "ucs");
2810 defsymbol (&Qucs_bmp, "ucs-bmp");
2811 defsymbol (&Qlatin_viscii, "latin-viscii");
2812 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2813 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2814 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2815 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2816 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2817 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2818 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2819 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2820 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2821 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2822 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2823 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2824 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2825 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2826 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2827 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2828 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2829 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2830 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2831 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2832 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2833 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2834 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2835 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2836 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2837 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2838 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2840 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2841 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2843 defsymbol (&Qcomposite, "composite");
2847 vars_of_mule_charset (void)
2854 chlook = xnew (struct charset_lookup);
2855 dumpstruct (&chlook, &charset_lookup_description);
2857 /* Table of charsets indexed by leading byte. */
2858 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2859 chlook->charset_by_leading_byte[i] = Qnil;
2862 /* Table of charsets indexed by type/final-byte. */
2863 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2864 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2865 chlook->charset_by_attributes[i][j] = Qnil;
2867 /* Table of charsets indexed by type/final-byte/direction. */
2868 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2869 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2870 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2871 chlook->charset_by_attributes[i][j][k] = Qnil;
2875 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2877 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2878 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2882 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2883 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2884 Leading-code of private TYPE9N charset of column-width 1.
2886 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2890 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2891 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2892 Version number of UTF-2000.
2895 staticpro (&Vcharacter_attribute_table);
2896 Vcharacter_attribute_table = make_char_code_table (Qnil);
2898 staticpro (&Vcharacter_composition_table);
2899 Vcharacter_composition_table = make_char_code_table (Qnil);
2901 staticpro (&Vcharacter_variant_table);
2902 Vcharacter_variant_table = make_char_code_table (Qnil);
2904 Vdefault_coded_charset_priority_list = Qnil;
2905 DEFVAR_LISP ("default-coded-charset-priority-list",
2906 &Vdefault_coded_charset_priority_list /*
2907 Default order of preferred coded-character-sets.
2913 complex_vars_of_mule_charset (void)
2915 staticpro (&Vcharset_hash_table);
2916 Vcharset_hash_table =
2917 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2919 /* Predefined character sets. We store them into variables for
2923 staticpro (&Vcharset_ucs);
2925 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2926 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2927 build_string ("UCS"),
2928 build_string ("UCS"),
2929 build_string ("ISO/IEC 10646"),
2931 Qnil, 0, 0xFFFFFFF, 0, 0);
2932 staticpro (&Vcharset_ucs_bmp);
2934 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2935 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2936 build_string ("BMP"),
2937 build_string ("BMP"),
2938 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2939 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2940 Qnil, 0, 0xFFFF, 0, 0);
2942 # define MIN_CHAR_THAI 0
2943 # define MAX_CHAR_THAI 0
2944 # define MIN_CHAR_HEBREW 0
2945 # define MAX_CHAR_HEBREW 0
2946 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2947 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2949 staticpro (&Vcharset_ascii);
2951 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2952 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2953 build_string ("ASCII"),
2954 build_string ("ASCII)"),
2955 build_string ("ASCII (ISO646 IRV)"),
2956 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2957 Qnil, 0, 0x7F, 0, 0);
2958 staticpro (&Vcharset_control_1);
2959 Vcharset_control_1 =
2960 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2961 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2962 build_string ("C1"),
2963 build_string ("Control characters"),
2964 build_string ("Control characters 128-191"),
2966 Qnil, 0x80, 0x9F, 0, 0);
2967 staticpro (&Vcharset_latin_iso8859_1);
2968 Vcharset_latin_iso8859_1 =
2969 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2970 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2971 build_string ("Latin-1"),
2972 build_string ("ISO8859-1 (Latin-1)"),
2973 build_string ("ISO8859-1 (Latin-1)"),
2974 build_string ("iso8859-1"),
2975 Qnil, 0xA0, 0xFF, 0, 32);
2976 staticpro (&Vcharset_latin_iso8859_2);
2977 Vcharset_latin_iso8859_2 =
2978 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2979 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2980 build_string ("Latin-2"),
2981 build_string ("ISO8859-2 (Latin-2)"),
2982 build_string ("ISO8859-2 (Latin-2)"),
2983 build_string ("iso8859-2"),
2985 staticpro (&Vcharset_latin_iso8859_3);
2986 Vcharset_latin_iso8859_3 =
2987 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2988 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2989 build_string ("Latin-3"),
2990 build_string ("ISO8859-3 (Latin-3)"),
2991 build_string ("ISO8859-3 (Latin-3)"),
2992 build_string ("iso8859-3"),
2994 staticpro (&Vcharset_latin_iso8859_4);
2995 Vcharset_latin_iso8859_4 =
2996 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2997 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2998 build_string ("Latin-4"),
2999 build_string ("ISO8859-4 (Latin-4)"),
3000 build_string ("ISO8859-4 (Latin-4)"),
3001 build_string ("iso8859-4"),
3003 staticpro (&Vcharset_thai_tis620);
3004 Vcharset_thai_tis620 =
3005 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3006 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3007 build_string ("TIS620"),
3008 build_string ("TIS620 (Thai)"),
3009 build_string ("TIS620.2529 (Thai)"),
3010 build_string ("tis620"),
3011 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3012 staticpro (&Vcharset_greek_iso8859_7);
3013 Vcharset_greek_iso8859_7 =
3014 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3015 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3016 build_string ("ISO8859-7"),
3017 build_string ("ISO8859-7 (Greek)"),
3018 build_string ("ISO8859-7 (Greek)"),
3019 build_string ("iso8859-7"),
3021 0 /* MIN_CHAR_GREEK */,
3022 0 /* MAX_CHAR_GREEK */, 0, 32);
3023 staticpro (&Vcharset_arabic_iso8859_6);
3024 Vcharset_arabic_iso8859_6 =
3025 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3026 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3027 build_string ("ISO8859-6"),
3028 build_string ("ISO8859-6 (Arabic)"),
3029 build_string ("ISO8859-6 (Arabic)"),
3030 build_string ("iso8859-6"),
3032 staticpro (&Vcharset_hebrew_iso8859_8);
3033 Vcharset_hebrew_iso8859_8 =
3034 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3035 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3036 build_string ("ISO8859-8"),
3037 build_string ("ISO8859-8 (Hebrew)"),
3038 build_string ("ISO8859-8 (Hebrew)"),
3039 build_string ("iso8859-8"),
3040 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3041 staticpro (&Vcharset_katakana_jisx0201);
3042 Vcharset_katakana_jisx0201 =
3043 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3044 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3045 build_string ("JISX0201 Kana"),
3046 build_string ("JISX0201.1976 (Japanese Kana)"),
3047 build_string ("JISX0201.1976 Japanese Kana"),
3048 build_string ("jisx0201\\.1976"),
3050 staticpro (&Vcharset_latin_jisx0201);
3051 Vcharset_latin_jisx0201 =
3052 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3053 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3054 build_string ("JISX0201 Roman"),
3055 build_string ("JISX0201.1976 (Japanese Roman)"),
3056 build_string ("JISX0201.1976 Japanese Roman"),
3057 build_string ("jisx0201\\.1976"),
3059 staticpro (&Vcharset_cyrillic_iso8859_5);
3060 Vcharset_cyrillic_iso8859_5 =
3061 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3062 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3063 build_string ("ISO8859-5"),
3064 build_string ("ISO8859-5 (Cyrillic)"),
3065 build_string ("ISO8859-5 (Cyrillic)"),
3066 build_string ("iso8859-5"),
3068 0 /* MIN_CHAR_CYRILLIC */,
3069 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3070 staticpro (&Vcharset_latin_iso8859_9);
3071 Vcharset_latin_iso8859_9 =
3072 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3073 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3074 build_string ("Latin-5"),
3075 build_string ("ISO8859-9 (Latin-5)"),
3076 build_string ("ISO8859-9 (Latin-5)"),
3077 build_string ("iso8859-9"),
3079 staticpro (&Vcharset_japanese_jisx0208_1978);
3080 Vcharset_japanese_jisx0208_1978 =
3081 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3082 Qjapanese_jisx0208_1978, 94, 2,
3083 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3084 build_string ("JIS X0208:1978"),
3085 build_string ("JIS X0208:1978 (Japanese)"),
3087 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3088 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3090 staticpro (&Vcharset_chinese_gb2312);
3091 Vcharset_chinese_gb2312 =
3092 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3093 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3094 build_string ("GB2312"),
3095 build_string ("GB2312)"),
3096 build_string ("GB2312 Chinese simplified"),
3097 build_string ("gb2312"),
3099 staticpro (&Vcharset_japanese_jisx0208);
3100 Vcharset_japanese_jisx0208 =
3101 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3102 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3103 build_string ("JISX0208"),
3104 build_string ("JIS X0208:1983 (Japanese)"),
3105 build_string ("JIS X0208:1983 Japanese Kanji"),
3106 build_string ("jisx0208\\.1983"),
3109 staticpro (&Vcharset_japanese_jisx0208_1990);
3110 Vcharset_japanese_jisx0208_1990 =
3111 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3112 Qjapanese_jisx0208_1990, 94, 2,
3113 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3114 build_string ("JISX0208-1990"),
3115 build_string ("JIS X0208:1990 (Japanese)"),
3116 build_string ("JIS X0208:1990 Japanese Kanji"),
3117 build_string ("jisx0208\\.1990"),
3119 MIN_CHAR_JIS_X0208_1990,
3120 MAX_CHAR_JIS_X0208_1990, 0, 33);
3122 staticpro (&Vcharset_korean_ksc5601);
3123 Vcharset_korean_ksc5601 =
3124 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3125 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3126 build_string ("KSC5601"),
3127 build_string ("KSC5601 (Korean"),
3128 build_string ("KSC5601 Korean Hangul and Hanja"),
3129 build_string ("ksc5601"),
3131 staticpro (&Vcharset_japanese_jisx0212);
3132 Vcharset_japanese_jisx0212 =
3133 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3134 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3135 build_string ("JISX0212"),
3136 build_string ("JISX0212 (Japanese)"),
3137 build_string ("JISX0212 Japanese Supplement"),
3138 build_string ("jisx0212"),
3141 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3142 staticpro (&Vcharset_chinese_cns11643_1);
3143 Vcharset_chinese_cns11643_1 =
3144 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3145 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3146 build_string ("CNS11643-1"),
3147 build_string ("CNS11643-1 (Chinese traditional)"),
3149 ("CNS 11643 Plane 1 Chinese traditional"),
3150 build_string (CHINESE_CNS_PLANE_RE("1")),
3152 staticpro (&Vcharset_chinese_cns11643_2);
3153 Vcharset_chinese_cns11643_2 =
3154 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3155 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3156 build_string ("CNS11643-2"),
3157 build_string ("CNS11643-2 (Chinese traditional)"),
3159 ("CNS 11643 Plane 2 Chinese traditional"),
3160 build_string (CHINESE_CNS_PLANE_RE("2")),
3163 staticpro (&Vcharset_latin_viscii_lower);
3164 Vcharset_latin_viscii_lower =
3165 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3166 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3167 build_string ("VISCII lower"),
3168 build_string ("VISCII lower (Vietnamese)"),
3169 build_string ("VISCII lower (Vietnamese)"),
3170 build_string ("MULEVISCII-LOWER"),
3172 staticpro (&Vcharset_latin_viscii_upper);
3173 Vcharset_latin_viscii_upper =
3174 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3175 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3176 build_string ("VISCII upper"),
3177 build_string ("VISCII upper (Vietnamese)"),
3178 build_string ("VISCII upper (Vietnamese)"),
3179 build_string ("MULEVISCII-UPPER"),
3181 staticpro (&Vcharset_latin_viscii);
3182 Vcharset_latin_viscii =
3183 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3184 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3185 build_string ("VISCII"),
3186 build_string ("VISCII 1.1 (Vietnamese)"),
3187 build_string ("VISCII 1.1 (Vietnamese)"),
3188 build_string ("VISCII1\\.1"),
3190 staticpro (&Vcharset_ideograph_daikanwa);
3191 Vcharset_ideograph_daikanwa =
3192 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3193 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3194 build_string ("Daikanwa"),
3195 build_string ("Morohashi's Daikanwa"),
3196 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3197 build_string ("Daikanwa"),
3198 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3199 staticpro (&Vcharset_mojikyo_pj_1);
3200 Vcharset_mojikyo_pj_1 =
3201 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3202 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3203 build_string ("Mojikyo-PJ-1"),
3204 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3206 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3207 build_string ("jisx0208\\.Mojikyo-1$"),
3209 staticpro (&Vcharset_mojikyo_pj_2);
3210 Vcharset_mojikyo_pj_2 =
3211 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3212 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3213 build_string ("Mojikyo-PJ-2"),
3214 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3216 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3217 build_string ("jisx0208\\.Mojikyo-2$"),
3219 staticpro (&Vcharset_mojikyo_pj_3);
3220 Vcharset_mojikyo_pj_3 =
3221 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3222 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3223 build_string ("Mojikyo-PJ-3"),
3224 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3226 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3227 build_string ("jisx0208\\.Mojikyo-3$"),
3229 staticpro (&Vcharset_mojikyo_pj_4);
3230 Vcharset_mojikyo_pj_4 =
3231 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3232 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3233 build_string ("Mojikyo-PJ-4"),
3234 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3236 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3237 build_string ("jisx0208\\.Mojikyo-4$"),
3239 staticpro (&Vcharset_mojikyo_pj_5);
3240 Vcharset_mojikyo_pj_5 =
3241 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3242 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3243 build_string ("Mojikyo-PJ-5"),
3244 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3246 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3247 build_string ("jisx0208\\.Mojikyo-5$"),
3249 staticpro (&Vcharset_mojikyo_pj_6);
3250 Vcharset_mojikyo_pj_6 =
3251 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3252 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3253 build_string ("Mojikyo-PJ-6"),
3254 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3256 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3257 build_string ("jisx0208\\.Mojikyo-6$"),
3259 staticpro (&Vcharset_mojikyo_pj_7);
3260 Vcharset_mojikyo_pj_7 =
3261 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3262 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3263 build_string ("Mojikyo-PJ-7"),
3264 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3266 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3267 build_string ("jisx0208\\.Mojikyo-7$"),
3269 staticpro (&Vcharset_mojikyo_pj_8);
3270 Vcharset_mojikyo_pj_8 =
3271 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3272 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3273 build_string ("Mojikyo-PJ-8"),
3274 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3276 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3277 build_string ("jisx0208\\.Mojikyo-8$"),
3279 staticpro (&Vcharset_mojikyo_pj_9);
3280 Vcharset_mojikyo_pj_9 =
3281 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3282 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3283 build_string ("Mojikyo-PJ-9"),
3284 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3286 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3287 build_string ("jisx0208\\.Mojikyo-9$"),
3289 staticpro (&Vcharset_mojikyo_pj_10);
3290 Vcharset_mojikyo_pj_10 =
3291 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3292 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3293 build_string ("Mojikyo-PJ-10"),
3294 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3296 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3297 build_string ("jisx0208\\.Mojikyo-10$"),
3299 staticpro (&Vcharset_mojikyo_pj_11);
3300 Vcharset_mojikyo_pj_11 =
3301 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3302 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3303 build_string ("Mojikyo-PJ-11"),
3304 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3306 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3307 build_string ("jisx0208\\.Mojikyo-11$"),
3309 staticpro (&Vcharset_mojikyo_pj_12);
3310 Vcharset_mojikyo_pj_12 =
3311 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3312 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3313 build_string ("Mojikyo-PJ-12"),
3314 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3316 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3317 build_string ("jisx0208\\.Mojikyo-12$"),
3319 staticpro (&Vcharset_mojikyo_pj_13);
3320 Vcharset_mojikyo_pj_13 =
3321 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3322 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3323 build_string ("Mojikyo-PJ-13"),
3324 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3326 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3327 build_string ("jisx0208\\.Mojikyo-13$"),
3329 staticpro (&Vcharset_mojikyo_pj_14);
3330 Vcharset_mojikyo_pj_14 =
3331 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3332 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3333 build_string ("Mojikyo-PJ-14"),
3334 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3336 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3337 build_string ("jisx0208\\.Mojikyo-14$"),
3339 staticpro (&Vcharset_mojikyo_pj_15);
3340 Vcharset_mojikyo_pj_15 =
3341 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3342 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3343 build_string ("Mojikyo-PJ-15"),
3344 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3346 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3347 build_string ("jisx0208\\.Mojikyo-15$"),
3349 staticpro (&Vcharset_mojikyo_pj_16);
3350 Vcharset_mojikyo_pj_16 =
3351 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3352 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3353 build_string ("Mojikyo-PJ-16"),
3354 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3356 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3357 build_string ("jisx0208\\.Mojikyo-16$"),
3359 staticpro (&Vcharset_mojikyo_pj_17);
3360 Vcharset_mojikyo_pj_17 =
3361 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3362 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3363 build_string ("Mojikyo-PJ-17"),
3364 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3366 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3367 build_string ("jisx0208\\.Mojikyo-17$"),
3369 staticpro (&Vcharset_mojikyo_pj_18);
3370 Vcharset_mojikyo_pj_18 =
3371 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3372 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3373 build_string ("Mojikyo-PJ-18"),
3374 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3376 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3377 build_string ("jisx0208\\.Mojikyo-18$"),
3379 staticpro (&Vcharset_mojikyo_pj_19);
3380 Vcharset_mojikyo_pj_19 =
3381 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3382 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3383 build_string ("Mojikyo-PJ-19"),
3384 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3386 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3387 build_string ("jisx0208\\.Mojikyo-19$"),
3389 staticpro (&Vcharset_mojikyo_pj_20);
3390 Vcharset_mojikyo_pj_20 =
3391 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3392 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3393 build_string ("Mojikyo-PJ-20"),
3394 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3396 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3397 build_string ("jisx0208\\.Mojikyo-20$"),
3399 staticpro (&Vcharset_mojikyo_pj_21);
3400 Vcharset_mojikyo_pj_21 =
3401 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3402 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3403 build_string ("Mojikyo-PJ-21"),
3404 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3406 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3407 build_string ("jisx0208\\.Mojikyo-21$"),
3409 staticpro (&Vcharset_ethiopic_ucs);
3410 Vcharset_ethiopic_ucs =
3411 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3412 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3413 build_string ("Ethiopic (UCS)"),
3414 build_string ("Ethiopic (UCS)"),
3415 build_string ("Ethiopic of UCS"),
3416 build_string ("Ethiopic-Unicode"),
3417 Qnil, 0x1200, 0x137F, 0x1200, 0);
3419 staticpro (&Vcharset_chinese_big5_1);
3420 Vcharset_chinese_big5_1 =
3421 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3422 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3423 build_string ("Big5"),
3424 build_string ("Big5 (Level-1)"),
3426 ("Big5 Level-1 Chinese traditional"),
3427 build_string ("big5"),
3429 staticpro (&Vcharset_chinese_big5_2);
3430 Vcharset_chinese_big5_2 =
3431 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3432 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3433 build_string ("Big5"),
3434 build_string ("Big5 (Level-2)"),
3436 ("Big5 Level-2 Chinese traditional"),
3437 build_string ("big5"),
3440 #ifdef ENABLE_COMPOSITE_CHARS
3441 /* #### For simplicity, we put composite chars into a 96x96 charset.
3442 This is going to lead to problems because you can run out of
3443 room, esp. as we don't yet recycle numbers. */
3444 staticpro (&Vcharset_composite);
3445 Vcharset_composite =
3446 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3447 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3448 build_string ("Composite"),
3449 build_string ("Composite characters"),
3450 build_string ("Composite characters"),
3453 /* #### not dumped properly */
3454 composite_char_row_next = 32;
3455 composite_char_col_next = 32;
3457 Vcomposite_char_string2char_hash_table =
3458 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3459 Vcomposite_char_char2string_hash_table =
3460 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3461 staticpro (&Vcomposite_char_string2char_hash_table);
3462 staticpro (&Vcomposite_char_char2string_hash_table);
3463 #endif /* ENABLE_COMPOSITE_CHARS */