1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof(struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_char_byte_table (Lisp_Object obj)
165 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
179 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
185 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
187 if (!char_byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 char_byte_table_hash (Lisp_Object obj, int depth)
203 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description char_byte_table_description[] = {
209 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
214 mark_char_byte_table,
215 internal_object_printer,
216 0, char_byte_table_equal,
217 char_byte_table_hash,
218 char_byte_table_description,
219 struct Lisp_Char_Byte_Table);
222 make_char_byte_table (Lisp_Object initval)
226 struct Lisp_Char_Byte_Table *cte =
227 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
228 &lrecord_char_byte_table);
230 for (i = 0; i < 256; i++)
231 cte->property[i] = initval;
233 XSETCHAR_BYTE_TABLE (obj, cte);
238 copy_char_byte_table (Lisp_Object entry)
240 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
243 struct Lisp_Char_Byte_Table *ctenew =
244 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
245 &lrecord_char_byte_table);
247 for (i = 0; i < 256; i++)
249 Lisp_Object new = cte->property[i];
250 if (CHAR_BYTE_TABLE_P (new))
251 ctenew->property[i] = copy_char_byte_table (new);
253 ctenew->property[i] = new;
256 XSETCHAR_BYTE_TABLE (obj, ctenew);
262 mark_char_code_table (Lisp_Object obj)
264 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
270 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
272 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
273 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
275 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
279 char_code_table_hash (Lisp_Object obj, int depth)
281 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
283 return char_code_table_hash (cte->table, depth + 1);
286 static const struct lrecord_description char_code_table_description[] = {
287 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
291 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
292 mark_char_code_table,
293 internal_object_printer,
294 0, char_code_table_equal,
295 char_code_table_hash,
296 char_code_table_description,
297 struct Lisp_Char_Code_Table);
300 make_char_code_table (Lisp_Object initval)
303 struct Lisp_Char_Code_Table *cte =
304 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
305 &lrecord_char_code_table);
307 cte->table = make_char_byte_table (initval);
309 XSETCHAR_CODE_TABLE (obj, cte);
314 copy_char_code_table (Lisp_Object entry)
316 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
318 struct Lisp_Char_Code_Table *ctenew =
319 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
320 &lrecord_char_code_table);
322 ctenew->table = copy_char_byte_table (cte->table);
323 XSETCHAR_CODE_TABLE (obj, ctenew);
329 get_char_code_table (Emchar ch, Lisp_Object table)
331 unsigned int code = ch;
332 struct Lisp_Char_Byte_Table* cpt
333 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
334 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
336 if (CHAR_BYTE_TABLE_P (ret))
337 cpt = XCHAR_BYTE_TABLE (ret);
341 ret = cpt->property [(unsigned char) (code >> 16)];
342 if (CHAR_BYTE_TABLE_P (ret))
343 cpt = XCHAR_BYTE_TABLE (ret);
347 ret = cpt->property [(unsigned char) (code >> 8)];
348 if (CHAR_BYTE_TABLE_P (ret))
349 cpt = XCHAR_BYTE_TABLE (ret);
353 return cpt->property [(unsigned char) code];
357 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
359 unsigned int code = ch;
360 struct Lisp_Char_Byte_Table* cpt1
361 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
362 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
364 if (CHAR_BYTE_TABLE_P (ret))
366 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
368 ret = cpt2->property[(unsigned char)(code >> 16)];
369 if (CHAR_BYTE_TABLE_P (ret))
371 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
373 ret = cpt3->property[(unsigned char)(code >> 8)];
374 if (CHAR_BYTE_TABLE_P (ret))
376 struct Lisp_Char_Byte_Table* cpt4
377 = XCHAR_BYTE_TABLE (ret);
379 cpt4->property[(unsigned char)code] = value;
381 else if (!EQ (ret, value))
383 Lisp_Object cpt4 = make_char_byte_table (ret);
385 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
386 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
389 else if (!EQ (ret, value))
391 Lisp_Object cpt3 = make_char_byte_table (ret);
392 Lisp_Object cpt4 = make_char_byte_table (ret);
394 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
395 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
397 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
400 else if (!EQ (ret, value))
402 Lisp_Object cpt2 = make_char_byte_table (ret);
403 Lisp_Object cpt3 = make_char_byte_table (ret);
404 Lisp_Object cpt4 = make_char_byte_table (ret);
406 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
407 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
408 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
409 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
414 Lisp_Object Vcharacter_attribute_table;
415 Lisp_Object Vcharacter_composition_table;
416 Lisp_Object Vcharacter_variant_table;
418 Lisp_Object Q_decomposition;
421 Lisp_Object Qisolated;
422 Lisp_Object Qinitial;
425 Lisp_Object Qvertical;
426 Lisp_Object QnoBreak;
427 Lisp_Object Qfraction;
438 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
444 else if (EQ (v, Qcompat))
446 else if (EQ (v, Qisolated))
448 else if (EQ (v, Qinitial))
450 else if (EQ (v, Qmedial))
452 else if (EQ (v, Qfinal))
454 else if (EQ (v, Qvertical))
456 else if (EQ (v, QnoBreak))
458 else if (EQ (v, Qfraction))
460 else if (EQ (v, Qsuper))
462 else if (EQ (v, Qsub))
464 else if (EQ (v, Qcircle))
466 else if (EQ (v, Qsquare))
468 else if (EQ (v, Qwide))
470 else if (EQ (v, Qnarrow))
472 else if (EQ (v, Qsmall))
474 else if (EQ (v, Qfont))
477 signal_simple_error (err_msg, err_arg);
480 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
481 Return character corresponding with list.
485 Lisp_Object table = Vcharacter_composition_table;
486 Lisp_Object rest = list;
490 Lisp_Object v = Fcar (rest);
492 Emchar c = to_char_code (v, "Invalid value for composition", list);
494 ret = get_char_code_table (c, table);
499 if (!CHAR_CODE_TABLE_P (ret))
504 else if (!CONSP (rest))
506 else if (CHAR_CODE_TABLE_P (ret))
509 signal_simple_error ("Invalid table is found with", list);
511 signal_simple_error ("Invalid value for composition", list);
514 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
515 Return variants of CHARACTER.
519 CHECK_CHAR (character);
520 return Fcopy_list (get_char_code_table (XCHAR (character),
521 Vcharacter_variant_table));
524 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
525 Return the alist of attributes of CHARACTER.
529 CHECK_CHAR (character);
530 return Fcopy_alist (get_char_code_table (XCHAR (character),
531 Vcharacter_attribute_table));
534 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
535 Return the value of CHARACTER's ATTRIBUTE.
537 (character, attribute))
542 CHECK_CHAR (character);
543 ret = get_char_code_table (XCHAR (character),
544 Vcharacter_attribute_table);
548 if (!NILP (ccs = Ffind_charset (attribute)))
551 return Fcdr (Fassq (attribute, ret));
555 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
558 Emchar char_code = XCHAR (character);
560 = get_char_code_table (char_code, Vcharacter_attribute_table);
563 cell = Fassq (attribute, ret);
567 ret = Fcons (Fcons (attribute, value), ret);
569 else if (!EQ (Fcdr (cell), value))
571 Fsetcdr (cell, value);
573 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
578 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
580 Emchar char_code = XCHAR (character);
582 = get_char_code_table (char_code, Vcharacter_attribute_table);
584 if (EQ (attribute, Fcar (Fcar (alist))))
586 alist = Fcdr (alist);
590 Lisp_Object pr = alist;
591 Lisp_Object r = Fcdr (alist);
595 if (EQ (attribute, Fcar (Fcar (r))))
597 XCDR (pr) = Fcdr (r);
604 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
610 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
611 Store CHARACTER's ATTRIBUTE with VALUE.
613 (character, attribute, value))
617 CHECK_CHAR (character);
618 ccs = Ffind_charset (attribute);
621 if (!EQ (XCHARSET_NAME (ccs), Qucs)
622 || (XCHAR (character) != XINT (value)))
624 Lisp_Object cpos, rest;
625 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
632 /* ad-hoc method for `ascii' */
633 if ((XCHARSET_CHARS (ccs) == 94) &&
634 (XCHARSET_BYTE_OFFSET (ccs) != 33))
635 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
637 ccs_len = XCHARSET_CHARS (ccs);
641 Lisp_Object ret = Fcar (value);
644 signal_simple_error ("Invalid value for coded-charset", value);
645 code_point = XINT (ret);
646 if (XCHARSET_GRAPHIC (ccs) == 1)
654 signal_simple_error ("Invalid value for coded-charset",
658 signal_simple_error ("Invalid value for coded-charset",
661 if (XCHARSET_GRAPHIC (ccs) == 1)
663 code_point = (code_point << 8) | i;
666 value = make_int (code_point);
668 else if (INTP (value))
670 if (XCHARSET_GRAPHIC (ccs) == 1)
671 value = make_int (XINT (value) & 0x7F7F7F7F);
674 signal_simple_error ("Invalid value for coded-charset", value);
677 cpos = Fget_char_attribute (character, attribute);
682 dim = XCHARSET_DIMENSION (ccs);
683 code_point = XINT (cpos);
687 i = ((code_point >> (8 * dim)) & 255)
688 - XCHARSET_BYTE_OFFSET (ccs);
689 nv = XVECTOR_DATA(v)[i];
695 XVECTOR_DATA(v)[i] = Qnil;
696 v = XCHARSET_DECODING_TABLE (ccs);
701 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
704 dim = XCHARSET_DIMENSION (ccs);
705 code_point = XINT (value);
710 i = ((code_point >> (8 * dim)) & 255)
711 - XCHARSET_BYTE_OFFSET (ccs);
712 nv = XVECTOR_DATA(v)[i];
716 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
722 XVECTOR_DATA(v)[i] = character;
727 else if (EQ (attribute, Q_decomposition))
730 signal_simple_error ("Invalid value for ->decomposition",
733 if (CONSP (Fcdr (value)))
735 Lisp_Object rest = value;
736 Lisp_Object table = Vcharacter_composition_table;
740 Lisp_Object v = Fcar (rest);
744 "Invalid value for ->decomposition", value);
749 put_char_code_table (c, character, table);
754 ntable = get_char_code_table (c, table);
755 if (!CHAR_CODE_TABLE_P (ntable))
757 ntable = make_char_code_table (Qnil);
758 put_char_code_table (c, ntable, table);
766 Lisp_Object v = Fcar (value);
772 = get_char_code_table (c, Vcharacter_variant_table);
774 if (NILP (Fmemq (v, ret)))
776 put_char_code_table (c, Fcons (character, ret),
777 Vcharacter_variant_table);
782 else if (EQ (attribute, Q_ucs))
788 signal_simple_error ("Invalid value for ->ucs", value);
792 ret = get_char_code_table (c, Vcharacter_variant_table);
793 if (NILP (Fmemq (character, ret)))
795 put_char_code_table (c, Fcons (character, ret),
796 Vcharacter_variant_table);
799 return put_char_attribute (character, attribute, value);
802 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
803 Remove CHARACTER's ATTRIBUTE.
805 (character, attribute))
809 CHECK_CHAR (character);
810 ccs = Ffind_charset (attribute);
814 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
821 /* ad-hoc method for `ascii' */
822 if ((XCHARSET_CHARS (ccs) == 94) &&
823 (XCHARSET_BYTE_OFFSET (ccs) != 33))
824 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
826 ccs_len = XCHARSET_CHARS (ccs);
829 cpos = Fget_char_attribute (character, attribute);
834 dim = XCHARSET_DIMENSION (ccs);
835 code_point = XINT (cpos);
839 i = ((code_point >> (8 * dim)) & 255)
840 - XCHARSET_BYTE_OFFSET (ccs);
841 nv = XVECTOR_DATA(v)[i];
847 XVECTOR_DATA(v)[i] = Qnil;
848 v = XCHARSET_DECODING_TABLE (ccs);
852 return remove_char_attribute (character, attribute);
855 EXFUN (Fmake_char, 3);
856 EXFUN (Fdecode_char, 2);
858 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
859 Store character's ATTRIBUTES.
863 Lisp_Object rest = attributes;
864 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
865 Lisp_Object character;
871 Lisp_Object cell = Fcar (rest);
875 signal_simple_error ("Invalid argument", attributes);
876 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
877 && ((XCHARSET_FINAL (ccs) != 0) ||
878 (XCHARSET_UCS_MAX (ccs) > 0)) )
882 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
884 character = Fdecode_char (ccs, cell);
885 goto setup_attributes;
889 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
892 signal_simple_error ("Invalid argument", attributes);
894 character = make_char (XINT (code) + 0x100000);
895 goto setup_attributes;
899 else if (!INTP (code))
900 signal_simple_error ("Invalid argument", attributes);
902 character = make_char (XINT (code));
908 Lisp_Object cell = Fcar (rest);
911 signal_simple_error ("Invalid argument", attributes);
912 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
916 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
919 Lisp_Object Vutf_2000_version;
923 int leading_code_private_11;
926 Lisp_Object Qcharsetp;
928 /* Qdoc_string, Qdimension, Qchars defined in general.c */
929 Lisp_Object Qregistry, Qfinal, Qgraphic;
930 Lisp_Object Qdirection;
931 Lisp_Object Qreverse_direction_charset;
932 Lisp_Object Qleading_byte;
933 Lisp_Object Qshort_name, Qlong_name;
949 Qjapanese_jisx0208_1978,
952 Qjapanese_jisx0208_1990,
963 Qvietnamese_viscii_lower,
964 Qvietnamese_viscii_upper,
994 Lisp_Object Ql2r, Qr2l;
996 Lisp_Object Vcharset_hash_table;
999 static Charset_ID next_allocated_leading_byte;
1001 static Charset_ID next_allocated_1_byte_leading_byte;
1002 static Charset_ID next_allocated_2_byte_leading_byte;
1005 /* Composite characters are characters constructed by overstriking two
1006 or more regular characters.
1008 1) The old Mule implementation involves storing composite characters
1009 in a buffer as a tag followed by all of the actual characters
1010 used to make up the composite character. I think this is a bad
1011 idea; it greatly complicates code that wants to handle strings
1012 one character at a time because it has to deal with the possibility
1013 of great big ungainly characters. It's much more reasonable to
1014 simply store an index into a table of composite characters.
1016 2) The current implementation only allows for 16,384 separate
1017 composite characters over the lifetime of the XEmacs process.
1018 This could become a potential problem if the user
1019 edited lots of different files that use composite characters.
1020 Due to FSF bogosity, increasing the number of allowable
1021 composite characters under Mule would decrease the number
1022 of possible faces that can exist. Mule already has shrunk
1023 this to 2048, and further shrinkage would become uncomfortable.
1024 No such problems exist in XEmacs.
1026 Composite characters could be represented as 0x80 C1 C2 C3,
1027 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1028 for slightly under 2^20 (one million) composite characters
1029 over the XEmacs process lifetime, and you only need to
1030 increase the size of a Mule character from 19 to 21 bits.
1031 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1032 85 million (slightly over 2^26) composite characters. */
1035 /************************************************************************/
1036 /* Basic Emchar functions */
1037 /************************************************************************/
1039 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1040 string in STR. Returns the number of bytes stored.
1041 Do not call this directly. Use the macro set_charptr_emchar() instead.
1045 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1051 Lisp_Object charset;
1060 else if ( c <= 0x7ff )
1062 *p++ = (c >> 6) | 0xc0;
1063 *p++ = (c & 0x3f) | 0x80;
1065 else if ( c <= 0xffff )
1067 *p++ = (c >> 12) | 0xe0;
1068 *p++ = ((c >> 6) & 0x3f) | 0x80;
1069 *p++ = (c & 0x3f) | 0x80;
1071 else if ( c <= 0x1fffff )
1073 *p++ = (c >> 18) | 0xf0;
1074 *p++ = ((c >> 12) & 0x3f) | 0x80;
1075 *p++ = ((c >> 6) & 0x3f) | 0x80;
1076 *p++ = (c & 0x3f) | 0x80;
1078 else if ( c <= 0x3ffffff )
1080 *p++ = (c >> 24) | 0xf8;
1081 *p++ = ((c >> 18) & 0x3f) | 0x80;
1082 *p++ = ((c >> 12) & 0x3f) | 0x80;
1083 *p++ = ((c >> 6) & 0x3f) | 0x80;
1084 *p++ = (c & 0x3f) | 0x80;
1088 *p++ = (c >> 30) | 0xfc;
1089 *p++ = ((c >> 24) & 0x3f) | 0x80;
1090 *p++ = ((c >> 18) & 0x3f) | 0x80;
1091 *p++ = ((c >> 12) & 0x3f) | 0x80;
1092 *p++ = ((c >> 6) & 0x3f) | 0x80;
1093 *p++ = (c & 0x3f) | 0x80;
1096 BREAKUP_CHAR (c, charset, c1, c2);
1097 lb = CHAR_LEADING_BYTE (c);
1098 if (LEADING_BYTE_PRIVATE_P (lb))
1099 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1101 if (EQ (charset, Vcharset_control_1))
1110 /* Return the first character from a Mule-encoded string in STR,
1111 assuming it's non-ASCII. Do not call this directly.
1112 Use the macro charptr_emchar() instead. */
1115 non_ascii_charptr_emchar (CONST Bufbyte *str)
1128 else if ( b >= 0xf8 )
1133 else if ( b >= 0xf0 )
1138 else if ( b >= 0xe0 )
1143 else if ( b >= 0xc0 )
1153 for( ; len > 0; len-- )
1156 ch = ( ch << 6 ) | ( b & 0x3f );
1160 Bufbyte i0 = *str, i1, i2 = 0;
1161 Lisp_Object charset;
1163 if (i0 == LEADING_BYTE_CONTROL_1)
1164 return (Emchar) (*++str - 0x20);
1166 if (LEADING_BYTE_PREFIX_P (i0))
1171 charset = CHARSET_BY_LEADING_BYTE (i0);
1172 if (XCHARSET_DIMENSION (charset) == 2)
1175 return MAKE_CHAR (charset, i1, i2);
1179 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1180 Do not call this directly. Use the macro valid_char_p() instead. */
1184 non_ascii_valid_char_p (Emchar ch)
1188 /* Must have only lowest 19 bits set */
1192 f1 = CHAR_FIELD1 (ch);
1193 f2 = CHAR_FIELD2 (ch);
1194 f3 = CHAR_FIELD3 (ch);
1198 Lisp_Object charset;
1200 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1201 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1202 f2 > MAX_CHAR_FIELD2_PRIVATE)
1207 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1208 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1212 NOTE: This takes advantage of the fact that
1213 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1214 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1216 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1217 if (EQ (charset, Qnil))
1219 return (XCHARSET_CHARS (charset) == 96);
1223 Lisp_Object charset;
1225 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1226 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1227 f1 > MAX_CHAR_FIELD1_PRIVATE)
1229 if (f2 < 0x20 || f3 < 0x20)
1232 #ifdef ENABLE_COMPOSITE_CHARS
1233 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1235 if (UNBOUNDP (Fgethash (make_int (ch),
1236 Vcomposite_char_char2string_hash_table,
1241 #endif /* ENABLE_COMPOSITE_CHARS */
1243 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1244 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1247 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1249 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1252 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1254 if (EQ (charset, Qnil))
1256 return (XCHARSET_CHARS (charset) == 96);
1262 /************************************************************************/
1263 /* Basic string functions */
1264 /************************************************************************/
1266 /* Copy the character pointed to by PTR into STR, assuming it's
1267 non-ASCII. Do not call this directly. Use the macro
1268 charptr_copy_char() instead. */
1271 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1273 Bufbyte *strptr = str;
1275 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1277 /* Notice fallthrough. */
1279 case 6: *++strptr = *ptr++;
1280 case 5: *++strptr = *ptr++;
1282 case 4: *++strptr = *ptr++;
1283 case 3: *++strptr = *ptr++;
1284 case 2: *++strptr = *ptr;
1289 return strptr + 1 - str;
1293 /************************************************************************/
1294 /* streams of Emchars */
1295 /************************************************************************/
1297 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1298 The functions below are not meant to be called directly; use
1299 the macros in insdel.h. */
1302 Lstream_get_emchar_1 (Lstream *stream, int ch)
1304 Bufbyte str[MAX_EMCHAR_LEN];
1305 Bufbyte *strptr = str;
1307 str[0] = (Bufbyte) ch;
1308 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1310 /* Notice fallthrough. */
1313 ch = Lstream_getc (stream);
1315 *++strptr = (Bufbyte) ch;
1317 ch = Lstream_getc (stream);
1319 *++strptr = (Bufbyte) ch;
1322 ch = Lstream_getc (stream);
1324 *++strptr = (Bufbyte) ch;
1326 ch = Lstream_getc (stream);
1328 *++strptr = (Bufbyte) ch;
1330 ch = Lstream_getc (stream);
1332 *++strptr = (Bufbyte) ch;
1337 return charptr_emchar (str);
1341 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1343 Bufbyte str[MAX_EMCHAR_LEN];
1344 Bytecount len = set_charptr_emchar (str, ch);
1345 return Lstream_write (stream, str, len);
1349 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1351 Bufbyte str[MAX_EMCHAR_LEN];
1352 Bytecount len = set_charptr_emchar (str, ch);
1353 Lstream_unread (stream, str, len);
1357 /************************************************************************/
1358 /* charset object */
1359 /************************************************************************/
1362 mark_charset (Lisp_Object obj)
1364 struct Lisp_Charset *cs = XCHARSET (obj);
1366 mark_object (cs->short_name);
1367 mark_object (cs->long_name);
1368 mark_object (cs->doc_string);
1369 mark_object (cs->registry);
1370 mark_object (cs->ccl_program);
1372 mark_object (cs->decoding_table);
1378 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1380 struct Lisp_Charset *cs = XCHARSET (obj);
1384 error ("printing unreadable object #<charset %s 0x%x>",
1385 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1388 write_c_string ("#<charset ", printcharfun);
1389 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1390 write_c_string (" ", printcharfun);
1391 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1392 write_c_string (" ", printcharfun);
1393 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1394 write_c_string (" ", printcharfun);
1395 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1396 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1398 CHARSET_DIMENSION (cs),
1399 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1400 CHARSET_COLUMNS (cs),
1401 CHARSET_GRAPHIC (cs),
1402 CHARSET_FINAL (cs));
1403 write_c_string (buf, printcharfun);
1404 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1405 sprintf (buf, " 0x%x>", cs->header.uid);
1406 write_c_string (buf, printcharfun);
1409 static const struct lrecord_description charset_description[] = {
1410 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1412 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1417 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1418 mark_charset, print_charset, 0, 0, 0,
1419 charset_description,
1420 struct Lisp_Charset);
1421 /* Make a new charset. */
1424 make_charset (Charset_ID id, Lisp_Object name,
1425 unsigned short chars, unsigned char dimension,
1426 unsigned char columns, unsigned char graphic,
1427 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1428 Lisp_Object long_name, Lisp_Object doc,
1430 Lisp_Object decoding_table,
1431 Emchar ucs_min, Emchar ucs_max,
1432 Emchar code_offset, unsigned char byte_offset)
1434 unsigned char type = 0;
1436 struct Lisp_Charset *cs =
1437 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1438 XSETCHARSET (obj, cs);
1440 CHARSET_ID (cs) = id;
1441 CHARSET_NAME (cs) = name;
1442 CHARSET_SHORT_NAME (cs) = short_name;
1443 CHARSET_LONG_NAME (cs) = long_name;
1444 CHARSET_CHARS (cs) = chars;
1445 CHARSET_DIMENSION (cs) = dimension;
1446 CHARSET_DIRECTION (cs) = direction;
1447 CHARSET_COLUMNS (cs) = columns;
1448 CHARSET_GRAPHIC (cs) = graphic;
1449 CHARSET_FINAL (cs) = final;
1450 CHARSET_DOC_STRING (cs) = doc;
1451 CHARSET_REGISTRY (cs) = reg;
1452 CHARSET_CCL_PROGRAM (cs) = Qnil;
1453 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1455 CHARSET_DECODING_TABLE(cs) = Qnil;
1456 CHARSET_UCS_MIN(cs) = ucs_min;
1457 CHARSET_UCS_MAX(cs) = ucs_max;
1458 CHARSET_CODE_OFFSET(cs) = code_offset;
1459 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1462 switch (CHARSET_CHARS (cs))
1465 switch (CHARSET_DIMENSION (cs))
1468 type = CHARSET_TYPE_94;
1471 type = CHARSET_TYPE_94X94;
1476 switch (CHARSET_DIMENSION (cs))
1479 type = CHARSET_TYPE_96;
1482 type = CHARSET_TYPE_96X96;
1488 switch (CHARSET_DIMENSION (cs))
1491 type = CHARSET_TYPE_128;
1494 type = CHARSET_TYPE_128X128;
1499 switch (CHARSET_DIMENSION (cs))
1502 type = CHARSET_TYPE_256;
1505 type = CHARSET_TYPE_256X256;
1512 CHARSET_TYPE (cs) = type;
1516 if (id == LEADING_BYTE_ASCII)
1517 CHARSET_REP_BYTES (cs) = 1;
1519 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1521 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1526 /* some charsets do not have final characters. This includes
1527 ASCII, Control-1, Composite, and the two faux private
1530 if (code_offset == 0)
1532 assert (NILP (chlook->charset_by_attributes[type][final]));
1533 chlook->charset_by_attributes[type][final] = obj;
1536 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1537 chlook->charset_by_attributes[type][final][direction] = obj;
1541 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1542 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1544 /* Some charsets are "faux" and don't have names or really exist at
1545 all except in the leading-byte table. */
1547 Fputhash (name, obj, Vcharset_hash_table);
1552 get_unallocated_leading_byte (int dimension)
1557 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1560 lb = next_allocated_leading_byte++;
1564 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1567 lb = next_allocated_1_byte_leading_byte++;
1571 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1574 lb = next_allocated_2_byte_leading_byte++;
1580 ("No more character sets free for this dimension",
1581 make_int (dimension));
1588 make_builtin_char (Lisp_Object charset, int c1, int c2)
1590 if (XCHARSET_UCS_MAX (charset))
1593 = (XCHARSET_DIMENSION (charset) == 1
1595 c1 - XCHARSET_BYTE_OFFSET (charset)
1597 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1598 + c2 - XCHARSET_BYTE_OFFSET (charset))
1599 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1600 if ((code < XCHARSET_UCS_MIN (charset))
1601 || (XCHARSET_UCS_MAX (charset) < code))
1602 signal_simple_error ("Arguments makes invalid character",
1606 else if (XCHARSET_DIMENSION (charset) == 1)
1608 switch (XCHARSET_CHARS (charset))
1612 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1615 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1622 switch (XCHARSET_CHARS (charset))
1625 return MIN_CHAR_94x94
1626 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1627 + (c1 - 33) * 94 + (c2 - 33);
1629 return MIN_CHAR_96x96
1630 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1631 + (c1 - 32) * 96 + (c2 - 32);
1639 range_charset_code_point (Lisp_Object charset, Emchar ch)
1643 if ((XCHARSET_UCS_MIN (charset) <= ch)
1644 && (ch <= XCHARSET_UCS_MAX (charset)))
1646 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1648 if (XCHARSET_CHARS (charset) == 256)
1650 else if (XCHARSET_DIMENSION (charset) == 1)
1651 return d + XCHARSET_BYTE_OFFSET (charset);
1652 else if (XCHARSET_DIMENSION (charset) == 2)
1654 ((d / XCHARSET_CHARS (charset)
1655 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1656 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1657 else if (XCHARSET_DIMENSION (charset) == 3)
1659 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1660 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1661 | ((d / XCHARSET_CHARS (charset)
1662 % XCHARSET_CHARS (charset)
1663 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1664 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1665 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1667 ((d / (XCHARSET_CHARS (charset)
1668 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1669 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1670 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1671 % XCHARSET_CHARS (charset)
1672 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1673 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1674 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1675 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1677 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1679 if (XCHARSET_DIMENSION (charset) == 1)
1681 if (XCHARSET_CHARS (charset) == 94)
1683 if (((d = ch - (MIN_CHAR_94
1684 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1688 else if (XCHARSET_CHARS (charset) == 96)
1690 if (((d = ch - (MIN_CHAR_96
1691 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1698 else if (XCHARSET_DIMENSION (charset) == 2)
1700 if (XCHARSET_CHARS (charset) == 94)
1702 if (((d = ch - (MIN_CHAR_94x94
1703 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1706 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1708 else if (XCHARSET_CHARS (charset) == 96)
1710 if (((d = ch - (MIN_CHAR_96x96
1711 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1714 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1724 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1726 if (c <= MAX_CHAR_BASIC_LATIN)
1728 *charset = Vcharset_ascii;
1733 *charset = Vcharset_control_1;
1738 *charset = Vcharset_latin_iso8859_1;
1742 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1744 *charset = Vcharset_greek_iso8859_7;
1745 return c - MIN_CHAR_GREEK + 0x20;
1747 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1749 *charset = Vcharset_cyrillic_iso8859_5;
1750 return c - MIN_CHAR_CYRILLIC + 0x20;
1753 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1755 *charset = Vcharset_hebrew_iso8859_8;
1756 return c - MIN_CHAR_HEBREW + 0x20;
1758 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1760 *charset = Vcharset_thai_tis620;
1761 return c - MIN_CHAR_THAI + 0x20;
1764 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1765 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1767 return list2 (Vcharset_katakana_jisx0201,
1768 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1771 else if (c <= MAX_CHAR_BMP)
1773 *charset = Vcharset_ucs_bmp;
1776 else if (c < MIN_CHAR_DAIKANWA)
1778 *charset = Vcharset_ucs;
1782 else if (c <= MAX_CHAR_DAIKANWA)
1784 *charset = Vcharset_ideograph_daikanwa;
1785 return c - MIN_CHAR_DAIKANWA;
1788 else if (c <= MAX_CHAR_MOJIKYO)
1790 *charset = Vcharset_mojikyo;
1791 return c - MIN_CHAR_MOJIKYO;
1793 else if (c < MIN_CHAR_94)
1795 *charset = Vcharset_ucs;
1798 else if (c <= MAX_CHAR_94)
1800 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1801 ((c - MIN_CHAR_94) / 94) + '0',
1802 CHARSET_LEFT_TO_RIGHT);
1803 if (!NILP (*charset))
1804 return ((c - MIN_CHAR_94) % 94) + 33;
1807 *charset = Vcharset_ucs;
1811 else if (c <= MAX_CHAR_96)
1813 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1814 ((c - MIN_CHAR_96) / 96) + '0',
1815 CHARSET_LEFT_TO_RIGHT);
1816 if (!NILP (*charset))
1817 return ((c - MIN_CHAR_96) % 96) + 32;
1820 *charset = Vcharset_ucs;
1824 else if (c <= MAX_CHAR_94x94)
1827 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1828 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1829 CHARSET_LEFT_TO_RIGHT);
1830 if (!NILP (*charset))
1831 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1832 | (((c - MIN_CHAR_94x94) % 94) + 33);
1835 *charset = Vcharset_ucs;
1839 else if (c <= MAX_CHAR_96x96)
1842 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1843 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1844 CHARSET_LEFT_TO_RIGHT);
1845 if (!NILP (*charset))
1846 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1847 | (((c - MIN_CHAR_96x96) % 96) + 32);
1850 *charset = Vcharset_ucs;
1856 *charset = Vcharset_ucs;
1861 Lisp_Object Vdefault_coded_charset_priority_list;
1865 /************************************************************************/
1866 /* Basic charset Lisp functions */
1867 /************************************************************************/
1869 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1870 Return non-nil if OBJECT is a charset.
1874 return CHARSETP (object) ? Qt : Qnil;
1877 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1878 Retrieve the charset of the given name.
1879 If CHARSET-OR-NAME is a charset object, it is simply returned.
1880 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1881 nil is returned. Otherwise the associated charset object is returned.
1885 if (CHARSETP (charset_or_name))
1886 return charset_or_name;
1888 CHECK_SYMBOL (charset_or_name);
1889 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1892 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1893 Retrieve the charset of the given name.
1894 Same as `find-charset' except an error is signalled if there is no such
1895 charset instead of returning nil.
1899 Lisp_Object charset = Ffind_charset (name);
1902 signal_simple_error ("No such charset", name);
1906 /* We store the charsets in hash tables with the names as the key and the
1907 actual charset object as the value. Occasionally we need to use them
1908 in a list format. These routines provide us with that. */
1909 struct charset_list_closure
1911 Lisp_Object *charset_list;
1915 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1916 void *charset_list_closure)
1918 /* This function can GC */
1919 struct charset_list_closure *chcl =
1920 (struct charset_list_closure*) charset_list_closure;
1921 Lisp_Object *charset_list = chcl->charset_list;
1923 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1927 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1928 Return a list of the names of all defined charsets.
1932 Lisp_Object charset_list = Qnil;
1933 struct gcpro gcpro1;
1934 struct charset_list_closure charset_list_closure;
1936 GCPRO1 (charset_list);
1937 charset_list_closure.charset_list = &charset_list;
1938 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1939 &charset_list_closure);
1942 return charset_list;
1945 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1946 Return the name of the given charset.
1950 return XCHARSET_NAME (Fget_charset (charset));
1953 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1954 Define a new character set.
1955 This function is for use with Mule support.
1956 NAME is a symbol, the name by which the character set is normally referred.
1957 DOC-STRING is a string describing the character set.
1958 PROPS is a property list, describing the specific nature of the
1959 character set. Recognized properties are:
1961 'short-name Short version of the charset name (ex: Latin-1)
1962 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1963 'registry A regular expression matching the font registry field for
1965 'dimension Number of octets used to index a character in this charset.
1966 Either 1 or 2. Defaults to 1.
1967 'columns Number of columns used to display a character in this charset.
1968 Only used in TTY mode. (Under X, the actual width of a
1969 character can be derived from the font used to display the
1970 characters.) If unspecified, defaults to the dimension
1971 (this is almost always the correct value).
1972 'chars Number of characters in each dimension (94 or 96).
1973 Defaults to 94. Note that if the dimension is 2, the
1974 character set thus described is 94x94 or 96x96.
1975 'final Final byte of ISO 2022 escape sequence. Must be
1976 supplied. Each combination of (DIMENSION, CHARS) defines a
1977 separate namespace for final bytes. Note that ISO
1978 2022 restricts the final byte to the range
1979 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1980 dimension == 2. Note also that final bytes in the range
1981 0x30 - 0x3F are reserved for user-defined (not official)
1983 'graphic 0 (use left half of font on output) or 1 (use right half
1984 of font on output). Defaults to 0. For example, for
1985 a font whose registry is ISO8859-1, the left half
1986 (octets 0x20 - 0x7F) is the `ascii' character set, while
1987 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1988 character set. With 'graphic set to 0, the octets
1989 will have their high bit cleared; with it set to 1,
1990 the octets will have their high bit set.
1991 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1993 'ccl-program A compiled CCL program used to convert a character in
1994 this charset into an index into the font. This is in
1995 addition to the 'graphic property. The CCL program
1996 is passed the octets of the character, with the high
1997 bit cleared and set depending upon whether the value
1998 of the 'graphic property is 0 or 1.
2000 (name, doc_string, props))
2002 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2003 int direction = CHARSET_LEFT_TO_RIGHT;
2005 Lisp_Object registry = Qnil;
2006 Lisp_Object charset;
2007 Lisp_Object rest, keyword, value;
2008 Lisp_Object ccl_program = Qnil;
2009 Lisp_Object short_name = Qnil, long_name = Qnil;
2010 int byte_offset = -1;
2012 CHECK_SYMBOL (name);
2013 if (!NILP (doc_string))
2014 CHECK_STRING (doc_string);
2016 charset = Ffind_charset (name);
2017 if (!NILP (charset))
2018 signal_simple_error ("Cannot redefine existing charset", name);
2020 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2022 if (EQ (keyword, Qshort_name))
2024 CHECK_STRING (value);
2028 if (EQ (keyword, Qlong_name))
2030 CHECK_STRING (value);
2034 else if (EQ (keyword, Qdimension))
2037 dimension = XINT (value);
2038 if (dimension < 1 || dimension > 2)
2039 signal_simple_error ("Invalid value for 'dimension", value);
2042 else if (EQ (keyword, Qchars))
2045 chars = XINT (value);
2046 if (chars != 94 && chars != 96)
2047 signal_simple_error ("Invalid value for 'chars", value);
2050 else if (EQ (keyword, Qcolumns))
2053 columns = XINT (value);
2054 if (columns != 1 && columns != 2)
2055 signal_simple_error ("Invalid value for 'columns", value);
2058 else if (EQ (keyword, Qgraphic))
2061 graphic = XINT (value);
2063 if (graphic < 0 || graphic > 2)
2065 if (graphic < 0 || graphic > 1)
2067 signal_simple_error ("Invalid value for 'graphic", value);
2070 else if (EQ (keyword, Qregistry))
2072 CHECK_STRING (value);
2076 else if (EQ (keyword, Qdirection))
2078 if (EQ (value, Ql2r))
2079 direction = CHARSET_LEFT_TO_RIGHT;
2080 else if (EQ (value, Qr2l))
2081 direction = CHARSET_RIGHT_TO_LEFT;
2083 signal_simple_error ("Invalid value for 'direction", value);
2086 else if (EQ (keyword, Qfinal))
2088 CHECK_CHAR_COERCE_INT (value);
2089 final = XCHAR (value);
2090 if (final < '0' || final > '~')
2091 signal_simple_error ("Invalid value for 'final", value);
2094 else if (EQ (keyword, Qccl_program))
2096 CHECK_VECTOR (value);
2097 ccl_program = value;
2101 signal_simple_error ("Unrecognized property", keyword);
2105 error ("'final must be specified");
2106 if (dimension == 2 && final > 0x5F)
2108 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2112 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2114 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2116 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2117 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2119 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2121 id = get_unallocated_leading_byte (dimension);
2123 if (NILP (doc_string))
2124 doc_string = build_string ("");
2126 if (NILP (registry))
2127 registry = build_string ("");
2129 if (NILP (short_name))
2130 XSETSTRING (short_name, XSYMBOL (name)->name);
2132 if (NILP (long_name))
2133 long_name = doc_string;
2136 columns = dimension;
2138 if (byte_offset < 0)
2142 else if (chars == 96)
2148 charset = make_charset (id, name, chars, dimension, columns, graphic,
2149 final, direction, short_name, long_name,
2150 doc_string, registry,
2151 Qnil, 0, 0, 0, byte_offset);
2152 if (!NILP (ccl_program))
2153 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2157 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2159 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2160 NEW-NAME is the name of the new charset. Return the new charset.
2162 (charset, new_name))
2164 Lisp_Object new_charset = Qnil;
2165 int id, chars, dimension, columns, graphic, final;
2167 Lisp_Object registry, doc_string, short_name, long_name;
2168 struct Lisp_Charset *cs;
2170 charset = Fget_charset (charset);
2171 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2172 signal_simple_error ("Charset already has reverse-direction charset",
2175 CHECK_SYMBOL (new_name);
2176 if (!NILP (Ffind_charset (new_name)))
2177 signal_simple_error ("Cannot redefine existing charset", new_name);
2179 cs = XCHARSET (charset);
2181 chars = CHARSET_CHARS (cs);
2182 dimension = CHARSET_DIMENSION (cs);
2183 columns = CHARSET_COLUMNS (cs);
2184 id = get_unallocated_leading_byte (dimension);
2186 graphic = CHARSET_GRAPHIC (cs);
2187 final = CHARSET_FINAL (cs);
2188 direction = CHARSET_RIGHT_TO_LEFT;
2189 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2190 direction = CHARSET_LEFT_TO_RIGHT;
2191 doc_string = CHARSET_DOC_STRING (cs);
2192 short_name = CHARSET_SHORT_NAME (cs);
2193 long_name = CHARSET_LONG_NAME (cs);
2194 registry = CHARSET_REGISTRY (cs);
2196 new_charset = make_charset (id, new_name, chars, dimension, columns,
2197 graphic, final, direction, short_name, long_name,
2198 doc_string, registry,
2200 CHARSET_DECODING_TABLE(cs),
2201 CHARSET_UCS_MIN(cs),
2202 CHARSET_UCS_MAX(cs),
2203 CHARSET_CODE_OFFSET(cs),
2204 CHARSET_BYTE_OFFSET(cs)
2210 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2211 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2216 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2217 Define symbol ALIAS as an alias for CHARSET.
2221 CHECK_SYMBOL (alias);
2222 charset = Fget_charset (charset);
2223 return Fputhash (alias, charset, Vcharset_hash_table);
2226 /* #### Reverse direction charsets not yet implemented. */
2228 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2230 Return the reverse-direction charset parallel to CHARSET, if any.
2231 This is the charset with the same properties (in particular, the same
2232 dimension, number of characters per dimension, and final byte) as
2233 CHARSET but whose characters are displayed in the opposite direction.
2237 charset = Fget_charset (charset);
2238 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2242 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2243 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2244 If DIRECTION is omitted, both directions will be checked (left-to-right
2245 will be returned if character sets exist for both directions).
2247 (dimension, chars, final, direction))
2249 int dm, ch, fi, di = -1;
2251 Lisp_Object obj = Qnil;
2253 CHECK_INT (dimension);
2254 dm = XINT (dimension);
2255 if (dm < 1 || dm > 2)
2256 signal_simple_error ("Invalid value for DIMENSION", dimension);
2260 if (ch != 94 && ch != 96)
2261 signal_simple_error ("Invalid value for CHARS", chars);
2263 CHECK_CHAR_COERCE_INT (final);
2265 if (fi < '0' || fi > '~')
2266 signal_simple_error ("Invalid value for FINAL", final);
2268 if (EQ (direction, Ql2r))
2269 di = CHARSET_LEFT_TO_RIGHT;
2270 else if (EQ (direction, Qr2l))
2271 di = CHARSET_RIGHT_TO_LEFT;
2272 else if (!NILP (direction))
2273 signal_simple_error ("Invalid value for DIRECTION", direction);
2275 if (dm == 2 && fi > 0x5F)
2277 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2280 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2282 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2286 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2288 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2291 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2294 return XCHARSET_NAME (obj);
2298 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2299 Return short name of CHARSET.
2303 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2306 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2307 Return long name of CHARSET.
2311 return XCHARSET_LONG_NAME (Fget_charset (charset));
2314 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2315 Return description of CHARSET.
2319 return XCHARSET_DOC_STRING (Fget_charset (charset));
2322 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2323 Return dimension of CHARSET.
2327 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2330 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2331 Return property PROP of CHARSET.
2332 Recognized properties are those listed in `make-charset', as well as
2333 'name and 'doc-string.
2337 struct Lisp_Charset *cs;
2339 charset = Fget_charset (charset);
2340 cs = XCHARSET (charset);
2342 CHECK_SYMBOL (prop);
2343 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2344 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2345 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2346 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2347 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2348 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2349 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2350 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2351 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2352 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2353 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2354 if (EQ (prop, Qdirection))
2355 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2356 if (EQ (prop, Qreverse_direction_charset))
2358 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2362 return XCHARSET_NAME (obj);
2364 signal_simple_error ("Unrecognized charset property name", prop);
2365 return Qnil; /* not reached */
2368 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2369 Return charset identification number of CHARSET.
2373 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2376 /* #### We need to figure out which properties we really want to
2379 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2380 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2382 (charset, ccl_program))
2384 charset = Fget_charset (charset);
2385 CHECK_VECTOR (ccl_program);
2386 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2391 invalidate_charset_font_caches (Lisp_Object charset)
2393 /* Invalidate font cache entries for charset on all devices. */
2394 Lisp_Object devcons, concons, hash_table;
2395 DEVICE_LOOP_NO_BREAK (devcons, concons)
2397 struct device *d = XDEVICE (XCAR (devcons));
2398 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2399 if (!UNBOUNDP (hash_table))
2400 Fclrhash (hash_table);
2404 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2405 Set the 'registry property of CHARSET to REGISTRY.
2407 (charset, registry))
2409 charset = Fget_charset (charset);
2410 CHECK_STRING (registry);
2411 XCHARSET_REGISTRY (charset) = registry;
2412 invalidate_charset_font_caches (charset);
2413 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2418 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2419 Return mapping-table of CHARSET.
2423 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2426 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2427 Set mapping-table of CHARSET to TABLE.
2431 struct Lisp_Charset *cs;
2432 Lisp_Object old_table;
2435 charset = Fget_charset (charset);
2436 cs = XCHARSET (charset);
2438 if (EQ (table, Qnil))
2440 CHARSET_DECODING_TABLE(cs) = table;
2443 else if (VECTORP (table))
2447 /* ad-hoc method for `ascii' */
2448 if ((CHARSET_CHARS (cs) == 94) &&
2449 (CHARSET_BYTE_OFFSET (cs) != 33))
2450 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2452 ccs_len = CHARSET_CHARS (cs);
2454 if (XVECTOR_LENGTH (table) > ccs_len)
2455 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2456 old_table = CHARSET_DECODING_TABLE(cs);
2457 CHARSET_DECODING_TABLE(cs) = table;
2460 signal_error (Qwrong_type_argument,
2461 list2 (build_translated_string ("vector-or-nil-p"),
2463 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2465 switch (CHARSET_DIMENSION (cs))
2468 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2470 Lisp_Object c = XVECTOR_DATA(table)[i];
2475 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2479 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2481 Lisp_Object v = XVECTOR_DATA(table)[i];
2487 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2489 CHARSET_DECODING_TABLE(cs) = old_table;
2490 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2492 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2494 Lisp_Object c = XVECTOR_DATA(v)[j];
2499 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2500 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2504 put_char_attribute (v, charset,
2505 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2514 /************************************************************************/
2515 /* Lisp primitives for working with characters */
2516 /************************************************************************/
2519 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2520 Make a character from CHARSET and code-point CODE.
2526 charset = Fget_charset (charset);
2529 if (XCHARSET_GRAPHIC (charset) == 1)
2531 return make_char (DECODE_CHAR (charset, c));
2535 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2536 Make a character from CHARSET and octets ARG1 and ARG2.
2537 ARG2 is required only for characters from two-dimensional charsets.
2538 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2539 character s with caron.
2541 (charset, arg1, arg2))
2543 struct Lisp_Charset *cs;
2545 int lowlim, highlim;
2547 charset = Fget_charset (charset);
2548 cs = XCHARSET (charset);
2550 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2551 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2553 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2555 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2556 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2559 /* It is useful (and safe, according to Olivier Galibert) to strip
2560 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2561 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2562 Latin 2 code of the character. */
2570 if (a1 < lowlim || a1 > highlim)
2571 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2573 if (CHARSET_DIMENSION (cs) == 1)
2577 ("Charset is of dimension one; second octet must be nil", arg2);
2578 return make_char (MAKE_CHAR (charset, a1, 0));
2587 a2 = XINT (arg2) & 0x7f;
2589 if (a2 < lowlim || a2 > highlim)
2590 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2592 return make_char (MAKE_CHAR (charset, a1, a2));
2595 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2596 Return the character set of char CH.
2600 CHECK_CHAR_COERCE_INT (ch);
2602 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2605 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2606 Return the octet numbered N (should be 0 or 1) of char CH.
2607 N defaults to 0 if omitted.
2611 Lisp_Object charset;
2614 CHECK_CHAR_COERCE_INT (ch);
2616 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2618 if (NILP (n) || EQ (n, Qzero))
2619 return make_int (octet0);
2620 else if (EQ (n, make_int (1)))
2621 return make_int (octet1);
2623 signal_simple_error ("Octet number must be 0 or 1", n);
2626 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2627 Return list of charset and one or two position-codes of CHAR.
2631 /* This function can GC */
2632 struct gcpro gcpro1, gcpro2;
2633 Lisp_Object charset = Qnil;
2634 Lisp_Object rc = Qnil;
2642 GCPRO2 (charset, rc);
2643 CHECK_CHAR_COERCE_INT (character);
2646 code_point = ENCODE_CHAR (XCHAR (character), charset);
2647 dimension = XCHARSET_DIMENSION (charset);
2648 while (dimension > 0)
2650 rc = Fcons (make_int (code_point & 255), rc);
2654 rc = Fcons (XCHARSET_NAME (charset), rc);
2656 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2658 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2660 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2664 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2673 #ifdef ENABLE_COMPOSITE_CHARS
2674 /************************************************************************/
2675 /* composite character functions */
2676 /************************************************************************/
2679 lookup_composite_char (Bufbyte *str, int len)
2681 Lisp_Object lispstr = make_string (str, len);
2682 Lisp_Object ch = Fgethash (lispstr,
2683 Vcomposite_char_string2char_hash_table,
2689 if (composite_char_row_next >= 128)
2690 signal_simple_error ("No more composite chars available", lispstr);
2691 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2692 composite_char_col_next);
2693 Fputhash (make_char (emch), lispstr,
2694 Vcomposite_char_char2string_hash_table);
2695 Fputhash (lispstr, make_char (emch),
2696 Vcomposite_char_string2char_hash_table);
2697 composite_char_col_next++;
2698 if (composite_char_col_next >= 128)
2700 composite_char_col_next = 32;
2701 composite_char_row_next++;
2710 composite_char_string (Emchar ch)
2712 Lisp_Object str = Fgethash (make_char (ch),
2713 Vcomposite_char_char2string_hash_table,
2715 assert (!UNBOUNDP (str));
2719 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2720 Convert a string into a single composite character.
2721 The character is the result of overstriking all the characters in
2726 CHECK_STRING (string);
2727 return make_char (lookup_composite_char (XSTRING_DATA (string),
2728 XSTRING_LENGTH (string)));
2731 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2732 Return a string of the characters comprising a composite character.
2740 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2741 signal_simple_error ("Must be composite char", ch);
2742 return composite_char_string (emch);
2744 #endif /* ENABLE_COMPOSITE_CHARS */
2747 /************************************************************************/
2748 /* initialization */
2749 /************************************************************************/
2752 syms_of_mule_charset (void)
2754 DEFSUBR (Fcharsetp);
2755 DEFSUBR (Ffind_charset);
2756 DEFSUBR (Fget_charset);
2757 DEFSUBR (Fcharset_list);
2758 DEFSUBR (Fcharset_name);
2759 DEFSUBR (Fmake_charset);
2760 DEFSUBR (Fmake_reverse_direction_charset);
2761 /* DEFSUBR (Freverse_direction_charset); */
2762 DEFSUBR (Fdefine_charset_alias);
2763 DEFSUBR (Fcharset_from_attributes);
2764 DEFSUBR (Fcharset_short_name);
2765 DEFSUBR (Fcharset_long_name);
2766 DEFSUBR (Fcharset_description);
2767 DEFSUBR (Fcharset_dimension);
2768 DEFSUBR (Fcharset_property);
2769 DEFSUBR (Fcharset_id);
2770 DEFSUBR (Fset_charset_ccl_program);
2771 DEFSUBR (Fset_charset_registry);
2773 DEFSUBR (Fchar_attribute_alist);
2774 DEFSUBR (Fget_char_attribute);
2775 DEFSUBR (Fput_char_attribute);
2776 DEFSUBR (Fremove_char_attribute);
2777 DEFSUBR (Fdefine_char);
2778 DEFSUBR (Fchar_variants);
2779 DEFSUBR (Fget_composite_char);
2780 DEFSUBR (Fcharset_mapping_table);
2781 DEFSUBR (Fset_charset_mapping_table);
2785 DEFSUBR (Fdecode_char);
2787 DEFSUBR (Fmake_char);
2788 DEFSUBR (Fchar_charset);
2789 DEFSUBR (Fchar_octet);
2790 DEFSUBR (Fsplit_char);
2792 #ifdef ENABLE_COMPOSITE_CHARS
2793 DEFSUBR (Fmake_composite_char);
2794 DEFSUBR (Fcomposite_char_string);
2797 defsymbol (&Qcharsetp, "charsetp");
2798 defsymbol (&Qregistry, "registry");
2799 defsymbol (&Qfinal, "final");
2800 defsymbol (&Qgraphic, "graphic");
2801 defsymbol (&Qdirection, "direction");
2802 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2803 defsymbol (&Qshort_name, "short-name");
2804 defsymbol (&Qlong_name, "long-name");
2806 defsymbol (&Ql2r, "l2r");
2807 defsymbol (&Qr2l, "r2l");
2809 /* Charsets, compatible with FSF 20.3
2810 Naming convention is Script-Charset[-Edition] */
2811 defsymbol (&Qascii, "ascii");
2812 defsymbol (&Qcontrol_1, "control-1");
2813 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2814 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2815 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2816 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2817 defsymbol (&Qthai_tis620, "thai-tis620");
2818 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2819 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2820 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2821 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2822 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2823 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2824 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2825 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2826 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2827 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2828 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2829 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2830 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2831 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2832 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2834 defsymbol (&Q_ucs, "->ucs");
2835 defsymbol (&Q_decomposition, "->decomposition");
2836 defsymbol (&Qcompat, "compat");
2837 defsymbol (&Qisolated, "isolated");
2838 defsymbol (&Qinitial, "initial");
2839 defsymbol (&Qmedial, "medial");
2840 defsymbol (&Qfinal, "final");
2841 defsymbol (&Qvertical, "vertical");
2842 defsymbol (&QnoBreak, "noBreak");
2843 defsymbol (&Qfraction, "fraction");
2844 defsymbol (&Qsuper, "super");
2845 defsymbol (&Qsub, "sub");
2846 defsymbol (&Qcircle, "circle");
2847 defsymbol (&Qsquare, "square");
2848 defsymbol (&Qwide, "wide");
2849 defsymbol (&Qnarrow, "narrow");
2850 defsymbol (&Qsmall, "small");
2851 defsymbol (&Qfont, "font");
2852 defsymbol (&Qucs, "ucs");
2853 defsymbol (&Qucs_bmp, "ucs-bmp");
2854 defsymbol (&Qlatin_viscii, "latin-viscii");
2855 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2856 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2857 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2858 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2859 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2860 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2861 defsymbol (&Qmojikyo, "mojikyo");
2862 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2863 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2864 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2865 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2866 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2867 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2868 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2869 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2870 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2871 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2872 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2873 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2874 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2875 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2876 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2877 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2878 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2879 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2880 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2881 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2882 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2883 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2885 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2886 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2888 defsymbol (&Qcomposite, "composite");
2892 vars_of_mule_charset (void)
2899 chlook = xnew (struct charset_lookup);
2900 dumpstruct (&chlook, &charset_lookup_description);
2902 /* Table of charsets indexed by leading byte. */
2903 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2904 chlook->charset_by_leading_byte[i] = Qnil;
2907 /* Table of charsets indexed by type/final-byte. */
2908 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2909 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2910 chlook->charset_by_attributes[i][j] = Qnil;
2912 /* Table of charsets indexed by type/final-byte/direction. */
2913 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2914 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2915 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2916 chlook->charset_by_attributes[i][j][k] = Qnil;
2920 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2922 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2923 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2927 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2928 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2929 Leading-code of private TYPE9N charset of column-width 1.
2931 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2935 Vutf_2000_version = build_string("0.15 (Sangō)");
2936 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2937 Version number of UTF-2000.
2940 staticpro (&Vcharacter_attribute_table);
2941 Vcharacter_attribute_table = make_char_code_table (Qnil);
2943 staticpro (&Vcharacter_composition_table);
2944 Vcharacter_composition_table = make_char_code_table (Qnil);
2946 staticpro (&Vcharacter_variant_table);
2947 Vcharacter_variant_table = make_char_code_table (Qnil);
2949 Vdefault_coded_charset_priority_list = Qnil;
2950 DEFVAR_LISP ("default-coded-charset-priority-list",
2951 &Vdefault_coded_charset_priority_list /*
2952 Default order of preferred coded-character-sets.
2958 complex_vars_of_mule_charset (void)
2960 staticpro (&Vcharset_hash_table);
2961 Vcharset_hash_table =
2962 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2964 /* Predefined character sets. We store them into variables for
2968 staticpro (&Vcharset_ucs);
2970 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2971 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2972 build_string ("UCS"),
2973 build_string ("UCS"),
2974 build_string ("ISO/IEC 10646"),
2976 Qnil, 0, 0xFFFFFFF, 0, 0);
2977 staticpro (&Vcharset_ucs_bmp);
2979 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2980 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2981 build_string ("BMP"),
2982 build_string ("BMP"),
2983 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2984 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2985 Qnil, 0, 0xFFFF, 0, 0);
2987 # define MIN_CHAR_THAI 0
2988 # define MAX_CHAR_THAI 0
2989 # define MIN_CHAR_HEBREW 0
2990 # define MAX_CHAR_HEBREW 0
2991 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2992 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2994 staticpro (&Vcharset_ascii);
2996 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2997 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2998 build_string ("ASCII"),
2999 build_string ("ASCII)"),
3000 build_string ("ASCII (ISO646 IRV)"),
3001 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3002 Qnil, 0, 0x7F, 0, 0);
3003 staticpro (&Vcharset_control_1);
3004 Vcharset_control_1 =
3005 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3006 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3007 build_string ("C1"),
3008 build_string ("Control characters"),
3009 build_string ("Control characters 128-191"),
3011 Qnil, 0x80, 0x9F, 0, 0);
3012 staticpro (&Vcharset_latin_iso8859_1);
3013 Vcharset_latin_iso8859_1 =
3014 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3015 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3016 build_string ("Latin-1"),
3017 build_string ("ISO8859-1 (Latin-1)"),
3018 build_string ("ISO8859-1 (Latin-1)"),
3019 build_string ("iso8859-1"),
3020 Qnil, 0xA0, 0xFF, 0, 32);
3021 staticpro (&Vcharset_latin_iso8859_2);
3022 Vcharset_latin_iso8859_2 =
3023 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3024 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3025 build_string ("Latin-2"),
3026 build_string ("ISO8859-2 (Latin-2)"),
3027 build_string ("ISO8859-2 (Latin-2)"),
3028 build_string ("iso8859-2"),
3030 staticpro (&Vcharset_latin_iso8859_3);
3031 Vcharset_latin_iso8859_3 =
3032 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3033 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3034 build_string ("Latin-3"),
3035 build_string ("ISO8859-3 (Latin-3)"),
3036 build_string ("ISO8859-3 (Latin-3)"),
3037 build_string ("iso8859-3"),
3039 staticpro (&Vcharset_latin_iso8859_4);
3040 Vcharset_latin_iso8859_4 =
3041 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3042 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3043 build_string ("Latin-4"),
3044 build_string ("ISO8859-4 (Latin-4)"),
3045 build_string ("ISO8859-4 (Latin-4)"),
3046 build_string ("iso8859-4"),
3048 staticpro (&Vcharset_thai_tis620);
3049 Vcharset_thai_tis620 =
3050 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3051 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3052 build_string ("TIS620"),
3053 build_string ("TIS620 (Thai)"),
3054 build_string ("TIS620.2529 (Thai)"),
3055 build_string ("tis620"),
3056 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3057 staticpro (&Vcharset_greek_iso8859_7);
3058 Vcharset_greek_iso8859_7 =
3059 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3060 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3061 build_string ("ISO8859-7"),
3062 build_string ("ISO8859-7 (Greek)"),
3063 build_string ("ISO8859-7 (Greek)"),
3064 build_string ("iso8859-7"),
3066 0 /* MIN_CHAR_GREEK */,
3067 0 /* MAX_CHAR_GREEK */, 0, 32);
3068 staticpro (&Vcharset_arabic_iso8859_6);
3069 Vcharset_arabic_iso8859_6 =
3070 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3071 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3072 build_string ("ISO8859-6"),
3073 build_string ("ISO8859-6 (Arabic)"),
3074 build_string ("ISO8859-6 (Arabic)"),
3075 build_string ("iso8859-6"),
3077 staticpro (&Vcharset_hebrew_iso8859_8);
3078 Vcharset_hebrew_iso8859_8 =
3079 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3080 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3081 build_string ("ISO8859-8"),
3082 build_string ("ISO8859-8 (Hebrew)"),
3083 build_string ("ISO8859-8 (Hebrew)"),
3084 build_string ("iso8859-8"),
3085 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3086 staticpro (&Vcharset_katakana_jisx0201);
3087 Vcharset_katakana_jisx0201 =
3088 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3089 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3090 build_string ("JISX0201 Kana"),
3091 build_string ("JISX0201.1976 (Japanese Kana)"),
3092 build_string ("JISX0201.1976 Japanese Kana"),
3093 build_string ("jisx0201\\.1976"),
3095 staticpro (&Vcharset_latin_jisx0201);
3096 Vcharset_latin_jisx0201 =
3097 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3098 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3099 build_string ("JISX0201 Roman"),
3100 build_string ("JISX0201.1976 (Japanese Roman)"),
3101 build_string ("JISX0201.1976 Japanese Roman"),
3102 build_string ("jisx0201\\.1976"),
3104 staticpro (&Vcharset_cyrillic_iso8859_5);
3105 Vcharset_cyrillic_iso8859_5 =
3106 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3107 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3108 build_string ("ISO8859-5"),
3109 build_string ("ISO8859-5 (Cyrillic)"),
3110 build_string ("ISO8859-5 (Cyrillic)"),
3111 build_string ("iso8859-5"),
3113 0 /* MIN_CHAR_CYRILLIC */,
3114 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3115 staticpro (&Vcharset_latin_iso8859_9);
3116 Vcharset_latin_iso8859_9 =
3117 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3118 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Latin-5"),
3120 build_string ("ISO8859-9 (Latin-5)"),
3121 build_string ("ISO8859-9 (Latin-5)"),
3122 build_string ("iso8859-9"),
3124 staticpro (&Vcharset_japanese_jisx0208_1978);
3125 Vcharset_japanese_jisx0208_1978 =
3126 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3127 Qjapanese_jisx0208_1978, 94, 2,
3128 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3129 build_string ("JIS X0208:1978"),
3130 build_string ("JIS X0208:1978 (Japanese)"),
3132 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3133 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3135 staticpro (&Vcharset_chinese_gb2312);
3136 Vcharset_chinese_gb2312 =
3137 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3138 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3139 build_string ("GB2312"),
3140 build_string ("GB2312)"),
3141 build_string ("GB2312 Chinese simplified"),
3142 build_string ("gb2312"),
3144 staticpro (&Vcharset_japanese_jisx0208);
3145 Vcharset_japanese_jisx0208 =
3146 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3147 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3148 build_string ("JISX0208"),
3149 build_string ("JIS X0208:1983 (Japanese)"),
3150 build_string ("JIS X0208:1983 Japanese Kanji"),
3151 build_string ("jisx0208\\.1983"),
3154 staticpro (&Vcharset_japanese_jisx0208_1990);
3155 Vcharset_japanese_jisx0208_1990 =
3156 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3157 Qjapanese_jisx0208_1990, 94, 2,
3158 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3159 build_string ("JISX0208-1990"),
3160 build_string ("JIS X0208:1990 (Japanese)"),
3161 build_string ("JIS X0208:1990 Japanese Kanji"),
3162 build_string ("jisx0208\\.1990"),
3164 MIN_CHAR_JIS_X0208_1990,
3165 MAX_CHAR_JIS_X0208_1990, 0, 33);
3167 staticpro (&Vcharset_korean_ksc5601);
3168 Vcharset_korean_ksc5601 =
3169 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3170 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3171 build_string ("KSC5601"),
3172 build_string ("KSC5601 (Korean"),
3173 build_string ("KSC5601 Korean Hangul and Hanja"),
3174 build_string ("ksc5601"),
3176 staticpro (&Vcharset_japanese_jisx0212);
3177 Vcharset_japanese_jisx0212 =
3178 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3179 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3180 build_string ("JISX0212"),
3181 build_string ("JISX0212 (Japanese)"),
3182 build_string ("JISX0212 Japanese Supplement"),
3183 build_string ("jisx0212"),
3186 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3187 staticpro (&Vcharset_chinese_cns11643_1);
3188 Vcharset_chinese_cns11643_1 =
3189 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3190 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3191 build_string ("CNS11643-1"),
3192 build_string ("CNS11643-1 (Chinese traditional)"),
3194 ("CNS 11643 Plane 1 Chinese traditional"),
3195 build_string (CHINESE_CNS_PLANE_RE("1")),
3197 staticpro (&Vcharset_chinese_cns11643_2);
3198 Vcharset_chinese_cns11643_2 =
3199 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3200 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3201 build_string ("CNS11643-2"),
3202 build_string ("CNS11643-2 (Chinese traditional)"),
3204 ("CNS 11643 Plane 2 Chinese traditional"),
3205 build_string (CHINESE_CNS_PLANE_RE("2")),
3208 staticpro (&Vcharset_latin_tcvn5712);
3209 Vcharset_latin_tcvn5712 =
3210 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3211 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3212 build_string ("TCVN 5712"),
3213 build_string ("TCVN 5712 (VSCII-2)"),
3214 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3215 build_string ("tcvn5712-1"),
3217 staticpro (&Vcharset_latin_viscii_lower);
3218 Vcharset_latin_viscii_lower =
3219 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3220 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3221 build_string ("VISCII lower"),
3222 build_string ("VISCII lower (Vietnamese)"),
3223 build_string ("VISCII lower (Vietnamese)"),
3224 build_string ("MULEVISCII-LOWER"),
3226 staticpro (&Vcharset_latin_viscii_upper);
3227 Vcharset_latin_viscii_upper =
3228 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3229 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3230 build_string ("VISCII upper"),
3231 build_string ("VISCII upper (Vietnamese)"),
3232 build_string ("VISCII upper (Vietnamese)"),
3233 build_string ("MULEVISCII-UPPER"),
3235 staticpro (&Vcharset_latin_viscii);
3236 Vcharset_latin_viscii =
3237 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3238 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3239 build_string ("VISCII"),
3240 build_string ("VISCII 1.1 (Vietnamese)"),
3241 build_string ("VISCII 1.1 (Vietnamese)"),
3242 build_string ("VISCII1\\.1"),
3244 staticpro (&Vcharset_ideograph_daikanwa);
3245 Vcharset_ideograph_daikanwa =
3246 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3247 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3248 build_string ("Daikanwa"),
3249 build_string ("Morohashi's Daikanwa"),
3250 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3251 build_string ("Daikanwa"),
3252 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3253 staticpro (&Vcharset_mojikyo);
3255 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3256 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3257 build_string ("Mojikyo"),
3258 build_string ("Mojikyo"),
3259 build_string ("Konjaku-Mojikyo"),
3261 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3262 staticpro (&Vcharset_mojikyo_pj_1);
3263 Vcharset_mojikyo_pj_1 =
3264 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3265 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3266 build_string ("Mojikyo-PJ-1"),
3267 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3269 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3270 build_string ("jisx0208\\.Mojikyo-1$"),
3272 staticpro (&Vcharset_mojikyo_pj_2);
3273 Vcharset_mojikyo_pj_2 =
3274 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3275 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3276 build_string ("Mojikyo-PJ-2"),
3277 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3279 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3280 build_string ("jisx0208\\.Mojikyo-2$"),
3282 staticpro (&Vcharset_mojikyo_pj_3);
3283 Vcharset_mojikyo_pj_3 =
3284 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3285 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3286 build_string ("Mojikyo-PJ-3"),
3287 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3289 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3290 build_string ("jisx0208\\.Mojikyo-3$"),
3292 staticpro (&Vcharset_mojikyo_pj_4);
3293 Vcharset_mojikyo_pj_4 =
3294 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3295 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3296 build_string ("Mojikyo-PJ-4"),
3297 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3299 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3300 build_string ("jisx0208\\.Mojikyo-4$"),
3302 staticpro (&Vcharset_mojikyo_pj_5);
3303 Vcharset_mojikyo_pj_5 =
3304 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3305 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3306 build_string ("Mojikyo-PJ-5"),
3307 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3309 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3310 build_string ("jisx0208\\.Mojikyo-5$"),
3312 staticpro (&Vcharset_mojikyo_pj_6);
3313 Vcharset_mojikyo_pj_6 =
3314 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3315 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3316 build_string ("Mojikyo-PJ-6"),
3317 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3319 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3320 build_string ("jisx0208\\.Mojikyo-6$"),
3322 staticpro (&Vcharset_mojikyo_pj_7);
3323 Vcharset_mojikyo_pj_7 =
3324 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3325 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3326 build_string ("Mojikyo-PJ-7"),
3327 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3329 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3330 build_string ("jisx0208\\.Mojikyo-7$"),
3332 staticpro (&Vcharset_mojikyo_pj_8);
3333 Vcharset_mojikyo_pj_8 =
3334 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3335 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3336 build_string ("Mojikyo-PJ-8"),
3337 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3339 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3340 build_string ("jisx0208\\.Mojikyo-8$"),
3342 staticpro (&Vcharset_mojikyo_pj_9);
3343 Vcharset_mojikyo_pj_9 =
3344 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3345 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3346 build_string ("Mojikyo-PJ-9"),
3347 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3349 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3350 build_string ("jisx0208\\.Mojikyo-9$"),
3352 staticpro (&Vcharset_mojikyo_pj_10);
3353 Vcharset_mojikyo_pj_10 =
3354 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3355 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3356 build_string ("Mojikyo-PJ-10"),
3357 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3359 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3360 build_string ("jisx0208\\.Mojikyo-10$"),
3362 staticpro (&Vcharset_mojikyo_pj_11);
3363 Vcharset_mojikyo_pj_11 =
3364 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3365 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3366 build_string ("Mojikyo-PJ-11"),
3367 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3369 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3370 build_string ("jisx0208\\.Mojikyo-11$"),
3372 staticpro (&Vcharset_mojikyo_pj_12);
3373 Vcharset_mojikyo_pj_12 =
3374 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3375 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3376 build_string ("Mojikyo-PJ-12"),
3377 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3379 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3380 build_string ("jisx0208\\.Mojikyo-12$"),
3382 staticpro (&Vcharset_mojikyo_pj_13);
3383 Vcharset_mojikyo_pj_13 =
3384 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3385 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3386 build_string ("Mojikyo-PJ-13"),
3387 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3389 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3390 build_string ("jisx0208\\.Mojikyo-13$"),
3392 staticpro (&Vcharset_mojikyo_pj_14);
3393 Vcharset_mojikyo_pj_14 =
3394 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3395 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3396 build_string ("Mojikyo-PJ-14"),
3397 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3399 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3400 build_string ("jisx0208\\.Mojikyo-14$"),
3402 staticpro (&Vcharset_mojikyo_pj_15);
3403 Vcharset_mojikyo_pj_15 =
3404 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3405 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3406 build_string ("Mojikyo-PJ-15"),
3407 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3409 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3410 build_string ("jisx0208\\.Mojikyo-15$"),
3412 staticpro (&Vcharset_mojikyo_pj_16);
3413 Vcharset_mojikyo_pj_16 =
3414 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3415 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3416 build_string ("Mojikyo-PJ-16"),
3417 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3419 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3420 build_string ("jisx0208\\.Mojikyo-16$"),
3422 staticpro (&Vcharset_mojikyo_pj_17);
3423 Vcharset_mojikyo_pj_17 =
3424 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3425 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3426 build_string ("Mojikyo-PJ-17"),
3427 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3429 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3430 build_string ("jisx0208\\.Mojikyo-17$"),
3432 staticpro (&Vcharset_mojikyo_pj_18);
3433 Vcharset_mojikyo_pj_18 =
3434 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3435 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3436 build_string ("Mojikyo-PJ-18"),
3437 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3439 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3440 build_string ("jisx0208\\.Mojikyo-18$"),
3442 staticpro (&Vcharset_mojikyo_pj_19);
3443 Vcharset_mojikyo_pj_19 =
3444 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3445 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3446 build_string ("Mojikyo-PJ-19"),
3447 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3449 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3450 build_string ("jisx0208\\.Mojikyo-19$"),
3452 staticpro (&Vcharset_mojikyo_pj_20);
3453 Vcharset_mojikyo_pj_20 =
3454 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3455 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3456 build_string ("Mojikyo-PJ-20"),
3457 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3459 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3460 build_string ("jisx0208\\.Mojikyo-20$"),
3462 staticpro (&Vcharset_mojikyo_pj_21);
3463 Vcharset_mojikyo_pj_21 =
3464 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3465 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3466 build_string ("Mojikyo-PJ-21"),
3467 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3469 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3470 build_string ("jisx0208\\.Mojikyo-21$"),
3472 staticpro (&Vcharset_ethiopic_ucs);
3473 Vcharset_ethiopic_ucs =
3474 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3475 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3476 build_string ("Ethiopic (UCS)"),
3477 build_string ("Ethiopic (UCS)"),
3478 build_string ("Ethiopic of UCS"),
3479 build_string ("Ethiopic-Unicode"),
3480 Qnil, 0x1200, 0x137F, 0x1200, 0);
3482 staticpro (&Vcharset_chinese_big5_1);
3483 Vcharset_chinese_big5_1 =
3484 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3485 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3486 build_string ("Big5"),
3487 build_string ("Big5 (Level-1)"),
3489 ("Big5 Level-1 Chinese traditional"),
3490 build_string ("big5"),
3492 staticpro (&Vcharset_chinese_big5_2);
3493 Vcharset_chinese_big5_2 =
3494 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3495 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3496 build_string ("Big5"),
3497 build_string ("Big5 (Level-2)"),
3499 ("Big5 Level-2 Chinese traditional"),
3500 build_string ("big5"),
3503 #ifdef ENABLE_COMPOSITE_CHARS
3504 /* #### For simplicity, we put composite chars into a 96x96 charset.
3505 This is going to lead to problems because you can run out of
3506 room, esp. as we don't yet recycle numbers. */
3507 staticpro (&Vcharset_composite);
3508 Vcharset_composite =
3509 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3510 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3511 build_string ("Composite"),
3512 build_string ("Composite characters"),
3513 build_string ("Composite characters"),
3516 /* #### not dumped properly */
3517 composite_char_row_next = 32;
3518 composite_char_col_next = 32;
3520 Vcomposite_char_string2char_hash_table =
3521 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3522 Vcomposite_char_char2string_hash_table =
3523 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3524 staticpro (&Vcomposite_char_string2char_hash_table);
3525 staticpro (&Vcomposite_char_char2string_hash_table);
3526 #endif /* ENABLE_COMPOSITE_CHARS */