1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_byte_table (Lisp_Object obj)
165 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
179 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (BYTE_TABLE_P (cte1->property[i]))
185 if (BYTE_TABLE_P (cte2->property[i]))
187 if (!byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 byte_table_hash (Lisp_Object obj, int depth)
203 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description byte_table_description[] = {
209 { XD_LISP_OBJECT, offsetof(Lisp_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
215 internal_object_printer,
218 byte_table_description,
222 make_byte_table (Lisp_Object initval)
227 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
229 for (i = 0; i < 256; i++)
230 cte->property[i] = initval;
232 XSETBYTE_TABLE (obj, cte);
237 copy_byte_table (Lisp_Object entry)
239 Lisp_Byte_Table *cte = XBYTE_TABLE (entry);
242 Lisp_Byte_Table *ctenew
243 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_byte_table (new);
251 ctenew->property[i] = new;
254 XSETBYTE_TABLE (obj, ctenew);
260 mark_char_code_table (Lisp_Object obj)
262 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
268 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
271 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
273 return byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_code_table_hash (Lisp_Object obj, int depth)
279 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
281 return char_code_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_code_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
290 mark_char_code_table,
291 internal_object_printer,
292 0, char_code_table_equal,
293 char_code_table_hash,
294 char_code_table_description,
295 struct Lisp_Char_Code_Table);
298 make_char_code_table (Lisp_Object initval)
301 struct Lisp_Char_Code_Table *cte =
302 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
303 &lrecord_char_code_table);
305 cte->table = make_byte_table (initval);
307 XSETCHAR_CODE_TABLE (obj, cte);
312 copy_char_code_table (Lisp_Object entry)
314 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
316 struct Lisp_Char_Code_Table *ctenew =
317 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
318 &lrecord_char_code_table);
320 ctenew->table = copy_byte_table (cte->table);
321 XSETCHAR_CODE_TABLE (obj, ctenew);
327 get_char_code_table (Emchar ch, Lisp_Object table)
329 unsigned int code = ch;
331 = XBYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
332 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
334 if (BYTE_TABLE_P (ret))
335 cpt = XBYTE_TABLE (ret);
339 ret = cpt->property [(unsigned char) (code >> 16)];
340 if (BYTE_TABLE_P (ret))
341 cpt = XBYTE_TABLE (ret);
345 ret = cpt->property [(unsigned char) (code >> 8)];
346 if (BYTE_TABLE_P (ret))
347 cpt = XBYTE_TABLE (ret);
351 return cpt->property [(unsigned char) code];
354 void put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table);
356 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
358 unsigned int code = ch;
359 Lisp_Byte_Table* cpt1
360 = XBYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
361 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
363 if (BYTE_TABLE_P (ret))
365 Lisp_Byte_Table* cpt2 = XBYTE_TABLE (ret);
367 ret = cpt2->property[(unsigned char)(code >> 16)];
368 if (BYTE_TABLE_P (ret))
370 Lisp_Byte_Table* cpt3 = XBYTE_TABLE (ret);
372 ret = cpt3->property[(unsigned char)(code >> 8)];
373 if (BYTE_TABLE_P (ret))
375 Lisp_Byte_Table* cpt4 = XBYTE_TABLE (ret);
377 cpt4->property[(unsigned char)code] = value;
379 else if (!EQ (ret, value))
381 Lisp_Object cpt4 = make_byte_table (ret);
383 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
387 else if (!EQ (ret, value))
389 Lisp_Object cpt3 = make_byte_table (ret);
390 Lisp_Object cpt4 = make_byte_table (ret);
392 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
393 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
395 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
398 else if (!EQ (ret, value))
400 Lisp_Object cpt2 = make_byte_table (ret);
401 Lisp_Object cpt3 = make_byte_table (ret);
402 Lisp_Object cpt4 = make_byte_table (ret);
404 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
405 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
406 XBYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
407 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
412 Lisp_Object Vcharacter_attribute_table;
413 Lisp_Object Vcharacter_composition_table;
414 Lisp_Object Vcharacter_variant_table;
416 Lisp_Object Q_decomposition;
419 Lisp_Object Qisolated;
420 Lisp_Object Qinitial;
423 Lisp_Object Qvertical;
424 Lisp_Object QnoBreak;
425 Lisp_Object Qfraction;
435 Emchar to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
437 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
443 else if (EQ (v, Qcompat))
445 else if (EQ (v, Qisolated))
447 else if (EQ (v, Qinitial))
449 else if (EQ (v, Qmedial))
451 else if (EQ (v, Qfinal))
453 else if (EQ (v, Qvertical))
455 else if (EQ (v, QnoBreak))
457 else if (EQ (v, Qfraction))
459 else if (EQ (v, Qsuper))
461 else if (EQ (v, Qsub))
463 else if (EQ (v, Qcircle))
465 else if (EQ (v, Qsquare))
467 else if (EQ (v, Qwide))
469 else if (EQ (v, Qnarrow))
471 else if (EQ (v, Qsmall))
473 else if (EQ (v, Qfont))
476 signal_simple_error (err_msg, err_arg);
479 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
480 Return character corresponding with list.
484 Lisp_Object table = Vcharacter_composition_table;
485 Lisp_Object rest = list;
489 Lisp_Object v = Fcar (rest);
491 Emchar c = to_char_code (v, "Invalid value for composition", list);
493 ret = get_char_code_table (c, table);
498 if (!CHAR_CODE_TABLE_P (ret))
503 else if (!CONSP (rest))
505 else if (CHAR_CODE_TABLE_P (ret))
508 signal_simple_error ("Invalid table is found with", list);
510 signal_simple_error ("Invalid value for composition", list);
513 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
514 Return variants of CHARACTER.
518 CHECK_CHAR (character);
519 return Fcopy_list (get_char_code_table (XCHAR (character),
520 Vcharacter_variant_table));
523 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
524 Return the alist of attributes of CHARACTER.
528 CHECK_CHAR (character);
529 return Fcopy_alist (get_char_code_table (XCHAR (character),
530 Vcharacter_attribute_table));
533 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
534 Return the value of CHARACTER's ATTRIBUTE.
536 (character, attribute))
541 CHECK_CHAR (character);
542 ret = get_char_code_table (XCHAR (character),
543 Vcharacter_attribute_table);
547 if (!NILP (ccs = Ffind_charset (attribute)))
550 return Fcdr (Fassq (attribute, ret));
553 Lisp_Object put_char_attribute (Lisp_Object character,
554 Lisp_Object attribute, Lisp_Object value);
556 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
559 Emchar char_code = XCHAR (character);
561 = get_char_code_table (char_code, Vcharacter_attribute_table);
564 cell = Fassq (attribute, ret);
568 ret = Fcons (Fcons (attribute, value), ret);
570 else if (!EQ (Fcdr (cell), value))
572 Fsetcdr (cell, value);
574 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
578 Lisp_Object remove_char_attribute (Lisp_Object character,
579 Lisp_Object attribute);
581 remove_char_attribute (Lisp_Object character, Lisp_Object attribute)
583 Emchar char_code = XCHAR (character);
585 = get_char_code_table (char_code, Vcharacter_attribute_table);
587 if (EQ (attribute, Fcar (Fcar (alist))))
589 alist = Fcdr (alist);
593 Lisp_Object pr = alist;
594 Lisp_Object r = Fcdr (alist);
598 if (EQ (attribute, Fcar (Fcar (r))))
600 XCDR (pr) = Fcdr (r);
607 put_char_code_table (char_code, alist, Vcharacter_attribute_table);
613 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
614 Store CHARACTER's ATTRIBUTE with VALUE.
616 (character, attribute, value))
620 CHECK_CHAR (character);
621 ccs = Ffind_charset (attribute);
624 if (!EQ (XCHARSET_NAME (ccs), Qucs)
625 || (XCHAR (character) != XINT (value)))
627 Lisp_Object cpos, rest;
628 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
635 /* ad-hoc method for `ascii' */
636 if ((XCHARSET_CHARS (ccs) == 94) &&
637 (XCHARSET_BYTE_OFFSET (ccs) != 33))
638 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
640 ccs_len = XCHARSET_CHARS (ccs);
644 Lisp_Object ret = Fcar (value);
647 signal_simple_error ("Invalid value for coded-charset", value);
648 code_point = XINT (ret);
649 if (XCHARSET_GRAPHIC (ccs) == 1)
657 signal_simple_error ("Invalid value for coded-charset",
661 signal_simple_error ("Invalid value for coded-charset",
664 if (XCHARSET_GRAPHIC (ccs) == 1)
666 code_point = (code_point << 8) | j;
669 value = make_int (code_point);
671 else if (INTP (value))
673 if (XCHARSET_GRAPHIC (ccs) == 1)
674 value = make_int (XINT (value) & 0x7F7F7F7F);
677 signal_simple_error ("Invalid value for coded-charset", value);
680 cpos = Fget_char_attribute (character, attribute);
685 dim = XCHARSET_DIMENSION (ccs);
686 code_point = XINT (cpos);
690 i = ((code_point >> (8 * dim)) & 255)
691 - XCHARSET_BYTE_OFFSET (ccs);
692 nv = XVECTOR_DATA(v)[i];
698 XVECTOR_DATA(v)[i] = Qnil;
699 v = XCHARSET_DECODING_TABLE (ccs);
704 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
707 dim = XCHARSET_DIMENSION (ccs);
708 code_point = XINT (value);
713 i = ((code_point >> (8 * dim)) & 255)
714 - XCHARSET_BYTE_OFFSET (ccs);
715 nv = XVECTOR_DATA(v)[i];
719 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
725 XVECTOR_DATA(v)[i] = character;
730 else if (EQ (attribute, Q_decomposition))
733 signal_simple_error ("Invalid value for ->decomposition",
736 if (CONSP (Fcdr (value)))
738 Lisp_Object rest = value;
739 Lisp_Object table = Vcharacter_composition_table;
743 Lisp_Object v = Fcar (rest);
747 "Invalid value for ->decomposition", value);
752 put_char_code_table (c, character, table);
757 ntable = get_char_code_table (c, table);
758 if (!CHAR_CODE_TABLE_P (ntable))
760 ntable = make_char_code_table (Qnil);
761 put_char_code_table (c, ntable, table);
769 Lisp_Object v = Fcar (value);
775 = get_char_code_table (c, Vcharacter_variant_table);
777 if (NILP (Fmemq (v, ret)))
779 put_char_code_table (c, Fcons (character, ret),
780 Vcharacter_variant_table);
785 else if (EQ (attribute, Q_ucs))
791 signal_simple_error ("Invalid value for ->ucs", value);
795 ret = get_char_code_table (c, Vcharacter_variant_table);
796 if (NILP (Fmemq (character, ret)))
798 put_char_code_table (c, Fcons (character, ret),
799 Vcharacter_variant_table);
802 return put_char_attribute (character, attribute, value);
805 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
806 Remove CHARACTER's ATTRIBUTE.
808 (character, attribute))
812 CHECK_CHAR (character);
813 ccs = Ffind_charset (attribute);
817 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
824 /* ad-hoc method for `ascii' */
825 if ((XCHARSET_CHARS (ccs) == 94) &&
826 (XCHARSET_BYTE_OFFSET (ccs) != 33))
827 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
829 ccs_len = XCHARSET_CHARS (ccs);
832 cpos = Fget_char_attribute (character, attribute);
837 dim = XCHARSET_DIMENSION (ccs);
838 code_point = XINT (cpos);
842 i = ((code_point >> (8 * dim)) & 255)
843 - XCHARSET_BYTE_OFFSET (ccs);
844 nv = XVECTOR_DATA(v)[i];
850 XVECTOR_DATA(v)[i] = Qnil;
851 v = XCHARSET_DECODING_TABLE (ccs);
855 return remove_char_attribute (character, attribute);
858 EXFUN (Fmake_char, 3);
859 EXFUN (Fdecode_char, 2);
861 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
862 Store character's ATTRIBUTES.
866 Lisp_Object rest = attributes;
867 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
868 Lisp_Object character;
874 Lisp_Object cell = Fcar (rest);
878 signal_simple_error ("Invalid argument", attributes);
879 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
880 && ((XCHARSET_FINAL (ccs) != 0) ||
881 (XCHARSET_UCS_MAX (ccs) > 0)) )
885 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
887 character = Fdecode_char (ccs, cell);
888 goto setup_attributes;
892 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
895 signal_simple_error ("Invalid argument", attributes);
897 character = make_char (XINT (code) + 0x100000);
898 goto setup_attributes;
902 else if (!INTP (code))
903 signal_simple_error ("Invalid argument", attributes);
905 character = make_char (XINT (code));
911 Lisp_Object cell = Fcar (rest);
914 signal_simple_error ("Invalid argument", attributes);
915 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
919 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
922 Lisp_Object Vutf_2000_version;
926 int leading_code_private_11;
929 Lisp_Object Qcharsetp;
931 /* Qdoc_string, Qdimension, Qchars defined in general.c */
932 Lisp_Object Qregistry, Qfinal, Qgraphic;
933 Lisp_Object Qdirection;
934 Lisp_Object Qreverse_direction_charset;
935 Lisp_Object Qleading_byte;
936 Lisp_Object Qshort_name, Qlong_name;
952 Qjapanese_jisx0208_1978,
955 Qjapanese_jisx0208_1990,
966 Qvietnamese_viscii_lower,
967 Qvietnamese_viscii_upper,
997 Lisp_Object Ql2r, Qr2l;
999 Lisp_Object Vcharset_hash_table;
1001 /* Composite characters are characters constructed by overstriking two
1002 or more regular characters.
1004 1) The old Mule implementation involves storing composite characters
1005 in a buffer as a tag followed by all of the actual characters
1006 used to make up the composite character. I think this is a bad
1007 idea; it greatly complicates code that wants to handle strings
1008 one character at a time because it has to deal with the possibility
1009 of great big ungainly characters. It's much more reasonable to
1010 simply store an index into a table of composite characters.
1012 2) The current implementation only allows for 16,384 separate
1013 composite characters over the lifetime of the XEmacs process.
1014 This could become a potential problem if the user
1015 edited lots of different files that use composite characters.
1016 Due to FSF bogosity, increasing the number of allowable
1017 composite characters under Mule would decrease the number
1018 of possible faces that can exist. Mule already has shrunk
1019 this to 2048, and further shrinkage would become uncomfortable.
1020 No such problems exist in XEmacs.
1022 Composite characters could be represented as 0x80 C1 C2 C3,
1023 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1024 for slightly under 2^20 (one million) composite characters
1025 over the XEmacs process lifetime, and you only need to
1026 increase the size of a Mule character from 19 to 21 bits.
1027 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1028 85 million (slightly over 2^26) composite characters. */
1031 /************************************************************************/
1032 /* Basic Emchar functions */
1033 /************************************************************************/
1035 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1036 string in STR. Returns the number of bytes stored.
1037 Do not call this directly. Use the macro set_charptr_emchar() instead.
1041 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1047 Lisp_Object charset;
1056 else if ( c <= 0x7ff )
1058 *p++ = (c >> 6) | 0xc0;
1059 *p++ = (c & 0x3f) | 0x80;
1061 else if ( c <= 0xffff )
1063 *p++ = (c >> 12) | 0xe0;
1064 *p++ = ((c >> 6) & 0x3f) | 0x80;
1065 *p++ = (c & 0x3f) | 0x80;
1067 else if ( c <= 0x1fffff )
1069 *p++ = (c >> 18) | 0xf0;
1070 *p++ = ((c >> 12) & 0x3f) | 0x80;
1071 *p++ = ((c >> 6) & 0x3f) | 0x80;
1072 *p++ = (c & 0x3f) | 0x80;
1074 else if ( c <= 0x3ffffff )
1076 *p++ = (c >> 24) | 0xf8;
1077 *p++ = ((c >> 18) & 0x3f) | 0x80;
1078 *p++ = ((c >> 12) & 0x3f) | 0x80;
1079 *p++ = ((c >> 6) & 0x3f) | 0x80;
1080 *p++ = (c & 0x3f) | 0x80;
1084 *p++ = (c >> 30) | 0xfc;
1085 *p++ = ((c >> 24) & 0x3f) | 0x80;
1086 *p++ = ((c >> 18) & 0x3f) | 0x80;
1087 *p++ = ((c >> 12) & 0x3f) | 0x80;
1088 *p++ = ((c >> 6) & 0x3f) | 0x80;
1089 *p++ = (c & 0x3f) | 0x80;
1092 BREAKUP_CHAR (c, charset, c1, c2);
1093 lb = CHAR_LEADING_BYTE (c);
1094 if (LEADING_BYTE_PRIVATE_P (lb))
1095 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1097 if (EQ (charset, Vcharset_control_1))
1106 /* Return the first character from a Mule-encoded string in STR,
1107 assuming it's non-ASCII. Do not call this directly.
1108 Use the macro charptr_emchar() instead. */
1111 non_ascii_charptr_emchar (const Bufbyte *str)
1124 else if ( b >= 0xf8 )
1129 else if ( b >= 0xf0 )
1134 else if ( b >= 0xe0 )
1139 else if ( b >= 0xc0 )
1149 for( ; len > 0; len-- )
1152 ch = ( ch << 6 ) | ( b & 0x3f );
1156 Bufbyte i0 = *str, i1, i2 = 0;
1157 Lisp_Object charset;
1159 if (i0 == LEADING_BYTE_CONTROL_1)
1160 return (Emchar) (*++str - 0x20);
1162 if (LEADING_BYTE_PREFIX_P (i0))
1167 charset = CHARSET_BY_LEADING_BYTE (i0);
1168 if (XCHARSET_DIMENSION (charset) == 2)
1171 return MAKE_CHAR (charset, i1, i2);
1175 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1176 Do not call this directly. Use the macro valid_char_p() instead. */
1180 non_ascii_valid_char_p (Emchar ch)
1184 /* Must have only lowest 19 bits set */
1188 f1 = CHAR_FIELD1 (ch);
1189 f2 = CHAR_FIELD2 (ch);
1190 f3 = CHAR_FIELD3 (ch);
1194 Lisp_Object charset;
1196 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1197 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1198 f2 > MAX_CHAR_FIELD2_PRIVATE)
1203 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1204 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1208 NOTE: This takes advantage of the fact that
1209 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1210 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1212 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1213 if (EQ (charset, Qnil))
1215 return (XCHARSET_CHARS (charset) == 96);
1219 Lisp_Object charset;
1221 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1222 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1223 f1 > MAX_CHAR_FIELD1_PRIVATE)
1225 if (f2 < 0x20 || f3 < 0x20)
1228 #ifdef ENABLE_COMPOSITE_CHARS
1229 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1231 if (UNBOUNDP (Fgethash (make_int (ch),
1232 Vcomposite_char_char2string_hash_table,
1237 #endif /* ENABLE_COMPOSITE_CHARS */
1239 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1240 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1243 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1245 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1248 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1250 if (EQ (charset, Qnil))
1252 return (XCHARSET_CHARS (charset) == 96);
1258 /************************************************************************/
1259 /* Basic string functions */
1260 /************************************************************************/
1262 /* Copy the character pointed to by PTR into STR, assuming it's
1263 non-ASCII. Do not call this directly. Use the macro
1264 charptr_copy_char() instead. */
1267 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1269 Bufbyte *strptr = str;
1271 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1273 /* Notice fallthrough. */
1275 case 6: *++strptr = *ptr++;
1276 case 5: *++strptr = *ptr++;
1278 case 4: *++strptr = *ptr++;
1279 case 3: *++strptr = *ptr++;
1280 case 2: *++strptr = *ptr;
1285 return strptr + 1 - str;
1289 /************************************************************************/
1290 /* streams of Emchars */
1291 /************************************************************************/
1293 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1294 The functions below are not meant to be called directly; use
1295 the macros in insdel.h. */
1298 Lstream_get_emchar_1 (Lstream *stream, int ch)
1300 Bufbyte str[MAX_EMCHAR_LEN];
1301 Bufbyte *strptr = str;
1303 str[0] = (Bufbyte) ch;
1304 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1306 /* Notice fallthrough. */
1309 ch = Lstream_getc (stream);
1311 *++strptr = (Bufbyte) ch;
1313 ch = Lstream_getc (stream);
1315 *++strptr = (Bufbyte) ch;
1318 ch = Lstream_getc (stream);
1320 *++strptr = (Bufbyte) ch;
1322 ch = Lstream_getc (stream);
1324 *++strptr = (Bufbyte) ch;
1326 ch = Lstream_getc (stream);
1328 *++strptr = (Bufbyte) ch;
1333 return charptr_emchar (str);
1337 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1339 Bufbyte str[MAX_EMCHAR_LEN];
1340 Bytecount len = set_charptr_emchar (str, ch);
1341 return Lstream_write (stream, str, len);
1345 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1347 Bufbyte str[MAX_EMCHAR_LEN];
1348 Bytecount len = set_charptr_emchar (str, ch);
1349 Lstream_unread (stream, str, len);
1353 /************************************************************************/
1354 /* charset object */
1355 /************************************************************************/
1358 mark_charset (Lisp_Object obj)
1360 Lisp_Charset *cs = XCHARSET (obj);
1362 mark_object (cs->short_name);
1363 mark_object (cs->long_name);
1364 mark_object (cs->doc_string);
1365 mark_object (cs->registry);
1366 mark_object (cs->ccl_program);
1368 mark_object (cs->decoding_table);
1374 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1376 Lisp_Charset *cs = XCHARSET (obj);
1380 error ("printing unreadable object #<charset %s 0x%x>",
1381 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1384 write_c_string ("#<charset ", printcharfun);
1385 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1386 write_c_string (" ", printcharfun);
1387 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1388 write_c_string (" ", printcharfun);
1389 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1390 write_c_string (" ", printcharfun);
1391 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1392 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1394 CHARSET_DIMENSION (cs),
1395 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1396 CHARSET_COLUMNS (cs),
1397 CHARSET_GRAPHIC (cs),
1398 CHARSET_FINAL (cs));
1399 write_c_string (buf, printcharfun);
1400 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1401 sprintf (buf, " 0x%x>", cs->header.uid);
1402 write_c_string (buf, printcharfun);
1405 static const struct lrecord_description charset_description[] = {
1406 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1407 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1408 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1409 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1410 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1411 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1412 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1414 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1419 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1420 mark_charset, print_charset, 0, 0, 0,
1421 charset_description,
1423 /* Make a new charset. */
1426 make_charset (Charset_ID id, Lisp_Object name,
1427 unsigned short chars, unsigned char dimension,
1428 unsigned char columns, unsigned char graphic,
1429 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1430 Lisp_Object long_name, Lisp_Object doc,
1432 Lisp_Object decoding_table,
1433 Emchar ucs_min, Emchar ucs_max,
1434 Emchar code_offset, unsigned char byte_offset)
1436 unsigned char type = 0;
1438 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1442 XSETCHARSET (obj, cs);
1444 CHARSET_ID (cs) = id;
1445 CHARSET_NAME (cs) = name;
1446 CHARSET_SHORT_NAME (cs) = short_name;
1447 CHARSET_LONG_NAME (cs) = long_name;
1448 CHARSET_CHARS (cs) = chars;
1449 CHARSET_DIMENSION (cs) = dimension;
1450 CHARSET_DIRECTION (cs) = direction;
1451 CHARSET_COLUMNS (cs) = columns;
1452 CHARSET_GRAPHIC (cs) = graphic;
1453 CHARSET_FINAL (cs) = final;
1454 CHARSET_DOC_STRING (cs) = doc;
1455 CHARSET_REGISTRY (cs) = reg;
1456 CHARSET_CCL_PROGRAM (cs) = Qnil;
1457 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1459 CHARSET_DECODING_TABLE(cs) = Qnil;
1460 CHARSET_UCS_MIN(cs) = ucs_min;
1461 CHARSET_UCS_MAX(cs) = ucs_max;
1462 CHARSET_CODE_OFFSET(cs) = code_offset;
1463 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1466 switch (CHARSET_CHARS (cs))
1469 switch (CHARSET_DIMENSION (cs))
1472 type = CHARSET_TYPE_94;
1475 type = CHARSET_TYPE_94X94;
1480 switch (CHARSET_DIMENSION (cs))
1483 type = CHARSET_TYPE_96;
1486 type = CHARSET_TYPE_96X96;
1492 switch (CHARSET_DIMENSION (cs))
1495 type = CHARSET_TYPE_128;
1498 type = CHARSET_TYPE_128X128;
1503 switch (CHARSET_DIMENSION (cs))
1506 type = CHARSET_TYPE_256;
1509 type = CHARSET_TYPE_256X256;
1516 CHARSET_TYPE (cs) = type;
1520 if (id == LEADING_BYTE_ASCII)
1521 CHARSET_REP_BYTES (cs) = 1;
1523 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1525 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1530 /* some charsets do not have final characters. This includes
1531 ASCII, Control-1, Composite, and the two faux private
1534 if (code_offset == 0)
1536 assert (NILP (chlook->charset_by_attributes[type][final]));
1537 chlook->charset_by_attributes[type][final] = obj;
1540 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1541 chlook->charset_by_attributes[type][final][direction] = obj;
1545 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1546 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1548 /* Some charsets are "faux" and don't have names or really exist at
1549 all except in the leading-byte table. */
1551 Fputhash (name, obj, Vcharset_hash_table);
1556 get_unallocated_leading_byte (int dimension)
1561 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1564 lb = chlook->next_allocated_leading_byte++;
1568 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1571 lb = chlook->next_allocated_1_byte_leading_byte++;
1575 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1578 lb = chlook->next_allocated_2_byte_leading_byte++;
1584 ("No more character sets free for this dimension",
1585 make_int (dimension));
1592 make_builtin_char (Lisp_Object charset, int c1, int c2)
1594 if (XCHARSET_UCS_MAX (charset))
1597 = (XCHARSET_DIMENSION (charset) == 1
1599 c1 - XCHARSET_BYTE_OFFSET (charset)
1601 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1602 + c2 - XCHARSET_BYTE_OFFSET (charset))
1603 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1604 if ((code < XCHARSET_UCS_MIN (charset))
1605 || (XCHARSET_UCS_MAX (charset) < code))
1606 signal_simple_error ("Arguments makes invalid character",
1610 else if (XCHARSET_DIMENSION (charset) == 1)
1612 switch (XCHARSET_CHARS (charset))
1616 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1619 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1626 switch (XCHARSET_CHARS (charset))
1629 return MIN_CHAR_94x94
1630 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1631 + (c1 - 33) * 94 + (c2 - 33);
1633 return MIN_CHAR_96x96
1634 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1635 + (c1 - 32) * 96 + (c2 - 32);
1643 range_charset_code_point (Lisp_Object charset, Emchar ch)
1647 if ((XCHARSET_UCS_MIN (charset) <= ch)
1648 && (ch <= XCHARSET_UCS_MAX (charset)))
1650 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1652 if (XCHARSET_CHARS (charset) == 256)
1654 else if (XCHARSET_DIMENSION (charset) == 1)
1655 return d + XCHARSET_BYTE_OFFSET (charset);
1656 else if (XCHARSET_DIMENSION (charset) == 2)
1658 ((d / XCHARSET_CHARS (charset)
1659 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1660 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1661 else if (XCHARSET_DIMENSION (charset) == 3)
1663 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1664 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1665 | ((d / XCHARSET_CHARS (charset)
1666 % XCHARSET_CHARS (charset)
1667 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1668 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1669 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1671 ((d / (XCHARSET_CHARS (charset)
1672 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1673 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1674 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1675 % XCHARSET_CHARS (charset)
1676 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1677 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1678 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1679 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1681 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1683 if (XCHARSET_DIMENSION (charset) == 1)
1685 if (XCHARSET_CHARS (charset) == 94)
1687 if (((d = ch - (MIN_CHAR_94
1688 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1692 else if (XCHARSET_CHARS (charset) == 96)
1694 if (((d = ch - (MIN_CHAR_96
1695 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1702 else if (XCHARSET_DIMENSION (charset) == 2)
1704 if (XCHARSET_CHARS (charset) == 94)
1706 if (((d = ch - (MIN_CHAR_94x94
1707 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1710 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1712 else if (XCHARSET_CHARS (charset) == 96)
1714 if (((d = ch - (MIN_CHAR_96x96
1715 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1718 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1728 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1730 if (c <= MAX_CHAR_BASIC_LATIN)
1732 *charset = Vcharset_ascii;
1737 *charset = Vcharset_control_1;
1742 *charset = Vcharset_latin_iso8859_1;
1746 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1748 *charset = Vcharset_greek_iso8859_7;
1749 return c - MIN_CHAR_GREEK + 0x20;
1751 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1753 *charset = Vcharset_cyrillic_iso8859_5;
1754 return c - MIN_CHAR_CYRILLIC + 0x20;
1757 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1759 *charset = Vcharset_hebrew_iso8859_8;
1760 return c - MIN_CHAR_HEBREW + 0x20;
1762 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1764 *charset = Vcharset_thai_tis620;
1765 return c - MIN_CHAR_THAI + 0x20;
1768 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1769 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1771 return list2 (Vcharset_katakana_jisx0201,
1772 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1775 else if (c <= MAX_CHAR_BMP)
1777 *charset = Vcharset_ucs_bmp;
1780 else if (c < MIN_CHAR_DAIKANWA)
1782 *charset = Vcharset_ucs;
1786 else if (c <= MAX_CHAR_DAIKANWA)
1788 *charset = Vcharset_ideograph_daikanwa;
1789 return c - MIN_CHAR_DAIKANWA;
1792 else if (c <= MAX_CHAR_MOJIKYO)
1794 *charset = Vcharset_mojikyo;
1795 return c - MIN_CHAR_MOJIKYO;
1797 else if (c < MIN_CHAR_94)
1799 *charset = Vcharset_ucs;
1802 else if (c <= MAX_CHAR_94)
1804 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1805 ((c - MIN_CHAR_94) / 94) + '0',
1806 CHARSET_LEFT_TO_RIGHT);
1807 if (!NILP (*charset))
1808 return ((c - MIN_CHAR_94) % 94) + 33;
1811 *charset = Vcharset_ucs;
1815 else if (c <= MAX_CHAR_96)
1817 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1818 ((c - MIN_CHAR_96) / 96) + '0',
1819 CHARSET_LEFT_TO_RIGHT);
1820 if (!NILP (*charset))
1821 return ((c - MIN_CHAR_96) % 96) + 32;
1824 *charset = Vcharset_ucs;
1828 else if (c <= MAX_CHAR_94x94)
1831 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1832 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1833 CHARSET_LEFT_TO_RIGHT);
1834 if (!NILP (*charset))
1835 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1836 | (((c - MIN_CHAR_94x94) % 94) + 33);
1839 *charset = Vcharset_ucs;
1843 else if (c <= MAX_CHAR_96x96)
1846 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1847 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1848 CHARSET_LEFT_TO_RIGHT);
1849 if (!NILP (*charset))
1850 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1851 | (((c - MIN_CHAR_96x96) % 96) + 32);
1854 *charset = Vcharset_ucs;
1860 *charset = Vcharset_ucs;
1865 Lisp_Object Vdefault_coded_charset_priority_list;
1869 /************************************************************************/
1870 /* Basic charset Lisp functions */
1871 /************************************************************************/
1873 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1874 Return non-nil if OBJECT is a charset.
1878 return CHARSETP (object) ? Qt : Qnil;
1881 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1882 Retrieve the charset of the given name.
1883 If CHARSET-OR-NAME is a charset object, it is simply returned.
1884 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1885 nil is returned. Otherwise the associated charset object is returned.
1889 if (CHARSETP (charset_or_name))
1890 return charset_or_name;
1892 CHECK_SYMBOL (charset_or_name);
1893 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1896 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1897 Retrieve the charset of the given name.
1898 Same as `find-charset' except an error is signalled if there is no such
1899 charset instead of returning nil.
1903 Lisp_Object charset = Ffind_charset (name);
1906 signal_simple_error ("No such charset", name);
1910 /* We store the charsets in hash tables with the names as the key and the
1911 actual charset object as the value. Occasionally we need to use them
1912 in a list format. These routines provide us with that. */
1913 struct charset_list_closure
1915 Lisp_Object *charset_list;
1919 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1920 void *charset_list_closure)
1922 /* This function can GC */
1923 struct charset_list_closure *chcl =
1924 (struct charset_list_closure*) charset_list_closure;
1925 Lisp_Object *charset_list = chcl->charset_list;
1927 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1931 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1932 Return a list of the names of all defined charsets.
1936 Lisp_Object charset_list = Qnil;
1937 struct gcpro gcpro1;
1938 struct charset_list_closure charset_list_closure;
1940 GCPRO1 (charset_list);
1941 charset_list_closure.charset_list = &charset_list;
1942 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1943 &charset_list_closure);
1946 return charset_list;
1949 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1950 Return the name of the given charset.
1954 return XCHARSET_NAME (Fget_charset (charset));
1957 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1958 Define a new character set.
1959 This function is for use with Mule support.
1960 NAME is a symbol, the name by which the character set is normally referred.
1961 DOC-STRING is a string describing the character set.
1962 PROPS is a property list, describing the specific nature of the
1963 character set. Recognized properties are:
1965 'short-name Short version of the charset name (ex: Latin-1)
1966 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1967 'registry A regular expression matching the font registry field for
1969 'dimension Number of octets used to index a character in this charset.
1970 Either 1 or 2. Defaults to 1.
1971 'columns Number of columns used to display a character in this charset.
1972 Only used in TTY mode. (Under X, the actual width of a
1973 character can be derived from the font used to display the
1974 characters.) If unspecified, defaults to the dimension
1975 (this is almost always the correct value).
1976 'chars Number of characters in each dimension (94 or 96).
1977 Defaults to 94. Note that if the dimension is 2, the
1978 character set thus described is 94x94 or 96x96.
1979 'final Final byte of ISO 2022 escape sequence. Must be
1980 supplied. Each combination of (DIMENSION, CHARS) defines a
1981 separate namespace for final bytes. Note that ISO
1982 2022 restricts the final byte to the range
1983 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1984 dimension == 2. Note also that final bytes in the range
1985 0x30 - 0x3F are reserved for user-defined (not official)
1987 'graphic 0 (use left half of font on output) or 1 (use right half
1988 of font on output). Defaults to 0. For example, for
1989 a font whose registry is ISO8859-1, the left half
1990 (octets 0x20 - 0x7F) is the `ascii' character set, while
1991 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1992 character set. With 'graphic set to 0, the octets
1993 will have their high bit cleared; with it set to 1,
1994 the octets will have their high bit set.
1995 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1997 'ccl-program A compiled CCL program used to convert a character in
1998 this charset into an index into the font. This is in
1999 addition to the 'graphic property. The CCL program
2000 is passed the octets of the character, with the high
2001 bit cleared and set depending upon whether the value
2002 of the 'graphic property is 0 or 1.
2004 (name, doc_string, props))
2006 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2007 int direction = CHARSET_LEFT_TO_RIGHT;
2009 Lisp_Object registry = Qnil;
2010 Lisp_Object charset;
2011 Lisp_Object rest, keyword, value;
2012 Lisp_Object ccl_program = Qnil;
2013 Lisp_Object short_name = Qnil, long_name = Qnil;
2014 int byte_offset = -1;
2016 CHECK_SYMBOL (name);
2017 if (!NILP (doc_string))
2018 CHECK_STRING (doc_string);
2020 charset = Ffind_charset (name);
2021 if (!NILP (charset))
2022 signal_simple_error ("Cannot redefine existing charset", name);
2024 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2026 if (EQ (keyword, Qshort_name))
2028 CHECK_STRING (value);
2032 if (EQ (keyword, Qlong_name))
2034 CHECK_STRING (value);
2038 else if (EQ (keyword, Qdimension))
2041 dimension = XINT (value);
2042 if (dimension < 1 || dimension > 2)
2043 signal_simple_error ("Invalid value for 'dimension", value);
2046 else if (EQ (keyword, Qchars))
2049 chars = XINT (value);
2050 if (chars != 94 && chars != 96)
2051 signal_simple_error ("Invalid value for 'chars", value);
2054 else if (EQ (keyword, Qcolumns))
2057 columns = XINT (value);
2058 if (columns != 1 && columns != 2)
2059 signal_simple_error ("Invalid value for 'columns", value);
2062 else if (EQ (keyword, Qgraphic))
2065 graphic = XINT (value);
2067 if (graphic < 0 || graphic > 2)
2069 if (graphic < 0 || graphic > 1)
2071 signal_simple_error ("Invalid value for 'graphic", value);
2074 else if (EQ (keyword, Qregistry))
2076 CHECK_STRING (value);
2080 else if (EQ (keyword, Qdirection))
2082 if (EQ (value, Ql2r))
2083 direction = CHARSET_LEFT_TO_RIGHT;
2084 else if (EQ (value, Qr2l))
2085 direction = CHARSET_RIGHT_TO_LEFT;
2087 signal_simple_error ("Invalid value for 'direction", value);
2090 else if (EQ (keyword, Qfinal))
2092 CHECK_CHAR_COERCE_INT (value);
2093 final = XCHAR (value);
2094 if (final < '0' || final > '~')
2095 signal_simple_error ("Invalid value for 'final", value);
2098 else if (EQ (keyword, Qccl_program))
2100 CHECK_VECTOR (value);
2101 ccl_program = value;
2105 signal_simple_error ("Unrecognized property", keyword);
2109 error ("'final must be specified");
2110 if (dimension == 2 && final > 0x5F)
2112 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2116 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2118 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2120 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2121 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2123 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2125 id = get_unallocated_leading_byte (dimension);
2127 if (NILP (doc_string))
2128 doc_string = build_string ("");
2130 if (NILP (registry))
2131 registry = build_string ("");
2133 if (NILP (short_name))
2134 XSETSTRING (short_name, XSYMBOL (name)->name);
2136 if (NILP (long_name))
2137 long_name = doc_string;
2140 columns = dimension;
2142 if (byte_offset < 0)
2146 else if (chars == 96)
2152 charset = make_charset (id, name, chars, dimension, columns, graphic,
2153 final, direction, short_name, long_name,
2154 doc_string, registry,
2155 Qnil, 0, 0, 0, byte_offset);
2156 if (!NILP (ccl_program))
2157 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2161 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2163 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2164 NEW-NAME is the name of the new charset. Return the new charset.
2166 (charset, new_name))
2168 Lisp_Object new_charset = Qnil;
2169 int id, chars, dimension, columns, graphic, final;
2171 Lisp_Object registry, doc_string, short_name, long_name;
2174 charset = Fget_charset (charset);
2175 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2176 signal_simple_error ("Charset already has reverse-direction charset",
2179 CHECK_SYMBOL (new_name);
2180 if (!NILP (Ffind_charset (new_name)))
2181 signal_simple_error ("Cannot redefine existing charset", new_name);
2183 cs = XCHARSET (charset);
2185 chars = CHARSET_CHARS (cs);
2186 dimension = CHARSET_DIMENSION (cs);
2187 columns = CHARSET_COLUMNS (cs);
2188 id = get_unallocated_leading_byte (dimension);
2190 graphic = CHARSET_GRAPHIC (cs);
2191 final = CHARSET_FINAL (cs);
2192 direction = CHARSET_RIGHT_TO_LEFT;
2193 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2194 direction = CHARSET_LEFT_TO_RIGHT;
2195 doc_string = CHARSET_DOC_STRING (cs);
2196 short_name = CHARSET_SHORT_NAME (cs);
2197 long_name = CHARSET_LONG_NAME (cs);
2198 registry = CHARSET_REGISTRY (cs);
2200 new_charset = make_charset (id, new_name, chars, dimension, columns,
2201 graphic, final, direction, short_name, long_name,
2202 doc_string, registry,
2204 CHARSET_DECODING_TABLE(cs),
2205 CHARSET_UCS_MIN(cs),
2206 CHARSET_UCS_MAX(cs),
2207 CHARSET_CODE_OFFSET(cs),
2208 CHARSET_BYTE_OFFSET(cs)
2214 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2215 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2220 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2221 Define symbol ALIAS as an alias for CHARSET.
2225 CHECK_SYMBOL (alias);
2226 charset = Fget_charset (charset);
2227 return Fputhash (alias, charset, Vcharset_hash_table);
2230 /* #### Reverse direction charsets not yet implemented. */
2232 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2234 Return the reverse-direction charset parallel to CHARSET, if any.
2235 This is the charset with the same properties (in particular, the same
2236 dimension, number of characters per dimension, and final byte) as
2237 CHARSET but whose characters are displayed in the opposite direction.
2241 charset = Fget_charset (charset);
2242 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2246 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2247 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2248 If DIRECTION is omitted, both directions will be checked (left-to-right
2249 will be returned if character sets exist for both directions).
2251 (dimension, chars, final, direction))
2253 int dm, ch, fi, di = -1;
2255 Lisp_Object obj = Qnil;
2257 CHECK_INT (dimension);
2258 dm = XINT (dimension);
2259 if (dm < 1 || dm > 2)
2260 signal_simple_error ("Invalid value for DIMENSION", dimension);
2264 if (ch != 94 && ch != 96)
2265 signal_simple_error ("Invalid value for CHARS", chars);
2267 CHECK_CHAR_COERCE_INT (final);
2269 if (fi < '0' || fi > '~')
2270 signal_simple_error ("Invalid value for FINAL", final);
2272 if (EQ (direction, Ql2r))
2273 di = CHARSET_LEFT_TO_RIGHT;
2274 else if (EQ (direction, Qr2l))
2275 di = CHARSET_RIGHT_TO_LEFT;
2276 else if (!NILP (direction))
2277 signal_simple_error ("Invalid value for DIRECTION", direction);
2279 if (dm == 2 && fi > 0x5F)
2281 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2284 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2286 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2290 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2292 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2295 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2298 return XCHARSET_NAME (obj);
2302 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2303 Return short name of CHARSET.
2307 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2310 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2311 Return long name of CHARSET.
2315 return XCHARSET_LONG_NAME (Fget_charset (charset));
2318 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2319 Return description of CHARSET.
2323 return XCHARSET_DOC_STRING (Fget_charset (charset));
2326 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2327 Return dimension of CHARSET.
2331 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2334 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2335 Return property PROP of CHARSET.
2336 Recognized properties are those listed in `make-charset', as well as
2337 'name and 'doc-string.
2343 charset = Fget_charset (charset);
2344 cs = XCHARSET (charset);
2346 CHECK_SYMBOL (prop);
2347 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2348 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2349 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2350 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2351 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2352 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2353 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2354 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2355 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2356 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2357 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2358 if (EQ (prop, Qdirection))
2359 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2360 if (EQ (prop, Qreverse_direction_charset))
2362 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2366 return XCHARSET_NAME (obj);
2368 signal_simple_error ("Unrecognized charset property name", prop);
2369 return Qnil; /* not reached */
2372 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2373 Return charset identification number of CHARSET.
2377 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2380 /* #### We need to figure out which properties we really want to
2383 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2384 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2386 (charset, ccl_program))
2388 charset = Fget_charset (charset);
2389 CHECK_VECTOR (ccl_program);
2390 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2395 invalidate_charset_font_caches (Lisp_Object charset)
2397 /* Invalidate font cache entries for charset on all devices. */
2398 Lisp_Object devcons, concons, hash_table;
2399 DEVICE_LOOP_NO_BREAK (devcons, concons)
2401 struct device *d = XDEVICE (XCAR (devcons));
2402 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2403 if (!UNBOUNDP (hash_table))
2404 Fclrhash (hash_table);
2408 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2409 Set the 'registry property of CHARSET to REGISTRY.
2411 (charset, registry))
2413 charset = Fget_charset (charset);
2414 CHECK_STRING (registry);
2415 XCHARSET_REGISTRY (charset) = registry;
2416 invalidate_charset_font_caches (charset);
2417 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2422 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2423 Return mapping-table of CHARSET.
2427 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2430 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2431 Set mapping-table of CHARSET to TABLE.
2435 struct Lisp_Charset *cs;
2436 Lisp_Object old_table;
2439 charset = Fget_charset (charset);
2440 cs = XCHARSET (charset);
2442 if (EQ (table, Qnil))
2444 CHARSET_DECODING_TABLE(cs) = table;
2447 else if (VECTORP (table))
2451 /* ad-hoc method for `ascii' */
2452 if ((CHARSET_CHARS (cs) == 94) &&
2453 (CHARSET_BYTE_OFFSET (cs) != 33))
2454 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2456 ccs_len = CHARSET_CHARS (cs);
2458 if (XVECTOR_LENGTH (table) > ccs_len)
2459 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2460 old_table = CHARSET_DECODING_TABLE(cs);
2461 CHARSET_DECODING_TABLE(cs) = table;
2464 signal_error (Qwrong_type_argument,
2465 list2 (build_translated_string ("vector-or-nil-p"),
2467 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2469 switch (CHARSET_DIMENSION (cs))
2472 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2474 Lisp_Object c = XVECTOR_DATA(table)[i];
2479 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2483 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2485 Lisp_Object v = XVECTOR_DATA(table)[i];
2491 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2493 CHARSET_DECODING_TABLE(cs) = old_table;
2494 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2496 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2498 Lisp_Object c = XVECTOR_DATA(v)[j];
2503 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2504 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2508 put_char_attribute (v, charset,
2509 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2518 /************************************************************************/
2519 /* Lisp primitives for working with characters */
2520 /************************************************************************/
2523 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2524 Make a character from CHARSET and code-point CODE.
2530 charset = Fget_charset (charset);
2533 if (XCHARSET_GRAPHIC (charset) == 1)
2535 return make_char (DECODE_CHAR (charset, c));
2539 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2540 Make a character from CHARSET and octets ARG1 and ARG2.
2541 ARG2 is required only for characters from two-dimensional charsets.
2542 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2543 character s with caron.
2545 (charset, arg1, arg2))
2549 int lowlim, highlim;
2551 charset = Fget_charset (charset);
2552 cs = XCHARSET (charset);
2554 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2555 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2557 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2559 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2560 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2563 /* It is useful (and safe, according to Olivier Galibert) to strip
2564 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2565 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2566 Latin 2 code of the character. */
2574 if (a1 < lowlim || a1 > highlim)
2575 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2577 if (CHARSET_DIMENSION (cs) == 1)
2581 ("Charset is of dimension one; second octet must be nil", arg2);
2582 return make_char (MAKE_CHAR (charset, a1, 0));
2591 a2 = XINT (arg2) & 0x7f;
2593 if (a2 < lowlim || a2 > highlim)
2594 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2596 return make_char (MAKE_CHAR (charset, a1, a2));
2599 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2600 Return the character set of char CH.
2604 CHECK_CHAR_COERCE_INT (ch);
2606 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2609 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2610 Return the octet numbered N (should be 0 or 1) of char CH.
2611 N defaults to 0 if omitted.
2615 Lisp_Object charset;
2618 CHECK_CHAR_COERCE_INT (ch);
2620 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2622 if (NILP (n) || EQ (n, Qzero))
2623 return make_int (octet0);
2624 else if (EQ (n, make_int (1)))
2625 return make_int (octet1);
2627 signal_simple_error ("Octet number must be 0 or 1", n);
2630 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2631 Return list of charset and one or two position-codes of CHAR.
2635 /* This function can GC */
2636 struct gcpro gcpro1, gcpro2;
2637 Lisp_Object charset = Qnil;
2638 Lisp_Object rc = Qnil;
2646 GCPRO2 (charset, rc);
2647 CHECK_CHAR_COERCE_INT (character);
2650 code_point = ENCODE_CHAR (XCHAR (character), charset);
2651 dimension = XCHARSET_DIMENSION (charset);
2652 while (dimension > 0)
2654 rc = Fcons (make_int (code_point & 255), rc);
2658 rc = Fcons (XCHARSET_NAME (charset), rc);
2660 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2662 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2664 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2668 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2677 #ifdef ENABLE_COMPOSITE_CHARS
2678 /************************************************************************/
2679 /* composite character functions */
2680 /************************************************************************/
2683 lookup_composite_char (Bufbyte *str, int len)
2685 Lisp_Object lispstr = make_string (str, len);
2686 Lisp_Object ch = Fgethash (lispstr,
2687 Vcomposite_char_string2char_hash_table,
2693 if (composite_char_row_next >= 128)
2694 signal_simple_error ("No more composite chars available", lispstr);
2695 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2696 composite_char_col_next);
2697 Fputhash (make_char (emch), lispstr,
2698 Vcomposite_char_char2string_hash_table);
2699 Fputhash (lispstr, make_char (emch),
2700 Vcomposite_char_string2char_hash_table);
2701 composite_char_col_next++;
2702 if (composite_char_col_next >= 128)
2704 composite_char_col_next = 32;
2705 composite_char_row_next++;
2714 composite_char_string (Emchar ch)
2716 Lisp_Object str = Fgethash (make_char (ch),
2717 Vcomposite_char_char2string_hash_table,
2719 assert (!UNBOUNDP (str));
2723 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2724 Convert a string into a single composite character.
2725 The character is the result of overstriking all the characters in
2730 CHECK_STRING (string);
2731 return make_char (lookup_composite_char (XSTRING_DATA (string),
2732 XSTRING_LENGTH (string)));
2735 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2736 Return a string of the characters comprising a composite character.
2744 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2745 signal_simple_error ("Must be composite char", ch);
2746 return composite_char_string (emch);
2748 #endif /* ENABLE_COMPOSITE_CHARS */
2751 /************************************************************************/
2752 /* initialization */
2753 /************************************************************************/
2756 syms_of_mule_charset (void)
2759 INIT_LRECORD_IMPLEMENTATION (byte_table);
2760 INIT_LRECORD_IMPLEMENTATION (char_code_table);
2762 INIT_LRECORD_IMPLEMENTATION (charset);
2764 DEFSUBR (Fcharsetp);
2765 DEFSUBR (Ffind_charset);
2766 DEFSUBR (Fget_charset);
2767 DEFSUBR (Fcharset_list);
2768 DEFSUBR (Fcharset_name);
2769 DEFSUBR (Fmake_charset);
2770 DEFSUBR (Fmake_reverse_direction_charset);
2771 /* DEFSUBR (Freverse_direction_charset); */
2772 DEFSUBR (Fdefine_charset_alias);
2773 DEFSUBR (Fcharset_from_attributes);
2774 DEFSUBR (Fcharset_short_name);
2775 DEFSUBR (Fcharset_long_name);
2776 DEFSUBR (Fcharset_description);
2777 DEFSUBR (Fcharset_dimension);
2778 DEFSUBR (Fcharset_property);
2779 DEFSUBR (Fcharset_id);
2780 DEFSUBR (Fset_charset_ccl_program);
2781 DEFSUBR (Fset_charset_registry);
2783 DEFSUBR (Fchar_attribute_alist);
2784 DEFSUBR (Fget_char_attribute);
2785 DEFSUBR (Fput_char_attribute);
2786 DEFSUBR (Fremove_char_attribute);
2787 DEFSUBR (Fdefine_char);
2788 DEFSUBR (Fchar_variants);
2789 DEFSUBR (Fget_composite_char);
2790 DEFSUBR (Fcharset_mapping_table);
2791 DEFSUBR (Fset_charset_mapping_table);
2795 DEFSUBR (Fdecode_char);
2797 DEFSUBR (Fmake_char);
2798 DEFSUBR (Fchar_charset);
2799 DEFSUBR (Fchar_octet);
2800 DEFSUBR (Fsplit_char);
2802 #ifdef ENABLE_COMPOSITE_CHARS
2803 DEFSUBR (Fmake_composite_char);
2804 DEFSUBR (Fcomposite_char_string);
2807 defsymbol (&Qcharsetp, "charsetp");
2808 defsymbol (&Qregistry, "registry");
2809 defsymbol (&Qfinal, "final");
2810 defsymbol (&Qgraphic, "graphic");
2811 defsymbol (&Qdirection, "direction");
2812 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2813 defsymbol (&Qshort_name, "short-name");
2814 defsymbol (&Qlong_name, "long-name");
2816 defsymbol (&Ql2r, "l2r");
2817 defsymbol (&Qr2l, "r2l");
2819 /* Charsets, compatible with FSF 20.3
2820 Naming convention is Script-Charset[-Edition] */
2821 defsymbol (&Qascii, "ascii");
2822 defsymbol (&Qcontrol_1, "control-1");
2823 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2824 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2825 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2826 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2827 defsymbol (&Qthai_tis620, "thai-tis620");
2828 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2829 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2830 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2831 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2832 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2833 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2834 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2835 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2836 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2837 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2838 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2839 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2840 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2841 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2842 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2844 defsymbol (&Q_ucs, "->ucs");
2845 defsymbol (&Q_decomposition, "->decomposition");
2846 defsymbol (&Qcompat, "compat");
2847 defsymbol (&Qisolated, "isolated");
2848 defsymbol (&Qinitial, "initial");
2849 defsymbol (&Qmedial, "medial");
2850 defsymbol (&Qfinal, "final");
2851 defsymbol (&Qvertical, "vertical");
2852 defsymbol (&QnoBreak, "noBreak");
2853 defsymbol (&Qfraction, "fraction");
2854 defsymbol (&Qsuper, "super");
2855 defsymbol (&Qsub, "sub");
2856 defsymbol (&Qcircle, "circle");
2857 defsymbol (&Qsquare, "square");
2858 defsymbol (&Qwide, "wide");
2859 defsymbol (&Qnarrow, "narrow");
2860 defsymbol (&Qsmall, "small");
2861 defsymbol (&Qfont, "font");
2862 defsymbol (&Qucs, "ucs");
2863 defsymbol (&Qucs_bmp, "ucs-bmp");
2864 defsymbol (&Qlatin_viscii, "latin-viscii");
2865 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2866 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2867 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2868 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2869 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2870 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2871 defsymbol (&Qmojikyo, "mojikyo");
2872 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2873 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2874 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2875 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2876 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2877 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2878 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2879 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2880 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2881 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2882 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2883 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2884 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2885 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2886 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2887 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2888 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2889 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2890 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2891 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2892 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2893 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2895 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2896 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2898 defsymbol (&Qcomposite, "composite");
2902 vars_of_mule_charset (void)
2909 chlook = xnew (struct charset_lookup);
2910 dumpstruct (&chlook, &charset_lookup_description);
2912 /* Table of charsets indexed by leading byte. */
2913 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2914 chlook->charset_by_leading_byte[i] = Qnil;
2917 /* Table of charsets indexed by type/final-byte. */
2918 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2919 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2920 chlook->charset_by_attributes[i][j] = Qnil;
2922 /* Table of charsets indexed by type/final-byte/direction. */
2923 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2924 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2925 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2926 chlook->charset_by_attributes[i][j][k] = Qnil;
2930 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2932 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2933 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2937 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2938 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2939 Leading-code of private TYPE9N charset of column-width 1.
2941 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2945 Vutf_2000_version = build_string("0.15 (Sangō)");
2946 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2947 Version number of UTF-2000.
2950 staticpro (&Vcharacter_attribute_table);
2951 Vcharacter_attribute_table = make_char_code_table (Qnil);
2953 staticpro (&Vcharacter_composition_table);
2954 Vcharacter_composition_table = make_char_code_table (Qnil);
2956 staticpro (&Vcharacter_variant_table);
2957 Vcharacter_variant_table = make_char_code_table (Qnil);
2959 Vdefault_coded_charset_priority_list = Qnil;
2960 DEFVAR_LISP ("default-coded-charset-priority-list",
2961 &Vdefault_coded_charset_priority_list /*
2962 Default order of preferred coded-character-sets.
2968 complex_vars_of_mule_charset (void)
2970 staticpro (&Vcharset_hash_table);
2971 Vcharset_hash_table =
2972 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2974 /* Predefined character sets. We store them into variables for
2978 staticpro (&Vcharset_ucs);
2980 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2981 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2982 build_string ("UCS"),
2983 build_string ("UCS"),
2984 build_string ("ISO/IEC 10646"),
2986 Qnil, 0, 0xFFFFFFF, 0, 0);
2987 staticpro (&Vcharset_ucs_bmp);
2989 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2990 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2991 build_string ("BMP"),
2992 build_string ("BMP"),
2993 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2994 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2995 Qnil, 0, 0xFFFF, 0, 0);
2997 # define MIN_CHAR_THAI 0
2998 # define MAX_CHAR_THAI 0
2999 # define MIN_CHAR_HEBREW 0
3000 # define MAX_CHAR_HEBREW 0
3001 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3002 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3004 staticpro (&Vcharset_ascii);
3006 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3007 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3008 build_string ("ASCII"),
3009 build_string ("ASCII)"),
3010 build_string ("ASCII (ISO646 IRV)"),
3011 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3012 Qnil, 0, 0x7F, 0, 0);
3013 staticpro (&Vcharset_control_1);
3014 Vcharset_control_1 =
3015 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3016 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3017 build_string ("C1"),
3018 build_string ("Control characters"),
3019 build_string ("Control characters 128-191"),
3021 Qnil, 0x80, 0x9F, 0, 0);
3022 staticpro (&Vcharset_latin_iso8859_1);
3023 Vcharset_latin_iso8859_1 =
3024 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3025 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3026 build_string ("Latin-1"),
3027 build_string ("ISO8859-1 (Latin-1)"),
3028 build_string ("ISO8859-1 (Latin-1)"),
3029 build_string ("iso8859-1"),
3030 Qnil, 0xA0, 0xFF, 0, 32);
3031 staticpro (&Vcharset_latin_iso8859_2);
3032 Vcharset_latin_iso8859_2 =
3033 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3034 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3035 build_string ("Latin-2"),
3036 build_string ("ISO8859-2 (Latin-2)"),
3037 build_string ("ISO8859-2 (Latin-2)"),
3038 build_string ("iso8859-2"),
3040 staticpro (&Vcharset_latin_iso8859_3);
3041 Vcharset_latin_iso8859_3 =
3042 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3043 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3044 build_string ("Latin-3"),
3045 build_string ("ISO8859-3 (Latin-3)"),
3046 build_string ("ISO8859-3 (Latin-3)"),
3047 build_string ("iso8859-3"),
3049 staticpro (&Vcharset_latin_iso8859_4);
3050 Vcharset_latin_iso8859_4 =
3051 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3052 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3053 build_string ("Latin-4"),
3054 build_string ("ISO8859-4 (Latin-4)"),
3055 build_string ("ISO8859-4 (Latin-4)"),
3056 build_string ("iso8859-4"),
3058 staticpro (&Vcharset_thai_tis620);
3059 Vcharset_thai_tis620 =
3060 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3061 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3062 build_string ("TIS620"),
3063 build_string ("TIS620 (Thai)"),
3064 build_string ("TIS620.2529 (Thai)"),
3065 build_string ("tis620"),
3066 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3067 staticpro (&Vcharset_greek_iso8859_7);
3068 Vcharset_greek_iso8859_7 =
3069 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3070 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3071 build_string ("ISO8859-7"),
3072 build_string ("ISO8859-7 (Greek)"),
3073 build_string ("ISO8859-7 (Greek)"),
3074 build_string ("iso8859-7"),
3076 0 /* MIN_CHAR_GREEK */,
3077 0 /* MAX_CHAR_GREEK */, 0, 32);
3078 staticpro (&Vcharset_arabic_iso8859_6);
3079 Vcharset_arabic_iso8859_6 =
3080 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3081 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3082 build_string ("ISO8859-6"),
3083 build_string ("ISO8859-6 (Arabic)"),
3084 build_string ("ISO8859-6 (Arabic)"),
3085 build_string ("iso8859-6"),
3087 staticpro (&Vcharset_hebrew_iso8859_8);
3088 Vcharset_hebrew_iso8859_8 =
3089 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3090 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3091 build_string ("ISO8859-8"),
3092 build_string ("ISO8859-8 (Hebrew)"),
3093 build_string ("ISO8859-8 (Hebrew)"),
3094 build_string ("iso8859-8"),
3095 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3096 staticpro (&Vcharset_katakana_jisx0201);
3097 Vcharset_katakana_jisx0201 =
3098 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3099 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3100 build_string ("JISX0201 Kana"),
3101 build_string ("JISX0201.1976 (Japanese Kana)"),
3102 build_string ("JISX0201.1976 Japanese Kana"),
3103 build_string ("jisx0201\\.1976"),
3105 staticpro (&Vcharset_latin_jisx0201);
3106 Vcharset_latin_jisx0201 =
3107 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3108 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3109 build_string ("JISX0201 Roman"),
3110 build_string ("JISX0201.1976 (Japanese Roman)"),
3111 build_string ("JISX0201.1976 Japanese Roman"),
3112 build_string ("jisx0201\\.1976"),
3114 staticpro (&Vcharset_cyrillic_iso8859_5);
3115 Vcharset_cyrillic_iso8859_5 =
3116 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3117 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3118 build_string ("ISO8859-5"),
3119 build_string ("ISO8859-5 (Cyrillic)"),
3120 build_string ("ISO8859-5 (Cyrillic)"),
3121 build_string ("iso8859-5"),
3123 0 /* MIN_CHAR_CYRILLIC */,
3124 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3125 staticpro (&Vcharset_latin_iso8859_9);
3126 Vcharset_latin_iso8859_9 =
3127 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3128 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3129 build_string ("Latin-5"),
3130 build_string ("ISO8859-9 (Latin-5)"),
3131 build_string ("ISO8859-9 (Latin-5)"),
3132 build_string ("iso8859-9"),
3134 staticpro (&Vcharset_japanese_jisx0208_1978);
3135 Vcharset_japanese_jisx0208_1978 =
3136 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3137 Qjapanese_jisx0208_1978, 94, 2,
3138 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3139 build_string ("JIS X0208:1978"),
3140 build_string ("JIS X0208:1978 (Japanese)"),
3142 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3143 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3145 staticpro (&Vcharset_chinese_gb2312);
3146 Vcharset_chinese_gb2312 =
3147 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3148 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3149 build_string ("GB2312"),
3150 build_string ("GB2312)"),
3151 build_string ("GB2312 Chinese simplified"),
3152 build_string ("gb2312"),
3154 staticpro (&Vcharset_japanese_jisx0208);
3155 Vcharset_japanese_jisx0208 =
3156 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3157 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3158 build_string ("JISX0208"),
3159 build_string ("JIS X0208:1983 (Japanese)"),
3160 build_string ("JIS X0208:1983 Japanese Kanji"),
3161 build_string ("jisx0208\\.1983"),
3164 staticpro (&Vcharset_japanese_jisx0208_1990);
3165 Vcharset_japanese_jisx0208_1990 =
3166 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3167 Qjapanese_jisx0208_1990, 94, 2,
3168 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3169 build_string ("JISX0208-1990"),
3170 build_string ("JIS X0208:1990 (Japanese)"),
3171 build_string ("JIS X0208:1990 Japanese Kanji"),
3172 build_string ("jisx0208\\.1990"),
3174 MIN_CHAR_JIS_X0208_1990,
3175 MAX_CHAR_JIS_X0208_1990, 0, 33);
3177 staticpro (&Vcharset_korean_ksc5601);
3178 Vcharset_korean_ksc5601 =
3179 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3180 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3181 build_string ("KSC5601"),
3182 build_string ("KSC5601 (Korean"),
3183 build_string ("KSC5601 Korean Hangul and Hanja"),
3184 build_string ("ksc5601"),
3186 staticpro (&Vcharset_japanese_jisx0212);
3187 Vcharset_japanese_jisx0212 =
3188 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3189 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3190 build_string ("JISX0212"),
3191 build_string ("JISX0212 (Japanese)"),
3192 build_string ("JISX0212 Japanese Supplement"),
3193 build_string ("jisx0212"),
3196 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3197 staticpro (&Vcharset_chinese_cns11643_1);
3198 Vcharset_chinese_cns11643_1 =
3199 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3200 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3201 build_string ("CNS11643-1"),
3202 build_string ("CNS11643-1 (Chinese traditional)"),
3204 ("CNS 11643 Plane 1 Chinese traditional"),
3205 build_string (CHINESE_CNS_PLANE_RE("1")),
3207 staticpro (&Vcharset_chinese_cns11643_2);
3208 Vcharset_chinese_cns11643_2 =
3209 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3210 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3211 build_string ("CNS11643-2"),
3212 build_string ("CNS11643-2 (Chinese traditional)"),
3214 ("CNS 11643 Plane 2 Chinese traditional"),
3215 build_string (CHINESE_CNS_PLANE_RE("2")),
3218 staticpro (&Vcharset_latin_tcvn5712);
3219 Vcharset_latin_tcvn5712 =
3220 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3221 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3222 build_string ("TCVN 5712"),
3223 build_string ("TCVN 5712 (VSCII-2)"),
3224 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3225 build_string ("tcvn5712-1"),
3227 staticpro (&Vcharset_latin_viscii_lower);
3228 Vcharset_latin_viscii_lower =
3229 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3230 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3231 build_string ("VISCII lower"),
3232 build_string ("VISCII lower (Vietnamese)"),
3233 build_string ("VISCII lower (Vietnamese)"),
3234 build_string ("MULEVISCII-LOWER"),
3236 staticpro (&Vcharset_latin_viscii_upper);
3237 Vcharset_latin_viscii_upper =
3238 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3239 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3240 build_string ("VISCII upper"),
3241 build_string ("VISCII upper (Vietnamese)"),
3242 build_string ("VISCII upper (Vietnamese)"),
3243 build_string ("MULEVISCII-UPPER"),
3245 staticpro (&Vcharset_latin_viscii);
3246 Vcharset_latin_viscii =
3247 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3248 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3249 build_string ("VISCII"),
3250 build_string ("VISCII 1.1 (Vietnamese)"),
3251 build_string ("VISCII 1.1 (Vietnamese)"),
3252 build_string ("VISCII1\\.1"),
3254 staticpro (&Vcharset_ideograph_daikanwa);
3255 Vcharset_ideograph_daikanwa =
3256 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3257 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3258 build_string ("Daikanwa"),
3259 build_string ("Morohashi's Daikanwa"),
3260 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3261 build_string ("Daikanwa"),
3262 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3263 staticpro (&Vcharset_mojikyo);
3265 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3266 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3267 build_string ("Mojikyo"),
3268 build_string ("Mojikyo"),
3269 build_string ("Konjaku-Mojikyo"),
3271 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3272 staticpro (&Vcharset_mojikyo_pj_1);
3273 Vcharset_mojikyo_pj_1 =
3274 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3275 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3276 build_string ("Mojikyo-PJ-1"),
3277 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3279 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3280 build_string ("jisx0208\\.Mojikyo-1$"),
3282 staticpro (&Vcharset_mojikyo_pj_2);
3283 Vcharset_mojikyo_pj_2 =
3284 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3285 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3286 build_string ("Mojikyo-PJ-2"),
3287 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3289 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3290 build_string ("jisx0208\\.Mojikyo-2$"),
3292 staticpro (&Vcharset_mojikyo_pj_3);
3293 Vcharset_mojikyo_pj_3 =
3294 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3295 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3296 build_string ("Mojikyo-PJ-3"),
3297 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3299 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3300 build_string ("jisx0208\\.Mojikyo-3$"),
3302 staticpro (&Vcharset_mojikyo_pj_4);
3303 Vcharset_mojikyo_pj_4 =
3304 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3305 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3306 build_string ("Mojikyo-PJ-4"),
3307 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3309 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3310 build_string ("jisx0208\\.Mojikyo-4$"),
3312 staticpro (&Vcharset_mojikyo_pj_5);
3313 Vcharset_mojikyo_pj_5 =
3314 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3315 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3316 build_string ("Mojikyo-PJ-5"),
3317 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3319 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3320 build_string ("jisx0208\\.Mojikyo-5$"),
3322 staticpro (&Vcharset_mojikyo_pj_6);
3323 Vcharset_mojikyo_pj_6 =
3324 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3325 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3326 build_string ("Mojikyo-PJ-6"),
3327 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3329 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3330 build_string ("jisx0208\\.Mojikyo-6$"),
3332 staticpro (&Vcharset_mojikyo_pj_7);
3333 Vcharset_mojikyo_pj_7 =
3334 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3335 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3336 build_string ("Mojikyo-PJ-7"),
3337 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3339 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3340 build_string ("jisx0208\\.Mojikyo-7$"),
3342 staticpro (&Vcharset_mojikyo_pj_8);
3343 Vcharset_mojikyo_pj_8 =
3344 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3345 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3346 build_string ("Mojikyo-PJ-8"),
3347 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3349 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3350 build_string ("jisx0208\\.Mojikyo-8$"),
3352 staticpro (&Vcharset_mojikyo_pj_9);
3353 Vcharset_mojikyo_pj_9 =
3354 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3355 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3356 build_string ("Mojikyo-PJ-9"),
3357 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3359 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3360 build_string ("jisx0208\\.Mojikyo-9$"),
3362 staticpro (&Vcharset_mojikyo_pj_10);
3363 Vcharset_mojikyo_pj_10 =
3364 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3365 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3366 build_string ("Mojikyo-PJ-10"),
3367 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3369 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3370 build_string ("jisx0208\\.Mojikyo-10$"),
3372 staticpro (&Vcharset_mojikyo_pj_11);
3373 Vcharset_mojikyo_pj_11 =
3374 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3375 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3376 build_string ("Mojikyo-PJ-11"),
3377 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3379 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3380 build_string ("jisx0208\\.Mojikyo-11$"),
3382 staticpro (&Vcharset_mojikyo_pj_12);
3383 Vcharset_mojikyo_pj_12 =
3384 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3385 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3386 build_string ("Mojikyo-PJ-12"),
3387 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3389 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3390 build_string ("jisx0208\\.Mojikyo-12$"),
3392 staticpro (&Vcharset_mojikyo_pj_13);
3393 Vcharset_mojikyo_pj_13 =
3394 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3395 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3396 build_string ("Mojikyo-PJ-13"),
3397 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3399 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3400 build_string ("jisx0208\\.Mojikyo-13$"),
3402 staticpro (&Vcharset_mojikyo_pj_14);
3403 Vcharset_mojikyo_pj_14 =
3404 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3405 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3406 build_string ("Mojikyo-PJ-14"),
3407 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3409 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3410 build_string ("jisx0208\\.Mojikyo-14$"),
3412 staticpro (&Vcharset_mojikyo_pj_15);
3413 Vcharset_mojikyo_pj_15 =
3414 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3415 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3416 build_string ("Mojikyo-PJ-15"),
3417 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3419 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3420 build_string ("jisx0208\\.Mojikyo-15$"),
3422 staticpro (&Vcharset_mojikyo_pj_16);
3423 Vcharset_mojikyo_pj_16 =
3424 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3425 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3426 build_string ("Mojikyo-PJ-16"),
3427 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3429 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3430 build_string ("jisx0208\\.Mojikyo-16$"),
3432 staticpro (&Vcharset_mojikyo_pj_17);
3433 Vcharset_mojikyo_pj_17 =
3434 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3435 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3436 build_string ("Mojikyo-PJ-17"),
3437 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3439 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3440 build_string ("jisx0208\\.Mojikyo-17$"),
3442 staticpro (&Vcharset_mojikyo_pj_18);
3443 Vcharset_mojikyo_pj_18 =
3444 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3445 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3446 build_string ("Mojikyo-PJ-18"),
3447 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3449 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3450 build_string ("jisx0208\\.Mojikyo-18$"),
3452 staticpro (&Vcharset_mojikyo_pj_19);
3453 Vcharset_mojikyo_pj_19 =
3454 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3455 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3456 build_string ("Mojikyo-PJ-19"),
3457 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3459 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3460 build_string ("jisx0208\\.Mojikyo-19$"),
3462 staticpro (&Vcharset_mojikyo_pj_20);
3463 Vcharset_mojikyo_pj_20 =
3464 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3465 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3466 build_string ("Mojikyo-PJ-20"),
3467 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3469 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3470 build_string ("jisx0208\\.Mojikyo-20$"),
3472 staticpro (&Vcharset_mojikyo_pj_21);
3473 Vcharset_mojikyo_pj_21 =
3474 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3475 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3476 build_string ("Mojikyo-PJ-21"),
3477 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3479 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3480 build_string ("jisx0208\\.Mojikyo-21$"),
3482 staticpro (&Vcharset_ethiopic_ucs);
3483 Vcharset_ethiopic_ucs =
3484 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3485 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3486 build_string ("Ethiopic (UCS)"),
3487 build_string ("Ethiopic (UCS)"),
3488 build_string ("Ethiopic of UCS"),
3489 build_string ("Ethiopic-Unicode"),
3490 Qnil, 0x1200, 0x137F, 0x1200, 0);
3492 staticpro (&Vcharset_chinese_big5_1);
3493 Vcharset_chinese_big5_1 =
3494 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3495 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3496 build_string ("Big5"),
3497 build_string ("Big5 (Level-1)"),
3499 ("Big5 Level-1 Chinese traditional"),
3500 build_string ("big5"),
3502 staticpro (&Vcharset_chinese_big5_2);
3503 Vcharset_chinese_big5_2 =
3504 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3505 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3506 build_string ("Big5"),
3507 build_string ("Big5 (Level-2)"),
3509 ("Big5 Level-2 Chinese traditional"),
3510 build_string ("big5"),
3513 #ifdef ENABLE_COMPOSITE_CHARS
3514 /* #### For simplicity, we put composite chars into a 96x96 charset.
3515 This is going to lead to problems because you can run out of
3516 room, esp. as we don't yet recycle numbers. */
3517 staticpro (&Vcharset_composite);
3518 Vcharset_composite =
3519 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3520 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3521 build_string ("Composite"),
3522 build_string ("Composite characters"),
3523 build_string ("Composite characters"),
3526 /* #### not dumped properly */
3527 composite_char_row_next = 32;
3528 composite_char_col_next = 32;
3530 Vcomposite_char_string2char_hash_table =
3531 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3532 Vcomposite_char_char2string_hash_table =
3533 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3534 staticpro (&Vcomposite_char_string2char_hash_table);
3535 staticpro (&Vcomposite_char_char2string_hash_table);
3536 #endif /* ENABLE_COMPOSITE_CHARS */