1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_japanese_jisx0208_1990;
57 Lisp_Object Vcharset_korean_ksc5601;
58 Lisp_Object Vcharset_japanese_jisx0212;
59 Lisp_Object Vcharset_chinese_cns11643_1;
60 Lisp_Object Vcharset_chinese_cns11643_2;
62 Lisp_Object Vcharset_ucs_bmp;
63 Lisp_Object Vcharset_latin_viscii;
64 Lisp_Object Vcharset_latin_viscii_lower;
65 Lisp_Object Vcharset_latin_viscii_upper;
66 Lisp_Object Vcharset_ideograph_daikanwa;
67 Lisp_Object Vcharset_mojikyo_pj_1;
68 Lisp_Object Vcharset_mojikyo_pj_2;
69 Lisp_Object Vcharset_mojikyo_pj_3;
70 Lisp_Object Vcharset_mojikyo_pj_4;
71 Lisp_Object Vcharset_mojikyo_pj_5;
72 Lisp_Object Vcharset_mojikyo_pj_6;
73 Lisp_Object Vcharset_mojikyo_pj_7;
74 Lisp_Object Vcharset_mojikyo_pj_8;
75 Lisp_Object Vcharset_mojikyo_pj_9;
76 Lisp_Object Vcharset_mojikyo_pj_10;
77 Lisp_Object Vcharset_mojikyo_pj_11;
78 Lisp_Object Vcharset_mojikyo_pj_12;
79 Lisp_Object Vcharset_mojikyo_pj_13;
80 Lisp_Object Vcharset_mojikyo_pj_14;
81 Lisp_Object Vcharset_mojikyo_pj_15;
82 Lisp_Object Vcharset_mojikyo_pj_16;
83 Lisp_Object Vcharset_mojikyo_pj_17;
84 Lisp_Object Vcharset_mojikyo_pj_18;
85 Lisp_Object Vcharset_mojikyo_pj_19;
86 Lisp_Object Vcharset_mojikyo_pj_20;
87 Lisp_Object Vcharset_mojikyo_pj_21;
88 Lisp_Object Vcharset_ethiopic_ucs;
90 Lisp_Object Vcharset_chinese_big5_1;
91 Lisp_Object Vcharset_chinese_big5_2;
93 #ifdef ENABLE_COMPOSITE_CHARS
94 Lisp_Object Vcharset_composite;
96 /* Hash tables for composite chars. One maps string representing
97 composed chars to their equivalent chars; one goes the
99 Lisp_Object Vcomposite_char_char2string_hash_table;
100 Lisp_Object Vcomposite_char_string2char_hash_table;
102 static int composite_char_row_next;
103 static int composite_char_col_next;
105 #endif /* ENABLE_COMPOSITE_CHARS */
107 /* Table of charsets indexed by leading byte. */
108 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
110 /* Table of charsets indexed by type/final-byte/direction. */
112 Lisp_Object charset_by_attributes[4][128];
114 Lisp_Object charset_by_attributes[4][128][2];
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
126 Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
152 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
154 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
157 for (i = 0; i < 256; i++)
159 markobj (cte->property[i]);
165 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
167 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
168 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
171 for (i = 0; i < 256; i++)
172 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
174 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
176 if (!char_byte_table_equal (cte1->property[i],
177 cte2->property[i], depth + 1))
184 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
190 char_byte_table_hash (Lisp_Object obj, int depth)
192 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
194 return internal_array_hash (cte->property, 256, depth);
197 static const struct lrecord_description char_byte_table_description[] = {
198 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
202 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
203 mark_char_byte_table,
204 internal_object_printer,
205 0, char_byte_table_equal,
206 char_byte_table_hash,
207 char_byte_table_description,
208 struct Lisp_Char_Byte_Table);
211 make_char_byte_table (Lisp_Object initval)
215 struct Lisp_Char_Byte_Table *cte =
216 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
217 &lrecord_char_byte_table);
219 for (i = 0; i < 256; i++)
220 cte->property[i] = initval;
222 XSETCHAR_BYTE_TABLE (obj, cte);
227 copy_char_byte_table (Lisp_Object entry)
229 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
232 struct Lisp_Char_Byte_Table *ctenew =
233 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
234 &lrecord_char_byte_table);
236 for (i = 0; i < 256; i++)
238 Lisp_Object new = cte->property[i];
239 if (CHAR_BYTE_TABLE_P (new))
240 ctenew->property[i] = copy_char_byte_table (new);
242 ctenew->property[i] = new;
245 XSETCHAR_BYTE_TABLE (obj, ctenew);
251 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
253 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
259 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
261 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
262 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
264 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
268 char_code_table_hash (Lisp_Object obj, int depth)
270 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
272 return char_code_table_hash (cte->table, depth + 1);
275 static const struct lrecord_description char_code_table_description[] = {
276 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
280 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
281 mark_char_code_table,
282 internal_object_printer,
283 0, char_code_table_equal,
284 char_code_table_hash,
285 char_code_table_description,
286 struct Lisp_Char_Code_Table);
289 make_char_code_table (Lisp_Object initval)
292 struct Lisp_Char_Code_Table *cte =
293 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
294 &lrecord_char_code_table);
296 cte->table = make_char_byte_table (initval);
298 XSETCHAR_CODE_TABLE (obj, cte);
303 copy_char_code_table (Lisp_Object entry)
305 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
307 struct Lisp_Char_Code_Table *ctenew =
308 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
309 &lrecord_char_code_table);
311 ctenew->table = copy_char_byte_table (cte->table);
312 XSETCHAR_CODE_TABLE (obj, ctenew);
318 get_char_code_table (Emchar ch, Lisp_Object table)
320 unsigned int code = ch;
321 struct Lisp_Char_Byte_Table* cpt
322 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
323 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
325 if (CHAR_BYTE_TABLE_P (ret))
326 cpt = XCHAR_BYTE_TABLE (ret);
330 ret = cpt->property [(unsigned char) (code >> 16)];
331 if (CHAR_BYTE_TABLE_P (ret))
332 cpt = XCHAR_BYTE_TABLE (ret);
336 ret = cpt->property [(unsigned char) (code >> 8)];
337 if (CHAR_BYTE_TABLE_P (ret))
338 cpt = XCHAR_BYTE_TABLE (ret);
342 return cpt->property [(unsigned char) code];
346 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
348 unsigned int code = ch;
349 struct Lisp_Char_Byte_Table* cpt1
350 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
351 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
353 if (CHAR_BYTE_TABLE_P (ret))
355 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
357 ret = cpt2->property[(unsigned char)(code >> 16)];
358 if (CHAR_BYTE_TABLE_P (ret))
360 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
362 ret = cpt3->property[(unsigned char)(code >> 8)];
363 if (CHAR_BYTE_TABLE_P (ret))
365 struct Lisp_Char_Byte_Table* cpt4
366 = XCHAR_BYTE_TABLE (ret);
368 cpt4->property[(unsigned char)code] = value;
370 else if (!EQ (ret, value))
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
378 else if (!EQ (ret, value))
380 Lisp_Object cpt3 = make_char_byte_table (ret);
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
386 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
389 else if (!EQ (ret, value))
391 Lisp_Object cpt2 = make_char_byte_table (ret);
392 Lisp_Object cpt3 = make_char_byte_table (ret);
393 Lisp_Object cpt4 = make_char_byte_table (ret);
395 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
396 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
397 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
398 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
403 Lisp_Object Vcharacter_attribute_table;
404 Lisp_Object Vcharacter_composition_table;
405 Lisp_Object Vcharacter_variant_table;
407 Lisp_Object Q_decomposition;
410 Lisp_Object Qisolated;
411 Lisp_Object Qinitial;
414 Lisp_Object Qvertical;
415 Lisp_Object QnoBreak;
416 Lisp_Object Qfraction;
427 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
433 else if (EQ (v, Qcompat))
435 else if (EQ (v, Qisolated))
437 else if (EQ (v, Qinitial))
439 else if (EQ (v, Qmedial))
441 else if (EQ (v, Qfinal))
443 else if (EQ (v, Qvertical))
445 else if (EQ (v, QnoBreak))
447 else if (EQ (v, Qfraction))
449 else if (EQ (v, Qsuper))
451 else if (EQ (v, Qsub))
453 else if (EQ (v, Qcircle))
455 else if (EQ (v, Qsquare))
457 else if (EQ (v, Qwide))
459 else if (EQ (v, Qnarrow))
461 else if (EQ (v, Qsmall))
463 else if (EQ (v, Qfont))
466 signal_simple_error (err_msg, err_arg);
469 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
470 Return character corresponding with list.
474 Lisp_Object table = Vcharacter_composition_table;
475 Lisp_Object rest = list;
479 Lisp_Object v = Fcar (rest);
481 Emchar c = to_char_code (v, "Invalid value for composition", list);
483 ret = get_char_code_table (c, table);
488 if (!CHAR_CODE_TABLE_P (ret))
493 else if (!CONSP (rest))
495 else if (CHAR_CODE_TABLE_P (ret))
498 signal_simple_error ("Invalid table is found with", list);
500 signal_simple_error ("Invalid value for composition", list);
503 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
504 Return variants of CHARACTER.
508 CHECK_CHAR (character);
509 return Fcopy_list (get_char_code_table (XCHAR (character),
510 Vcharacter_variant_table));
513 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
514 Return the alist of attributes of CHARACTER.
518 CHECK_CHAR (character);
519 return Fcopy_alist (get_char_code_table (XCHAR (character),
520 Vcharacter_attribute_table));
523 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
524 Return the value of CHARACTER's ATTRIBUTE.
526 (character, attribute))
531 CHECK_CHAR (character);
532 ret = get_char_code_table (XCHAR (character),
533 Vcharacter_attribute_table);
537 if (!NILP (ccs = Ffind_charset (attribute)))
540 return Fcdr (Fassq (attribute, ret));
544 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
547 Emchar char_code = XCHAR (character);
549 = get_char_code_table (char_code, Vcharacter_attribute_table);
552 cell = Fassq (attribute, ret);
556 ret = Fcons (Fcons (attribute, value), ret);
558 else if (!EQ (Fcdr (cell), value))
560 Fsetcdr (cell, value);
562 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
566 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
567 Store CHARACTER's ATTRIBUTE with VALUE.
569 (character, attribute, value))
573 CHECK_CHAR (character);
574 ccs = Ffind_charset (attribute);
578 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
583 /* ad-hoc method for `ascii' */
584 if ((XCHARSET_CHARS (ccs) == 94) &&
585 (XCHARSET_BYTE_OFFSET (ccs) != 33))
586 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
588 ccs_len = XCHARSET_CHARS (ccs);
591 signal_simple_error ("Invalid value for coded-charset",
595 rest = Fget_char_attribute (character, attribute);
602 Lisp_Object ei = Fcar (rest);
604 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
605 nv = XVECTOR_DATA(v)[i];
612 XVECTOR_DATA(v)[i] = Qnil;
613 v = XCHARSET_DECODING_TABLE (ccs);
618 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
621 if (XCHARSET_GRAPHIC (ccs) == 1)
622 value = Fcopy_list (value);
627 Lisp_Object ei = Fcar (rest);
630 signal_simple_error ("Invalid value for coded-charset", value);
632 if ((i < 0) || (255 < i))
633 signal_simple_error ("Invalid value for coded-charset", value);
634 if (XCHARSET_GRAPHIC (ccs) == 1)
637 Fsetcar (rest, make_int (i));
639 i -= XCHARSET_BYTE_OFFSET (ccs);
640 nv = XVECTOR_DATA(v)[i];
646 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
653 XVECTOR_DATA(v)[i] = character;
655 else if (EQ (attribute, Q_decomposition))
657 Lisp_Object rest = value;
658 Lisp_Object table = Vcharacter_composition_table;
661 signal_simple_error ("Invalid value for ->decomposition",
666 Lisp_Object v = Fcar (rest);
669 = to_char_code (v, "Invalid value for ->decomposition", value);
674 put_char_code_table (c, character, table);
679 ntable = get_char_code_table (c, table);
680 if (!CHAR_CODE_TABLE_P (ntable))
682 ntable = make_char_code_table (Qnil);
683 put_char_code_table (c, ntable, table);
689 else if (EQ (attribute, Q_ucs))
695 signal_simple_error ("Invalid value for ->ucs", value);
699 ret = get_char_code_table (c, Vcharacter_variant_table);
700 if (NILP (Fmemq (character, ret)))
702 put_char_code_table (c, Fcons (character, ret),
703 Vcharacter_variant_table);
706 return put_char_attribute (character, attribute, value);
711 EXFUN (Fmake_char, 3);
713 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
714 Store character's ATTRIBUTES.
718 Lisp_Object rest = attributes;
719 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
720 Lisp_Object character;
726 Lisp_Object cell = Fcar (rest);
730 signal_simple_error ("Invalid argument", attributes);
731 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
734 character = Fmake_char (ccs, Fcar (cell),
736 goto setup_attributes;
740 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
743 signal_simple_error ("Invalid argument", attributes);
745 character = make_char (XINT (code) + 0x100000);
746 goto setup_attributes;
750 else if (!INTP (code))
751 signal_simple_error ("Invalid argument", attributes);
753 character = make_char (XINT (code));
759 Lisp_Object cell = Fcar (rest);
762 signal_simple_error ("Invalid argument", attributes);
763 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
767 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
770 Lisp_Object Vutf_2000_version;
774 int leading_code_private_11;
777 Lisp_Object Qcharsetp;
779 /* Qdoc_string, Qdimension, Qchars defined in general.c */
780 Lisp_Object Qregistry, Qfinal, Qgraphic;
781 Lisp_Object Qdirection;
782 Lisp_Object Qreverse_direction_charset;
783 Lisp_Object Qleading_byte;
784 Lisp_Object Qshort_name, Qlong_name;
800 Qjapanese_jisx0208_1978,
803 Qjapanese_jisx0208_1990,
813 Qvietnamese_viscii_lower,
814 Qvietnamese_viscii_upper,
843 Lisp_Object Ql2r, Qr2l;
845 Lisp_Object Vcharset_hash_table;
848 static Charset_ID next_allocated_leading_byte;
850 static Charset_ID next_allocated_1_byte_leading_byte;
851 static Charset_ID next_allocated_2_byte_leading_byte;
854 /* Composite characters are characters constructed by overstriking two
855 or more regular characters.
857 1) The old Mule implementation involves storing composite characters
858 in a buffer as a tag followed by all of the actual characters
859 used to make up the composite character. I think this is a bad
860 idea; it greatly complicates code that wants to handle strings
861 one character at a time because it has to deal with the possibility
862 of great big ungainly characters. It's much more reasonable to
863 simply store an index into a table of composite characters.
865 2) The current implementation only allows for 16,384 separate
866 composite characters over the lifetime of the XEmacs process.
867 This could become a potential problem if the user
868 edited lots of different files that use composite characters.
869 Due to FSF bogosity, increasing the number of allowable
870 composite characters under Mule would decrease the number
871 of possible faces that can exist. Mule already has shrunk
872 this to 2048, and further shrinkage would become uncomfortable.
873 No such problems exist in XEmacs.
875 Composite characters could be represented as 0x80 C1 C2 C3,
876 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
877 for slightly under 2^20 (one million) composite characters
878 over the XEmacs process lifetime, and you only need to
879 increase the size of a Mule character from 19 to 21 bits.
880 Or you could use 0x80 C1 C2 C3 C4, allowing for about
881 85 million (slightly over 2^26) composite characters. */
884 /************************************************************************/
885 /* Basic Emchar functions */
886 /************************************************************************/
888 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
889 string in STR. Returns the number of bytes stored.
890 Do not call this directly. Use the macro set_charptr_emchar() instead.
894 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
909 else if ( c <= 0x7ff )
911 *p++ = (c >> 6) | 0xc0;
912 *p++ = (c & 0x3f) | 0x80;
914 else if ( c <= 0xffff )
916 *p++ = (c >> 12) | 0xe0;
917 *p++ = ((c >> 6) & 0x3f) | 0x80;
918 *p++ = (c & 0x3f) | 0x80;
920 else if ( c <= 0x1fffff )
922 *p++ = (c >> 18) | 0xf0;
923 *p++ = ((c >> 12) & 0x3f) | 0x80;
924 *p++ = ((c >> 6) & 0x3f) | 0x80;
925 *p++ = (c & 0x3f) | 0x80;
927 else if ( c <= 0x3ffffff )
929 *p++ = (c >> 24) | 0xf8;
930 *p++ = ((c >> 18) & 0x3f) | 0x80;
931 *p++ = ((c >> 12) & 0x3f) | 0x80;
932 *p++ = ((c >> 6) & 0x3f) | 0x80;
933 *p++ = (c & 0x3f) | 0x80;
937 *p++ = (c >> 30) | 0xfc;
938 *p++ = ((c >> 24) & 0x3f) | 0x80;
939 *p++ = ((c >> 18) & 0x3f) | 0x80;
940 *p++ = ((c >> 12) & 0x3f) | 0x80;
941 *p++ = ((c >> 6) & 0x3f) | 0x80;
942 *p++ = (c & 0x3f) | 0x80;
945 BREAKUP_CHAR (c, charset, c1, c2);
946 lb = CHAR_LEADING_BYTE (c);
947 if (LEADING_BYTE_PRIVATE_P (lb))
948 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
950 if (EQ (charset, Vcharset_control_1))
959 /* Return the first character from a Mule-encoded string in STR,
960 assuming it's non-ASCII. Do not call this directly.
961 Use the macro charptr_emchar() instead. */
964 non_ascii_charptr_emchar (CONST Bufbyte *str)
977 else if ( b >= 0xf8 )
982 else if ( b >= 0xf0 )
987 else if ( b >= 0xe0 )
992 else if ( b >= 0xc0 )
1002 for( ; len > 0; len-- )
1005 ch = ( ch << 6 ) | ( b & 0x3f );
1009 Bufbyte i0 = *str, i1, i2 = 0;
1010 Lisp_Object charset;
1012 if (i0 == LEADING_BYTE_CONTROL_1)
1013 return (Emchar) (*++str - 0x20);
1015 if (LEADING_BYTE_PREFIX_P (i0))
1020 charset = CHARSET_BY_LEADING_BYTE (i0);
1021 if (XCHARSET_DIMENSION (charset) == 2)
1024 return MAKE_CHAR (charset, i1, i2);
1028 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1029 Do not call this directly. Use the macro valid_char_p() instead. */
1033 non_ascii_valid_char_p (Emchar ch)
1037 /* Must have only lowest 19 bits set */
1041 f1 = CHAR_FIELD1 (ch);
1042 f2 = CHAR_FIELD2 (ch);
1043 f3 = CHAR_FIELD3 (ch);
1047 Lisp_Object charset;
1049 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1050 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1051 f2 > MAX_CHAR_FIELD2_PRIVATE)
1056 if (f3 != 0x20 && f3 != 0x7F)
1060 NOTE: This takes advantage of the fact that
1061 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1062 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1064 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1065 return (XCHARSET_CHARS (charset) == 96);
1069 Lisp_Object charset;
1071 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1072 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1073 f1 > MAX_CHAR_FIELD1_PRIVATE)
1075 if (f2 < 0x20 || f3 < 0x20)
1078 #ifdef ENABLE_COMPOSITE_CHARS
1079 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1081 if (UNBOUNDP (Fgethash (make_int (ch),
1082 Vcomposite_char_char2string_hash_table,
1087 #endif /* ENABLE_COMPOSITE_CHARS */
1089 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1092 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1094 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1097 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1099 return (XCHARSET_CHARS (charset) == 96);
1105 /************************************************************************/
1106 /* Basic string functions */
1107 /************************************************************************/
1109 /* Copy the character pointed to by PTR into STR, assuming it's
1110 non-ASCII. Do not call this directly. Use the macro
1111 charptr_copy_char() instead. */
1114 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1116 Bufbyte *strptr = str;
1118 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1120 /* Notice fallthrough. */
1122 case 6: *++strptr = *ptr++;
1123 case 5: *++strptr = *ptr++;
1125 case 4: *++strptr = *ptr++;
1126 case 3: *++strptr = *ptr++;
1127 case 2: *++strptr = *ptr;
1132 return strptr + 1 - str;
1136 /************************************************************************/
1137 /* streams of Emchars */
1138 /************************************************************************/
1140 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1141 The functions below are not meant to be called directly; use
1142 the macros in insdel.h. */
1145 Lstream_get_emchar_1 (Lstream *stream, int ch)
1147 Bufbyte str[MAX_EMCHAR_LEN];
1148 Bufbyte *strptr = str;
1150 str[0] = (Bufbyte) ch;
1151 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1153 /* Notice fallthrough. */
1156 ch = Lstream_getc (stream);
1158 *++strptr = (Bufbyte) ch;
1160 ch = Lstream_getc (stream);
1162 *++strptr = (Bufbyte) ch;
1165 ch = Lstream_getc (stream);
1167 *++strptr = (Bufbyte) ch;
1169 ch = Lstream_getc (stream);
1171 *++strptr = (Bufbyte) ch;
1173 ch = Lstream_getc (stream);
1175 *++strptr = (Bufbyte) ch;
1180 return charptr_emchar (str);
1184 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1186 Bufbyte str[MAX_EMCHAR_LEN];
1187 Bytecount len = set_charptr_emchar (str, ch);
1188 return Lstream_write (stream, str, len);
1192 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1194 Bufbyte str[MAX_EMCHAR_LEN];
1195 Bytecount len = set_charptr_emchar (str, ch);
1196 Lstream_unread (stream, str, len);
1200 /************************************************************************/
1201 /* charset object */
1202 /************************************************************************/
1205 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1207 struct Lisp_Charset *cs = XCHARSET (obj);
1209 markobj (cs->short_name);
1210 markobj (cs->long_name);
1211 markobj (cs->doc_string);
1212 markobj (cs->registry);
1213 markobj (cs->ccl_program);
1215 markobj (cs->decoding_table);
1221 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1223 struct Lisp_Charset *cs = XCHARSET (obj);
1227 error ("printing unreadable object #<charset %s 0x%x>",
1228 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1231 write_c_string ("#<charset ", printcharfun);
1232 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1233 write_c_string (" ", printcharfun);
1234 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1235 write_c_string (" ", printcharfun);
1236 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1237 write_c_string (" ", printcharfun);
1238 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1239 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1240 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1241 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1242 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1244 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1245 CHARSET_COLUMNS (cs),
1246 CHARSET_GRAPHIC (cs),
1247 CHARSET_FINAL (cs));
1248 write_c_string (buf, printcharfun);
1249 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1250 sprintf (buf, " 0x%x>", cs->header.uid);
1251 write_c_string (buf, printcharfun);
1254 static const struct lrecord_description charset_description[] = {
1255 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1257 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1262 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1263 mark_charset, print_charset, 0, 0, 0,
1264 charset_description,
1265 struct Lisp_Charset);
1267 /* Make a new charset. */
1270 make_charset (Charset_ID id, Lisp_Object name,
1271 unsigned char type, unsigned char columns, unsigned char graphic,
1272 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1273 Lisp_Object long_name, Lisp_Object doc,
1275 Lisp_Object decoding_table,
1276 Emchar ucs_min, Emchar ucs_max,
1277 Emchar code_offset, unsigned char byte_offset)
1280 struct Lisp_Charset *cs =
1281 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1282 XSETCHARSET (obj, cs);
1284 CHARSET_ID (cs) = id;
1285 CHARSET_NAME (cs) = name;
1286 CHARSET_SHORT_NAME (cs) = short_name;
1287 CHARSET_LONG_NAME (cs) = long_name;
1288 CHARSET_DIRECTION (cs) = direction;
1289 CHARSET_TYPE (cs) = type;
1290 CHARSET_COLUMNS (cs) = columns;
1291 CHARSET_GRAPHIC (cs) = graphic;
1292 CHARSET_FINAL (cs) = final;
1293 CHARSET_DOC_STRING (cs) = doc;
1294 CHARSET_REGISTRY (cs) = reg;
1295 CHARSET_CCL_PROGRAM (cs) = Qnil;
1296 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1298 CHARSET_DECODING_TABLE(cs) = Qnil;
1299 CHARSET_UCS_MIN(cs) = ucs_min;
1300 CHARSET_UCS_MAX(cs) = ucs_max;
1301 CHARSET_CODE_OFFSET(cs) = code_offset;
1302 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1305 switch (CHARSET_TYPE (cs))
1307 case CHARSET_TYPE_94:
1308 CHARSET_DIMENSION (cs) = 1;
1309 CHARSET_CHARS (cs) = 94;
1311 case CHARSET_TYPE_96:
1312 CHARSET_DIMENSION (cs) = 1;
1313 CHARSET_CHARS (cs) = 96;
1315 case CHARSET_TYPE_94X94:
1316 CHARSET_DIMENSION (cs) = 2;
1317 CHARSET_CHARS (cs) = 94;
1319 case CHARSET_TYPE_96X96:
1320 CHARSET_DIMENSION (cs) = 2;
1321 CHARSET_CHARS (cs) = 96;
1324 case CHARSET_TYPE_128:
1325 CHARSET_DIMENSION (cs) = 1;
1326 CHARSET_CHARS (cs) = 128;
1328 case CHARSET_TYPE_128X128:
1329 CHARSET_DIMENSION (cs) = 2;
1330 CHARSET_CHARS (cs) = 128;
1332 case CHARSET_TYPE_256:
1333 CHARSET_DIMENSION (cs) = 1;
1334 CHARSET_CHARS (cs) = 256;
1336 case CHARSET_TYPE_256X256:
1337 CHARSET_DIMENSION (cs) = 2;
1338 CHARSET_CHARS (cs) = 256;
1344 if (id == LEADING_BYTE_ASCII)
1345 CHARSET_REP_BYTES (cs) = 1;
1347 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1349 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1354 /* some charsets do not have final characters. This includes
1355 ASCII, Control-1, Composite, and the two faux private
1358 if (code_offset == 0)
1360 assert (NILP (charset_by_attributes[type][final]));
1361 charset_by_attributes[type][final] = obj;
1364 assert (NILP (charset_by_attributes[type][final][direction]));
1365 charset_by_attributes[type][final][direction] = obj;
1369 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1370 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1373 /* official leading byte */
1374 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1377 /* Some charsets are "faux" and don't have names or really exist at
1378 all except in the leading-byte table. */
1380 Fputhash (name, obj, Vcharset_hash_table);
1385 get_unallocated_leading_byte (int dimension)
1390 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1393 lb = next_allocated_leading_byte++;
1397 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1400 lb = next_allocated_1_byte_leading_byte++;
1404 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1407 lb = next_allocated_2_byte_leading_byte++;
1413 ("No more character sets free for this dimension",
1414 make_int (dimension));
1421 range_charset_code_point (Lisp_Object charset, Emchar ch)
1425 if ((XCHARSET_UCS_MIN (charset) <= ch)
1426 && (ch <= XCHARSET_UCS_MAX (charset)))
1428 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1430 if (XCHARSET_DIMENSION (charset) == 1)
1431 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1432 else if (XCHARSET_DIMENSION (charset) == 2)
1433 return list2 (make_int (d / XCHARSET_CHARS (charset)
1434 + XCHARSET_BYTE_OFFSET (charset)),
1435 make_int (d % XCHARSET_CHARS (charset)
1436 + XCHARSET_BYTE_OFFSET (charset)));
1437 else if (XCHARSET_DIMENSION (charset) == 3)
1438 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1439 * XCHARSET_CHARS (charset))
1440 + XCHARSET_BYTE_OFFSET (charset)),
1441 make_int (d / XCHARSET_CHARS (charset)
1442 % XCHARSET_CHARS (charset)
1443 + XCHARSET_BYTE_OFFSET (charset)),
1444 make_int (d % XCHARSET_CHARS (charset)
1445 + XCHARSET_BYTE_OFFSET (charset)));
1446 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1447 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1448 * XCHARSET_CHARS (charset)
1449 * XCHARSET_CHARS (charset))
1450 + XCHARSET_BYTE_OFFSET (charset)),
1451 make_int (d / (XCHARSET_CHARS (charset)
1452 * XCHARSET_CHARS (charset))
1453 % XCHARSET_CHARS (charset)
1454 + XCHARSET_BYTE_OFFSET (charset)),
1455 make_int (d / XCHARSET_CHARS (charset)
1456 % XCHARSET_CHARS (charset)
1457 + XCHARSET_BYTE_OFFSET (charset)),
1458 make_int (d % XCHARSET_CHARS (charset)
1459 + XCHARSET_BYTE_OFFSET (charset)));
1461 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1463 if (XCHARSET_DIMENSION (charset) == 1)
1465 if (XCHARSET_CHARS (charset) == 94)
1467 if (((d = ch - (MIN_CHAR_94
1468 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1470 return list1 (make_int (d + 33));
1472 else if (XCHARSET_CHARS (charset) == 96)
1474 if (((d = ch - (MIN_CHAR_96
1475 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1477 return list1 (make_int (d + 32));
1482 else if (XCHARSET_DIMENSION (charset) == 2)
1484 if (XCHARSET_CHARS (charset) == 94)
1486 if (((d = ch - (MIN_CHAR_94x94
1487 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1490 return list2 (make_int ((d / 94) + 33),
1491 make_int (d % 94 + 33));
1493 else if (XCHARSET_CHARS (charset) == 96)
1495 if (((d = ch - (MIN_CHAR_96x96
1496 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1499 return list2 (make_int ((d / 96) + 32),
1500 make_int (d % 96 + 32));
1508 split_builtin_char (Emchar c)
1510 if (c < MIN_CHAR_OBS_94x94)
1512 if (c <= MAX_CHAR_BASIC_LATIN)
1514 return list2 (Vcharset_ascii, make_int (c));
1518 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1522 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1524 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1526 return list2 (Vcharset_greek_iso8859_7,
1527 make_int (c - MIN_CHAR_GREEK + 0x20));
1529 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1531 return list2 (Vcharset_cyrillic_iso8859_5,
1532 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1534 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1536 return list2 (Vcharset_hebrew_iso8859_8,
1537 make_int (c - MIN_CHAR_HEBREW + 0x20));
1539 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1541 return list2 (Vcharset_thai_tis620,
1542 make_int (c - MIN_CHAR_THAI + 0x20));
1546 return list3 (Vcharset_ucs_bmp,
1547 make_int (c >> 8), make_int (c & 0xff));
1550 else if (c <= MAX_CHAR_OBS_94x94)
1552 return list3 (CHARSET_BY_ATTRIBUTES
1553 (CHARSET_TYPE_94X94,
1554 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1555 CHARSET_LEFT_TO_RIGHT),
1556 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1557 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1559 else if (c <= MAX_CHAR_DAIKANWA)
1561 return list3 (Vcharset_ideograph_daikanwa,
1562 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1563 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1565 else if (c <= MAX_CHAR_94)
1567 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1568 ((c - MIN_CHAR_94) / 94) + '0',
1569 CHARSET_LEFT_TO_RIGHT),
1570 make_int (((c - MIN_CHAR_94) % 94) + 33));
1572 else if (c <= MAX_CHAR_96)
1574 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1575 ((c - MIN_CHAR_96) / 96) + '0',
1576 CHARSET_LEFT_TO_RIGHT),
1577 make_int (((c - MIN_CHAR_96) % 96) + 32));
1579 else if (c <= MAX_CHAR_94x94)
1581 return list3 (CHARSET_BY_ATTRIBUTES
1582 (CHARSET_TYPE_94X94,
1583 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1584 CHARSET_LEFT_TO_RIGHT),
1585 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1586 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1588 else if (c <= MAX_CHAR_96x96)
1590 return list3 (CHARSET_BY_ATTRIBUTES
1591 (CHARSET_TYPE_96X96,
1592 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1593 CHARSET_LEFT_TO_RIGHT),
1594 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1595 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1604 charset_code_point (Lisp_Object charset, Emchar ch)
1606 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1608 if (!EQ (cdef, Qnil))
1610 Lisp_Object field = Fassq (charset, cdef);
1612 if (!EQ (field, Qnil))
1613 return Fcdr (field);
1615 return range_charset_code_point (charset, ch);
1618 Lisp_Object Vdefault_coded_charset_priority_list;
1622 /************************************************************************/
1623 /* Basic charset Lisp functions */
1624 /************************************************************************/
1626 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1627 Return non-nil if OBJECT is a charset.
1631 return CHARSETP (object) ? Qt : Qnil;
1634 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1635 Retrieve the charset of the given name.
1636 If CHARSET-OR-NAME is a charset object, it is simply returned.
1637 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1638 nil is returned. Otherwise the associated charset object is returned.
1642 if (CHARSETP (charset_or_name))
1643 return charset_or_name;
1645 CHECK_SYMBOL (charset_or_name);
1646 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1649 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1650 Retrieve the charset of the given name.
1651 Same as `find-charset' except an error is signalled if there is no such
1652 charset instead of returning nil.
1656 Lisp_Object charset = Ffind_charset (name);
1659 signal_simple_error ("No such charset", name);
1663 /* We store the charsets in hash tables with the names as the key and the
1664 actual charset object as the value. Occasionally we need to use them
1665 in a list format. These routines provide us with that. */
1666 struct charset_list_closure
1668 Lisp_Object *charset_list;
1672 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1673 void *charset_list_closure)
1675 /* This function can GC */
1676 struct charset_list_closure *chcl =
1677 (struct charset_list_closure*) charset_list_closure;
1678 Lisp_Object *charset_list = chcl->charset_list;
1680 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1684 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1685 Return a list of the names of all defined charsets.
1689 Lisp_Object charset_list = Qnil;
1690 struct gcpro gcpro1;
1691 struct charset_list_closure charset_list_closure;
1693 GCPRO1 (charset_list);
1694 charset_list_closure.charset_list = &charset_list;
1695 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1696 &charset_list_closure);
1699 return charset_list;
1702 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1703 Return the name of the given charset.
1707 return XCHARSET_NAME (Fget_charset (charset));
1710 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1711 Define a new character set.
1712 This function is for use with Mule support.
1713 NAME is a symbol, the name by which the character set is normally referred.
1714 DOC-STRING is a string describing the character set.
1715 PROPS is a property list, describing the specific nature of the
1716 character set. Recognized properties are:
1718 'short-name Short version of the charset name (ex: Latin-1)
1719 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1720 'registry A regular expression matching the font registry field for
1722 'dimension Number of octets used to index a character in this charset.
1723 Either 1 or 2. Defaults to 1.
1724 'columns Number of columns used to display a character in this charset.
1725 Only used in TTY mode. (Under X, the actual width of a
1726 character can be derived from the font used to display the
1727 characters.) If unspecified, defaults to the dimension
1728 (this is almost always the correct value).
1729 'chars Number of characters in each dimension (94 or 96).
1730 Defaults to 94. Note that if the dimension is 2, the
1731 character set thus described is 94x94 or 96x96.
1732 'final Final byte of ISO 2022 escape sequence. Must be
1733 supplied. Each combination of (DIMENSION, CHARS) defines a
1734 separate namespace for final bytes. Note that ISO
1735 2022 restricts the final byte to the range
1736 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1737 dimension == 2. Note also that final bytes in the range
1738 0x30 - 0x3F are reserved for user-defined (not official)
1740 'graphic 0 (use left half of font on output) or 1 (use right half
1741 of font on output). Defaults to 0. For example, for
1742 a font whose registry is ISO8859-1, the left half
1743 (octets 0x20 - 0x7F) is the `ascii' character set, while
1744 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1745 character set. With 'graphic set to 0, the octets
1746 will have their high bit cleared; with it set to 1,
1747 the octets will have their high bit set.
1748 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1750 'ccl-program A compiled CCL program used to convert a character in
1751 this charset into an index into the font. This is in
1752 addition to the 'graphic property. The CCL program
1753 is passed the octets of the character, with the high
1754 bit cleared and set depending upon whether the value
1755 of the 'graphic property is 0 or 1.
1757 (name, doc_string, props))
1759 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1760 int direction = CHARSET_LEFT_TO_RIGHT;
1762 Lisp_Object registry = Qnil;
1763 Lisp_Object charset;
1764 Lisp_Object rest, keyword, value;
1765 Lisp_Object ccl_program = Qnil;
1766 Lisp_Object short_name = Qnil, long_name = Qnil;
1767 int byte_offset = -1;
1769 CHECK_SYMBOL (name);
1770 if (!NILP (doc_string))
1771 CHECK_STRING (doc_string);
1773 charset = Ffind_charset (name);
1774 if (!NILP (charset))
1775 signal_simple_error ("Cannot redefine existing charset", name);
1777 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1779 if (EQ (keyword, Qshort_name))
1781 CHECK_STRING (value);
1785 if (EQ (keyword, Qlong_name))
1787 CHECK_STRING (value);
1791 else if (EQ (keyword, Qdimension))
1794 dimension = XINT (value);
1795 if (dimension < 1 || dimension > 2)
1796 signal_simple_error ("Invalid value for 'dimension", value);
1799 else if (EQ (keyword, Qchars))
1802 chars = XINT (value);
1803 if (chars != 94 && chars != 96)
1804 signal_simple_error ("Invalid value for 'chars", value);
1807 else if (EQ (keyword, Qcolumns))
1810 columns = XINT (value);
1811 if (columns != 1 && columns != 2)
1812 signal_simple_error ("Invalid value for 'columns", value);
1815 else if (EQ (keyword, Qgraphic))
1818 graphic = XINT (value);
1820 if (graphic < 0 || graphic > 2)
1822 if (graphic < 0 || graphic > 1)
1824 signal_simple_error ("Invalid value for 'graphic", value);
1827 else if (EQ (keyword, Qregistry))
1829 CHECK_STRING (value);
1833 else if (EQ (keyword, Qdirection))
1835 if (EQ (value, Ql2r))
1836 direction = CHARSET_LEFT_TO_RIGHT;
1837 else if (EQ (value, Qr2l))
1838 direction = CHARSET_RIGHT_TO_LEFT;
1840 signal_simple_error ("Invalid value for 'direction", value);
1843 else if (EQ (keyword, Qfinal))
1845 CHECK_CHAR_COERCE_INT (value);
1846 final = XCHAR (value);
1847 if (final < '0' || final > '~')
1848 signal_simple_error ("Invalid value for 'final", value);
1851 else if (EQ (keyword, Qccl_program))
1853 CHECK_VECTOR (value);
1854 ccl_program = value;
1858 signal_simple_error ("Unrecognized property", keyword);
1862 error ("'final must be specified");
1863 if (dimension == 2 && final > 0x5F)
1865 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1869 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1871 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1873 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1874 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1876 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1878 id = get_unallocated_leading_byte (dimension);
1880 if (NILP (doc_string))
1881 doc_string = build_string ("");
1883 if (NILP (registry))
1884 registry = build_string ("");
1886 if (NILP (short_name))
1887 XSETSTRING (short_name, XSYMBOL (name)->name);
1889 if (NILP (long_name))
1890 long_name = doc_string;
1893 columns = dimension;
1895 if (byte_offset < 0)
1899 else if (chars == 96)
1905 charset = make_charset (id, name, type, columns, graphic,
1906 final, direction, short_name, long_name,
1907 doc_string, registry,
1908 Qnil, 0, 0, 0, byte_offset);
1909 if (!NILP (ccl_program))
1910 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1914 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1916 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1917 NEW-NAME is the name of the new charset. Return the new charset.
1919 (charset, new_name))
1921 Lisp_Object new_charset = Qnil;
1922 int id, dimension, columns, graphic, final;
1923 int direction, type;
1924 Lisp_Object registry, doc_string, short_name, long_name;
1925 struct Lisp_Charset *cs;
1927 charset = Fget_charset (charset);
1928 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1929 signal_simple_error ("Charset already has reverse-direction charset",
1932 CHECK_SYMBOL (new_name);
1933 if (!NILP (Ffind_charset (new_name)))
1934 signal_simple_error ("Cannot redefine existing charset", new_name);
1936 cs = XCHARSET (charset);
1938 type = CHARSET_TYPE (cs);
1939 columns = CHARSET_COLUMNS (cs);
1940 dimension = CHARSET_DIMENSION (cs);
1941 id = get_unallocated_leading_byte (dimension);
1943 graphic = CHARSET_GRAPHIC (cs);
1944 final = CHARSET_FINAL (cs);
1945 direction = CHARSET_RIGHT_TO_LEFT;
1946 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1947 direction = CHARSET_LEFT_TO_RIGHT;
1948 doc_string = CHARSET_DOC_STRING (cs);
1949 short_name = CHARSET_SHORT_NAME (cs);
1950 long_name = CHARSET_LONG_NAME (cs);
1951 registry = CHARSET_REGISTRY (cs);
1953 new_charset = make_charset (id, new_name, type, columns,
1954 graphic, final, direction, short_name, long_name,
1955 doc_string, registry,
1957 CHARSET_DECODING_TABLE(cs),
1958 CHARSET_UCS_MIN(cs),
1959 CHARSET_UCS_MAX(cs),
1960 CHARSET_CODE_OFFSET(cs),
1961 CHARSET_BYTE_OFFSET(cs)
1967 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1968 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1973 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1974 Define symbol ALIAS as an alias for CHARSET.
1978 CHECK_SYMBOL (alias);
1979 charset = Fget_charset (charset);
1980 return Fputhash (alias, charset, Vcharset_hash_table);
1983 /* #### Reverse direction charsets not yet implemented. */
1985 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1987 Return the reverse-direction charset parallel to CHARSET, if any.
1988 This is the charset with the same properties (in particular, the same
1989 dimension, number of characters per dimension, and final byte) as
1990 CHARSET but whose characters are displayed in the opposite direction.
1994 charset = Fget_charset (charset);
1995 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1999 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2000 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2001 If DIRECTION is omitted, both directions will be checked (left-to-right
2002 will be returned if character sets exist for both directions).
2004 (dimension, chars, final, direction))
2006 int dm, ch, fi, di = -1;
2008 Lisp_Object obj = Qnil;
2010 CHECK_INT (dimension);
2011 dm = XINT (dimension);
2012 if (dm < 1 || dm > 2)
2013 signal_simple_error ("Invalid value for DIMENSION", dimension);
2017 if (ch != 94 && ch != 96)
2018 signal_simple_error ("Invalid value for CHARS", chars);
2020 CHECK_CHAR_COERCE_INT (final);
2022 if (fi < '0' || fi > '~')
2023 signal_simple_error ("Invalid value for FINAL", final);
2025 if (EQ (direction, Ql2r))
2026 di = CHARSET_LEFT_TO_RIGHT;
2027 else if (EQ (direction, Qr2l))
2028 di = CHARSET_RIGHT_TO_LEFT;
2029 else if (!NILP (direction))
2030 signal_simple_error ("Invalid value for DIRECTION", direction);
2032 if (dm == 2 && fi > 0x5F)
2034 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2037 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2039 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2043 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2045 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2048 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2051 return XCHARSET_NAME (obj);
2055 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2056 Return short name of CHARSET.
2060 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2063 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2064 Return long name of CHARSET.
2068 return XCHARSET_LONG_NAME (Fget_charset (charset));
2071 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2072 Return description of CHARSET.
2076 return XCHARSET_DOC_STRING (Fget_charset (charset));
2079 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2080 Return dimension of CHARSET.
2084 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2087 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2088 Return property PROP of CHARSET.
2089 Recognized properties are those listed in `make-charset', as well as
2090 'name and 'doc-string.
2094 struct Lisp_Charset *cs;
2096 charset = Fget_charset (charset);
2097 cs = XCHARSET (charset);
2099 CHECK_SYMBOL (prop);
2100 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2101 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2102 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2103 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2104 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2105 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2106 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2107 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2108 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2109 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2110 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2111 if (EQ (prop, Qdirection))
2112 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2113 if (EQ (prop, Qreverse_direction_charset))
2115 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2119 return XCHARSET_NAME (obj);
2121 signal_simple_error ("Unrecognized charset property name", prop);
2122 return Qnil; /* not reached */
2125 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2126 Return charset identification number of CHARSET.
2130 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2133 /* #### We need to figure out which properties we really want to
2136 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2137 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2139 (charset, ccl_program))
2141 charset = Fget_charset (charset);
2142 CHECK_VECTOR (ccl_program);
2143 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2148 invalidate_charset_font_caches (Lisp_Object charset)
2150 /* Invalidate font cache entries for charset on all devices. */
2151 Lisp_Object devcons, concons, hash_table;
2152 DEVICE_LOOP_NO_BREAK (devcons, concons)
2154 struct device *d = XDEVICE (XCAR (devcons));
2155 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2156 if (!UNBOUNDP (hash_table))
2157 Fclrhash (hash_table);
2161 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2162 Set the 'registry property of CHARSET to REGISTRY.
2164 (charset, registry))
2166 charset = Fget_charset (charset);
2167 CHECK_STRING (registry);
2168 XCHARSET_REGISTRY (charset) = registry;
2169 invalidate_charset_font_caches (charset);
2170 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2175 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2176 Return mapping-table of CHARSET.
2180 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2183 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2184 Set mapping-table of CHARSET to TABLE.
2188 struct Lisp_Charset *cs;
2189 Lisp_Object old_table;
2192 charset = Fget_charset (charset);
2193 cs = XCHARSET (charset);
2195 if (EQ (table, Qnil))
2197 CHARSET_DECODING_TABLE(cs) = table;
2200 else if (VECTORP (table))
2204 /* ad-hoc method for `ascii' */
2205 if ((CHARSET_CHARS (cs) == 94) &&
2206 (CHARSET_BYTE_OFFSET (cs) != 33))
2207 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2209 ccs_len = CHARSET_CHARS (cs);
2211 if (XVECTOR_LENGTH (table) > ccs_len)
2212 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2213 old_table = CHARSET_DECODING_TABLE(cs);
2214 CHARSET_DECODING_TABLE(cs) = table;
2217 signal_error (Qwrong_type_argument,
2218 list2 (build_translated_string ("vector-or-nil-p"),
2220 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2222 switch (CHARSET_DIMENSION (cs))
2225 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2227 Lisp_Object c = XVECTOR_DATA(table)[i];
2232 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2236 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2238 Lisp_Object v = XVECTOR_DATA(table)[i];
2244 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2246 CHARSET_DECODING_TABLE(cs) = old_table;
2247 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2249 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2251 Lisp_Object c = XVECTOR_DATA(v)[j];
2254 put_char_attribute (c, charset,
2257 (i + CHARSET_BYTE_OFFSET (cs)),
2259 (j + CHARSET_BYTE_OFFSET (cs))));
2263 put_char_attribute (v, charset,
2265 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2274 /************************************************************************/
2275 /* Lisp primitives for working with characters */
2276 /************************************************************************/
2278 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2279 Make a character from CHARSET and octets ARG1 and ARG2.
2280 ARG2 is required only for characters from two-dimensional charsets.
2281 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2282 character s with caron.
2284 (charset, arg1, arg2))
2286 struct Lisp_Charset *cs;
2288 int lowlim, highlim;
2290 charset = Fget_charset (charset);
2291 cs = XCHARSET (charset);
2293 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2294 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2296 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2298 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2299 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2302 /* It is useful (and safe, according to Olivier Galibert) to strip
2303 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2304 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2305 Latin 2 code of the character. */
2313 if (a1 < lowlim || a1 > highlim)
2314 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2316 if (CHARSET_DIMENSION (cs) == 1)
2320 ("Charset is of dimension one; second octet must be nil", arg2);
2321 return make_char (MAKE_CHAR (charset, a1, 0));
2330 a2 = XINT (arg2) & 0x7f;
2332 if (a2 < lowlim || a2 > highlim)
2333 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2335 return make_char (MAKE_CHAR (charset, a1, a2));
2338 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2339 Return the character set of char CH.
2343 CHECK_CHAR_COERCE_INT (ch);
2345 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2348 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2349 Return list of charset and one or two position-codes of CHAR.
2355 Lisp_Object charset;
2357 CHECK_CHAR_COERCE_INT (character);
2358 ret = SPLIT_CHAR (XCHAR (character));
2359 charset = Fcar (ret);
2360 if (CHARSETP (charset))
2361 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2365 /* This function can GC */
2366 struct gcpro gcpro1, gcpro2;
2367 Lisp_Object charset = Qnil;
2368 Lisp_Object rc = Qnil;
2371 GCPRO2 (charset, rc);
2372 CHECK_CHAR_COERCE_INT (character);
2374 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2376 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2378 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2382 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2390 #ifdef ENABLE_COMPOSITE_CHARS
2391 /************************************************************************/
2392 /* composite character functions */
2393 /************************************************************************/
2396 lookup_composite_char (Bufbyte *str, int len)
2398 Lisp_Object lispstr = make_string (str, len);
2399 Lisp_Object ch = Fgethash (lispstr,
2400 Vcomposite_char_string2char_hash_table,
2406 if (composite_char_row_next >= 128)
2407 signal_simple_error ("No more composite chars available", lispstr);
2408 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2409 composite_char_col_next);
2410 Fputhash (make_char (emch), lispstr,
2411 Vcomposite_char_char2string_hash_table);
2412 Fputhash (lispstr, make_char (emch),
2413 Vcomposite_char_string2char_hash_table);
2414 composite_char_col_next++;
2415 if (composite_char_col_next >= 128)
2417 composite_char_col_next = 32;
2418 composite_char_row_next++;
2427 composite_char_string (Emchar ch)
2429 Lisp_Object str = Fgethash (make_char (ch),
2430 Vcomposite_char_char2string_hash_table,
2432 assert (!UNBOUNDP (str));
2436 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2437 Convert a string into a single composite character.
2438 The character is the result of overstriking all the characters in
2443 CHECK_STRING (string);
2444 return make_char (lookup_composite_char (XSTRING_DATA (string),
2445 XSTRING_LENGTH (string)));
2448 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2449 Return a string of the characters comprising a composite character.
2457 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2458 signal_simple_error ("Must be composite char", ch);
2459 return composite_char_string (emch);
2461 #endif /* ENABLE_COMPOSITE_CHARS */
2464 /************************************************************************/
2465 /* initialization */
2466 /************************************************************************/
2469 syms_of_mule_charset (void)
2471 DEFSUBR (Fcharsetp);
2472 DEFSUBR (Ffind_charset);
2473 DEFSUBR (Fget_charset);
2474 DEFSUBR (Fcharset_list);
2475 DEFSUBR (Fcharset_name);
2476 DEFSUBR (Fmake_charset);
2477 DEFSUBR (Fmake_reverse_direction_charset);
2478 /* DEFSUBR (Freverse_direction_charset); */
2479 DEFSUBR (Fdefine_charset_alias);
2480 DEFSUBR (Fcharset_from_attributes);
2481 DEFSUBR (Fcharset_short_name);
2482 DEFSUBR (Fcharset_long_name);
2483 DEFSUBR (Fcharset_description);
2484 DEFSUBR (Fcharset_dimension);
2485 DEFSUBR (Fcharset_property);
2486 DEFSUBR (Fcharset_id);
2487 DEFSUBR (Fset_charset_ccl_program);
2488 DEFSUBR (Fset_charset_registry);
2490 DEFSUBR (Fchar_attribute_alist);
2491 DEFSUBR (Fget_char_attribute);
2492 DEFSUBR (Fput_char_attribute);
2493 DEFSUBR (Fdefine_char);
2494 DEFSUBR (Fchar_variants);
2495 DEFSUBR (Fget_composite_char);
2496 DEFSUBR (Fcharset_mapping_table);
2497 DEFSUBR (Fset_charset_mapping_table);
2500 DEFSUBR (Fmake_char);
2501 DEFSUBR (Fchar_charset);
2502 DEFSUBR (Fsplit_char);
2504 #ifdef ENABLE_COMPOSITE_CHARS
2505 DEFSUBR (Fmake_composite_char);
2506 DEFSUBR (Fcomposite_char_string);
2509 defsymbol (&Qcharsetp, "charsetp");
2510 defsymbol (&Qregistry, "registry");
2511 defsymbol (&Qfinal, "final");
2512 defsymbol (&Qgraphic, "graphic");
2513 defsymbol (&Qdirection, "direction");
2514 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2515 defsymbol (&Qshort_name, "short-name");
2516 defsymbol (&Qlong_name, "long-name");
2518 defsymbol (&Ql2r, "l2r");
2519 defsymbol (&Qr2l, "r2l");
2521 /* Charsets, compatible with FSF 20.3
2522 Naming convention is Script-Charset[-Edition] */
2523 defsymbol (&Qascii, "ascii");
2524 defsymbol (&Qcontrol_1, "control-1");
2525 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2526 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2527 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2528 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2529 defsymbol (&Qthai_tis620, "thai-tis620");
2530 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2531 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2532 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2533 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2534 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2535 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2536 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2537 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2538 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2539 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2540 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2541 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2542 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2543 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2544 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2546 defsymbol (&Q_ucs, "->ucs");
2547 defsymbol (&Q_decomposition, "->decomposition");
2548 defsymbol (&Qcompat, "compat");
2549 defsymbol (&Qisolated, "isolated");
2550 defsymbol (&Qinitial, "initial");
2551 defsymbol (&Qmedial, "medial");
2552 defsymbol (&Qfinal, "final");
2553 defsymbol (&Qvertical, "vertical");
2554 defsymbol (&QnoBreak, "noBreak");
2555 defsymbol (&Qfraction, "fraction");
2556 defsymbol (&Qsuper, "super");
2557 defsymbol (&Qsub, "sub");
2558 defsymbol (&Qcircle, "circle");
2559 defsymbol (&Qsquare, "square");
2560 defsymbol (&Qwide, "wide");
2561 defsymbol (&Qnarrow, "narrow");
2562 defsymbol (&Qsmall, "small");
2563 defsymbol (&Qfont, "font");
2564 defsymbol (&Qucs, "ucs");
2565 defsymbol (&Qucs_bmp, "ucs-bmp");
2566 defsymbol (&Qlatin_viscii, "latin-viscii");
2567 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2568 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2569 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2570 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2571 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2572 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2573 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2574 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2575 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2576 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2577 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2578 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2579 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2580 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2581 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2582 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2583 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2584 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2585 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2586 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2587 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2588 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2589 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2590 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2591 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2592 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2593 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2595 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2596 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2598 defsymbol (&Qcomposite, "composite");
2602 vars_of_mule_charset (void)
2609 /* Table of charsets indexed by leading byte. */
2610 for (i = 0; i < countof (charset_by_leading_byte); i++)
2611 charset_by_leading_byte[i] = Qnil;
2614 /* Table of charsets indexed by type/final-byte. */
2615 for (i = 0; i < countof (charset_by_attributes); i++)
2616 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2617 charset_by_attributes[i][j] = Qnil;
2619 /* Table of charsets indexed by type/final-byte/direction. */
2620 for (i = 0; i < countof (charset_by_attributes); i++)
2621 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2622 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2623 charset_by_attributes[i][j][k] = Qnil;
2627 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2629 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2630 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2634 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2635 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2636 Leading-code of private TYPE9N charset of column-width 1.
2638 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2642 Vutf_2000_version = build_string("0.13 (Takaida)");
2643 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2644 Version number of UTF-2000.
2647 staticpro (&Vcharacter_attribute_table);
2648 Vcharacter_attribute_table = make_char_code_table (Qnil);
2650 staticpro (&Vcharacter_composition_table);
2651 Vcharacter_composition_table = make_char_code_table (Qnil);
2653 staticpro (&Vcharacter_variant_table);
2654 Vcharacter_variant_table = make_char_code_table (Qnil);
2656 Vdefault_coded_charset_priority_list = Qnil;
2657 DEFVAR_LISP ("default-coded-charset-priority-list",
2658 &Vdefault_coded_charset_priority_list /*
2659 Default order of preferred coded-character-sets.
2665 complex_vars_of_mule_charset (void)
2667 staticpro (&Vcharset_hash_table);
2668 Vcharset_hash_table =
2669 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2671 /* Predefined character sets. We store them into variables for
2676 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2677 CHARSET_TYPE_256X256, 1, 2, 0,
2678 CHARSET_LEFT_TO_RIGHT,
2679 build_string ("BMP"),
2680 build_string ("BMP"),
2681 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2682 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2683 Qnil, 0, 0xFFFF, 0, 0);
2685 # define MIN_CHAR_THAI 0
2686 # define MAX_CHAR_THAI 0
2687 # define MIN_CHAR_GREEK 0
2688 # define MAX_CHAR_GREEK 0
2689 # define MIN_CHAR_HEBREW 0
2690 # define MAX_CHAR_HEBREW 0
2691 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2692 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2693 # define MIN_CHAR_CYRILLIC 0
2694 # define MAX_CHAR_CYRILLIC 0
2697 make_charset (LEADING_BYTE_ASCII, Qascii,
2698 CHARSET_TYPE_94, 1, 0, 'B',
2699 CHARSET_LEFT_TO_RIGHT,
2700 build_string ("ASCII"),
2701 build_string ("ASCII)"),
2702 build_string ("ASCII (ISO646 IRV)"),
2703 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2704 Qnil, 0, 0x7F, 0, 0);
2705 Vcharset_control_1 =
2706 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2707 CHARSET_TYPE_94, 1, 1, 0,
2708 CHARSET_LEFT_TO_RIGHT,
2709 build_string ("C1"),
2710 build_string ("Control characters"),
2711 build_string ("Control characters 128-191"),
2713 Qnil, 0x80, 0x9F, 0, 0);
2714 Vcharset_latin_iso8859_1 =
2715 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2716 CHARSET_TYPE_96, 1, 1, 'A',
2717 CHARSET_LEFT_TO_RIGHT,
2718 build_string ("Latin-1"),
2719 build_string ("ISO8859-1 (Latin-1)"),
2720 build_string ("ISO8859-1 (Latin-1)"),
2721 build_string ("iso8859-1"),
2722 Qnil, 0xA0, 0xFF, 0, 32);
2723 Vcharset_latin_iso8859_2 =
2724 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2725 CHARSET_TYPE_96, 1, 1, 'B',
2726 CHARSET_LEFT_TO_RIGHT,
2727 build_string ("Latin-2"),
2728 build_string ("ISO8859-2 (Latin-2)"),
2729 build_string ("ISO8859-2 (Latin-2)"),
2730 build_string ("iso8859-2"),
2732 Vcharset_latin_iso8859_3 =
2733 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2734 CHARSET_TYPE_96, 1, 1, 'C',
2735 CHARSET_LEFT_TO_RIGHT,
2736 build_string ("Latin-3"),
2737 build_string ("ISO8859-3 (Latin-3)"),
2738 build_string ("ISO8859-3 (Latin-3)"),
2739 build_string ("iso8859-3"),
2741 Vcharset_latin_iso8859_4 =
2742 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2743 CHARSET_TYPE_96, 1, 1, 'D',
2744 CHARSET_LEFT_TO_RIGHT,
2745 build_string ("Latin-4"),
2746 build_string ("ISO8859-4 (Latin-4)"),
2747 build_string ("ISO8859-4 (Latin-4)"),
2748 build_string ("iso8859-4"),
2750 Vcharset_thai_tis620 =
2751 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2752 CHARSET_TYPE_96, 1, 1, 'T',
2753 CHARSET_LEFT_TO_RIGHT,
2754 build_string ("TIS620"),
2755 build_string ("TIS620 (Thai)"),
2756 build_string ("TIS620.2529 (Thai)"),
2757 build_string ("tis620"),
2758 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2759 Vcharset_greek_iso8859_7 =
2760 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2761 CHARSET_TYPE_96, 1, 1, 'F',
2762 CHARSET_LEFT_TO_RIGHT,
2763 build_string ("ISO8859-7"),
2764 build_string ("ISO8859-7 (Greek)"),
2765 build_string ("ISO8859-7 (Greek)"),
2766 build_string ("iso8859-7"),
2767 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2768 Vcharset_arabic_iso8859_6 =
2769 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2770 CHARSET_TYPE_96, 1, 1, 'G',
2771 CHARSET_RIGHT_TO_LEFT,
2772 build_string ("ISO8859-6"),
2773 build_string ("ISO8859-6 (Arabic)"),
2774 build_string ("ISO8859-6 (Arabic)"),
2775 build_string ("iso8859-6"),
2777 Vcharset_hebrew_iso8859_8 =
2778 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2779 CHARSET_TYPE_96, 1, 1, 'H',
2780 CHARSET_RIGHT_TO_LEFT,
2781 build_string ("ISO8859-8"),
2782 build_string ("ISO8859-8 (Hebrew)"),
2783 build_string ("ISO8859-8 (Hebrew)"),
2784 build_string ("iso8859-8"),
2785 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2786 Vcharset_katakana_jisx0201 =
2787 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2788 CHARSET_TYPE_94, 1, 1, 'I',
2789 CHARSET_LEFT_TO_RIGHT,
2790 build_string ("JISX0201 Kana"),
2791 build_string ("JISX0201.1976 (Japanese Kana)"),
2792 build_string ("JISX0201.1976 Japanese Kana"),
2793 build_string ("jisx0201\\.1976"),
2795 Vcharset_latin_jisx0201 =
2796 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2797 CHARSET_TYPE_94, 1, 0, 'J',
2798 CHARSET_LEFT_TO_RIGHT,
2799 build_string ("JISX0201 Roman"),
2800 build_string ("JISX0201.1976 (Japanese Roman)"),
2801 build_string ("JISX0201.1976 Japanese Roman"),
2802 build_string ("jisx0201\\.1976"),
2804 Vcharset_cyrillic_iso8859_5 =
2805 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2806 CHARSET_TYPE_96, 1, 1, 'L',
2807 CHARSET_LEFT_TO_RIGHT,
2808 build_string ("ISO8859-5"),
2809 build_string ("ISO8859-5 (Cyrillic)"),
2810 build_string ("ISO8859-5 (Cyrillic)"),
2811 build_string ("iso8859-5"),
2812 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2813 Vcharset_latin_iso8859_9 =
2814 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2815 CHARSET_TYPE_96, 1, 1, 'M',
2816 CHARSET_LEFT_TO_RIGHT,
2817 build_string ("Latin-5"),
2818 build_string ("ISO8859-9 (Latin-5)"),
2819 build_string ("ISO8859-9 (Latin-5)"),
2820 build_string ("iso8859-9"),
2822 Vcharset_japanese_jisx0208_1978 =
2823 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2824 CHARSET_TYPE_94X94, 2, 0, '@',
2825 CHARSET_LEFT_TO_RIGHT,
2826 build_string ("JIS X0208:1978"),
2827 build_string ("JIS X0208:1978 (Japanese)"),
2829 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2830 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2832 Vcharset_chinese_gb2312 =
2833 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2834 CHARSET_TYPE_94X94, 2, 0, 'A',
2835 CHARSET_LEFT_TO_RIGHT,
2836 build_string ("GB2312"),
2837 build_string ("GB2312)"),
2838 build_string ("GB2312 Chinese simplified"),
2839 build_string ("gb2312"),
2841 Vcharset_japanese_jisx0208 =
2842 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2843 CHARSET_TYPE_94X94, 2, 0, 'B',
2844 CHARSET_LEFT_TO_RIGHT,
2845 build_string ("JISX0208"),
2846 build_string ("JIS X0208:1983 (Japanese)"),
2847 build_string ("JIS X0208:1983 Japanese Kanji"),
2848 build_string ("jisx0208\\.1983"),
2850 Vcharset_japanese_jisx0208_1990 =
2851 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2852 Qjapanese_jisx0208_1990,
2853 CHARSET_TYPE_94X94, 2, 0, 0,
2854 CHARSET_LEFT_TO_RIGHT,
2855 build_string ("JISX0208-1990"),
2856 build_string ("JIS X0208:1990 (Japanese)"),
2857 build_string ("JIS X0208:1990 Japanese Kanji"),
2858 build_string ("jisx0208\\.1990"),
2860 MIN_CHAR_JIS_X0208_1990,
2861 MAX_CHAR_JIS_X0208_1990, 0, 33);
2862 Vcharset_korean_ksc5601 =
2863 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2864 CHARSET_TYPE_94X94, 2, 0, 'C',
2865 CHARSET_LEFT_TO_RIGHT,
2866 build_string ("KSC5601"),
2867 build_string ("KSC5601 (Korean"),
2868 build_string ("KSC5601 Korean Hangul and Hanja"),
2869 build_string ("ksc5601"),
2871 Vcharset_japanese_jisx0212 =
2872 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2873 CHARSET_TYPE_94X94, 2, 0, 'D',
2874 CHARSET_LEFT_TO_RIGHT,
2875 build_string ("JISX0212"),
2876 build_string ("JISX0212 (Japanese)"),
2877 build_string ("JISX0212 Japanese Supplement"),
2878 build_string ("jisx0212"),
2881 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2882 Vcharset_chinese_cns11643_1 =
2883 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2884 CHARSET_TYPE_94X94, 2, 0, 'G',
2885 CHARSET_LEFT_TO_RIGHT,
2886 build_string ("CNS11643-1"),
2887 build_string ("CNS11643-1 (Chinese traditional)"),
2889 ("CNS 11643 Plane 1 Chinese traditional"),
2890 build_string (CHINESE_CNS_PLANE_RE("1")),
2892 Vcharset_chinese_cns11643_2 =
2893 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2894 CHARSET_TYPE_94X94, 2, 0, 'H',
2895 CHARSET_LEFT_TO_RIGHT,
2896 build_string ("CNS11643-2"),
2897 build_string ("CNS11643-2 (Chinese traditional)"),
2899 ("CNS 11643 Plane 2 Chinese traditional"),
2900 build_string (CHINESE_CNS_PLANE_RE("2")),
2903 Vcharset_latin_viscii_lower =
2904 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2905 CHARSET_TYPE_96, 1, 1, '1',
2906 CHARSET_LEFT_TO_RIGHT,
2907 build_string ("VISCII lower"),
2908 build_string ("VISCII lower (Vietnamese)"),
2909 build_string ("VISCII lower (Vietnamese)"),
2910 build_string ("MULEVISCII-LOWER"),
2912 Vcharset_latin_viscii_upper =
2913 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2914 CHARSET_TYPE_96, 1, 1, '2',
2915 CHARSET_LEFT_TO_RIGHT,
2916 build_string ("VISCII upper"),
2917 build_string ("VISCII upper (Vietnamese)"),
2918 build_string ("VISCII upper (Vietnamese)"),
2919 build_string ("MULEVISCII-UPPER"),
2921 Vcharset_latin_viscii =
2922 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2923 CHARSET_TYPE_256, 1, 2, 0,
2924 CHARSET_LEFT_TO_RIGHT,
2925 build_string ("VISCII"),
2926 build_string ("VISCII 1.1 (Vietnamese)"),
2927 build_string ("VISCII 1.1 (Vietnamese)"),
2928 build_string ("VISCII1\\.1"),
2930 Vcharset_ideograph_daikanwa =
2931 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2932 CHARSET_TYPE_256X256, 2, 2, 0,
2933 CHARSET_LEFT_TO_RIGHT,
2934 build_string ("Daikanwa"),
2935 build_string ("Morohashi's Daikanwa"),
2936 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2937 build_string ("Daikanwa"),
2938 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2939 Vcharset_mojikyo_pj_1 =
2940 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1,
2941 CHARSET_TYPE_94X94, 2, 0, 0,
2942 CHARSET_LEFT_TO_RIGHT,
2943 build_string ("Mojikyo-PJ-1"),
2944 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
2946 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
2947 build_string ("jisx0208\\.Mojikyo-1$"),
2949 Vcharset_mojikyo_pj_2 =
2950 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2,
2951 CHARSET_TYPE_94X94, 2, 0, 0,
2952 CHARSET_LEFT_TO_RIGHT,
2953 build_string ("Mojikyo-PJ-2"),
2954 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
2956 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
2957 build_string ("jisx0208\\.Mojikyo-2$"),
2959 Vcharset_mojikyo_pj_3 =
2960 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3,
2961 CHARSET_TYPE_94X94, 2, 0, 0,
2962 CHARSET_LEFT_TO_RIGHT,
2963 build_string ("Mojikyo-PJ-3"),
2964 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
2966 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
2967 build_string ("jisx0208\\.Mojikyo-3$"),
2969 Vcharset_mojikyo_pj_4 =
2970 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4,
2971 CHARSET_TYPE_94X94, 2, 0, 0,
2972 CHARSET_LEFT_TO_RIGHT,
2973 build_string ("Mojikyo-PJ-4"),
2974 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
2976 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
2977 build_string ("jisx0208\\.Mojikyo-4$"),
2979 Vcharset_mojikyo_pj_5 =
2980 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5,
2981 CHARSET_TYPE_94X94, 2, 0, 0,
2982 CHARSET_LEFT_TO_RIGHT,
2983 build_string ("Mojikyo-PJ-5"),
2984 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
2986 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
2987 build_string ("jisx0208\\.Mojikyo-5$"),
2989 Vcharset_mojikyo_pj_6 =
2990 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6,
2991 CHARSET_TYPE_94X94, 2, 0, 0,
2992 CHARSET_LEFT_TO_RIGHT,
2993 build_string ("Mojikyo-PJ-6"),
2994 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
2996 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
2997 build_string ("jisx0208\\.Mojikyo-6$"),
2999 Vcharset_mojikyo_pj_7 =
3000 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7,
3001 CHARSET_TYPE_94X94, 2, 0, 0,
3002 CHARSET_LEFT_TO_RIGHT,
3003 build_string ("Mojikyo-PJ-7"),
3004 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3006 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3007 build_string ("jisx0208\\.Mojikyo-7$"),
3009 Vcharset_mojikyo_pj_8 =
3010 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8,
3011 CHARSET_TYPE_94X94, 2, 0, 0,
3012 CHARSET_LEFT_TO_RIGHT,
3013 build_string ("Mojikyo-PJ-8"),
3014 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3016 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3017 build_string ("jisx0208\\.Mojikyo-8$"),
3019 Vcharset_mojikyo_pj_9 =
3020 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9,
3021 CHARSET_TYPE_94X94, 2, 0, 0,
3022 CHARSET_LEFT_TO_RIGHT,
3023 build_string ("Mojikyo-PJ-9"),
3024 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3026 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3027 build_string ("jisx0208\\.Mojikyo-9$"),
3029 Vcharset_mojikyo_pj_10 =
3030 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10,
3031 CHARSET_TYPE_94X94, 2, 0, 0,
3032 CHARSET_LEFT_TO_RIGHT,
3033 build_string ("Mojikyo-PJ-10"),
3034 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3036 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3037 build_string ("jisx0208\\.Mojikyo-10$"),
3039 Vcharset_mojikyo_pj_11 =
3040 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11,
3041 CHARSET_TYPE_94X94, 2, 0, 0,
3042 CHARSET_LEFT_TO_RIGHT,
3043 build_string ("Mojikyo-PJ-11"),
3044 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3046 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3047 build_string ("jisx0208\\.Mojikyo-11$"),
3049 Vcharset_mojikyo_pj_12 =
3050 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12,
3051 CHARSET_TYPE_94X94, 2, 0, 0,
3052 CHARSET_LEFT_TO_RIGHT,
3053 build_string ("Mojikyo-PJ-12"),
3054 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3056 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3057 build_string ("jisx0208\\.Mojikyo-12$"),
3059 Vcharset_mojikyo_pj_13 =
3060 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13,
3061 CHARSET_TYPE_94X94, 2, 0, 0,
3062 CHARSET_LEFT_TO_RIGHT,
3063 build_string ("Mojikyo-PJ-13"),
3064 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3066 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3067 build_string ("jisx0208\\.Mojikyo-13$"),
3069 Vcharset_mojikyo_pj_14 =
3070 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14,
3071 CHARSET_TYPE_94X94, 2, 0, 0,
3072 CHARSET_LEFT_TO_RIGHT,
3073 build_string ("Mojikyo-PJ-14"),
3074 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3076 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3077 build_string ("jisx0208\\.Mojikyo-14$"),
3079 Vcharset_mojikyo_pj_15 =
3080 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15,
3081 CHARSET_TYPE_94X94, 2, 0, 0,
3082 CHARSET_LEFT_TO_RIGHT,
3083 build_string ("Mojikyo-PJ-15"),
3084 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3086 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3087 build_string ("jisx0208\\.Mojikyo-15$"),
3089 Vcharset_mojikyo_pj_16 =
3090 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16,
3091 CHARSET_TYPE_94X94, 2, 0, 0,
3092 CHARSET_LEFT_TO_RIGHT,
3093 build_string ("Mojikyo-PJ-16"),
3094 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3096 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3097 build_string ("jisx0208\\.Mojikyo-16$"),
3099 Vcharset_mojikyo_pj_17 =
3100 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17,
3101 CHARSET_TYPE_94X94, 2, 0, 0,
3102 CHARSET_LEFT_TO_RIGHT,
3103 build_string ("Mojikyo-PJ-17"),
3104 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3106 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3107 build_string ("jisx0208\\.Mojikyo-17$"),
3109 Vcharset_mojikyo_pj_18 =
3110 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18,
3111 CHARSET_TYPE_94X94, 2, 0, 0,
3112 CHARSET_LEFT_TO_RIGHT,
3113 build_string ("Mojikyo-PJ-18"),
3114 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3116 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3117 build_string ("jisx0208\\.Mojikyo-18$"),
3119 Vcharset_mojikyo_pj_19 =
3120 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19,
3121 CHARSET_TYPE_94X94, 2, 0, 0,
3122 CHARSET_LEFT_TO_RIGHT,
3123 build_string ("Mojikyo-PJ-19"),
3124 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3126 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3127 build_string ("jisx0208\\.Mojikyo-19$"),
3129 Vcharset_mojikyo_pj_20 =
3130 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20,
3131 CHARSET_TYPE_94X94, 2, 0, 0,
3132 CHARSET_LEFT_TO_RIGHT,
3133 build_string ("Mojikyo-PJ-20"),
3134 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3136 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3137 build_string ("jisx0208\\.Mojikyo-20$"),
3139 Vcharset_mojikyo_pj_21 =
3140 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21,
3141 CHARSET_TYPE_94X94, 2, 0, 0,
3142 CHARSET_LEFT_TO_RIGHT,
3143 build_string ("Mojikyo-PJ-21"),
3144 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3146 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3147 build_string ("jisx0208\\.Mojikyo-21$"),
3149 Vcharset_ethiopic_ucs =
3150 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
3151 CHARSET_TYPE_256X256, 2, 2, 0,
3152 CHARSET_LEFT_TO_RIGHT,
3153 build_string ("Ethiopic (UCS)"),
3154 build_string ("Ethiopic (UCS)"),
3155 build_string ("Ethiopic of UCS"),
3156 build_string ("Ethiopic-Unicode"),
3157 Qnil, 0x1200, 0x137F, 0x1200, 0);
3159 Vcharset_chinese_big5_1 =
3160 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3161 CHARSET_TYPE_94X94, 2, 0, '0',
3162 CHARSET_LEFT_TO_RIGHT,
3163 build_string ("Big5"),
3164 build_string ("Big5 (Level-1)"),
3166 ("Big5 Level-1 Chinese traditional"),
3167 build_string ("big5"),
3169 Vcharset_chinese_big5_2 =
3170 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3171 CHARSET_TYPE_94X94, 2, 0, '1',
3172 CHARSET_LEFT_TO_RIGHT,
3173 build_string ("Big5"),
3174 build_string ("Big5 (Level-2)"),
3176 ("Big5 Level-2 Chinese traditional"),
3177 build_string ("big5"),
3180 #ifdef ENABLE_COMPOSITE_CHARS
3181 /* #### For simplicity, we put composite chars into a 96x96 charset.
3182 This is going to lead to problems because you can run out of
3183 room, esp. as we don't yet recycle numbers. */
3184 Vcharset_composite =
3185 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3186 CHARSET_TYPE_96X96, 2, 0, 0,
3187 CHARSET_LEFT_TO_RIGHT,
3188 build_string ("Composite"),
3189 build_string ("Composite characters"),
3190 build_string ("Composite characters"),
3193 composite_char_row_next = 32;
3194 composite_char_col_next = 32;
3196 Vcomposite_char_string2char_hash_table =
3197 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3198 Vcomposite_char_char2string_hash_table =
3199 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3200 staticpro (&Vcomposite_char_string2char_hash_table);
3201 staticpro (&Vcomposite_char_char2string_hash_table);
3202 #endif /* ENABLE_COMPOSITE_CHARS */