1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_japanese_jisx0208_1990;
57 Lisp_Object Vcharset_korean_ksc5601;
58 Lisp_Object Vcharset_japanese_jisx0212;
59 Lisp_Object Vcharset_chinese_cns11643_1;
60 Lisp_Object Vcharset_chinese_cns11643_2;
62 Lisp_Object Vcharset_ucs_bmp;
63 Lisp_Object Vcharset_latin_viscii;
64 Lisp_Object Vcharset_latin_viscii_lower;
65 Lisp_Object Vcharset_latin_viscii_upper;
66 Lisp_Object Vcharset_ideograph_daikanwa;
67 Lisp_Object Vcharset_mojikyo_pj_1;
68 Lisp_Object Vcharset_mojikyo_pj_2;
69 Lisp_Object Vcharset_mojikyo_pj_3;
70 Lisp_Object Vcharset_mojikyo_pj_4;
71 Lisp_Object Vcharset_mojikyo_pj_5;
72 Lisp_Object Vcharset_mojikyo_pj_6;
73 Lisp_Object Vcharset_mojikyo_pj_7;
74 Lisp_Object Vcharset_mojikyo_pj_8;
75 Lisp_Object Vcharset_mojikyo_pj_9;
76 Lisp_Object Vcharset_mojikyo_pj_10;
77 Lisp_Object Vcharset_mojikyo_pj_11;
78 Lisp_Object Vcharset_mojikyo_pj_12;
79 Lisp_Object Vcharset_mojikyo_pj_13;
80 Lisp_Object Vcharset_mojikyo_pj_14;
81 Lisp_Object Vcharset_mojikyo_pj_15;
82 Lisp_Object Vcharset_mojikyo_pj_16;
83 Lisp_Object Vcharset_mojikyo_pj_17;
84 Lisp_Object Vcharset_mojikyo_pj_18;
85 Lisp_Object Vcharset_mojikyo_pj_19;
86 Lisp_Object Vcharset_mojikyo_pj_20;
87 Lisp_Object Vcharset_mojikyo_pj_21;
88 Lisp_Object Vcharset_ethiopic_ucs;
90 Lisp_Object Vcharset_chinese_big5_1;
91 Lisp_Object Vcharset_chinese_big5_2;
93 #ifdef ENABLE_COMPOSITE_CHARS
94 Lisp_Object Vcharset_composite;
96 /* Hash tables for composite chars. One maps string representing
97 composed chars to their equivalent chars; one goes the
99 Lisp_Object Vcomposite_char_char2string_hash_table;
100 Lisp_Object Vcomposite_char_string2char_hash_table;
102 static int composite_char_row_next;
103 static int composite_char_col_next;
105 #endif /* ENABLE_COMPOSITE_CHARS */
107 /* Table of charsets indexed by leading byte. */
108 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
110 /* Table of charsets indexed by type/final-byte/direction. */
112 Lisp_Object charset_by_attributes[4][128];
114 Lisp_Object charset_by_attributes[4][128][2];
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
126 Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
152 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
154 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
157 for (i = 0; i < 256; i++)
159 markobj (cte->property[i]);
165 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
167 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
168 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
171 for (i = 0; i < 256; i++)
172 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
174 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
176 if (!char_byte_table_equal (cte1->property[i],
177 cte2->property[i], depth + 1))
184 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
190 char_byte_table_hash (Lisp_Object obj, int depth)
192 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
194 return internal_array_hash (cte->property, 256, depth);
197 static const struct lrecord_description char_byte_table_description[] = {
198 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
202 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
203 mark_char_byte_table,
204 internal_object_printer,
205 0, char_byte_table_equal,
206 char_byte_table_hash,
207 char_byte_table_description,
208 struct Lisp_Char_Byte_Table);
211 make_char_byte_table (Lisp_Object initval)
215 struct Lisp_Char_Byte_Table *cte =
216 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
217 &lrecord_char_byte_table);
219 for (i = 0; i < 256; i++)
220 cte->property[i] = initval;
222 XSETCHAR_BYTE_TABLE (obj, cte);
227 copy_char_byte_table (Lisp_Object entry)
229 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
232 struct Lisp_Char_Byte_Table *ctenew =
233 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
234 &lrecord_char_byte_table);
236 for (i = 0; i < 256; i++)
238 Lisp_Object new = cte->property[i];
239 if (CHAR_BYTE_TABLE_P (new))
240 ctenew->property[i] = copy_char_byte_table (new);
242 ctenew->property[i] = new;
245 XSETCHAR_BYTE_TABLE (obj, ctenew);
251 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
253 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
259 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
261 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
262 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
264 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
268 char_code_table_hash (Lisp_Object obj, int depth)
270 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
272 return char_code_table_hash (cte->table, depth + 1);
275 static const struct lrecord_description char_code_table_description[] = {
276 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
280 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
281 mark_char_code_table,
282 internal_object_printer,
283 0, char_code_table_equal,
284 char_code_table_hash,
285 char_code_table_description,
286 struct Lisp_Char_Code_Table);
289 make_char_code_table (Lisp_Object initval)
292 struct Lisp_Char_Code_Table *cte =
293 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
294 &lrecord_char_code_table);
296 cte->table = make_char_byte_table (initval);
298 XSETCHAR_CODE_TABLE (obj, cte);
303 copy_char_code_table (Lisp_Object entry)
305 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
307 struct Lisp_Char_Code_Table *ctenew =
308 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
309 &lrecord_char_code_table);
311 ctenew->table = copy_char_byte_table (cte->table);
312 XSETCHAR_CODE_TABLE (obj, ctenew);
318 get_char_code_table (Emchar ch, Lisp_Object table)
320 unsigned int code = ch;
321 struct Lisp_Char_Byte_Table* cpt
322 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
323 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
325 if (CHAR_BYTE_TABLE_P (ret))
326 cpt = XCHAR_BYTE_TABLE (ret);
330 ret = cpt->property [(unsigned char) (code >> 16)];
331 if (CHAR_BYTE_TABLE_P (ret))
332 cpt = XCHAR_BYTE_TABLE (ret);
336 ret = cpt->property [(unsigned char) (code >> 8)];
337 if (CHAR_BYTE_TABLE_P (ret))
338 cpt = XCHAR_BYTE_TABLE (ret);
342 return cpt->property [(unsigned char) code];
346 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
348 unsigned int code = ch;
349 struct Lisp_Char_Byte_Table* cpt1
350 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
351 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
353 if (CHAR_BYTE_TABLE_P (ret))
355 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
357 ret = cpt2->property[(unsigned char)(code >> 16)];
358 if (CHAR_BYTE_TABLE_P (ret))
360 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
362 ret = cpt3->property[(unsigned char)(code >> 8)];
363 if (CHAR_BYTE_TABLE_P (ret))
365 struct Lisp_Char_Byte_Table* cpt4
366 = XCHAR_BYTE_TABLE (ret);
368 cpt4->property[(unsigned char)code] = value;
370 else if (!EQ (ret, value))
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
378 else if (!EQ (ret, value))
380 Lisp_Object cpt3 = make_char_byte_table (ret);
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
386 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
389 else if (!EQ (ret, value))
391 Lisp_Object cpt2 = make_char_byte_table (ret);
392 Lisp_Object cpt3 = make_char_byte_table (ret);
393 Lisp_Object cpt4 = make_char_byte_table (ret);
395 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
396 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
397 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
398 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
403 Lisp_Object Vcharacter_attribute_table;
404 Lisp_Object Vcharacter_composition_table;
405 Lisp_Object Vcharacter_variant_table;
407 Lisp_Object Q_decomposition;
410 Lisp_Object Qisolated;
411 Lisp_Object Qinitial;
414 Lisp_Object Qvertical;
415 Lisp_Object QnoBreak;
416 Lisp_Object Qfraction;
427 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
433 else if (EQ (v, Qcompat))
435 else if (EQ (v, Qisolated))
437 else if (EQ (v, Qinitial))
439 else if (EQ (v, Qmedial))
441 else if (EQ (v, Qfinal))
443 else if (EQ (v, Qvertical))
445 else if (EQ (v, QnoBreak))
447 else if (EQ (v, Qfraction))
449 else if (EQ (v, Qsuper))
451 else if (EQ (v, Qsub))
453 else if (EQ (v, Qcircle))
455 else if (EQ (v, Qsquare))
457 else if (EQ (v, Qwide))
459 else if (EQ (v, Qnarrow))
461 else if (EQ (v, Qsmall))
463 else if (EQ (v, Qfont))
466 signal_simple_error (err_msg, err_arg);
469 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
470 Return character corresponding with list.
474 Lisp_Object table = Vcharacter_composition_table;
475 Lisp_Object rest = list;
479 Lisp_Object v = Fcar (rest);
481 Emchar c = to_char_code (v, "Invalid value for composition", list);
483 ret = get_char_code_table (c, table);
488 if (!CHAR_CODE_TABLE_P (ret))
493 else if (!CONSP (rest))
495 else if (CHAR_CODE_TABLE_P (ret))
498 signal_simple_error ("Invalid table is found with", list);
500 signal_simple_error ("Invalid value for composition", list);
503 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
504 Return variants of CHARACTER.
508 CHECK_CHAR (character);
509 return Fcopy_list (get_char_code_table (XCHAR (character),
510 Vcharacter_variant_table));
513 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
514 Return the alist of attributes of CHARACTER.
518 CHECK_CHAR (character);
519 return Fcopy_alist (get_char_code_table (XCHAR (character),
520 Vcharacter_attribute_table));
523 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
524 Return the value of CHARACTER's ATTRIBUTE.
526 (character, attribute))
531 CHECK_CHAR (character);
532 ret = get_char_code_table (XCHAR (character),
533 Vcharacter_attribute_table);
537 if (!NILP (ccs = Ffind_charset (attribute)))
540 return Fcdr (Fassq (attribute, ret));
544 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
547 Emchar char_code = XCHAR (character);
549 = get_char_code_table (char_code, Vcharacter_attribute_table);
552 cell = Fassq (attribute, ret);
556 ret = Fcons (Fcons (attribute, value), ret);
558 else if (!EQ (Fcdr (cell), value))
560 Fsetcdr (cell, value);
562 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
566 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
567 Store CHARACTER's ATTRIBUTE with VALUE.
569 (character, attribute, value))
573 CHECK_CHAR (character);
574 ccs = Ffind_charset (attribute);
578 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
583 /* ad-hoc method for `ascii' */
584 if ((XCHARSET_CHARS (ccs) == 94) &&
585 (XCHARSET_BYTE_OFFSET (ccs) != 33))
586 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
588 ccs_len = XCHARSET_CHARS (ccs);
591 signal_simple_error ("Invalid value for coded-charset",
595 rest = Fget_char_attribute (character, attribute);
602 Lisp_Object ei = Fcar (rest);
604 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
605 nv = XVECTOR_DATA(v)[i];
612 XVECTOR_DATA(v)[i] = Qnil;
613 v = XCHARSET_DECODING_TABLE (ccs);
618 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
621 if (XCHARSET_GRAPHIC (ccs) == 1)
622 value = Fcopy_list (value);
627 Lisp_Object ei = Fcar (rest);
630 signal_simple_error ("Invalid value for coded-charset", value);
632 if ((i < 0) || (255 < i))
633 signal_simple_error ("Invalid value for coded-charset", value);
634 if (XCHARSET_GRAPHIC (ccs) == 1)
637 Fsetcar (rest, make_int (i));
639 i -= XCHARSET_BYTE_OFFSET (ccs);
640 nv = XVECTOR_DATA(v)[i];
646 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
653 XVECTOR_DATA(v)[i] = character;
655 else if (EQ (attribute, Q_decomposition))
657 Lisp_Object rest = value;
658 Lisp_Object table = Vcharacter_composition_table;
661 signal_simple_error ("Invalid value for ->decomposition",
666 Lisp_Object v = Fcar (rest);
669 = to_char_code (v, "Invalid value for ->decomposition", value);
674 put_char_code_table (c, character, table);
679 ntable = get_char_code_table (c, table);
680 if (!CHAR_CODE_TABLE_P (ntable))
682 ntable = make_char_code_table (Qnil);
683 put_char_code_table (c, ntable, table);
689 else if (EQ (attribute, Q_ucs))
695 signal_simple_error ("Invalid value for ->ucs", value);
699 ret = get_char_code_table (c, Vcharacter_variant_table);
700 if (NILP (Fmemq (character, ret)))
702 put_char_code_table (c, Fcons (character, ret),
703 Vcharacter_variant_table);
706 return put_char_attribute (character, attribute, value);
711 EXFUN (Fmake_char, 3);
713 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
714 Store character's ATTRIBUTES.
718 Lisp_Object rest = attributes;
719 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
720 Lisp_Object character;
726 Lisp_Object cell = Fcar (rest);
730 signal_simple_error ("Invalid argument", attributes);
731 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
734 character = Fmake_char (ccs, Fcar (cell),
736 goto setup_attributes;
740 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
743 signal_simple_error ("Invalid argument", attributes);
745 character = make_char (XINT (code) + 0x100000);
746 goto setup_attributes;
750 else if (!INTP (code))
751 signal_simple_error ("Invalid argument", attributes);
753 character = make_char (XINT (code));
759 Lisp_Object cell = Fcar (rest);
762 signal_simple_error ("Invalid argument", attributes);
763 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
767 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
770 Lisp_Object Vutf_2000_version;
774 int leading_code_private_11;
777 Lisp_Object Qcharsetp;
779 /* Qdoc_string, Qdimension, Qchars defined in general.c */
780 Lisp_Object Qregistry, Qfinal, Qgraphic;
781 Lisp_Object Qdirection;
782 Lisp_Object Qreverse_direction_charset;
783 Lisp_Object Qleading_byte;
784 Lisp_Object Qshort_name, Qlong_name;
800 Qjapanese_jisx0208_1978,
803 Qjapanese_jisx0208_1990,
813 Qvietnamese_viscii_lower,
814 Qvietnamese_viscii_upper,
843 Lisp_Object Ql2r, Qr2l;
845 Lisp_Object Vcharset_hash_table;
848 static Charset_ID next_allocated_leading_byte;
850 static Charset_ID next_allocated_1_byte_leading_byte;
851 static Charset_ID next_allocated_2_byte_leading_byte;
854 /* Composite characters are characters constructed by overstriking two
855 or more regular characters.
857 1) The old Mule implementation involves storing composite characters
858 in a buffer as a tag followed by all of the actual characters
859 used to make up the composite character. I think this is a bad
860 idea; it greatly complicates code that wants to handle strings
861 one character at a time because it has to deal with the possibility
862 of great big ungainly characters. It's much more reasonable to
863 simply store an index into a table of composite characters.
865 2) The current implementation only allows for 16,384 separate
866 composite characters over the lifetime of the XEmacs process.
867 This could become a potential problem if the user
868 edited lots of different files that use composite characters.
869 Due to FSF bogosity, increasing the number of allowable
870 composite characters under Mule would decrease the number
871 of possible faces that can exist. Mule already has shrunk
872 this to 2048, and further shrinkage would become uncomfortable.
873 No such problems exist in XEmacs.
875 Composite characters could be represented as 0x80 C1 C2 C3,
876 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
877 for slightly under 2^20 (one million) composite characters
878 over the XEmacs process lifetime, and you only need to
879 increase the size of a Mule character from 19 to 21 bits.
880 Or you could use 0x80 C1 C2 C3 C4, allowing for about
881 85 million (slightly over 2^26) composite characters. */
884 /************************************************************************/
885 /* Basic Emchar functions */
886 /************************************************************************/
888 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
889 string in STR. Returns the number of bytes stored.
890 Do not call this directly. Use the macro set_charptr_emchar() instead.
894 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
909 else if ( c <= 0x7ff )
911 *p++ = (c >> 6) | 0xc0;
912 *p++ = (c & 0x3f) | 0x80;
914 else if ( c <= 0xffff )
916 *p++ = (c >> 12) | 0xe0;
917 *p++ = ((c >> 6) & 0x3f) | 0x80;
918 *p++ = (c & 0x3f) | 0x80;
920 else if ( c <= 0x1fffff )
922 *p++ = (c >> 18) | 0xf0;
923 *p++ = ((c >> 12) & 0x3f) | 0x80;
924 *p++ = ((c >> 6) & 0x3f) | 0x80;
925 *p++ = (c & 0x3f) | 0x80;
927 else if ( c <= 0x3ffffff )
929 *p++ = (c >> 24) | 0xf8;
930 *p++ = ((c >> 18) & 0x3f) | 0x80;
931 *p++ = ((c >> 12) & 0x3f) | 0x80;
932 *p++ = ((c >> 6) & 0x3f) | 0x80;
933 *p++ = (c & 0x3f) | 0x80;
937 *p++ = (c >> 30) | 0xfc;
938 *p++ = ((c >> 24) & 0x3f) | 0x80;
939 *p++ = ((c >> 18) & 0x3f) | 0x80;
940 *p++ = ((c >> 12) & 0x3f) | 0x80;
941 *p++ = ((c >> 6) & 0x3f) | 0x80;
942 *p++ = (c & 0x3f) | 0x80;
945 BREAKUP_CHAR (c, charset, c1, c2);
946 lb = CHAR_LEADING_BYTE (c);
947 if (LEADING_BYTE_PRIVATE_P (lb))
948 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
950 if (EQ (charset, Vcharset_control_1))
959 /* Return the first character from a Mule-encoded string in STR,
960 assuming it's non-ASCII. Do not call this directly.
961 Use the macro charptr_emchar() instead. */
964 non_ascii_charptr_emchar (CONST Bufbyte *str)
977 else if ( b >= 0xf8 )
982 else if ( b >= 0xf0 )
987 else if ( b >= 0xe0 )
992 else if ( b >= 0xc0 )
1002 for( ; len > 0; len-- )
1005 ch = ( ch << 6 ) | ( b & 0x3f );
1009 Bufbyte i0 = *str, i1, i2 = 0;
1010 Lisp_Object charset;
1012 if (i0 == LEADING_BYTE_CONTROL_1)
1013 return (Emchar) (*++str - 0x20);
1015 if (LEADING_BYTE_PREFIX_P (i0))
1020 charset = CHARSET_BY_LEADING_BYTE (i0);
1021 if (XCHARSET_DIMENSION (charset) == 2)
1024 return MAKE_CHAR (charset, i1, i2);
1028 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1029 Do not call this directly. Use the macro valid_char_p() instead. */
1033 non_ascii_valid_char_p (Emchar ch)
1037 /* Must have only lowest 19 bits set */
1041 f1 = CHAR_FIELD1 (ch);
1042 f2 = CHAR_FIELD2 (ch);
1043 f3 = CHAR_FIELD3 (ch);
1047 Lisp_Object charset;
1049 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1050 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1051 f2 > MAX_CHAR_FIELD2_PRIVATE)
1056 if (f3 != 0x20 && f3 != 0x7F)
1060 NOTE: This takes advantage of the fact that
1061 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1062 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1064 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1065 return (XCHARSET_CHARS (charset) == 96);
1069 Lisp_Object charset;
1071 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1072 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1073 f1 > MAX_CHAR_FIELD1_PRIVATE)
1075 if (f2 < 0x20 || f3 < 0x20)
1078 #ifdef ENABLE_COMPOSITE_CHARS
1079 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1081 if (UNBOUNDP (Fgethash (make_int (ch),
1082 Vcomposite_char_char2string_hash_table,
1087 #endif /* ENABLE_COMPOSITE_CHARS */
1089 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1092 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1094 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1097 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1099 return (XCHARSET_CHARS (charset) == 96);
1105 /************************************************************************/
1106 /* Basic string functions */
1107 /************************************************************************/
1109 /* Copy the character pointed to by PTR into STR, assuming it's
1110 non-ASCII. Do not call this directly. Use the macro
1111 charptr_copy_char() instead. */
1114 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1116 Bufbyte *strptr = str;
1118 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1120 /* Notice fallthrough. */
1122 case 6: *++strptr = *ptr++;
1123 case 5: *++strptr = *ptr++;
1125 case 4: *++strptr = *ptr++;
1126 case 3: *++strptr = *ptr++;
1127 case 2: *++strptr = *ptr;
1132 return strptr + 1 - str;
1136 /************************************************************************/
1137 /* streams of Emchars */
1138 /************************************************************************/
1140 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1141 The functions below are not meant to be called directly; use
1142 the macros in insdel.h. */
1145 Lstream_get_emchar_1 (Lstream *stream, int ch)
1147 Bufbyte str[MAX_EMCHAR_LEN];
1148 Bufbyte *strptr = str;
1150 str[0] = (Bufbyte) ch;
1151 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1153 /* Notice fallthrough. */
1156 ch = Lstream_getc (stream);
1158 *++strptr = (Bufbyte) ch;
1160 ch = Lstream_getc (stream);
1162 *++strptr = (Bufbyte) ch;
1165 ch = Lstream_getc (stream);
1167 *++strptr = (Bufbyte) ch;
1169 ch = Lstream_getc (stream);
1171 *++strptr = (Bufbyte) ch;
1173 ch = Lstream_getc (stream);
1175 *++strptr = (Bufbyte) ch;
1180 return charptr_emchar (str);
1184 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1186 Bufbyte str[MAX_EMCHAR_LEN];
1187 Bytecount len = set_charptr_emchar (str, ch);
1188 return Lstream_write (stream, str, len);
1192 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1194 Bufbyte str[MAX_EMCHAR_LEN];
1195 Bytecount len = set_charptr_emchar (str, ch);
1196 Lstream_unread (stream, str, len);
1200 /************************************************************************/
1201 /* charset object */
1202 /************************************************************************/
1205 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1207 struct Lisp_Charset *cs = XCHARSET (obj);
1209 markobj (cs->short_name);
1210 markobj (cs->long_name);
1211 markobj (cs->doc_string);
1212 markobj (cs->registry);
1213 markobj (cs->ccl_program);
1215 markobj (cs->decoding_table);
1221 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1223 struct Lisp_Charset *cs = XCHARSET (obj);
1227 error ("printing unreadable object #<charset %s 0x%x>",
1228 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1231 write_c_string ("#<charset ", printcharfun);
1232 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1233 write_c_string (" ", printcharfun);
1234 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1235 write_c_string (" ", printcharfun);
1236 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1237 write_c_string (" ", printcharfun);
1238 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1239 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1240 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1241 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1242 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1244 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1245 CHARSET_COLUMNS (cs),
1246 CHARSET_GRAPHIC (cs),
1247 CHARSET_FINAL (cs));
1248 write_c_string (buf, printcharfun);
1249 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1250 sprintf (buf, " 0x%x>", cs->header.uid);
1251 write_c_string (buf, printcharfun);
1254 static const struct lrecord_description charset_description[] = {
1255 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1257 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1262 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1263 mark_charset, print_charset, 0, 0, 0,
1264 charset_description,
1265 struct Lisp_Charset);
1267 /* Make a new charset. */
1270 make_charset (Charset_ID id, Lisp_Object name,
1271 unsigned char type, unsigned char columns, unsigned char graphic,
1272 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1273 Lisp_Object long_name, Lisp_Object doc,
1275 Lisp_Object decoding_table,
1276 Emchar ucs_min, Emchar ucs_max,
1277 Emchar code_offset, unsigned char byte_offset)
1280 struct Lisp_Charset *cs =
1281 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1282 XSETCHARSET (obj, cs);
1284 CHARSET_ID (cs) = id;
1285 CHARSET_NAME (cs) = name;
1286 CHARSET_SHORT_NAME (cs) = short_name;
1287 CHARSET_LONG_NAME (cs) = long_name;
1288 CHARSET_DIRECTION (cs) = direction;
1289 CHARSET_TYPE (cs) = type;
1290 CHARSET_COLUMNS (cs) = columns;
1291 CHARSET_GRAPHIC (cs) = graphic;
1292 CHARSET_FINAL (cs) = final;
1293 CHARSET_DOC_STRING (cs) = doc;
1294 CHARSET_REGISTRY (cs) = reg;
1295 CHARSET_CCL_PROGRAM (cs) = Qnil;
1296 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1298 CHARSET_DECODING_TABLE(cs) = Qnil;
1299 CHARSET_UCS_MIN(cs) = ucs_min;
1300 CHARSET_UCS_MAX(cs) = ucs_max;
1301 CHARSET_CODE_OFFSET(cs) = code_offset;
1302 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1305 switch (CHARSET_TYPE (cs))
1307 case CHARSET_TYPE_94:
1308 CHARSET_DIMENSION (cs) = 1;
1309 CHARSET_CHARS (cs) = 94;
1311 case CHARSET_TYPE_96:
1312 CHARSET_DIMENSION (cs) = 1;
1313 CHARSET_CHARS (cs) = 96;
1315 case CHARSET_TYPE_94X94:
1316 CHARSET_DIMENSION (cs) = 2;
1317 CHARSET_CHARS (cs) = 94;
1319 case CHARSET_TYPE_96X96:
1320 CHARSET_DIMENSION (cs) = 2;
1321 CHARSET_CHARS (cs) = 96;
1324 case CHARSET_TYPE_128:
1325 CHARSET_DIMENSION (cs) = 1;
1326 CHARSET_CHARS (cs) = 128;
1328 case CHARSET_TYPE_128X128:
1329 CHARSET_DIMENSION (cs) = 2;
1330 CHARSET_CHARS (cs) = 128;
1332 case CHARSET_TYPE_256:
1333 CHARSET_DIMENSION (cs) = 1;
1334 CHARSET_CHARS (cs) = 256;
1336 case CHARSET_TYPE_256X256:
1337 CHARSET_DIMENSION (cs) = 2;
1338 CHARSET_CHARS (cs) = 256;
1344 if (id == LEADING_BYTE_ASCII)
1345 CHARSET_REP_BYTES (cs) = 1;
1347 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1349 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1354 /* some charsets do not have final characters. This includes
1355 ASCII, Control-1, Composite, and the two faux private
1358 if (code_offset == 0)
1360 assert (NILP (charset_by_attributes[type][final]));
1361 charset_by_attributes[type][final] = obj;
1364 assert (NILP (charset_by_attributes[type][final][direction]));
1365 charset_by_attributes[type][final][direction] = obj;
1369 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1370 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1373 /* official leading byte */
1374 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1377 /* Some charsets are "faux" and don't have names or really exist at
1378 all except in the leading-byte table. */
1380 Fputhash (name, obj, Vcharset_hash_table);
1385 get_unallocated_leading_byte (int dimension)
1390 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1393 lb = next_allocated_leading_byte++;
1397 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1400 lb = next_allocated_1_byte_leading_byte++;
1404 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1407 lb = next_allocated_2_byte_leading_byte++;
1413 ("No more character sets free for this dimension",
1414 make_int (dimension));
1421 range_charset_code_point (Lisp_Object charset, Emchar ch)
1425 if ((XCHARSET_UCS_MIN (charset) <= ch)
1426 && (ch <= XCHARSET_UCS_MAX (charset)))
1428 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1430 if (XCHARSET_DIMENSION (charset) == 1)
1431 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1432 else if (XCHARSET_DIMENSION (charset) == 2)
1433 return list2 (make_int (d / XCHARSET_CHARS (charset)
1434 + XCHARSET_BYTE_OFFSET (charset)),
1435 make_int (d % XCHARSET_CHARS (charset)
1436 + XCHARSET_BYTE_OFFSET (charset)));
1437 else if (XCHARSET_DIMENSION (charset) == 3)
1438 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1439 * XCHARSET_CHARS (charset))
1440 + XCHARSET_BYTE_OFFSET (charset)),
1441 make_int (d / XCHARSET_CHARS (charset)
1442 % XCHARSET_CHARS (charset)
1443 + XCHARSET_BYTE_OFFSET (charset)),
1444 make_int (d % XCHARSET_CHARS (charset)
1445 + XCHARSET_BYTE_OFFSET (charset)));
1446 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1447 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1448 * XCHARSET_CHARS (charset)
1449 * XCHARSET_CHARS (charset))
1450 + XCHARSET_BYTE_OFFSET (charset)),
1451 make_int (d / (XCHARSET_CHARS (charset)
1452 * XCHARSET_CHARS (charset))
1453 % XCHARSET_CHARS (charset)
1454 + XCHARSET_BYTE_OFFSET (charset)),
1455 make_int (d / XCHARSET_CHARS (charset)
1456 % XCHARSET_CHARS (charset)
1457 + XCHARSET_BYTE_OFFSET (charset)),
1458 make_int (d % XCHARSET_CHARS (charset)
1459 + XCHARSET_BYTE_OFFSET (charset)));
1461 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1463 if (XCHARSET_DIMENSION (charset) == 1)
1465 if (XCHARSET_CHARS (charset) == 94)
1467 if (((d = ch - (MIN_CHAR_94
1468 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1470 return list1 (make_int (d + 33));
1472 else if (XCHARSET_CHARS (charset) == 96)
1474 if (((d = ch - (MIN_CHAR_96
1475 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1477 return list1 (make_int (d + 32));
1482 else if (XCHARSET_DIMENSION (charset) == 2)
1484 if (XCHARSET_CHARS (charset) == 94)
1486 if (((d = ch - (MIN_CHAR_94x94
1487 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1490 return list2 (make_int ((d / 94) + 33),
1491 make_int (d % 94 + 33));
1493 else if (XCHARSET_CHARS (charset) == 96)
1495 if (((d = ch - (MIN_CHAR_96x96
1496 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1499 return list2 (make_int ((d / 96) + 32),
1500 make_int (d % 96 + 32));
1508 split_builtin_char (Emchar c)
1510 if (c < MIN_CHAR_OBS_94x94)
1512 if (c <= MAX_CHAR_BASIC_LATIN)
1514 return list2 (Vcharset_ascii, make_int (c));
1518 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1522 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1524 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1526 return list2 (Vcharset_greek_iso8859_7,
1527 make_int (c - MIN_CHAR_GREEK + 0x20));
1529 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1531 return list2 (Vcharset_cyrillic_iso8859_5,
1532 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1534 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1536 return list2 (Vcharset_hebrew_iso8859_8,
1537 make_int (c - MIN_CHAR_HEBREW + 0x20));
1539 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1541 return list2 (Vcharset_thai_tis620,
1542 make_int (c - MIN_CHAR_THAI + 0x20));
1544 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1545 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1547 return list2 (Vcharset_katakana_jisx0201,
1548 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1552 return list3 (Vcharset_ucs_bmp,
1553 make_int (c >> 8), make_int (c & 0xff));
1556 else if (c <= MAX_CHAR_OBS_94x94)
1558 return list3 (CHARSET_BY_ATTRIBUTES
1559 (CHARSET_TYPE_94X94,
1560 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1561 CHARSET_LEFT_TO_RIGHT),
1562 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1563 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1565 else if (c <= MAX_CHAR_DAIKANWA)
1567 return list3 (Vcharset_ideograph_daikanwa,
1568 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1569 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1571 else if (c <= MAX_CHAR_94)
1573 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1574 ((c - MIN_CHAR_94) / 94) + '0',
1575 CHARSET_LEFT_TO_RIGHT),
1576 make_int (((c - MIN_CHAR_94) % 94) + 33));
1578 else if (c <= MAX_CHAR_96)
1580 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1581 ((c - MIN_CHAR_96) / 96) + '0',
1582 CHARSET_LEFT_TO_RIGHT),
1583 make_int (((c - MIN_CHAR_96) % 96) + 32));
1585 else if (c <= MAX_CHAR_94x94)
1587 return list3 (CHARSET_BY_ATTRIBUTES
1588 (CHARSET_TYPE_94X94,
1589 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1590 CHARSET_LEFT_TO_RIGHT),
1591 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1592 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1594 else if (c <= MAX_CHAR_96x96)
1596 return list3 (CHARSET_BY_ATTRIBUTES
1597 (CHARSET_TYPE_96X96,
1598 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1599 CHARSET_LEFT_TO_RIGHT),
1600 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1601 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1610 charset_code_point (Lisp_Object charset, Emchar ch)
1612 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1614 if (!EQ (cdef, Qnil))
1616 Lisp_Object field = Fassq (charset, cdef);
1618 if (!EQ (field, Qnil))
1619 return Fcdr (field);
1621 return range_charset_code_point (charset, ch);
1624 Lisp_Object Vdefault_coded_charset_priority_list;
1628 /************************************************************************/
1629 /* Basic charset Lisp functions */
1630 /************************************************************************/
1632 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1633 Return non-nil if OBJECT is a charset.
1637 return CHARSETP (object) ? Qt : Qnil;
1640 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1641 Retrieve the charset of the given name.
1642 If CHARSET-OR-NAME is a charset object, it is simply returned.
1643 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1644 nil is returned. Otherwise the associated charset object is returned.
1648 if (CHARSETP (charset_or_name))
1649 return charset_or_name;
1651 CHECK_SYMBOL (charset_or_name);
1652 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1655 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1656 Retrieve the charset of the given name.
1657 Same as `find-charset' except an error is signalled if there is no such
1658 charset instead of returning nil.
1662 Lisp_Object charset = Ffind_charset (name);
1665 signal_simple_error ("No such charset", name);
1669 /* We store the charsets in hash tables with the names as the key and the
1670 actual charset object as the value. Occasionally we need to use them
1671 in a list format. These routines provide us with that. */
1672 struct charset_list_closure
1674 Lisp_Object *charset_list;
1678 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1679 void *charset_list_closure)
1681 /* This function can GC */
1682 struct charset_list_closure *chcl =
1683 (struct charset_list_closure*) charset_list_closure;
1684 Lisp_Object *charset_list = chcl->charset_list;
1686 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1690 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1691 Return a list of the names of all defined charsets.
1695 Lisp_Object charset_list = Qnil;
1696 struct gcpro gcpro1;
1697 struct charset_list_closure charset_list_closure;
1699 GCPRO1 (charset_list);
1700 charset_list_closure.charset_list = &charset_list;
1701 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1702 &charset_list_closure);
1705 return charset_list;
1708 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1709 Return the name of the given charset.
1713 return XCHARSET_NAME (Fget_charset (charset));
1716 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1717 Define a new character set.
1718 This function is for use with Mule support.
1719 NAME is a symbol, the name by which the character set is normally referred.
1720 DOC-STRING is a string describing the character set.
1721 PROPS is a property list, describing the specific nature of the
1722 character set. Recognized properties are:
1724 'short-name Short version of the charset name (ex: Latin-1)
1725 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1726 'registry A regular expression matching the font registry field for
1728 'dimension Number of octets used to index a character in this charset.
1729 Either 1 or 2. Defaults to 1.
1730 'columns Number of columns used to display a character in this charset.
1731 Only used in TTY mode. (Under X, the actual width of a
1732 character can be derived from the font used to display the
1733 characters.) If unspecified, defaults to the dimension
1734 (this is almost always the correct value).
1735 'chars Number of characters in each dimension (94 or 96).
1736 Defaults to 94. Note that if the dimension is 2, the
1737 character set thus described is 94x94 or 96x96.
1738 'final Final byte of ISO 2022 escape sequence. Must be
1739 supplied. Each combination of (DIMENSION, CHARS) defines a
1740 separate namespace for final bytes. Note that ISO
1741 2022 restricts the final byte to the range
1742 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1743 dimension == 2. Note also that final bytes in the range
1744 0x30 - 0x3F are reserved for user-defined (not official)
1746 'graphic 0 (use left half of font on output) or 1 (use right half
1747 of font on output). Defaults to 0. For example, for
1748 a font whose registry is ISO8859-1, the left half
1749 (octets 0x20 - 0x7F) is the `ascii' character set, while
1750 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1751 character set. With 'graphic set to 0, the octets
1752 will have their high bit cleared; with it set to 1,
1753 the octets will have their high bit set.
1754 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1756 'ccl-program A compiled CCL program used to convert a character in
1757 this charset into an index into the font. This is in
1758 addition to the 'graphic property. The CCL program
1759 is passed the octets of the character, with the high
1760 bit cleared and set depending upon whether the value
1761 of the 'graphic property is 0 or 1.
1763 (name, doc_string, props))
1765 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1766 int direction = CHARSET_LEFT_TO_RIGHT;
1768 Lisp_Object registry = Qnil;
1769 Lisp_Object charset;
1770 Lisp_Object rest, keyword, value;
1771 Lisp_Object ccl_program = Qnil;
1772 Lisp_Object short_name = Qnil, long_name = Qnil;
1773 int byte_offset = -1;
1775 CHECK_SYMBOL (name);
1776 if (!NILP (doc_string))
1777 CHECK_STRING (doc_string);
1779 charset = Ffind_charset (name);
1780 if (!NILP (charset))
1781 signal_simple_error ("Cannot redefine existing charset", name);
1783 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1785 if (EQ (keyword, Qshort_name))
1787 CHECK_STRING (value);
1791 if (EQ (keyword, Qlong_name))
1793 CHECK_STRING (value);
1797 else if (EQ (keyword, Qdimension))
1800 dimension = XINT (value);
1801 if (dimension < 1 || dimension > 2)
1802 signal_simple_error ("Invalid value for 'dimension", value);
1805 else if (EQ (keyword, Qchars))
1808 chars = XINT (value);
1809 if (chars != 94 && chars != 96)
1810 signal_simple_error ("Invalid value for 'chars", value);
1813 else if (EQ (keyword, Qcolumns))
1816 columns = XINT (value);
1817 if (columns != 1 && columns != 2)
1818 signal_simple_error ("Invalid value for 'columns", value);
1821 else if (EQ (keyword, Qgraphic))
1824 graphic = XINT (value);
1826 if (graphic < 0 || graphic > 2)
1828 if (graphic < 0 || graphic > 1)
1830 signal_simple_error ("Invalid value for 'graphic", value);
1833 else if (EQ (keyword, Qregistry))
1835 CHECK_STRING (value);
1839 else if (EQ (keyword, Qdirection))
1841 if (EQ (value, Ql2r))
1842 direction = CHARSET_LEFT_TO_RIGHT;
1843 else if (EQ (value, Qr2l))
1844 direction = CHARSET_RIGHT_TO_LEFT;
1846 signal_simple_error ("Invalid value for 'direction", value);
1849 else if (EQ (keyword, Qfinal))
1851 CHECK_CHAR_COERCE_INT (value);
1852 final = XCHAR (value);
1853 if (final < '0' || final > '~')
1854 signal_simple_error ("Invalid value for 'final", value);
1857 else if (EQ (keyword, Qccl_program))
1859 CHECK_VECTOR (value);
1860 ccl_program = value;
1864 signal_simple_error ("Unrecognized property", keyword);
1868 error ("'final must be specified");
1869 if (dimension == 2 && final > 0x5F)
1871 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1875 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1877 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1879 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1880 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1882 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1884 id = get_unallocated_leading_byte (dimension);
1886 if (NILP (doc_string))
1887 doc_string = build_string ("");
1889 if (NILP (registry))
1890 registry = build_string ("");
1892 if (NILP (short_name))
1893 XSETSTRING (short_name, XSYMBOL (name)->name);
1895 if (NILP (long_name))
1896 long_name = doc_string;
1899 columns = dimension;
1901 if (byte_offset < 0)
1905 else if (chars == 96)
1911 charset = make_charset (id, name, type, columns, graphic,
1912 final, direction, short_name, long_name,
1913 doc_string, registry,
1914 Qnil, 0, 0, 0, byte_offset);
1915 if (!NILP (ccl_program))
1916 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1920 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1922 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1923 NEW-NAME is the name of the new charset. Return the new charset.
1925 (charset, new_name))
1927 Lisp_Object new_charset = Qnil;
1928 int id, dimension, columns, graphic, final;
1929 int direction, type;
1930 Lisp_Object registry, doc_string, short_name, long_name;
1931 struct Lisp_Charset *cs;
1933 charset = Fget_charset (charset);
1934 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1935 signal_simple_error ("Charset already has reverse-direction charset",
1938 CHECK_SYMBOL (new_name);
1939 if (!NILP (Ffind_charset (new_name)))
1940 signal_simple_error ("Cannot redefine existing charset", new_name);
1942 cs = XCHARSET (charset);
1944 type = CHARSET_TYPE (cs);
1945 columns = CHARSET_COLUMNS (cs);
1946 dimension = CHARSET_DIMENSION (cs);
1947 id = get_unallocated_leading_byte (dimension);
1949 graphic = CHARSET_GRAPHIC (cs);
1950 final = CHARSET_FINAL (cs);
1951 direction = CHARSET_RIGHT_TO_LEFT;
1952 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1953 direction = CHARSET_LEFT_TO_RIGHT;
1954 doc_string = CHARSET_DOC_STRING (cs);
1955 short_name = CHARSET_SHORT_NAME (cs);
1956 long_name = CHARSET_LONG_NAME (cs);
1957 registry = CHARSET_REGISTRY (cs);
1959 new_charset = make_charset (id, new_name, type, columns,
1960 graphic, final, direction, short_name, long_name,
1961 doc_string, registry,
1963 CHARSET_DECODING_TABLE(cs),
1964 CHARSET_UCS_MIN(cs),
1965 CHARSET_UCS_MAX(cs),
1966 CHARSET_CODE_OFFSET(cs),
1967 CHARSET_BYTE_OFFSET(cs)
1973 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1974 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1979 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1980 Define symbol ALIAS as an alias for CHARSET.
1984 CHECK_SYMBOL (alias);
1985 charset = Fget_charset (charset);
1986 return Fputhash (alias, charset, Vcharset_hash_table);
1989 /* #### Reverse direction charsets not yet implemented. */
1991 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1993 Return the reverse-direction charset parallel to CHARSET, if any.
1994 This is the charset with the same properties (in particular, the same
1995 dimension, number of characters per dimension, and final byte) as
1996 CHARSET but whose characters are displayed in the opposite direction.
2000 charset = Fget_charset (charset);
2001 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2005 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2006 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2007 If DIRECTION is omitted, both directions will be checked (left-to-right
2008 will be returned if character sets exist for both directions).
2010 (dimension, chars, final, direction))
2012 int dm, ch, fi, di = -1;
2014 Lisp_Object obj = Qnil;
2016 CHECK_INT (dimension);
2017 dm = XINT (dimension);
2018 if (dm < 1 || dm > 2)
2019 signal_simple_error ("Invalid value for DIMENSION", dimension);
2023 if (ch != 94 && ch != 96)
2024 signal_simple_error ("Invalid value for CHARS", chars);
2026 CHECK_CHAR_COERCE_INT (final);
2028 if (fi < '0' || fi > '~')
2029 signal_simple_error ("Invalid value for FINAL", final);
2031 if (EQ (direction, Ql2r))
2032 di = CHARSET_LEFT_TO_RIGHT;
2033 else if (EQ (direction, Qr2l))
2034 di = CHARSET_RIGHT_TO_LEFT;
2035 else if (!NILP (direction))
2036 signal_simple_error ("Invalid value for DIRECTION", direction);
2038 if (dm == 2 && fi > 0x5F)
2040 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2043 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2045 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2049 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2051 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2054 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2057 return XCHARSET_NAME (obj);
2061 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2062 Return short name of CHARSET.
2066 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2069 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2070 Return long name of CHARSET.
2074 return XCHARSET_LONG_NAME (Fget_charset (charset));
2077 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2078 Return description of CHARSET.
2082 return XCHARSET_DOC_STRING (Fget_charset (charset));
2085 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2086 Return dimension of CHARSET.
2090 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2093 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2094 Return property PROP of CHARSET.
2095 Recognized properties are those listed in `make-charset', as well as
2096 'name and 'doc-string.
2100 struct Lisp_Charset *cs;
2102 charset = Fget_charset (charset);
2103 cs = XCHARSET (charset);
2105 CHECK_SYMBOL (prop);
2106 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2107 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2108 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2109 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2110 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2111 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2112 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2113 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2114 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2115 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2116 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2117 if (EQ (prop, Qdirection))
2118 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2119 if (EQ (prop, Qreverse_direction_charset))
2121 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2125 return XCHARSET_NAME (obj);
2127 signal_simple_error ("Unrecognized charset property name", prop);
2128 return Qnil; /* not reached */
2131 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2132 Return charset identification number of CHARSET.
2136 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2139 /* #### We need to figure out which properties we really want to
2142 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2143 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2145 (charset, ccl_program))
2147 charset = Fget_charset (charset);
2148 CHECK_VECTOR (ccl_program);
2149 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2154 invalidate_charset_font_caches (Lisp_Object charset)
2156 /* Invalidate font cache entries for charset on all devices. */
2157 Lisp_Object devcons, concons, hash_table;
2158 DEVICE_LOOP_NO_BREAK (devcons, concons)
2160 struct device *d = XDEVICE (XCAR (devcons));
2161 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2162 if (!UNBOUNDP (hash_table))
2163 Fclrhash (hash_table);
2167 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2168 Set the 'registry property of CHARSET to REGISTRY.
2170 (charset, registry))
2172 charset = Fget_charset (charset);
2173 CHECK_STRING (registry);
2174 XCHARSET_REGISTRY (charset) = registry;
2175 invalidate_charset_font_caches (charset);
2176 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2181 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2182 Return mapping-table of CHARSET.
2186 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2189 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2190 Set mapping-table of CHARSET to TABLE.
2194 struct Lisp_Charset *cs;
2195 Lisp_Object old_table;
2198 charset = Fget_charset (charset);
2199 cs = XCHARSET (charset);
2201 if (EQ (table, Qnil))
2203 CHARSET_DECODING_TABLE(cs) = table;
2206 else if (VECTORP (table))
2210 /* ad-hoc method for `ascii' */
2211 if ((CHARSET_CHARS (cs) == 94) &&
2212 (CHARSET_BYTE_OFFSET (cs) != 33))
2213 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2215 ccs_len = CHARSET_CHARS (cs);
2217 if (XVECTOR_LENGTH (table) > ccs_len)
2218 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2219 old_table = CHARSET_DECODING_TABLE(cs);
2220 CHARSET_DECODING_TABLE(cs) = table;
2223 signal_error (Qwrong_type_argument,
2224 list2 (build_translated_string ("vector-or-nil-p"),
2226 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2228 switch (CHARSET_DIMENSION (cs))
2231 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2233 Lisp_Object c = XVECTOR_DATA(table)[i];
2238 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2242 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2244 Lisp_Object v = XVECTOR_DATA(table)[i];
2250 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2252 CHARSET_DECODING_TABLE(cs) = old_table;
2253 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2255 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2257 Lisp_Object c = XVECTOR_DATA(v)[j];
2260 put_char_attribute (c, charset,
2263 (i + CHARSET_BYTE_OFFSET (cs)),
2265 (j + CHARSET_BYTE_OFFSET (cs))));
2269 put_char_attribute (v, charset,
2271 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2280 /************************************************************************/
2281 /* Lisp primitives for working with characters */
2282 /************************************************************************/
2284 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2285 Make a character from CHARSET and octets ARG1 and ARG2.
2286 ARG2 is required only for characters from two-dimensional charsets.
2287 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2288 character s with caron.
2290 (charset, arg1, arg2))
2292 struct Lisp_Charset *cs;
2294 int lowlim, highlim;
2296 charset = Fget_charset (charset);
2297 cs = XCHARSET (charset);
2299 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2300 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2302 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2304 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2305 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2308 /* It is useful (and safe, according to Olivier Galibert) to strip
2309 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2310 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2311 Latin 2 code of the character. */
2319 if (a1 < lowlim || a1 > highlim)
2320 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2322 if (CHARSET_DIMENSION (cs) == 1)
2326 ("Charset is of dimension one; second octet must be nil", arg2);
2327 return make_char (MAKE_CHAR (charset, a1, 0));
2336 a2 = XINT (arg2) & 0x7f;
2338 if (a2 < lowlim || a2 > highlim)
2339 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2341 return make_char (MAKE_CHAR (charset, a1, a2));
2344 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2345 Return the character set of char CH.
2349 CHECK_CHAR_COERCE_INT (ch);
2351 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2354 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2355 Return list of charset and one or two position-codes of CHAR.
2361 Lisp_Object charset;
2363 CHECK_CHAR_COERCE_INT (character);
2364 ret = SPLIT_CHAR (XCHAR (character));
2365 charset = Fcar (ret);
2366 if (CHARSETP (charset))
2367 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2371 /* This function can GC */
2372 struct gcpro gcpro1, gcpro2;
2373 Lisp_Object charset = Qnil;
2374 Lisp_Object rc = Qnil;
2377 GCPRO2 (charset, rc);
2378 CHECK_CHAR_COERCE_INT (character);
2380 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2382 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2384 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2388 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2396 #ifdef ENABLE_COMPOSITE_CHARS
2397 /************************************************************************/
2398 /* composite character functions */
2399 /************************************************************************/
2402 lookup_composite_char (Bufbyte *str, int len)
2404 Lisp_Object lispstr = make_string (str, len);
2405 Lisp_Object ch = Fgethash (lispstr,
2406 Vcomposite_char_string2char_hash_table,
2412 if (composite_char_row_next >= 128)
2413 signal_simple_error ("No more composite chars available", lispstr);
2414 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2415 composite_char_col_next);
2416 Fputhash (make_char (emch), lispstr,
2417 Vcomposite_char_char2string_hash_table);
2418 Fputhash (lispstr, make_char (emch),
2419 Vcomposite_char_string2char_hash_table);
2420 composite_char_col_next++;
2421 if (composite_char_col_next >= 128)
2423 composite_char_col_next = 32;
2424 composite_char_row_next++;
2433 composite_char_string (Emchar ch)
2435 Lisp_Object str = Fgethash (make_char (ch),
2436 Vcomposite_char_char2string_hash_table,
2438 assert (!UNBOUNDP (str));
2442 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2443 Convert a string into a single composite character.
2444 The character is the result of overstriking all the characters in
2449 CHECK_STRING (string);
2450 return make_char (lookup_composite_char (XSTRING_DATA (string),
2451 XSTRING_LENGTH (string)));
2454 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2455 Return a string of the characters comprising a composite character.
2463 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2464 signal_simple_error ("Must be composite char", ch);
2465 return composite_char_string (emch);
2467 #endif /* ENABLE_COMPOSITE_CHARS */
2470 /************************************************************************/
2471 /* initialization */
2472 /************************************************************************/
2475 syms_of_mule_charset (void)
2477 DEFSUBR (Fcharsetp);
2478 DEFSUBR (Ffind_charset);
2479 DEFSUBR (Fget_charset);
2480 DEFSUBR (Fcharset_list);
2481 DEFSUBR (Fcharset_name);
2482 DEFSUBR (Fmake_charset);
2483 DEFSUBR (Fmake_reverse_direction_charset);
2484 /* DEFSUBR (Freverse_direction_charset); */
2485 DEFSUBR (Fdefine_charset_alias);
2486 DEFSUBR (Fcharset_from_attributes);
2487 DEFSUBR (Fcharset_short_name);
2488 DEFSUBR (Fcharset_long_name);
2489 DEFSUBR (Fcharset_description);
2490 DEFSUBR (Fcharset_dimension);
2491 DEFSUBR (Fcharset_property);
2492 DEFSUBR (Fcharset_id);
2493 DEFSUBR (Fset_charset_ccl_program);
2494 DEFSUBR (Fset_charset_registry);
2496 DEFSUBR (Fchar_attribute_alist);
2497 DEFSUBR (Fget_char_attribute);
2498 DEFSUBR (Fput_char_attribute);
2499 DEFSUBR (Fdefine_char);
2500 DEFSUBR (Fchar_variants);
2501 DEFSUBR (Fget_composite_char);
2502 DEFSUBR (Fcharset_mapping_table);
2503 DEFSUBR (Fset_charset_mapping_table);
2506 DEFSUBR (Fmake_char);
2507 DEFSUBR (Fchar_charset);
2508 DEFSUBR (Fsplit_char);
2510 #ifdef ENABLE_COMPOSITE_CHARS
2511 DEFSUBR (Fmake_composite_char);
2512 DEFSUBR (Fcomposite_char_string);
2515 defsymbol (&Qcharsetp, "charsetp");
2516 defsymbol (&Qregistry, "registry");
2517 defsymbol (&Qfinal, "final");
2518 defsymbol (&Qgraphic, "graphic");
2519 defsymbol (&Qdirection, "direction");
2520 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2521 defsymbol (&Qshort_name, "short-name");
2522 defsymbol (&Qlong_name, "long-name");
2524 defsymbol (&Ql2r, "l2r");
2525 defsymbol (&Qr2l, "r2l");
2527 /* Charsets, compatible with FSF 20.3
2528 Naming convention is Script-Charset[-Edition] */
2529 defsymbol (&Qascii, "ascii");
2530 defsymbol (&Qcontrol_1, "control-1");
2531 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2532 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2533 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2534 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2535 defsymbol (&Qthai_tis620, "thai-tis620");
2536 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2537 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2538 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2539 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2540 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2541 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2542 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2543 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2544 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2545 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2546 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2547 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2548 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2549 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2550 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2552 defsymbol (&Q_ucs, "->ucs");
2553 defsymbol (&Q_decomposition, "->decomposition");
2554 defsymbol (&Qcompat, "compat");
2555 defsymbol (&Qisolated, "isolated");
2556 defsymbol (&Qinitial, "initial");
2557 defsymbol (&Qmedial, "medial");
2558 defsymbol (&Qfinal, "final");
2559 defsymbol (&Qvertical, "vertical");
2560 defsymbol (&QnoBreak, "noBreak");
2561 defsymbol (&Qfraction, "fraction");
2562 defsymbol (&Qsuper, "super");
2563 defsymbol (&Qsub, "sub");
2564 defsymbol (&Qcircle, "circle");
2565 defsymbol (&Qsquare, "square");
2566 defsymbol (&Qwide, "wide");
2567 defsymbol (&Qnarrow, "narrow");
2568 defsymbol (&Qsmall, "small");
2569 defsymbol (&Qfont, "font");
2570 defsymbol (&Qucs, "ucs");
2571 defsymbol (&Qucs_bmp, "ucs-bmp");
2572 defsymbol (&Qlatin_viscii, "latin-viscii");
2573 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2574 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2575 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2576 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2577 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2578 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2579 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2580 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2581 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2582 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2583 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2584 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2585 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2586 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2587 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2588 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2589 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2590 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2591 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2592 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2593 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2594 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2595 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2596 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2597 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2598 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2599 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2601 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2602 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2604 defsymbol (&Qcomposite, "composite");
2608 vars_of_mule_charset (void)
2615 /* Table of charsets indexed by leading byte. */
2616 for (i = 0; i < countof (charset_by_leading_byte); i++)
2617 charset_by_leading_byte[i] = Qnil;
2620 /* Table of charsets indexed by type/final-byte. */
2621 for (i = 0; i < countof (charset_by_attributes); i++)
2622 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2623 charset_by_attributes[i][j] = Qnil;
2625 /* Table of charsets indexed by type/final-byte/direction. */
2626 for (i = 0; i < countof (charset_by_attributes); i++)
2627 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2628 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2629 charset_by_attributes[i][j][k] = Qnil;
2633 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2635 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2636 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2640 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2641 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2642 Leading-code of private TYPE9N charset of column-width 1.
2644 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2648 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2649 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2650 Version number of UTF-2000.
2653 staticpro (&Vcharacter_attribute_table);
2654 Vcharacter_attribute_table = make_char_code_table (Qnil);
2656 staticpro (&Vcharacter_composition_table);
2657 Vcharacter_composition_table = make_char_code_table (Qnil);
2659 staticpro (&Vcharacter_variant_table);
2660 Vcharacter_variant_table = make_char_code_table (Qnil);
2662 Vdefault_coded_charset_priority_list = Qnil;
2663 DEFVAR_LISP ("default-coded-charset-priority-list",
2664 &Vdefault_coded_charset_priority_list /*
2665 Default order of preferred coded-character-sets.
2671 complex_vars_of_mule_charset (void)
2673 staticpro (&Vcharset_hash_table);
2674 Vcharset_hash_table =
2675 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2677 /* Predefined character sets. We store them into variables for
2682 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2683 CHARSET_TYPE_256X256, 1, 2, 0,
2684 CHARSET_LEFT_TO_RIGHT,
2685 build_string ("BMP"),
2686 build_string ("BMP"),
2687 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2688 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2689 Qnil, 0, 0xFFFF, 0, 0);
2691 # define MIN_CHAR_THAI 0
2692 # define MAX_CHAR_THAI 0
2693 # define MIN_CHAR_GREEK 0
2694 # define MAX_CHAR_GREEK 0
2695 # define MIN_CHAR_HEBREW 0
2696 # define MAX_CHAR_HEBREW 0
2697 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2698 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2699 # define MIN_CHAR_CYRILLIC 0
2700 # define MAX_CHAR_CYRILLIC 0
2703 make_charset (LEADING_BYTE_ASCII, Qascii,
2704 CHARSET_TYPE_94, 1, 0, 'B',
2705 CHARSET_LEFT_TO_RIGHT,
2706 build_string ("ASCII"),
2707 build_string ("ASCII)"),
2708 build_string ("ASCII (ISO646 IRV)"),
2709 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2710 Qnil, 0, 0x7F, 0, 0);
2711 Vcharset_control_1 =
2712 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2713 CHARSET_TYPE_94, 1, 1, 0,
2714 CHARSET_LEFT_TO_RIGHT,
2715 build_string ("C1"),
2716 build_string ("Control characters"),
2717 build_string ("Control characters 128-191"),
2719 Qnil, 0x80, 0x9F, 0, 0);
2720 Vcharset_latin_iso8859_1 =
2721 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2722 CHARSET_TYPE_96, 1, 1, 'A',
2723 CHARSET_LEFT_TO_RIGHT,
2724 build_string ("Latin-1"),
2725 build_string ("ISO8859-1 (Latin-1)"),
2726 build_string ("ISO8859-1 (Latin-1)"),
2727 build_string ("iso8859-1"),
2728 Qnil, 0xA0, 0xFF, 0, 32);
2729 Vcharset_latin_iso8859_2 =
2730 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2731 CHARSET_TYPE_96, 1, 1, 'B',
2732 CHARSET_LEFT_TO_RIGHT,
2733 build_string ("Latin-2"),
2734 build_string ("ISO8859-2 (Latin-2)"),
2735 build_string ("ISO8859-2 (Latin-2)"),
2736 build_string ("iso8859-2"),
2738 Vcharset_latin_iso8859_3 =
2739 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2740 CHARSET_TYPE_96, 1, 1, 'C',
2741 CHARSET_LEFT_TO_RIGHT,
2742 build_string ("Latin-3"),
2743 build_string ("ISO8859-3 (Latin-3)"),
2744 build_string ("ISO8859-3 (Latin-3)"),
2745 build_string ("iso8859-3"),
2747 Vcharset_latin_iso8859_4 =
2748 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2749 CHARSET_TYPE_96, 1, 1, 'D',
2750 CHARSET_LEFT_TO_RIGHT,
2751 build_string ("Latin-4"),
2752 build_string ("ISO8859-4 (Latin-4)"),
2753 build_string ("ISO8859-4 (Latin-4)"),
2754 build_string ("iso8859-4"),
2756 Vcharset_thai_tis620 =
2757 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2758 CHARSET_TYPE_96, 1, 1, 'T',
2759 CHARSET_LEFT_TO_RIGHT,
2760 build_string ("TIS620"),
2761 build_string ("TIS620 (Thai)"),
2762 build_string ("TIS620.2529 (Thai)"),
2763 build_string ("tis620"),
2764 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2765 Vcharset_greek_iso8859_7 =
2766 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2767 CHARSET_TYPE_96, 1, 1, 'F',
2768 CHARSET_LEFT_TO_RIGHT,
2769 build_string ("ISO8859-7"),
2770 build_string ("ISO8859-7 (Greek)"),
2771 build_string ("ISO8859-7 (Greek)"),
2772 build_string ("iso8859-7"),
2773 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2774 Vcharset_arabic_iso8859_6 =
2775 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2776 CHARSET_TYPE_96, 1, 1, 'G',
2777 CHARSET_RIGHT_TO_LEFT,
2778 build_string ("ISO8859-6"),
2779 build_string ("ISO8859-6 (Arabic)"),
2780 build_string ("ISO8859-6 (Arabic)"),
2781 build_string ("iso8859-6"),
2783 Vcharset_hebrew_iso8859_8 =
2784 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2785 CHARSET_TYPE_96, 1, 1, 'H',
2786 CHARSET_RIGHT_TO_LEFT,
2787 build_string ("ISO8859-8"),
2788 build_string ("ISO8859-8 (Hebrew)"),
2789 build_string ("ISO8859-8 (Hebrew)"),
2790 build_string ("iso8859-8"),
2791 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2792 Vcharset_katakana_jisx0201 =
2793 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2794 CHARSET_TYPE_94, 1, 1, 'I',
2795 CHARSET_LEFT_TO_RIGHT,
2796 build_string ("JISX0201 Kana"),
2797 build_string ("JISX0201.1976 (Japanese Kana)"),
2798 build_string ("JISX0201.1976 Japanese Kana"),
2799 build_string ("jisx0201\\.1976"),
2801 MIN_CHAR_HALFWIDTH_KATAKANA,
2802 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2803 Vcharset_latin_jisx0201 =
2804 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2805 CHARSET_TYPE_94, 1, 0, 'J',
2806 CHARSET_LEFT_TO_RIGHT,
2807 build_string ("JISX0201 Roman"),
2808 build_string ("JISX0201.1976 (Japanese Roman)"),
2809 build_string ("JISX0201.1976 Japanese Roman"),
2810 build_string ("jisx0201\\.1976"),
2812 Vcharset_cyrillic_iso8859_5 =
2813 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2814 CHARSET_TYPE_96, 1, 1, 'L',
2815 CHARSET_LEFT_TO_RIGHT,
2816 build_string ("ISO8859-5"),
2817 build_string ("ISO8859-5 (Cyrillic)"),
2818 build_string ("ISO8859-5 (Cyrillic)"),
2819 build_string ("iso8859-5"),
2820 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2821 Vcharset_latin_iso8859_9 =
2822 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2823 CHARSET_TYPE_96, 1, 1, 'M',
2824 CHARSET_LEFT_TO_RIGHT,
2825 build_string ("Latin-5"),
2826 build_string ("ISO8859-9 (Latin-5)"),
2827 build_string ("ISO8859-9 (Latin-5)"),
2828 build_string ("iso8859-9"),
2830 Vcharset_japanese_jisx0208_1978 =
2831 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2832 CHARSET_TYPE_94X94, 2, 0, '@',
2833 CHARSET_LEFT_TO_RIGHT,
2834 build_string ("JIS X0208:1978"),
2835 build_string ("JIS X0208:1978 (Japanese)"),
2837 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2838 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2840 Vcharset_chinese_gb2312 =
2841 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2842 CHARSET_TYPE_94X94, 2, 0, 'A',
2843 CHARSET_LEFT_TO_RIGHT,
2844 build_string ("GB2312"),
2845 build_string ("GB2312)"),
2846 build_string ("GB2312 Chinese simplified"),
2847 build_string ("gb2312"),
2849 Vcharset_japanese_jisx0208 =
2850 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2851 CHARSET_TYPE_94X94, 2, 0, 'B',
2852 CHARSET_LEFT_TO_RIGHT,
2853 build_string ("JISX0208"),
2854 build_string ("JIS X0208:1983 (Japanese)"),
2855 build_string ("JIS X0208:1983 Japanese Kanji"),
2856 build_string ("jisx0208\\.1983"),
2858 Vcharset_japanese_jisx0208_1990 =
2859 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2860 Qjapanese_jisx0208_1990,
2861 CHARSET_TYPE_94X94, 2, 0, 0,
2862 CHARSET_LEFT_TO_RIGHT,
2863 build_string ("JISX0208-1990"),
2864 build_string ("JIS X0208:1990 (Japanese)"),
2865 build_string ("JIS X0208:1990 Japanese Kanji"),
2866 build_string ("jisx0208\\.1990"),
2868 MIN_CHAR_JIS_X0208_1990,
2869 MAX_CHAR_JIS_X0208_1990, 0, 33);
2870 Vcharset_korean_ksc5601 =
2871 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2872 CHARSET_TYPE_94X94, 2, 0, 'C',
2873 CHARSET_LEFT_TO_RIGHT,
2874 build_string ("KSC5601"),
2875 build_string ("KSC5601 (Korean"),
2876 build_string ("KSC5601 Korean Hangul and Hanja"),
2877 build_string ("ksc5601"),
2879 Vcharset_japanese_jisx0212 =
2880 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2881 CHARSET_TYPE_94X94, 2, 0, 'D',
2882 CHARSET_LEFT_TO_RIGHT,
2883 build_string ("JISX0212"),
2884 build_string ("JISX0212 (Japanese)"),
2885 build_string ("JISX0212 Japanese Supplement"),
2886 build_string ("jisx0212"),
2889 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2890 Vcharset_chinese_cns11643_1 =
2891 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2892 CHARSET_TYPE_94X94, 2, 0, 'G',
2893 CHARSET_LEFT_TO_RIGHT,
2894 build_string ("CNS11643-1"),
2895 build_string ("CNS11643-1 (Chinese traditional)"),
2897 ("CNS 11643 Plane 1 Chinese traditional"),
2898 build_string (CHINESE_CNS_PLANE_RE("1")),
2900 Vcharset_chinese_cns11643_2 =
2901 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2902 CHARSET_TYPE_94X94, 2, 0, 'H',
2903 CHARSET_LEFT_TO_RIGHT,
2904 build_string ("CNS11643-2"),
2905 build_string ("CNS11643-2 (Chinese traditional)"),
2907 ("CNS 11643 Plane 2 Chinese traditional"),
2908 build_string (CHINESE_CNS_PLANE_RE("2")),
2911 Vcharset_latin_viscii_lower =
2912 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2913 CHARSET_TYPE_96, 1, 1, '1',
2914 CHARSET_LEFT_TO_RIGHT,
2915 build_string ("VISCII lower"),
2916 build_string ("VISCII lower (Vietnamese)"),
2917 build_string ("VISCII lower (Vietnamese)"),
2918 build_string ("MULEVISCII-LOWER"),
2920 Vcharset_latin_viscii_upper =
2921 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2922 CHARSET_TYPE_96, 1, 1, '2',
2923 CHARSET_LEFT_TO_RIGHT,
2924 build_string ("VISCII upper"),
2925 build_string ("VISCII upper (Vietnamese)"),
2926 build_string ("VISCII upper (Vietnamese)"),
2927 build_string ("MULEVISCII-UPPER"),
2929 Vcharset_latin_viscii =
2930 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2931 CHARSET_TYPE_256, 1, 2, 0,
2932 CHARSET_LEFT_TO_RIGHT,
2933 build_string ("VISCII"),
2934 build_string ("VISCII 1.1 (Vietnamese)"),
2935 build_string ("VISCII 1.1 (Vietnamese)"),
2936 build_string ("VISCII1\\.1"),
2938 Vcharset_ideograph_daikanwa =
2939 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2940 CHARSET_TYPE_256X256, 2, 2, 0,
2941 CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Daikanwa"),
2943 build_string ("Morohashi's Daikanwa"),
2944 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2945 build_string ("Daikanwa"),
2946 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2947 Vcharset_mojikyo_pj_1 =
2948 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1,
2949 CHARSET_TYPE_94X94, 2, 0, 0,
2950 CHARSET_LEFT_TO_RIGHT,
2951 build_string ("Mojikyo-PJ-1"),
2952 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
2954 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
2955 build_string ("jisx0208\\.Mojikyo-1$"),
2957 Vcharset_mojikyo_pj_2 =
2958 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2,
2959 CHARSET_TYPE_94X94, 2, 0, 0,
2960 CHARSET_LEFT_TO_RIGHT,
2961 build_string ("Mojikyo-PJ-2"),
2962 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
2964 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
2965 build_string ("jisx0208\\.Mojikyo-2$"),
2967 Vcharset_mojikyo_pj_3 =
2968 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3,
2969 CHARSET_TYPE_94X94, 2, 0, 0,
2970 CHARSET_LEFT_TO_RIGHT,
2971 build_string ("Mojikyo-PJ-3"),
2972 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
2974 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
2975 build_string ("jisx0208\\.Mojikyo-3$"),
2977 Vcharset_mojikyo_pj_4 =
2978 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4,
2979 CHARSET_TYPE_94X94, 2, 0, 0,
2980 CHARSET_LEFT_TO_RIGHT,
2981 build_string ("Mojikyo-PJ-4"),
2982 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
2984 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
2985 build_string ("jisx0208\\.Mojikyo-4$"),
2987 Vcharset_mojikyo_pj_5 =
2988 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5,
2989 CHARSET_TYPE_94X94, 2, 0, 0,
2990 CHARSET_LEFT_TO_RIGHT,
2991 build_string ("Mojikyo-PJ-5"),
2992 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
2994 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
2995 build_string ("jisx0208\\.Mojikyo-5$"),
2997 Vcharset_mojikyo_pj_6 =
2998 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6,
2999 CHARSET_TYPE_94X94, 2, 0, 0,
3000 CHARSET_LEFT_TO_RIGHT,
3001 build_string ("Mojikyo-PJ-6"),
3002 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3004 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3005 build_string ("jisx0208\\.Mojikyo-6$"),
3007 Vcharset_mojikyo_pj_7 =
3008 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7,
3009 CHARSET_TYPE_94X94, 2, 0, 0,
3010 CHARSET_LEFT_TO_RIGHT,
3011 build_string ("Mojikyo-PJ-7"),
3012 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3014 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3015 build_string ("jisx0208\\.Mojikyo-7$"),
3017 Vcharset_mojikyo_pj_8 =
3018 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8,
3019 CHARSET_TYPE_94X94, 2, 0, 0,
3020 CHARSET_LEFT_TO_RIGHT,
3021 build_string ("Mojikyo-PJ-8"),
3022 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3024 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3025 build_string ("jisx0208\\.Mojikyo-8$"),
3027 Vcharset_mojikyo_pj_9 =
3028 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9,
3029 CHARSET_TYPE_94X94, 2, 0, 0,
3030 CHARSET_LEFT_TO_RIGHT,
3031 build_string ("Mojikyo-PJ-9"),
3032 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3034 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3035 build_string ("jisx0208\\.Mojikyo-9$"),
3037 Vcharset_mojikyo_pj_10 =
3038 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10,
3039 CHARSET_TYPE_94X94, 2, 0, 0,
3040 CHARSET_LEFT_TO_RIGHT,
3041 build_string ("Mojikyo-PJ-10"),
3042 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3044 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3045 build_string ("jisx0208\\.Mojikyo-10$"),
3047 Vcharset_mojikyo_pj_11 =
3048 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11,
3049 CHARSET_TYPE_94X94, 2, 0, 0,
3050 CHARSET_LEFT_TO_RIGHT,
3051 build_string ("Mojikyo-PJ-11"),
3052 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3054 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3055 build_string ("jisx0208\\.Mojikyo-11$"),
3057 Vcharset_mojikyo_pj_12 =
3058 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12,
3059 CHARSET_TYPE_94X94, 2, 0, 0,
3060 CHARSET_LEFT_TO_RIGHT,
3061 build_string ("Mojikyo-PJ-12"),
3062 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3064 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3065 build_string ("jisx0208\\.Mojikyo-12$"),
3067 Vcharset_mojikyo_pj_13 =
3068 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13,
3069 CHARSET_TYPE_94X94, 2, 0, 0,
3070 CHARSET_LEFT_TO_RIGHT,
3071 build_string ("Mojikyo-PJ-13"),
3072 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3074 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3075 build_string ("jisx0208\\.Mojikyo-13$"),
3077 Vcharset_mojikyo_pj_14 =
3078 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14,
3079 CHARSET_TYPE_94X94, 2, 0, 0,
3080 CHARSET_LEFT_TO_RIGHT,
3081 build_string ("Mojikyo-PJ-14"),
3082 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3084 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3085 build_string ("jisx0208\\.Mojikyo-14$"),
3087 Vcharset_mojikyo_pj_15 =
3088 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15,
3089 CHARSET_TYPE_94X94, 2, 0, 0,
3090 CHARSET_LEFT_TO_RIGHT,
3091 build_string ("Mojikyo-PJ-15"),
3092 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3094 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3095 build_string ("jisx0208\\.Mojikyo-15$"),
3097 Vcharset_mojikyo_pj_16 =
3098 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16,
3099 CHARSET_TYPE_94X94, 2, 0, 0,
3100 CHARSET_LEFT_TO_RIGHT,
3101 build_string ("Mojikyo-PJ-16"),
3102 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3104 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3105 build_string ("jisx0208\\.Mojikyo-16$"),
3107 Vcharset_mojikyo_pj_17 =
3108 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17,
3109 CHARSET_TYPE_94X94, 2, 0, 0,
3110 CHARSET_LEFT_TO_RIGHT,
3111 build_string ("Mojikyo-PJ-17"),
3112 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3114 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3115 build_string ("jisx0208\\.Mojikyo-17$"),
3117 Vcharset_mojikyo_pj_18 =
3118 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18,
3119 CHARSET_TYPE_94X94, 2, 0, 0,
3120 CHARSET_LEFT_TO_RIGHT,
3121 build_string ("Mojikyo-PJ-18"),
3122 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3124 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3125 build_string ("jisx0208\\.Mojikyo-18$"),
3127 Vcharset_mojikyo_pj_19 =
3128 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19,
3129 CHARSET_TYPE_94X94, 2, 0, 0,
3130 CHARSET_LEFT_TO_RIGHT,
3131 build_string ("Mojikyo-PJ-19"),
3132 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3134 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3135 build_string ("jisx0208\\.Mojikyo-19$"),
3137 Vcharset_mojikyo_pj_20 =
3138 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20,
3139 CHARSET_TYPE_94X94, 2, 0, 0,
3140 CHARSET_LEFT_TO_RIGHT,
3141 build_string ("Mojikyo-PJ-20"),
3142 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3144 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3145 build_string ("jisx0208\\.Mojikyo-20$"),
3147 Vcharset_mojikyo_pj_21 =
3148 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21,
3149 CHARSET_TYPE_94X94, 2, 0, 0,
3150 CHARSET_LEFT_TO_RIGHT,
3151 build_string ("Mojikyo-PJ-21"),
3152 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3154 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3155 build_string ("jisx0208\\.Mojikyo-21$"),
3157 Vcharset_ethiopic_ucs =
3158 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
3159 CHARSET_TYPE_256X256, 2, 2, 0,
3160 CHARSET_LEFT_TO_RIGHT,
3161 build_string ("Ethiopic (UCS)"),
3162 build_string ("Ethiopic (UCS)"),
3163 build_string ("Ethiopic of UCS"),
3164 build_string ("Ethiopic-Unicode"),
3165 Qnil, 0x1200, 0x137F, 0x1200, 0);
3167 Vcharset_chinese_big5_1 =
3168 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3169 CHARSET_TYPE_94X94, 2, 0, '0',
3170 CHARSET_LEFT_TO_RIGHT,
3171 build_string ("Big5"),
3172 build_string ("Big5 (Level-1)"),
3174 ("Big5 Level-1 Chinese traditional"),
3175 build_string ("big5"),
3177 Vcharset_chinese_big5_2 =
3178 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3179 CHARSET_TYPE_94X94, 2, 0, '1',
3180 CHARSET_LEFT_TO_RIGHT,
3181 build_string ("Big5"),
3182 build_string ("Big5 (Level-2)"),
3184 ("Big5 Level-2 Chinese traditional"),
3185 build_string ("big5"),
3188 #ifdef ENABLE_COMPOSITE_CHARS
3189 /* #### For simplicity, we put composite chars into a 96x96 charset.
3190 This is going to lead to problems because you can run out of
3191 room, esp. as we don't yet recycle numbers. */
3192 Vcharset_composite =
3193 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3194 CHARSET_TYPE_96X96, 2, 0, 0,
3195 CHARSET_LEFT_TO_RIGHT,
3196 build_string ("Composite"),
3197 build_string ("Composite characters"),
3198 build_string ("Composite characters"),
3201 composite_char_row_next = 32;
3202 composite_char_col_next = 32;
3204 Vcomposite_char_string2char_hash_table =
3205 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3206 Vcomposite_char_char2string_hash_table =
3207 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3208 staticpro (&Vcomposite_char_string2char_hash_table);
3209 staticpro (&Vcomposite_char_char2string_hash_table);
3210 #endif /* ENABLE_COMPOSITE_CHARS */