1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs_bmp;
64 Lisp_Object Vcharset_latin_viscii;
65 Lisp_Object Vcharset_latin_viscii_lower;
66 Lisp_Object Vcharset_latin_viscii_upper;
67 Lisp_Object Vcharset_ideograph_daikanwa;
68 Lisp_Object Vcharset_mojikyo_pj_1;
69 Lisp_Object Vcharset_mojikyo_pj_2;
70 Lisp_Object Vcharset_mojikyo_pj_3;
71 Lisp_Object Vcharset_mojikyo_pj_4;
72 Lisp_Object Vcharset_mojikyo_pj_5;
73 Lisp_Object Vcharset_mojikyo_pj_6;
74 Lisp_Object Vcharset_mojikyo_pj_7;
75 Lisp_Object Vcharset_mojikyo_pj_8;
76 Lisp_Object Vcharset_mojikyo_pj_9;
77 Lisp_Object Vcharset_mojikyo_pj_10;
78 Lisp_Object Vcharset_mojikyo_pj_11;
79 Lisp_Object Vcharset_mojikyo_pj_12;
80 Lisp_Object Vcharset_mojikyo_pj_13;
81 Lisp_Object Vcharset_mojikyo_pj_14;
82 Lisp_Object Vcharset_mojikyo_pj_15;
83 Lisp_Object Vcharset_mojikyo_pj_16;
84 Lisp_Object Vcharset_mojikyo_pj_17;
85 Lisp_Object Vcharset_mojikyo_pj_18;
86 Lisp_Object Vcharset_mojikyo_pj_19;
87 Lisp_Object Vcharset_mojikyo_pj_20;
88 Lisp_Object Vcharset_mojikyo_pj_21;
89 Lisp_Object Vcharset_ethiopic_ucs;
91 Lisp_Object Vcharset_chinese_big5_1;
92 Lisp_Object Vcharset_chinese_big5_2;
94 #ifdef ENABLE_COMPOSITE_CHARS
95 Lisp_Object Vcharset_composite;
97 /* Hash tables for composite chars. One maps string representing
98 composed chars to their equivalent chars; one goes the
100 Lisp_Object Vcomposite_char_char2string_hash_table;
101 Lisp_Object Vcomposite_char_string2char_hash_table;
103 static int composite_char_row_next;
104 static int composite_char_col_next;
106 #endif /* ENABLE_COMPOSITE_CHARS */
108 struct charset_lookup *chlook;
110 static const struct lrecord_description charset_lookup_description_1[] = {
111 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
120 static const struct struct_description charset_lookup_description = {
121 sizeof(struct charset_lookup),
122 charset_lookup_description_1
126 /* Table of number of bytes in the string representation of a character
127 indexed by the first byte of that representation.
129 rep_bytes_by_first_byte(c) is more efficient than the equivalent
130 canonical computation:
132 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
134 const Bytecount rep_bytes_by_first_byte[0xA0] =
135 { /* 0x00 - 0x7f are for straight ASCII */
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 /* 0x80 - 0x8f are for Dimension-1 official charsets */
146 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
148 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
150 /* 0x90 - 0x9d are for Dimension-2 official charsets */
151 /* 0x9e is for Dimension-1 private charsets */
152 /* 0x9f is for Dimension-2 private charsets */
153 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
160 mark_char_byte_table (Lisp_Object obj)
162 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
165 for (i = 0; i < 256; i++)
167 mark_object (cte->property[i]);
173 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
175 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
176 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
179 for (i = 0; i < 256; i++)
180 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
182 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
184 if (!char_byte_table_equal (cte1->property[i],
185 cte2->property[i], depth + 1))
192 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
198 char_byte_table_hash (Lisp_Object obj, int depth)
200 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
202 return internal_array_hash (cte->property, 256, depth);
205 static const struct lrecord_description char_byte_table_description[] = {
206 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
210 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
211 mark_char_byte_table,
212 internal_object_printer,
213 0, char_byte_table_equal,
214 char_byte_table_hash,
215 char_byte_table_description,
216 struct Lisp_Char_Byte_Table);
219 make_char_byte_table (Lisp_Object initval)
223 struct Lisp_Char_Byte_Table *cte =
224 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
225 &lrecord_char_byte_table);
227 for (i = 0; i < 256; i++)
228 cte->property[i] = initval;
230 XSETCHAR_BYTE_TABLE (obj, cte);
235 copy_char_byte_table (Lisp_Object entry)
237 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
240 struct Lisp_Char_Byte_Table *ctenew =
241 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
242 &lrecord_char_byte_table);
244 for (i = 0; i < 256; i++)
246 Lisp_Object new = cte->property[i];
247 if (CHAR_BYTE_TABLE_P (new))
248 ctenew->property[i] = copy_char_byte_table (new);
250 ctenew->property[i] = new;
253 XSETCHAR_BYTE_TABLE (obj, ctenew);
259 mark_char_code_table (Lisp_Object obj)
261 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
267 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
269 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
270 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
272 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
276 char_code_table_hash (Lisp_Object obj, int depth)
278 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
280 return char_code_table_hash (cte->table, depth + 1);
283 static const struct lrecord_description char_code_table_description[] = {
284 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
288 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
289 mark_char_code_table,
290 internal_object_printer,
291 0, char_code_table_equal,
292 char_code_table_hash,
293 char_code_table_description,
294 struct Lisp_Char_Code_Table);
297 make_char_code_table (Lisp_Object initval)
300 struct Lisp_Char_Code_Table *cte =
301 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
302 &lrecord_char_code_table);
304 cte->table = make_char_byte_table (initval);
306 XSETCHAR_CODE_TABLE (obj, cte);
311 copy_char_code_table (Lisp_Object entry)
313 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
315 struct Lisp_Char_Code_Table *ctenew =
316 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
317 &lrecord_char_code_table);
319 ctenew->table = copy_char_byte_table (cte->table);
320 XSETCHAR_CODE_TABLE (obj, ctenew);
326 get_char_code_table (Emchar ch, Lisp_Object table)
328 unsigned int code = ch;
329 struct Lisp_Char_Byte_Table* cpt
330 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
331 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
333 if (CHAR_BYTE_TABLE_P (ret))
334 cpt = XCHAR_BYTE_TABLE (ret);
338 ret = cpt->property [(unsigned char) (code >> 16)];
339 if (CHAR_BYTE_TABLE_P (ret))
340 cpt = XCHAR_BYTE_TABLE (ret);
344 ret = cpt->property [(unsigned char) (code >> 8)];
345 if (CHAR_BYTE_TABLE_P (ret))
346 cpt = XCHAR_BYTE_TABLE (ret);
350 return cpt->property [(unsigned char) code];
354 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
356 unsigned int code = ch;
357 struct Lisp_Char_Byte_Table* cpt1
358 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
359 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
361 if (CHAR_BYTE_TABLE_P (ret))
363 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
365 ret = cpt2->property[(unsigned char)(code >> 16)];
366 if (CHAR_BYTE_TABLE_P (ret))
368 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
370 ret = cpt3->property[(unsigned char)(code >> 8)];
371 if (CHAR_BYTE_TABLE_P (ret))
373 struct Lisp_Char_Byte_Table* cpt4
374 = XCHAR_BYTE_TABLE (ret);
376 cpt4->property[(unsigned char)code] = value;
378 else if (!EQ (ret, value))
380 Lisp_Object cpt4 = make_char_byte_table (ret);
382 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
383 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
386 else if (!EQ (ret, value))
388 Lisp_Object cpt3 = make_char_byte_table (ret);
389 Lisp_Object cpt4 = make_char_byte_table (ret);
391 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
392 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
394 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
397 else if (!EQ (ret, value))
399 Lisp_Object cpt2 = make_char_byte_table (ret);
400 Lisp_Object cpt3 = make_char_byte_table (ret);
401 Lisp_Object cpt4 = make_char_byte_table (ret);
403 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
404 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
405 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
406 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
411 Lisp_Object Vcharacter_attribute_table;
412 Lisp_Object Vcharacter_composition_table;
413 Lisp_Object Vcharacter_variant_table;
415 Lisp_Object Q_decomposition;
418 Lisp_Object Qisolated;
419 Lisp_Object Qinitial;
422 Lisp_Object Qvertical;
423 Lisp_Object QnoBreak;
424 Lisp_Object Qfraction;
435 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
441 else if (EQ (v, Qcompat))
443 else if (EQ (v, Qisolated))
445 else if (EQ (v, Qinitial))
447 else if (EQ (v, Qmedial))
449 else if (EQ (v, Qfinal))
451 else if (EQ (v, Qvertical))
453 else if (EQ (v, QnoBreak))
455 else if (EQ (v, Qfraction))
457 else if (EQ (v, Qsuper))
459 else if (EQ (v, Qsub))
461 else if (EQ (v, Qcircle))
463 else if (EQ (v, Qsquare))
465 else if (EQ (v, Qwide))
467 else if (EQ (v, Qnarrow))
469 else if (EQ (v, Qsmall))
471 else if (EQ (v, Qfont))
474 signal_simple_error (err_msg, err_arg);
477 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
478 Return character corresponding with list.
482 Lisp_Object table = Vcharacter_composition_table;
483 Lisp_Object rest = list;
487 Lisp_Object v = Fcar (rest);
489 Emchar c = to_char_code (v, "Invalid value for composition", list);
491 ret = get_char_code_table (c, table);
496 if (!CHAR_CODE_TABLE_P (ret))
501 else if (!CONSP (rest))
503 else if (CHAR_CODE_TABLE_P (ret))
506 signal_simple_error ("Invalid table is found with", list);
508 signal_simple_error ("Invalid value for composition", list);
511 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
512 Return variants of CHARACTER.
516 CHECK_CHAR (character);
517 return Fcopy_list (get_char_code_table (XCHAR (character),
518 Vcharacter_variant_table));
521 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
522 Return the alist of attributes of CHARACTER.
526 CHECK_CHAR (character);
527 return Fcopy_alist (get_char_code_table (XCHAR (character),
528 Vcharacter_attribute_table));
531 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
532 Return the value of CHARACTER's ATTRIBUTE.
534 (character, attribute))
539 CHECK_CHAR (character);
540 ret = get_char_code_table (XCHAR (character),
541 Vcharacter_attribute_table);
545 if (!NILP (ccs = Ffind_charset (attribute)))
548 return Fcdr (Fassq (attribute, ret));
552 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
555 Emchar char_code = XCHAR (character);
557 = get_char_code_table (char_code, Vcharacter_attribute_table);
560 cell = Fassq (attribute, ret);
564 ret = Fcons (Fcons (attribute, value), ret);
566 else if (!EQ (Fcdr (cell), value))
568 Fsetcdr (cell, value);
570 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
574 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
575 Store CHARACTER's ATTRIBUTE with VALUE.
577 (character, attribute, value))
581 CHECK_CHAR (character);
582 ccs = Ffind_charset (attribute);
585 Lisp_Object cpos, rest;
586 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
593 /* ad-hoc method for `ascii' */
594 if ((XCHARSET_CHARS (ccs) == 94) &&
595 (XCHARSET_BYTE_OFFSET (ccs) != 33))
596 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
598 ccs_len = XCHARSET_CHARS (ccs);
602 Lisp_Object ret = Fcar (value);
605 signal_simple_error ("Invalid value for coded-charset", value);
606 code_point = XINT (ret);
607 if (XCHARSET_GRAPHIC (ccs) == 1)
615 signal_simple_error ("Invalid value for coded-charset", value);
618 signal_simple_error ("Invalid value for coded-charset", value);
620 if (XCHARSET_GRAPHIC (ccs) == 1)
622 code_point = (code_point << 8) | i;
625 value = make_int (code_point);
627 else if (INTP (value))
629 if (XCHARSET_GRAPHIC (ccs) == 1)
630 value = make_int (XINT (value) & 0x7F7F7F7F);
633 signal_simple_error ("Invalid value for coded-charset", value);
636 cpos = Fget_char_attribute (character, attribute);
641 dim = XCHARSET_DIMENSION (ccs);
642 code_point = XINT (cpos);
646 i = ((code_point >> (8 * dim)) & 255)
647 - XCHARSET_BYTE_OFFSET (ccs);
648 nv = XVECTOR_DATA(v)[i];
654 XVECTOR_DATA(v)[i] = Qnil;
655 v = XCHARSET_DECODING_TABLE (ccs);
660 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
663 dim = XCHARSET_DIMENSION (ccs);
664 code_point = XINT (value);
669 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
670 nv = XVECTOR_DATA(v)[i];
674 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
680 XVECTOR_DATA(v)[i] = character;
682 else if (EQ (attribute, Q_decomposition))
684 Lisp_Object rest = value;
685 Lisp_Object table = Vcharacter_composition_table;
688 signal_simple_error ("Invalid value for ->decomposition",
693 Lisp_Object v = Fcar (rest);
696 = to_char_code (v, "Invalid value for ->decomposition", value);
701 put_char_code_table (c, character, table);
706 ntable = get_char_code_table (c, table);
707 if (!CHAR_CODE_TABLE_P (ntable))
709 ntable = make_char_code_table (Qnil);
710 put_char_code_table (c, ntable, table);
716 else if (EQ (attribute, Q_ucs))
722 signal_simple_error ("Invalid value for ->ucs", value);
726 ret = get_char_code_table (c, Vcharacter_variant_table);
727 if (NILP (Fmemq (character, ret)))
729 put_char_code_table (c, Fcons (character, ret),
730 Vcharacter_variant_table);
733 return put_char_attribute (character, attribute, value);
738 EXFUN (Fmake_char, 3);
740 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
741 Store character's ATTRIBUTES.
745 Lisp_Object rest = attributes;
746 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
747 Lisp_Object character;
753 Lisp_Object cell = Fcar (rest);
757 signal_simple_error ("Invalid argument", attributes);
758 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
761 character = Fmake_char (ccs, Fcar (cell),
763 goto setup_attributes;
767 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
770 signal_simple_error ("Invalid argument", attributes);
772 character = make_char (XINT (code) + 0x100000);
773 goto setup_attributes;
777 else if (!INTP (code))
778 signal_simple_error ("Invalid argument", attributes);
780 character = make_char (XINT (code));
786 Lisp_Object cell = Fcar (rest);
789 signal_simple_error ("Invalid argument", attributes);
790 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
794 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
797 Lisp_Object Vutf_2000_version;
801 int leading_code_private_11;
804 Lisp_Object Qcharsetp;
806 /* Qdoc_string, Qdimension, Qchars defined in general.c */
807 Lisp_Object Qregistry, Qfinal, Qgraphic;
808 Lisp_Object Qdirection;
809 Lisp_Object Qreverse_direction_charset;
810 Lisp_Object Qleading_byte;
811 Lisp_Object Qshort_name, Qlong_name;
827 Qjapanese_jisx0208_1978,
830 Qjapanese_jisx0208_1990,
840 Qvietnamese_viscii_lower,
841 Qvietnamese_viscii_upper,
870 Lisp_Object Ql2r, Qr2l;
872 Lisp_Object Vcharset_hash_table;
875 static Charset_ID next_allocated_leading_byte;
877 static Charset_ID next_allocated_1_byte_leading_byte;
878 static Charset_ID next_allocated_2_byte_leading_byte;
881 /* Composite characters are characters constructed by overstriking two
882 or more regular characters.
884 1) The old Mule implementation involves storing composite characters
885 in a buffer as a tag followed by all of the actual characters
886 used to make up the composite character. I think this is a bad
887 idea; it greatly complicates code that wants to handle strings
888 one character at a time because it has to deal with the possibility
889 of great big ungainly characters. It's much more reasonable to
890 simply store an index into a table of composite characters.
892 2) The current implementation only allows for 16,384 separate
893 composite characters over the lifetime of the XEmacs process.
894 This could become a potential problem if the user
895 edited lots of different files that use composite characters.
896 Due to FSF bogosity, increasing the number of allowable
897 composite characters under Mule would decrease the number
898 of possible faces that can exist. Mule already has shrunk
899 this to 2048, and further shrinkage would become uncomfortable.
900 No such problems exist in XEmacs.
902 Composite characters could be represented as 0x80 C1 C2 C3,
903 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
904 for slightly under 2^20 (one million) composite characters
905 over the XEmacs process lifetime, and you only need to
906 increase the size of a Mule character from 19 to 21 bits.
907 Or you could use 0x80 C1 C2 C3 C4, allowing for about
908 85 million (slightly over 2^26) composite characters. */
911 /************************************************************************/
912 /* Basic Emchar functions */
913 /************************************************************************/
915 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
916 string in STR. Returns the number of bytes stored.
917 Do not call this directly. Use the macro set_charptr_emchar() instead.
921 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
936 else if ( c <= 0x7ff )
938 *p++ = (c >> 6) | 0xc0;
939 *p++ = (c & 0x3f) | 0x80;
941 else if ( c <= 0xffff )
943 *p++ = (c >> 12) | 0xe0;
944 *p++ = ((c >> 6) & 0x3f) | 0x80;
945 *p++ = (c & 0x3f) | 0x80;
947 else if ( c <= 0x1fffff )
949 *p++ = (c >> 18) | 0xf0;
950 *p++ = ((c >> 12) & 0x3f) | 0x80;
951 *p++ = ((c >> 6) & 0x3f) | 0x80;
952 *p++ = (c & 0x3f) | 0x80;
954 else if ( c <= 0x3ffffff )
956 *p++ = (c >> 24) | 0xf8;
957 *p++ = ((c >> 18) & 0x3f) | 0x80;
958 *p++ = ((c >> 12) & 0x3f) | 0x80;
959 *p++ = ((c >> 6) & 0x3f) | 0x80;
960 *p++ = (c & 0x3f) | 0x80;
964 *p++ = (c >> 30) | 0xfc;
965 *p++ = ((c >> 24) & 0x3f) | 0x80;
966 *p++ = ((c >> 18) & 0x3f) | 0x80;
967 *p++ = ((c >> 12) & 0x3f) | 0x80;
968 *p++ = ((c >> 6) & 0x3f) | 0x80;
969 *p++ = (c & 0x3f) | 0x80;
972 BREAKUP_CHAR (c, charset, c1, c2);
973 lb = CHAR_LEADING_BYTE (c);
974 if (LEADING_BYTE_PRIVATE_P (lb))
975 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
977 if (EQ (charset, Vcharset_control_1))
986 /* Return the first character from a Mule-encoded string in STR,
987 assuming it's non-ASCII. Do not call this directly.
988 Use the macro charptr_emchar() instead. */
991 non_ascii_charptr_emchar (CONST Bufbyte *str)
1004 else if ( b >= 0xf8 )
1009 else if ( b >= 0xf0 )
1014 else if ( b >= 0xe0 )
1019 else if ( b >= 0xc0 )
1029 for( ; len > 0; len-- )
1032 ch = ( ch << 6 ) | ( b & 0x3f );
1036 Bufbyte i0 = *str, i1, i2 = 0;
1037 Lisp_Object charset;
1039 if (i0 == LEADING_BYTE_CONTROL_1)
1040 return (Emchar) (*++str - 0x20);
1042 if (LEADING_BYTE_PREFIX_P (i0))
1047 charset = CHARSET_BY_LEADING_BYTE (i0);
1048 if (XCHARSET_DIMENSION (charset) == 2)
1051 return MAKE_CHAR (charset, i1, i2);
1055 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1056 Do not call this directly. Use the macro valid_char_p() instead. */
1060 non_ascii_valid_char_p (Emchar ch)
1064 /* Must have only lowest 19 bits set */
1068 f1 = CHAR_FIELD1 (ch);
1069 f2 = CHAR_FIELD2 (ch);
1070 f3 = CHAR_FIELD3 (ch);
1074 Lisp_Object charset;
1076 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1077 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1078 f2 > MAX_CHAR_FIELD2_PRIVATE)
1083 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1084 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1088 NOTE: This takes advantage of the fact that
1089 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1090 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1092 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1093 if (EQ (charset, Qnil))
1095 return (XCHARSET_CHARS (charset) == 96);
1099 Lisp_Object charset;
1101 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1102 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1103 f1 > MAX_CHAR_FIELD1_PRIVATE)
1105 if (f2 < 0x20 || f3 < 0x20)
1108 #ifdef ENABLE_COMPOSITE_CHARS
1109 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1111 if (UNBOUNDP (Fgethash (make_int (ch),
1112 Vcomposite_char_char2string_hash_table,
1117 #endif /* ENABLE_COMPOSITE_CHARS */
1119 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1120 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1123 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1125 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1128 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1130 if (EQ (charset, Qnil))
1132 return (XCHARSET_CHARS (charset) == 96);
1138 /************************************************************************/
1139 /* Basic string functions */
1140 /************************************************************************/
1142 /* Copy the character pointed to by PTR into STR, assuming it's
1143 non-ASCII. Do not call this directly. Use the macro
1144 charptr_copy_char() instead. */
1147 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1149 Bufbyte *strptr = str;
1151 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1153 /* Notice fallthrough. */
1155 case 6: *++strptr = *ptr++;
1156 case 5: *++strptr = *ptr++;
1158 case 4: *++strptr = *ptr++;
1159 case 3: *++strptr = *ptr++;
1160 case 2: *++strptr = *ptr;
1165 return strptr + 1 - str;
1169 /************************************************************************/
1170 /* streams of Emchars */
1171 /************************************************************************/
1173 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1174 The functions below are not meant to be called directly; use
1175 the macros in insdel.h. */
1178 Lstream_get_emchar_1 (Lstream *stream, int ch)
1180 Bufbyte str[MAX_EMCHAR_LEN];
1181 Bufbyte *strptr = str;
1183 str[0] = (Bufbyte) ch;
1184 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1186 /* Notice fallthrough. */
1189 ch = Lstream_getc (stream);
1191 *++strptr = (Bufbyte) ch;
1193 ch = Lstream_getc (stream);
1195 *++strptr = (Bufbyte) ch;
1198 ch = Lstream_getc (stream);
1200 *++strptr = (Bufbyte) ch;
1202 ch = Lstream_getc (stream);
1204 *++strptr = (Bufbyte) ch;
1206 ch = Lstream_getc (stream);
1208 *++strptr = (Bufbyte) ch;
1213 return charptr_emchar (str);
1217 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1219 Bufbyte str[MAX_EMCHAR_LEN];
1220 Bytecount len = set_charptr_emchar (str, ch);
1221 return Lstream_write (stream, str, len);
1225 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1227 Bufbyte str[MAX_EMCHAR_LEN];
1228 Bytecount len = set_charptr_emchar (str, ch);
1229 Lstream_unread (stream, str, len);
1233 /************************************************************************/
1234 /* charset object */
1235 /************************************************************************/
1238 mark_charset (Lisp_Object obj)
1240 struct Lisp_Charset *cs = XCHARSET (obj);
1242 mark_object (cs->short_name);
1243 mark_object (cs->long_name);
1244 mark_object (cs->doc_string);
1245 mark_object (cs->registry);
1246 mark_object (cs->ccl_program);
1248 mark_object (cs->decoding_table);
1254 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1256 struct Lisp_Charset *cs = XCHARSET (obj);
1260 error ("printing unreadable object #<charset %s 0x%x>",
1261 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1264 write_c_string ("#<charset ", printcharfun);
1265 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1266 write_c_string (" ", printcharfun);
1267 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1268 write_c_string (" ", printcharfun);
1269 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1270 write_c_string (" ", printcharfun);
1271 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1272 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1273 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1274 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1275 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1277 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1278 CHARSET_COLUMNS (cs),
1279 CHARSET_GRAPHIC (cs),
1280 CHARSET_FINAL (cs));
1281 write_c_string (buf, printcharfun);
1282 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1283 sprintf (buf, " 0x%x>", cs->header.uid);
1284 write_c_string (buf, printcharfun);
1287 static const struct lrecord_description charset_description[] = {
1288 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1290 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1295 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1296 mark_charset, print_charset, 0, 0, 0,
1297 charset_description,
1298 struct Lisp_Charset);
1299 /* Make a new charset. */
1302 make_charset (Charset_ID id, Lisp_Object name,
1303 unsigned char type, unsigned char columns, unsigned char graphic,
1304 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1305 Lisp_Object long_name, Lisp_Object doc,
1307 Lisp_Object decoding_table,
1308 Emchar ucs_min, Emchar ucs_max,
1309 Emchar code_offset, unsigned char byte_offset)
1312 struct Lisp_Charset *cs =
1313 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1314 XSETCHARSET (obj, cs);
1316 CHARSET_ID (cs) = id;
1317 CHARSET_NAME (cs) = name;
1318 CHARSET_SHORT_NAME (cs) = short_name;
1319 CHARSET_LONG_NAME (cs) = long_name;
1320 CHARSET_DIRECTION (cs) = direction;
1321 CHARSET_TYPE (cs) = type;
1322 CHARSET_COLUMNS (cs) = columns;
1323 CHARSET_GRAPHIC (cs) = graphic;
1324 CHARSET_FINAL (cs) = final;
1325 CHARSET_DOC_STRING (cs) = doc;
1326 CHARSET_REGISTRY (cs) = reg;
1327 CHARSET_CCL_PROGRAM (cs) = Qnil;
1328 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1330 CHARSET_DECODING_TABLE(cs) = Qnil;
1331 CHARSET_UCS_MIN(cs) = ucs_min;
1332 CHARSET_UCS_MAX(cs) = ucs_max;
1333 CHARSET_CODE_OFFSET(cs) = code_offset;
1334 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1337 switch (CHARSET_TYPE (cs))
1339 case CHARSET_TYPE_94:
1340 CHARSET_DIMENSION (cs) = 1;
1341 CHARSET_CHARS (cs) = 94;
1343 case CHARSET_TYPE_96:
1344 CHARSET_DIMENSION (cs) = 1;
1345 CHARSET_CHARS (cs) = 96;
1347 case CHARSET_TYPE_94X94:
1348 CHARSET_DIMENSION (cs) = 2;
1349 CHARSET_CHARS (cs) = 94;
1351 case CHARSET_TYPE_96X96:
1352 CHARSET_DIMENSION (cs) = 2;
1353 CHARSET_CHARS (cs) = 96;
1356 case CHARSET_TYPE_128:
1357 CHARSET_DIMENSION (cs) = 1;
1358 CHARSET_CHARS (cs) = 128;
1360 case CHARSET_TYPE_128X128:
1361 CHARSET_DIMENSION (cs) = 2;
1362 CHARSET_CHARS (cs) = 128;
1364 case CHARSET_TYPE_256:
1365 CHARSET_DIMENSION (cs) = 1;
1366 CHARSET_CHARS (cs) = 256;
1368 case CHARSET_TYPE_256X256:
1369 CHARSET_DIMENSION (cs) = 2;
1370 CHARSET_CHARS (cs) = 256;
1376 if (id == LEADING_BYTE_ASCII)
1377 CHARSET_REP_BYTES (cs) = 1;
1379 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1381 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1386 /* some charsets do not have final characters. This includes
1387 ASCII, Control-1, Composite, and the two faux private
1390 if (code_offset == 0)
1392 assert (NILP (chlook->charset_by_attributes[type][final]));
1393 chlook->charset_by_attributes[type][final] = obj;
1396 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1397 chlook->charset_by_attributes[type][final][direction] = obj;
1401 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1402 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1404 /* Some charsets are "faux" and don't have names or really exist at
1405 all except in the leading-byte table. */
1407 Fputhash (name, obj, Vcharset_hash_table);
1412 get_unallocated_leading_byte (int dimension)
1417 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1420 lb = next_allocated_leading_byte++;
1424 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1427 lb = next_allocated_1_byte_leading_byte++;
1431 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1434 lb = next_allocated_2_byte_leading_byte++;
1440 ("No more character sets free for this dimension",
1441 make_int (dimension));
1448 range_charset_code_point (Lisp_Object charset, Emchar ch)
1452 if ((XCHARSET_UCS_MIN (charset) <= ch)
1453 && (ch <= XCHARSET_UCS_MAX (charset)))
1455 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1457 if (XCHARSET_DIMENSION (charset) == 1)
1458 return d + XCHARSET_BYTE_OFFSET (charset);
1459 else if (XCHARSET_DIMENSION (charset) == 2)
1461 ((d / XCHARSET_CHARS (charset)
1462 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1463 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1464 else if (XCHARSET_DIMENSION (charset) == 3)
1466 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1467 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1468 | ((d / XCHARSET_CHARS (charset)
1469 % XCHARSET_CHARS (charset)
1470 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1471 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1472 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1474 ((d / (XCHARSET_CHARS (charset)
1475 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1476 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1477 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1478 % XCHARSET_CHARS (charset)
1479 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1480 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1481 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1482 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1484 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1486 if (XCHARSET_DIMENSION (charset) == 1)
1488 if (XCHARSET_CHARS (charset) == 94)
1490 if (((d = ch - (MIN_CHAR_94
1491 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1495 else if (XCHARSET_CHARS (charset) == 96)
1497 if (((d = ch - (MIN_CHAR_96
1498 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1505 else if (XCHARSET_DIMENSION (charset) == 2)
1507 if (XCHARSET_CHARS (charset) == 94)
1509 if (((d = ch - (MIN_CHAR_94x94
1510 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1513 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1515 else if (XCHARSET_CHARS (charset) == 96)
1517 if (((d = ch - (MIN_CHAR_96x96
1518 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1521 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1531 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1533 if (c <= MAX_CHAR_BASIC_LATIN)
1535 *charset = Vcharset_ascii;
1540 *charset = Vcharset_control_1;
1545 *charset = Vcharset_latin_iso8859_1;
1548 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1550 *charset = Vcharset_greek_iso8859_7;
1551 return c - MIN_CHAR_GREEK + 0x20;
1554 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1556 *charset = Vcharset_cyrillic_iso8859_5;
1557 return c - MIN_CHAR_CYRILLIC + 0x20;
1560 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1562 *charset = Vcharset_hebrew_iso8859_8;
1563 return c - MIN_CHAR_HEBREW + 0x20;
1565 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1567 *charset = Vcharset_thai_tis620;
1568 return c - MIN_CHAR_THAI + 0x20;
1571 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1572 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1574 return list2 (Vcharset_katakana_jisx0201,
1575 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1578 else if (c <= MAX_CHAR_BMP)
1580 *charset = Vcharset_ucs_bmp;
1583 else if ((MIN_CHAR_DAIKANWA <= c) && (c <= MAX_CHAR_DAIKANWA))
1585 *charset = Vcharset_ideograph_daikanwa;
1586 return c - MIN_CHAR_DAIKANWA;
1588 else if (c <= MAX_CHAR_94)
1590 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1591 ((c - MIN_CHAR_94) / 94) + '0',
1592 CHARSET_LEFT_TO_RIGHT);
1593 return ((c - MIN_CHAR_94) % 94) + 33;
1595 else if (c <= MAX_CHAR_96)
1597 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1598 ((c - MIN_CHAR_96) / 96) + '0',
1599 CHARSET_LEFT_TO_RIGHT);
1600 return ((c - MIN_CHAR_96) % 96) + 32;
1602 else if (c <= MAX_CHAR_94x94)
1605 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1606 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1607 CHARSET_LEFT_TO_RIGHT);
1608 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1609 | (((c - MIN_CHAR_94x94) % 94) + 33);
1611 else if (c <= MAX_CHAR_96x96)
1614 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1615 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1616 CHARSET_LEFT_TO_RIGHT);
1617 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1618 | (((c - MIN_CHAR_96x96) % 96) + 32);
1626 Lisp_Object Vdefault_coded_charset_priority_list;
1630 /************************************************************************/
1631 /* Basic charset Lisp functions */
1632 /************************************************************************/
1634 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1635 Return non-nil if OBJECT is a charset.
1639 return CHARSETP (object) ? Qt : Qnil;
1642 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1643 Retrieve the charset of the given name.
1644 If CHARSET-OR-NAME is a charset object, it is simply returned.
1645 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1646 nil is returned. Otherwise the associated charset object is returned.
1650 if (CHARSETP (charset_or_name))
1651 return charset_or_name;
1653 CHECK_SYMBOL (charset_or_name);
1654 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1657 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1658 Retrieve the charset of the given name.
1659 Same as `find-charset' except an error is signalled if there is no such
1660 charset instead of returning nil.
1664 Lisp_Object charset = Ffind_charset (name);
1667 signal_simple_error ("No such charset", name);
1671 /* We store the charsets in hash tables with the names as the key and the
1672 actual charset object as the value. Occasionally we need to use them
1673 in a list format. These routines provide us with that. */
1674 struct charset_list_closure
1676 Lisp_Object *charset_list;
1680 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1681 void *charset_list_closure)
1683 /* This function can GC */
1684 struct charset_list_closure *chcl =
1685 (struct charset_list_closure*) charset_list_closure;
1686 Lisp_Object *charset_list = chcl->charset_list;
1688 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1692 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1693 Return a list of the names of all defined charsets.
1697 Lisp_Object charset_list = Qnil;
1698 struct gcpro gcpro1;
1699 struct charset_list_closure charset_list_closure;
1701 GCPRO1 (charset_list);
1702 charset_list_closure.charset_list = &charset_list;
1703 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1704 &charset_list_closure);
1707 return charset_list;
1710 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1711 Return the name of the given charset.
1715 return XCHARSET_NAME (Fget_charset (charset));
1718 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1719 Define a new character set.
1720 This function is for use with Mule support.
1721 NAME is a symbol, the name by which the character set is normally referred.
1722 DOC-STRING is a string describing the character set.
1723 PROPS is a property list, describing the specific nature of the
1724 character set. Recognized properties are:
1726 'short-name Short version of the charset name (ex: Latin-1)
1727 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1728 'registry A regular expression matching the font registry field for
1730 'dimension Number of octets used to index a character in this charset.
1731 Either 1 or 2. Defaults to 1.
1732 'columns Number of columns used to display a character in this charset.
1733 Only used in TTY mode. (Under X, the actual width of a
1734 character can be derived from the font used to display the
1735 characters.) If unspecified, defaults to the dimension
1736 (this is almost always the correct value).
1737 'chars Number of characters in each dimension (94 or 96).
1738 Defaults to 94. Note that if the dimension is 2, the
1739 character set thus described is 94x94 or 96x96.
1740 'final Final byte of ISO 2022 escape sequence. Must be
1741 supplied. Each combination of (DIMENSION, CHARS) defines a
1742 separate namespace for final bytes. Note that ISO
1743 2022 restricts the final byte to the range
1744 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1745 dimension == 2. Note also that final bytes in the range
1746 0x30 - 0x3F are reserved for user-defined (not official)
1748 'graphic 0 (use left half of font on output) or 1 (use right half
1749 of font on output). Defaults to 0. For example, for
1750 a font whose registry is ISO8859-1, the left half
1751 (octets 0x20 - 0x7F) is the `ascii' character set, while
1752 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1753 character set. With 'graphic set to 0, the octets
1754 will have their high bit cleared; with it set to 1,
1755 the octets will have their high bit set.
1756 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1758 'ccl-program A compiled CCL program used to convert a character in
1759 this charset into an index into the font. This is in
1760 addition to the 'graphic property. The CCL program
1761 is passed the octets of the character, with the high
1762 bit cleared and set depending upon whether the value
1763 of the 'graphic property is 0 or 1.
1765 (name, doc_string, props))
1767 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1768 int direction = CHARSET_LEFT_TO_RIGHT;
1770 Lisp_Object registry = Qnil;
1771 Lisp_Object charset;
1772 Lisp_Object rest, keyword, value;
1773 Lisp_Object ccl_program = Qnil;
1774 Lisp_Object short_name = Qnil, long_name = Qnil;
1775 int byte_offset = -1;
1777 CHECK_SYMBOL (name);
1778 if (!NILP (doc_string))
1779 CHECK_STRING (doc_string);
1781 charset = Ffind_charset (name);
1782 if (!NILP (charset))
1783 signal_simple_error ("Cannot redefine existing charset", name);
1785 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1787 if (EQ (keyword, Qshort_name))
1789 CHECK_STRING (value);
1793 if (EQ (keyword, Qlong_name))
1795 CHECK_STRING (value);
1799 else if (EQ (keyword, Qdimension))
1802 dimension = XINT (value);
1803 if (dimension < 1 || dimension > 2)
1804 signal_simple_error ("Invalid value for 'dimension", value);
1807 else if (EQ (keyword, Qchars))
1810 chars = XINT (value);
1811 if (chars != 94 && chars != 96)
1812 signal_simple_error ("Invalid value for 'chars", value);
1815 else if (EQ (keyword, Qcolumns))
1818 columns = XINT (value);
1819 if (columns != 1 && columns != 2)
1820 signal_simple_error ("Invalid value for 'columns", value);
1823 else if (EQ (keyword, Qgraphic))
1826 graphic = XINT (value);
1828 if (graphic < 0 || graphic > 2)
1830 if (graphic < 0 || graphic > 1)
1832 signal_simple_error ("Invalid value for 'graphic", value);
1835 else if (EQ (keyword, Qregistry))
1837 CHECK_STRING (value);
1841 else if (EQ (keyword, Qdirection))
1843 if (EQ (value, Ql2r))
1844 direction = CHARSET_LEFT_TO_RIGHT;
1845 else if (EQ (value, Qr2l))
1846 direction = CHARSET_RIGHT_TO_LEFT;
1848 signal_simple_error ("Invalid value for 'direction", value);
1851 else if (EQ (keyword, Qfinal))
1853 CHECK_CHAR_COERCE_INT (value);
1854 final = XCHAR (value);
1855 if (final < '0' || final > '~')
1856 signal_simple_error ("Invalid value for 'final", value);
1859 else if (EQ (keyword, Qccl_program))
1861 CHECK_VECTOR (value);
1862 ccl_program = value;
1866 signal_simple_error ("Unrecognized property", keyword);
1870 error ("'final must be specified");
1871 if (dimension == 2 && final > 0x5F)
1873 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1877 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1879 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1881 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1882 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1884 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1886 id = get_unallocated_leading_byte (dimension);
1888 if (NILP (doc_string))
1889 doc_string = build_string ("");
1891 if (NILP (registry))
1892 registry = build_string ("");
1894 if (NILP (short_name))
1895 XSETSTRING (short_name, XSYMBOL (name)->name);
1897 if (NILP (long_name))
1898 long_name = doc_string;
1901 columns = dimension;
1903 if (byte_offset < 0)
1907 else if (chars == 96)
1913 charset = make_charset (id, name, type, columns, graphic,
1914 final, direction, short_name, long_name,
1915 doc_string, registry,
1916 Qnil, 0, 0, 0, byte_offset);
1917 if (!NILP (ccl_program))
1918 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1922 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1924 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1925 NEW-NAME is the name of the new charset. Return the new charset.
1927 (charset, new_name))
1929 Lisp_Object new_charset = Qnil;
1930 int id, dimension, columns, graphic, final;
1931 int direction, type;
1932 Lisp_Object registry, doc_string, short_name, long_name;
1933 struct Lisp_Charset *cs;
1935 charset = Fget_charset (charset);
1936 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1937 signal_simple_error ("Charset already has reverse-direction charset",
1940 CHECK_SYMBOL (new_name);
1941 if (!NILP (Ffind_charset (new_name)))
1942 signal_simple_error ("Cannot redefine existing charset", new_name);
1944 cs = XCHARSET (charset);
1946 type = CHARSET_TYPE (cs);
1947 columns = CHARSET_COLUMNS (cs);
1948 dimension = CHARSET_DIMENSION (cs);
1949 id = get_unallocated_leading_byte (dimension);
1951 graphic = CHARSET_GRAPHIC (cs);
1952 final = CHARSET_FINAL (cs);
1953 direction = CHARSET_RIGHT_TO_LEFT;
1954 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1955 direction = CHARSET_LEFT_TO_RIGHT;
1956 doc_string = CHARSET_DOC_STRING (cs);
1957 short_name = CHARSET_SHORT_NAME (cs);
1958 long_name = CHARSET_LONG_NAME (cs);
1959 registry = CHARSET_REGISTRY (cs);
1961 new_charset = make_charset (id, new_name, type, columns,
1962 graphic, final, direction, short_name, long_name,
1963 doc_string, registry,
1965 CHARSET_DECODING_TABLE(cs),
1966 CHARSET_UCS_MIN(cs),
1967 CHARSET_UCS_MAX(cs),
1968 CHARSET_CODE_OFFSET(cs),
1969 CHARSET_BYTE_OFFSET(cs)
1975 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1976 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1981 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1982 Define symbol ALIAS as an alias for CHARSET.
1986 CHECK_SYMBOL (alias);
1987 charset = Fget_charset (charset);
1988 return Fputhash (alias, charset, Vcharset_hash_table);
1991 /* #### Reverse direction charsets not yet implemented. */
1993 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1995 Return the reverse-direction charset parallel to CHARSET, if any.
1996 This is the charset with the same properties (in particular, the same
1997 dimension, number of characters per dimension, and final byte) as
1998 CHARSET but whose characters are displayed in the opposite direction.
2002 charset = Fget_charset (charset);
2003 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2007 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2008 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2009 If DIRECTION is omitted, both directions will be checked (left-to-right
2010 will be returned if character sets exist for both directions).
2012 (dimension, chars, final, direction))
2014 int dm, ch, fi, di = -1;
2016 Lisp_Object obj = Qnil;
2018 CHECK_INT (dimension);
2019 dm = XINT (dimension);
2020 if (dm < 1 || dm > 2)
2021 signal_simple_error ("Invalid value for DIMENSION", dimension);
2025 if (ch != 94 && ch != 96)
2026 signal_simple_error ("Invalid value for CHARS", chars);
2028 CHECK_CHAR_COERCE_INT (final);
2030 if (fi < '0' || fi > '~')
2031 signal_simple_error ("Invalid value for FINAL", final);
2033 if (EQ (direction, Ql2r))
2034 di = CHARSET_LEFT_TO_RIGHT;
2035 else if (EQ (direction, Qr2l))
2036 di = CHARSET_RIGHT_TO_LEFT;
2037 else if (!NILP (direction))
2038 signal_simple_error ("Invalid value for DIRECTION", direction);
2040 if (dm == 2 && fi > 0x5F)
2042 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2045 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2047 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2051 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2053 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2056 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2059 return XCHARSET_NAME (obj);
2063 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2064 Return short name of CHARSET.
2068 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2071 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2072 Return long name of CHARSET.
2076 return XCHARSET_LONG_NAME (Fget_charset (charset));
2079 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2080 Return description of CHARSET.
2084 return XCHARSET_DOC_STRING (Fget_charset (charset));
2087 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2088 Return dimension of CHARSET.
2092 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2095 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2096 Return property PROP of CHARSET.
2097 Recognized properties are those listed in `make-charset', as well as
2098 'name and 'doc-string.
2102 struct Lisp_Charset *cs;
2104 charset = Fget_charset (charset);
2105 cs = XCHARSET (charset);
2107 CHECK_SYMBOL (prop);
2108 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2109 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2110 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2111 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2112 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2113 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2114 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2115 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2116 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2117 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2118 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2119 if (EQ (prop, Qdirection))
2120 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2121 if (EQ (prop, Qreverse_direction_charset))
2123 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2127 return XCHARSET_NAME (obj);
2129 signal_simple_error ("Unrecognized charset property name", prop);
2130 return Qnil; /* not reached */
2133 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2134 Return charset identification number of CHARSET.
2138 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2141 /* #### We need to figure out which properties we really want to
2144 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2145 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2147 (charset, ccl_program))
2149 charset = Fget_charset (charset);
2150 CHECK_VECTOR (ccl_program);
2151 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2156 invalidate_charset_font_caches (Lisp_Object charset)
2158 /* Invalidate font cache entries for charset on all devices. */
2159 Lisp_Object devcons, concons, hash_table;
2160 DEVICE_LOOP_NO_BREAK (devcons, concons)
2162 struct device *d = XDEVICE (XCAR (devcons));
2163 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2164 if (!UNBOUNDP (hash_table))
2165 Fclrhash (hash_table);
2169 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2170 Set the 'registry property of CHARSET to REGISTRY.
2172 (charset, registry))
2174 charset = Fget_charset (charset);
2175 CHECK_STRING (registry);
2176 XCHARSET_REGISTRY (charset) = registry;
2177 invalidate_charset_font_caches (charset);
2178 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2183 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2184 Return mapping-table of CHARSET.
2188 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2191 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2192 Set mapping-table of CHARSET to TABLE.
2196 struct Lisp_Charset *cs;
2197 Lisp_Object old_table;
2200 charset = Fget_charset (charset);
2201 cs = XCHARSET (charset);
2203 if (EQ (table, Qnil))
2205 CHARSET_DECODING_TABLE(cs) = table;
2208 else if (VECTORP (table))
2212 /* ad-hoc method for `ascii' */
2213 if ((CHARSET_CHARS (cs) == 94) &&
2214 (CHARSET_BYTE_OFFSET (cs) != 33))
2215 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2217 ccs_len = CHARSET_CHARS (cs);
2219 if (XVECTOR_LENGTH (table) > ccs_len)
2220 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2221 old_table = CHARSET_DECODING_TABLE(cs);
2222 CHARSET_DECODING_TABLE(cs) = table;
2225 signal_error (Qwrong_type_argument,
2226 list2 (build_translated_string ("vector-or-nil-p"),
2228 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2230 switch (CHARSET_DIMENSION (cs))
2233 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2235 Lisp_Object c = XVECTOR_DATA(table)[i];
2240 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2244 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2246 Lisp_Object v = XVECTOR_DATA(table)[i];
2252 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2254 CHARSET_DECODING_TABLE(cs) = old_table;
2255 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2257 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2259 Lisp_Object c = XVECTOR_DATA(v)[j];
2262 put_char_attribute (c, charset,
2265 (i + CHARSET_BYTE_OFFSET (cs)),
2267 (j + CHARSET_BYTE_OFFSET (cs))));
2271 put_char_attribute (v, charset,
2273 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2282 /************************************************************************/
2283 /* Lisp primitives for working with characters */
2284 /************************************************************************/
2286 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2287 Make a character from CHARSET and octets ARG1 and ARG2.
2288 ARG2 is required only for characters from two-dimensional charsets.
2289 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2290 character s with caron.
2292 (charset, arg1, arg2))
2294 struct Lisp_Charset *cs;
2296 int lowlim, highlim;
2298 charset = Fget_charset (charset);
2299 cs = XCHARSET (charset);
2301 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2302 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2304 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2306 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2307 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2310 /* It is useful (and safe, according to Olivier Galibert) to strip
2311 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2312 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2313 Latin 2 code of the character. */
2321 if (a1 < lowlim || a1 > highlim)
2322 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2324 if (CHARSET_DIMENSION (cs) == 1)
2328 ("Charset is of dimension one; second octet must be nil", arg2);
2329 return make_char (MAKE_CHAR (charset, a1, 0));
2338 a2 = XINT (arg2) & 0x7f;
2340 if (a2 < lowlim || a2 > highlim)
2341 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2343 return make_char (MAKE_CHAR (charset, a1, a2));
2346 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2347 Return the character set of char CH.
2351 CHECK_CHAR_COERCE_INT (ch);
2353 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2356 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2357 Return the octet numbered N (should be 0 or 1) of char CH.
2358 N defaults to 0 if omitted.
2362 Lisp_Object charset;
2365 CHECK_CHAR_COERCE_INT (ch);
2367 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2369 if (NILP (n) || EQ (n, Qzero))
2370 return make_int (octet0);
2371 else if (EQ (n, make_int (1)))
2372 return make_int (octet1);
2374 signal_simple_error ("Octet number must be 0 or 1", n);
2377 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2378 Return list of charset and one or two position-codes of CHAR.
2382 /* This function can GC */
2383 struct gcpro gcpro1, gcpro2;
2384 Lisp_Object charset = Qnil;
2385 Lisp_Object rc = Qnil;
2388 GCPRO2 (charset, rc);
2389 CHECK_CHAR_COERCE_INT (character);
2391 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2393 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2395 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2399 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2407 #ifdef ENABLE_COMPOSITE_CHARS
2408 /************************************************************************/
2409 /* composite character functions */
2410 /************************************************************************/
2413 lookup_composite_char (Bufbyte *str, int len)
2415 Lisp_Object lispstr = make_string (str, len);
2416 Lisp_Object ch = Fgethash (lispstr,
2417 Vcomposite_char_string2char_hash_table,
2423 if (composite_char_row_next >= 128)
2424 signal_simple_error ("No more composite chars available", lispstr);
2425 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2426 composite_char_col_next);
2427 Fputhash (make_char (emch), lispstr,
2428 Vcomposite_char_char2string_hash_table);
2429 Fputhash (lispstr, make_char (emch),
2430 Vcomposite_char_string2char_hash_table);
2431 composite_char_col_next++;
2432 if (composite_char_col_next >= 128)
2434 composite_char_col_next = 32;
2435 composite_char_row_next++;
2444 composite_char_string (Emchar ch)
2446 Lisp_Object str = Fgethash (make_char (ch),
2447 Vcomposite_char_char2string_hash_table,
2449 assert (!UNBOUNDP (str));
2453 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2454 Convert a string into a single composite character.
2455 The character is the result of overstriking all the characters in
2460 CHECK_STRING (string);
2461 return make_char (lookup_composite_char (XSTRING_DATA (string),
2462 XSTRING_LENGTH (string)));
2465 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2466 Return a string of the characters comprising a composite character.
2474 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2475 signal_simple_error ("Must be composite char", ch);
2476 return composite_char_string (emch);
2478 #endif /* ENABLE_COMPOSITE_CHARS */
2481 /************************************************************************/
2482 /* initialization */
2483 /************************************************************************/
2486 syms_of_mule_charset (void)
2488 DEFSUBR (Fcharsetp);
2489 DEFSUBR (Ffind_charset);
2490 DEFSUBR (Fget_charset);
2491 DEFSUBR (Fcharset_list);
2492 DEFSUBR (Fcharset_name);
2493 DEFSUBR (Fmake_charset);
2494 DEFSUBR (Fmake_reverse_direction_charset);
2495 /* DEFSUBR (Freverse_direction_charset); */
2496 DEFSUBR (Fdefine_charset_alias);
2497 DEFSUBR (Fcharset_from_attributes);
2498 DEFSUBR (Fcharset_short_name);
2499 DEFSUBR (Fcharset_long_name);
2500 DEFSUBR (Fcharset_description);
2501 DEFSUBR (Fcharset_dimension);
2502 DEFSUBR (Fcharset_property);
2503 DEFSUBR (Fcharset_id);
2504 DEFSUBR (Fset_charset_ccl_program);
2505 DEFSUBR (Fset_charset_registry);
2507 DEFSUBR (Fchar_attribute_alist);
2508 DEFSUBR (Fget_char_attribute);
2509 DEFSUBR (Fput_char_attribute);
2510 DEFSUBR (Fdefine_char);
2511 DEFSUBR (Fchar_variants);
2512 DEFSUBR (Fget_composite_char);
2513 DEFSUBR (Fcharset_mapping_table);
2514 DEFSUBR (Fset_charset_mapping_table);
2517 DEFSUBR (Fmake_char);
2518 DEFSUBR (Fchar_charset);
2519 DEFSUBR (Fchar_octet);
2520 DEFSUBR (Fsplit_char);
2522 #ifdef ENABLE_COMPOSITE_CHARS
2523 DEFSUBR (Fmake_composite_char);
2524 DEFSUBR (Fcomposite_char_string);
2527 defsymbol (&Qcharsetp, "charsetp");
2528 defsymbol (&Qregistry, "registry");
2529 defsymbol (&Qfinal, "final");
2530 defsymbol (&Qgraphic, "graphic");
2531 defsymbol (&Qdirection, "direction");
2532 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2533 defsymbol (&Qshort_name, "short-name");
2534 defsymbol (&Qlong_name, "long-name");
2536 defsymbol (&Ql2r, "l2r");
2537 defsymbol (&Qr2l, "r2l");
2539 /* Charsets, compatible with FSF 20.3
2540 Naming convention is Script-Charset[-Edition] */
2541 defsymbol (&Qascii, "ascii");
2542 defsymbol (&Qcontrol_1, "control-1");
2543 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2544 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2545 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2546 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2547 defsymbol (&Qthai_tis620, "thai-tis620");
2548 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2549 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2550 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2551 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2552 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2553 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2554 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2555 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2556 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2557 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2558 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2559 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2560 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2561 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2562 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2564 defsymbol (&Q_ucs, "->ucs");
2565 defsymbol (&Q_decomposition, "->decomposition");
2566 defsymbol (&Qcompat, "compat");
2567 defsymbol (&Qisolated, "isolated");
2568 defsymbol (&Qinitial, "initial");
2569 defsymbol (&Qmedial, "medial");
2570 defsymbol (&Qfinal, "final");
2571 defsymbol (&Qvertical, "vertical");
2572 defsymbol (&QnoBreak, "noBreak");
2573 defsymbol (&Qfraction, "fraction");
2574 defsymbol (&Qsuper, "super");
2575 defsymbol (&Qsub, "sub");
2576 defsymbol (&Qcircle, "circle");
2577 defsymbol (&Qsquare, "square");
2578 defsymbol (&Qwide, "wide");
2579 defsymbol (&Qnarrow, "narrow");
2580 defsymbol (&Qsmall, "small");
2581 defsymbol (&Qfont, "font");
2582 defsymbol (&Qucs, "ucs");
2583 defsymbol (&Qucs_bmp, "ucs-bmp");
2584 defsymbol (&Qlatin_viscii, "latin-viscii");
2585 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2586 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2587 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2588 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2589 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2590 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2591 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2592 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2593 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2594 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2595 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2596 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2597 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2598 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2599 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2600 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2601 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2602 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2603 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2604 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2605 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2606 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2607 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2608 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2609 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2610 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2611 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2613 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2614 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2616 defsymbol (&Qcomposite, "composite");
2620 vars_of_mule_charset (void)
2627 chlook = xnew (struct charset_lookup);
2628 dumpstruct (&chlook, &charset_lookup_description);
2630 /* Table of charsets indexed by leading byte. */
2631 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2632 chlook->charset_by_leading_byte[i] = Qnil;
2635 /* Table of charsets indexed by type/final-byte. */
2636 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2637 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2638 chlook->charset_by_attributes[i][j] = Qnil;
2640 /* Table of charsets indexed by type/final-byte/direction. */
2641 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2642 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2643 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2644 chlook->charset_by_attributes[i][j][k] = Qnil;
2648 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2650 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2651 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2655 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2656 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2657 Leading-code of private TYPE9N charset of column-width 1.
2659 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2663 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2664 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2665 Version number of UTF-2000.
2668 staticpro (&Vcharacter_attribute_table);
2669 Vcharacter_attribute_table = make_char_code_table (Qnil);
2671 staticpro (&Vcharacter_composition_table);
2672 Vcharacter_composition_table = make_char_code_table (Qnil);
2674 staticpro (&Vcharacter_variant_table);
2675 Vcharacter_variant_table = make_char_code_table (Qnil);
2677 Vdefault_coded_charset_priority_list = Qnil;
2678 DEFVAR_LISP ("default-coded-charset-priority-list",
2679 &Vdefault_coded_charset_priority_list /*
2680 Default order of preferred coded-character-sets.
2686 complex_vars_of_mule_charset (void)
2688 staticpro (&Vcharset_hash_table);
2689 Vcharset_hash_table =
2690 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2692 /* Predefined character sets. We store them into variables for
2696 staticpro (&Vcharset_ucs_bmp);
2698 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2699 CHARSET_TYPE_256X256, 1, 2, 0,
2700 CHARSET_LEFT_TO_RIGHT,
2701 build_string ("BMP"),
2702 build_string ("BMP"),
2703 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2704 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2705 Qnil, 0, 0xFFFF, 0, 0);
2707 # define MIN_CHAR_THAI 0
2708 # define MAX_CHAR_THAI 0
2709 # define MIN_CHAR_GREEK 0
2710 # define MAX_CHAR_GREEK 0
2711 # define MIN_CHAR_HEBREW 0
2712 # define MAX_CHAR_HEBREW 0
2713 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2714 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2715 # define MIN_CHAR_CYRILLIC 0
2716 # define MAX_CHAR_CYRILLIC 0
2718 staticpro (&Vcharset_ascii);
2720 make_charset (LEADING_BYTE_ASCII, Qascii,
2721 CHARSET_TYPE_94, 1, 0, 'B',
2722 CHARSET_LEFT_TO_RIGHT,
2723 build_string ("ASCII"),
2724 build_string ("ASCII)"),
2725 build_string ("ASCII (ISO646 IRV)"),
2726 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2727 Qnil, 0, 0x7F, 0, 0);
2728 staticpro (&Vcharset_control_1);
2729 Vcharset_control_1 =
2730 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2731 CHARSET_TYPE_94, 1, 1, 0,
2732 CHARSET_LEFT_TO_RIGHT,
2733 build_string ("C1"),
2734 build_string ("Control characters"),
2735 build_string ("Control characters 128-191"),
2737 Qnil, 0x80, 0x9F, 0, 0);
2738 staticpro (&Vcharset_latin_iso8859_1);
2739 Vcharset_latin_iso8859_1 =
2740 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2741 CHARSET_TYPE_96, 1, 1, 'A',
2742 CHARSET_LEFT_TO_RIGHT,
2743 build_string ("Latin-1"),
2744 build_string ("ISO8859-1 (Latin-1)"),
2745 build_string ("ISO8859-1 (Latin-1)"),
2746 build_string ("iso8859-1"),
2747 Qnil, 0xA0, 0xFF, 0, 32);
2748 staticpro (&Vcharset_latin_iso8859_2);
2749 Vcharset_latin_iso8859_2 =
2750 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2751 CHARSET_TYPE_96, 1, 1, 'B',
2752 CHARSET_LEFT_TO_RIGHT,
2753 build_string ("Latin-2"),
2754 build_string ("ISO8859-2 (Latin-2)"),
2755 build_string ("ISO8859-2 (Latin-2)"),
2756 build_string ("iso8859-2"),
2758 staticpro (&Vcharset_latin_iso8859_3);
2759 Vcharset_latin_iso8859_3 =
2760 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2761 CHARSET_TYPE_96, 1, 1, 'C',
2762 CHARSET_LEFT_TO_RIGHT,
2763 build_string ("Latin-3"),
2764 build_string ("ISO8859-3 (Latin-3)"),
2765 build_string ("ISO8859-3 (Latin-3)"),
2766 build_string ("iso8859-3"),
2768 staticpro (&Vcharset_latin_iso8859_4);
2769 Vcharset_latin_iso8859_4 =
2770 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2771 CHARSET_TYPE_96, 1, 1, 'D',
2772 CHARSET_LEFT_TO_RIGHT,
2773 build_string ("Latin-4"),
2774 build_string ("ISO8859-4 (Latin-4)"),
2775 build_string ("ISO8859-4 (Latin-4)"),
2776 build_string ("iso8859-4"),
2778 staticpro (&Vcharset_thai_tis620);
2779 Vcharset_thai_tis620 =
2780 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2781 CHARSET_TYPE_96, 1, 1, 'T',
2782 CHARSET_LEFT_TO_RIGHT,
2783 build_string ("TIS620"),
2784 build_string ("TIS620 (Thai)"),
2785 build_string ("TIS620.2529 (Thai)"),
2786 build_string ("tis620"),
2787 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2788 staticpro (&Vcharset_greek_iso8859_7);
2789 Vcharset_greek_iso8859_7 =
2790 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2791 CHARSET_TYPE_96, 1, 1, 'F',
2792 CHARSET_LEFT_TO_RIGHT,
2793 build_string ("ISO8859-7"),
2794 build_string ("ISO8859-7 (Greek)"),
2795 build_string ("ISO8859-7 (Greek)"),
2796 build_string ("iso8859-7"),
2797 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2798 staticpro (&Vcharset_arabic_iso8859_6);
2799 Vcharset_arabic_iso8859_6 =
2800 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2801 CHARSET_TYPE_96, 1, 1, 'G',
2802 CHARSET_RIGHT_TO_LEFT,
2803 build_string ("ISO8859-6"),
2804 build_string ("ISO8859-6 (Arabic)"),
2805 build_string ("ISO8859-6 (Arabic)"),
2806 build_string ("iso8859-6"),
2808 staticpro (&Vcharset_hebrew_iso8859_8);
2809 Vcharset_hebrew_iso8859_8 =
2810 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2811 CHARSET_TYPE_96, 1, 1, 'H',
2812 CHARSET_RIGHT_TO_LEFT,
2813 build_string ("ISO8859-8"),
2814 build_string ("ISO8859-8 (Hebrew)"),
2815 build_string ("ISO8859-8 (Hebrew)"),
2816 build_string ("iso8859-8"),
2817 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2818 staticpro (&Vcharset_katakana_jisx0201);
2819 Vcharset_katakana_jisx0201 =
2820 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2821 CHARSET_TYPE_94, 1, 1, 'I',
2822 CHARSET_LEFT_TO_RIGHT,
2823 build_string ("JISX0201 Kana"),
2824 build_string ("JISX0201.1976 (Japanese Kana)"),
2825 build_string ("JISX0201.1976 Japanese Kana"),
2826 build_string ("jisx0201\\.1976"),
2828 staticpro (&Vcharset_latin_jisx0201);
2829 Vcharset_latin_jisx0201 =
2830 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2831 CHARSET_TYPE_94, 1, 0, 'J',
2832 CHARSET_LEFT_TO_RIGHT,
2833 build_string ("JISX0201 Roman"),
2834 build_string ("JISX0201.1976 (Japanese Roman)"),
2835 build_string ("JISX0201.1976 Japanese Roman"),
2836 build_string ("jisx0201\\.1976"),
2838 staticpro (&Vcharset_cyrillic_iso8859_5);
2839 Vcharset_cyrillic_iso8859_5 =
2840 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2841 CHARSET_TYPE_96, 1, 1, 'L',
2842 CHARSET_LEFT_TO_RIGHT,
2843 build_string ("ISO8859-5"),
2844 build_string ("ISO8859-5 (Cyrillic)"),
2845 build_string ("ISO8859-5 (Cyrillic)"),
2846 build_string ("iso8859-5"),
2847 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2848 staticpro (&Vcharset_latin_iso8859_9);
2849 Vcharset_latin_iso8859_9 =
2850 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2851 CHARSET_TYPE_96, 1, 1, 'M',
2852 CHARSET_LEFT_TO_RIGHT,
2853 build_string ("Latin-5"),
2854 build_string ("ISO8859-9 (Latin-5)"),
2855 build_string ("ISO8859-9 (Latin-5)"),
2856 build_string ("iso8859-9"),
2858 staticpro (&Vcharset_japanese_jisx0208_1978);
2859 Vcharset_japanese_jisx0208_1978 =
2860 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2861 CHARSET_TYPE_94X94, 2, 0, '@',
2862 CHARSET_LEFT_TO_RIGHT,
2863 build_string ("JIS X0208:1978"),
2864 build_string ("JIS X0208:1978 (Japanese)"),
2866 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2867 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2869 staticpro (&Vcharset_chinese_gb2312);
2870 Vcharset_chinese_gb2312 =
2871 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2872 CHARSET_TYPE_94X94, 2, 0, 'A',
2873 CHARSET_LEFT_TO_RIGHT,
2874 build_string ("GB2312"),
2875 build_string ("GB2312)"),
2876 build_string ("GB2312 Chinese simplified"),
2877 build_string ("gb2312"),
2879 staticpro (&Vcharset_japanese_jisx0208);
2880 Vcharset_japanese_jisx0208 =
2881 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2882 CHARSET_TYPE_94X94, 2, 0, 'B',
2883 CHARSET_LEFT_TO_RIGHT,
2884 build_string ("JISX0208"),
2885 build_string ("JIS X0208:1983 (Japanese)"),
2886 build_string ("JIS X0208:1983 Japanese Kanji"),
2887 build_string ("jisx0208\\.1983"),
2890 staticpro (&Vcharset_japanese_jisx0208_1990);
2891 Vcharset_japanese_jisx0208_1990 =
2892 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2893 Qjapanese_jisx0208_1990,
2894 CHARSET_TYPE_94X94, 2, 0, 0,
2895 CHARSET_LEFT_TO_RIGHT,
2896 build_string ("JISX0208-1990"),
2897 build_string ("JIS X0208:1990 (Japanese)"),
2898 build_string ("JIS X0208:1990 Japanese Kanji"),
2899 build_string ("jisx0208\\.1990"),
2901 MIN_CHAR_JIS_X0208_1990,
2902 MAX_CHAR_JIS_X0208_1990, 0, 33);
2904 staticpro (&Vcharset_korean_ksc5601);
2905 Vcharset_korean_ksc5601 =
2906 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2907 CHARSET_TYPE_94X94, 2, 0, 'C',
2908 CHARSET_LEFT_TO_RIGHT,
2909 build_string ("KSC5601"),
2910 build_string ("KSC5601 (Korean"),
2911 build_string ("KSC5601 Korean Hangul and Hanja"),
2912 build_string ("ksc5601"),
2914 staticpro (&Vcharset_japanese_jisx0212);
2915 Vcharset_japanese_jisx0212 =
2916 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2917 CHARSET_TYPE_94X94, 2, 0, 'D',
2918 CHARSET_LEFT_TO_RIGHT,
2919 build_string ("JISX0212"),
2920 build_string ("JISX0212 (Japanese)"),
2921 build_string ("JISX0212 Japanese Supplement"),
2922 build_string ("jisx0212"),
2925 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2926 staticpro (&Vcharset_chinese_cns11643_1);
2927 Vcharset_chinese_cns11643_1 =
2928 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2929 CHARSET_TYPE_94X94, 2, 0, 'G',
2930 CHARSET_LEFT_TO_RIGHT,
2931 build_string ("CNS11643-1"),
2932 build_string ("CNS11643-1 (Chinese traditional)"),
2934 ("CNS 11643 Plane 1 Chinese traditional"),
2935 build_string (CHINESE_CNS_PLANE_RE("1")),
2937 staticpro (&Vcharset_chinese_cns11643_2);
2938 Vcharset_chinese_cns11643_2 =
2939 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2940 CHARSET_TYPE_94X94, 2, 0, 'H',
2941 CHARSET_LEFT_TO_RIGHT,
2942 build_string ("CNS11643-2"),
2943 build_string ("CNS11643-2 (Chinese traditional)"),
2945 ("CNS 11643 Plane 2 Chinese traditional"),
2946 build_string (CHINESE_CNS_PLANE_RE("2")),
2949 staticpro (&Vcharset_latin_viscii_lower);
2950 Vcharset_latin_viscii_lower =
2951 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2952 CHARSET_TYPE_96, 1, 1, '1',
2953 CHARSET_LEFT_TO_RIGHT,
2954 build_string ("VISCII lower"),
2955 build_string ("VISCII lower (Vietnamese)"),
2956 build_string ("VISCII lower (Vietnamese)"),
2957 build_string ("MULEVISCII-LOWER"),
2959 staticpro (&Vcharset_latin_viscii_upper);
2960 Vcharset_latin_viscii_upper =
2961 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2962 CHARSET_TYPE_96, 1, 1, '2',
2963 CHARSET_LEFT_TO_RIGHT,
2964 build_string ("VISCII upper"),
2965 build_string ("VISCII upper (Vietnamese)"),
2966 build_string ("VISCII upper (Vietnamese)"),
2967 build_string ("MULEVISCII-UPPER"),
2969 staticpro (&Vcharset_latin_viscii);
2970 Vcharset_latin_viscii =
2971 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2972 CHARSET_TYPE_256, 1, 2, 0,
2973 CHARSET_LEFT_TO_RIGHT,
2974 build_string ("VISCII"),
2975 build_string ("VISCII 1.1 (Vietnamese)"),
2976 build_string ("VISCII 1.1 (Vietnamese)"),
2977 build_string ("VISCII1\\.1"),
2979 staticpro (&Vcharset_ideograph_daikanwa);
2980 Vcharset_ideograph_daikanwa =
2981 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2982 CHARSET_TYPE_256X256, 2, 2, 0,
2983 CHARSET_LEFT_TO_RIGHT,
2984 build_string ("Daikanwa"),
2985 build_string ("Morohashi's Daikanwa"),
2986 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2987 build_string ("Daikanwa"),
2988 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2989 staticpro (&Vcharset_mojikyo_pj_1);
2990 Vcharset_mojikyo_pj_1 =
2991 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1,
2992 CHARSET_TYPE_94X94, 2, 0, 0,
2993 CHARSET_LEFT_TO_RIGHT,
2994 build_string ("Mojikyo-PJ-1"),
2995 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
2997 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
2998 build_string ("jisx0208\\.Mojikyo-1$"),
3000 staticpro (&Vcharset_mojikyo_pj_2);
3001 Vcharset_mojikyo_pj_2 =
3002 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2,
3003 CHARSET_TYPE_94X94, 2, 0, 0,
3004 CHARSET_LEFT_TO_RIGHT,
3005 build_string ("Mojikyo-PJ-2"),
3006 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3008 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3009 build_string ("jisx0208\\.Mojikyo-2$"),
3011 staticpro (&Vcharset_mojikyo_pj_3);
3012 Vcharset_mojikyo_pj_3 =
3013 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3,
3014 CHARSET_TYPE_94X94, 2, 0, 0,
3015 CHARSET_LEFT_TO_RIGHT,
3016 build_string ("Mojikyo-PJ-3"),
3017 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3019 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3020 build_string ("jisx0208\\.Mojikyo-3$"),
3022 staticpro (&Vcharset_mojikyo_pj_4);
3023 Vcharset_mojikyo_pj_4 =
3024 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4,
3025 CHARSET_TYPE_94X94, 2, 0, 0,
3026 CHARSET_LEFT_TO_RIGHT,
3027 build_string ("Mojikyo-PJ-4"),
3028 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3030 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3031 build_string ("jisx0208\\.Mojikyo-4$"),
3033 staticpro (&Vcharset_mojikyo_pj_5);
3034 Vcharset_mojikyo_pj_5 =
3035 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5,
3036 CHARSET_TYPE_94X94, 2, 0, 0,
3037 CHARSET_LEFT_TO_RIGHT,
3038 build_string ("Mojikyo-PJ-5"),
3039 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3041 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3042 build_string ("jisx0208\\.Mojikyo-5$"),
3044 staticpro (&Vcharset_mojikyo_pj_6);
3045 Vcharset_mojikyo_pj_6 =
3046 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6,
3047 CHARSET_TYPE_94X94, 2, 0, 0,
3048 CHARSET_LEFT_TO_RIGHT,
3049 build_string ("Mojikyo-PJ-6"),
3050 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3052 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3053 build_string ("jisx0208\\.Mojikyo-6$"),
3055 staticpro (&Vcharset_mojikyo_pj_7);
3056 Vcharset_mojikyo_pj_7 =
3057 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7,
3058 CHARSET_TYPE_94X94, 2, 0, 0,
3059 CHARSET_LEFT_TO_RIGHT,
3060 build_string ("Mojikyo-PJ-7"),
3061 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3063 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3064 build_string ("jisx0208\\.Mojikyo-7$"),
3066 staticpro (&Vcharset_mojikyo_pj_8);
3067 Vcharset_mojikyo_pj_8 =
3068 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8,
3069 CHARSET_TYPE_94X94, 2, 0, 0,
3070 CHARSET_LEFT_TO_RIGHT,
3071 build_string ("Mojikyo-PJ-8"),
3072 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3074 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3075 build_string ("jisx0208\\.Mojikyo-8$"),
3077 staticpro (&Vcharset_mojikyo_pj_9);
3078 Vcharset_mojikyo_pj_9 =
3079 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9,
3080 CHARSET_TYPE_94X94, 2, 0, 0,
3081 CHARSET_LEFT_TO_RIGHT,
3082 build_string ("Mojikyo-PJ-9"),
3083 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3085 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3086 build_string ("jisx0208\\.Mojikyo-9$"),
3088 staticpro (&Vcharset_mojikyo_pj_10);
3089 Vcharset_mojikyo_pj_10 =
3090 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10,
3091 CHARSET_TYPE_94X94, 2, 0, 0,
3092 CHARSET_LEFT_TO_RIGHT,
3093 build_string ("Mojikyo-PJ-10"),
3094 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3096 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3097 build_string ("jisx0208\\.Mojikyo-10$"),
3099 staticpro (&Vcharset_mojikyo_pj_11);
3100 Vcharset_mojikyo_pj_11 =
3101 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11,
3102 CHARSET_TYPE_94X94, 2, 0, 0,
3103 CHARSET_LEFT_TO_RIGHT,
3104 build_string ("Mojikyo-PJ-11"),
3105 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3107 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3108 build_string ("jisx0208\\.Mojikyo-11$"),
3110 staticpro (&Vcharset_mojikyo_pj_12);
3111 Vcharset_mojikyo_pj_12 =
3112 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12,
3113 CHARSET_TYPE_94X94, 2, 0, 0,
3114 CHARSET_LEFT_TO_RIGHT,
3115 build_string ("Mojikyo-PJ-12"),
3116 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3118 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3119 build_string ("jisx0208\\.Mojikyo-12$"),
3121 staticpro (&Vcharset_mojikyo_pj_13);
3122 Vcharset_mojikyo_pj_13 =
3123 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13,
3124 CHARSET_TYPE_94X94, 2, 0, 0,
3125 CHARSET_LEFT_TO_RIGHT,
3126 build_string ("Mojikyo-PJ-13"),
3127 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3129 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3130 build_string ("jisx0208\\.Mojikyo-13$"),
3132 staticpro (&Vcharset_mojikyo_pj_14);
3133 Vcharset_mojikyo_pj_14 =
3134 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14,
3135 CHARSET_TYPE_94X94, 2, 0, 0,
3136 CHARSET_LEFT_TO_RIGHT,
3137 build_string ("Mojikyo-PJ-14"),
3138 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3140 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3141 build_string ("jisx0208\\.Mojikyo-14$"),
3143 staticpro (&Vcharset_mojikyo_pj_15);
3144 Vcharset_mojikyo_pj_15 =
3145 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15,
3146 CHARSET_TYPE_94X94, 2, 0, 0,
3147 CHARSET_LEFT_TO_RIGHT,
3148 build_string ("Mojikyo-PJ-15"),
3149 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3151 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3152 build_string ("jisx0208\\.Mojikyo-15$"),
3154 staticpro (&Vcharset_mojikyo_pj_16);
3155 Vcharset_mojikyo_pj_16 =
3156 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16,
3157 CHARSET_TYPE_94X94, 2, 0, 0,
3158 CHARSET_LEFT_TO_RIGHT,
3159 build_string ("Mojikyo-PJ-16"),
3160 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3162 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3163 build_string ("jisx0208\\.Mojikyo-16$"),
3165 staticpro (&Vcharset_mojikyo_pj_17);
3166 Vcharset_mojikyo_pj_17 =
3167 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17,
3168 CHARSET_TYPE_94X94, 2, 0, 0,
3169 CHARSET_LEFT_TO_RIGHT,
3170 build_string ("Mojikyo-PJ-17"),
3171 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3173 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3174 build_string ("jisx0208\\.Mojikyo-17$"),
3176 staticpro (&Vcharset_mojikyo_pj_18);
3177 Vcharset_mojikyo_pj_18 =
3178 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18,
3179 CHARSET_TYPE_94X94, 2, 0, 0,
3180 CHARSET_LEFT_TO_RIGHT,
3181 build_string ("Mojikyo-PJ-18"),
3182 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3184 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3185 build_string ("jisx0208\\.Mojikyo-18$"),
3187 staticpro (&Vcharset_mojikyo_pj_19);
3188 Vcharset_mojikyo_pj_19 =
3189 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19,
3190 CHARSET_TYPE_94X94, 2, 0, 0,
3191 CHARSET_LEFT_TO_RIGHT,
3192 build_string ("Mojikyo-PJ-19"),
3193 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3195 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3196 build_string ("jisx0208\\.Mojikyo-19$"),
3198 staticpro (&Vcharset_mojikyo_pj_20);
3199 Vcharset_mojikyo_pj_20 =
3200 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20,
3201 CHARSET_TYPE_94X94, 2, 0, 0,
3202 CHARSET_LEFT_TO_RIGHT,
3203 build_string ("Mojikyo-PJ-20"),
3204 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3206 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3207 build_string ("jisx0208\\.Mojikyo-20$"),
3209 staticpro (&Vcharset_mojikyo_pj_21);
3210 Vcharset_mojikyo_pj_21 =
3211 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21,
3212 CHARSET_TYPE_94X94, 2, 0, 0,
3213 CHARSET_LEFT_TO_RIGHT,
3214 build_string ("Mojikyo-PJ-21"),
3215 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3217 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3218 build_string ("jisx0208\\.Mojikyo-21$"),
3220 staticpro (&Vcharset_ethiopic_ucs);
3221 Vcharset_ethiopic_ucs =
3222 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
3223 CHARSET_TYPE_256X256, 2, 2, 0,
3224 CHARSET_LEFT_TO_RIGHT,
3225 build_string ("Ethiopic (UCS)"),
3226 build_string ("Ethiopic (UCS)"),
3227 build_string ("Ethiopic of UCS"),
3228 build_string ("Ethiopic-Unicode"),
3229 Qnil, 0x1200, 0x137F, 0x1200, 0);
3231 staticpro (&Vcharset_chinese_big5_1);
3232 Vcharset_chinese_big5_1 =
3233 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3234 CHARSET_TYPE_94X94, 2, 0, '0',
3235 CHARSET_LEFT_TO_RIGHT,
3236 build_string ("Big5"),
3237 build_string ("Big5 (Level-1)"),
3239 ("Big5 Level-1 Chinese traditional"),
3240 build_string ("big5"),
3242 staticpro (&Vcharset_chinese_big5_2);
3243 Vcharset_chinese_big5_2 =
3244 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3245 CHARSET_TYPE_94X94, 2, 0, '1',
3246 CHARSET_LEFT_TO_RIGHT,
3247 build_string ("Big5"),
3248 build_string ("Big5 (Level-2)"),
3250 ("Big5 Level-2 Chinese traditional"),
3251 build_string ("big5"),
3254 #ifdef ENABLE_COMPOSITE_CHARS
3255 /* #### For simplicity, we put composite chars into a 96x96 charset.
3256 This is going to lead to problems because you can run out of
3257 room, esp. as we don't yet recycle numbers. */
3258 staticpro (&Vcharset_composite);
3259 Vcharset_composite =
3260 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3261 CHARSET_TYPE_96X96, 2, 0, 0,
3262 CHARSET_LEFT_TO_RIGHT,
3263 build_string ("Composite"),
3264 build_string ("Composite characters"),
3265 build_string ("Composite characters"),
3268 /* #### not dumped properly */
3269 composite_char_row_next = 32;
3270 composite_char_col_next = 32;
3272 Vcomposite_char_string2char_hash_table =
3273 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3274 Vcomposite_char_char2string_hash_table =
3275 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3276 staticpro (&Vcomposite_char_string2char_hash_table);
3277 staticpro (&Vcomposite_char_char2string_hash_table);
3278 #endif /* ENABLE_COMPOSITE_CHARS */