1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs_bmp;
64 Lisp_Object Vcharset_latin_viscii;
65 Lisp_Object Vcharset_latin_viscii_lower;
66 Lisp_Object Vcharset_latin_viscii_upper;
67 Lisp_Object Vcharset_ideograph_daikanwa;
68 Lisp_Object Vcharset_mojikyo_pj_1;
69 Lisp_Object Vcharset_mojikyo_pj_2;
70 Lisp_Object Vcharset_mojikyo_pj_3;
71 Lisp_Object Vcharset_mojikyo_pj_4;
72 Lisp_Object Vcharset_mojikyo_pj_5;
73 Lisp_Object Vcharset_mojikyo_pj_6;
74 Lisp_Object Vcharset_mojikyo_pj_7;
75 Lisp_Object Vcharset_mojikyo_pj_8;
76 Lisp_Object Vcharset_mojikyo_pj_9;
77 Lisp_Object Vcharset_mojikyo_pj_10;
78 Lisp_Object Vcharset_mojikyo_pj_11;
79 Lisp_Object Vcharset_mojikyo_pj_12;
80 Lisp_Object Vcharset_mojikyo_pj_13;
81 Lisp_Object Vcharset_mojikyo_pj_14;
82 Lisp_Object Vcharset_mojikyo_pj_15;
83 Lisp_Object Vcharset_mojikyo_pj_16;
84 Lisp_Object Vcharset_mojikyo_pj_17;
85 Lisp_Object Vcharset_mojikyo_pj_18;
86 Lisp_Object Vcharset_mojikyo_pj_19;
87 Lisp_Object Vcharset_mojikyo_pj_20;
88 Lisp_Object Vcharset_mojikyo_pj_21;
89 Lisp_Object Vcharset_ethiopic_ucs;
91 Lisp_Object Vcharset_chinese_big5_1;
92 Lisp_Object Vcharset_chinese_big5_2;
94 #ifdef ENABLE_COMPOSITE_CHARS
95 Lisp_Object Vcharset_composite;
97 /* Hash tables for composite chars. One maps string representing
98 composed chars to their equivalent chars; one goes the
100 Lisp_Object Vcomposite_char_char2string_hash_table;
101 Lisp_Object Vcomposite_char_string2char_hash_table;
103 static int composite_char_row_next;
104 static int composite_char_col_next;
106 #endif /* ENABLE_COMPOSITE_CHARS */
108 struct charset_lookup *chlook;
110 static const struct lrecord_description charset_lookup_description_1[] = {
111 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
120 static const struct struct_description charset_lookup_description = {
121 sizeof(struct charset_lookup),
122 charset_lookup_description_1
126 /* Table of number of bytes in the string representation of a character
127 indexed by the first byte of that representation.
129 rep_bytes_by_first_byte(c) is more efficient than the equivalent
130 canonical computation:
132 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
134 const Bytecount rep_bytes_by_first_byte[0xA0] =
135 { /* 0x00 - 0x7f are for straight ASCII */
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 /* 0x80 - 0x8f are for Dimension-1 official charsets */
146 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
148 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
150 /* 0x90 - 0x9d are for Dimension-2 official charsets */
151 /* 0x9e is for Dimension-1 private charsets */
152 /* 0x9f is for Dimension-2 private charsets */
153 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
160 mark_char_byte_table (Lisp_Object obj)
162 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
165 for (i = 0; i < 256; i++)
167 mark_object (cte->property[i]);
173 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
175 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
176 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
179 for (i = 0; i < 256; i++)
180 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
182 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
184 if (!char_byte_table_equal (cte1->property[i],
185 cte2->property[i], depth + 1))
192 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
198 char_byte_table_hash (Lisp_Object obj, int depth)
200 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
202 return internal_array_hash (cte->property, 256, depth);
205 static const struct lrecord_description char_byte_table_description[] = {
206 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
210 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
211 mark_char_byte_table,
212 internal_object_printer,
213 0, char_byte_table_equal,
214 char_byte_table_hash,
215 char_byte_table_description,
216 struct Lisp_Char_Byte_Table);
219 make_char_byte_table (Lisp_Object initval)
223 struct Lisp_Char_Byte_Table *cte =
224 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
225 &lrecord_char_byte_table);
227 for (i = 0; i < 256; i++)
228 cte->property[i] = initval;
230 XSETCHAR_BYTE_TABLE (obj, cte);
235 copy_char_byte_table (Lisp_Object entry)
237 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
240 struct Lisp_Char_Byte_Table *ctenew =
241 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
242 &lrecord_char_byte_table);
244 for (i = 0; i < 256; i++)
246 Lisp_Object new = cte->property[i];
247 if (CHAR_BYTE_TABLE_P (new))
248 ctenew->property[i] = copy_char_byte_table (new);
250 ctenew->property[i] = new;
253 XSETCHAR_BYTE_TABLE (obj, ctenew);
259 mark_char_code_table (Lisp_Object obj)
261 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
267 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
269 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
270 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
272 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
276 char_code_table_hash (Lisp_Object obj, int depth)
278 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
280 return char_code_table_hash (cte->table, depth + 1);
283 static const struct lrecord_description char_code_table_description[] = {
284 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
288 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
289 mark_char_code_table,
290 internal_object_printer,
291 0, char_code_table_equal,
292 char_code_table_hash,
293 char_code_table_description,
294 struct Lisp_Char_Code_Table);
297 make_char_code_table (Lisp_Object initval)
300 struct Lisp_Char_Code_Table *cte =
301 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
302 &lrecord_char_code_table);
304 cte->table = make_char_byte_table (initval);
306 XSETCHAR_CODE_TABLE (obj, cte);
311 copy_char_code_table (Lisp_Object entry)
313 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
315 struct Lisp_Char_Code_Table *ctenew =
316 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
317 &lrecord_char_code_table);
319 ctenew->table = copy_char_byte_table (cte->table);
320 XSETCHAR_CODE_TABLE (obj, ctenew);
326 get_char_code_table (Emchar ch, Lisp_Object table)
328 unsigned int code = ch;
329 struct Lisp_Char_Byte_Table* cpt
330 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
331 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
333 if (CHAR_BYTE_TABLE_P (ret))
334 cpt = XCHAR_BYTE_TABLE (ret);
338 ret = cpt->property [(unsigned char) (code >> 16)];
339 if (CHAR_BYTE_TABLE_P (ret))
340 cpt = XCHAR_BYTE_TABLE (ret);
344 ret = cpt->property [(unsigned char) (code >> 8)];
345 if (CHAR_BYTE_TABLE_P (ret))
346 cpt = XCHAR_BYTE_TABLE (ret);
350 return cpt->property [(unsigned char) code];
354 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
356 unsigned int code = ch;
357 struct Lisp_Char_Byte_Table* cpt1
358 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
359 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
361 if (CHAR_BYTE_TABLE_P (ret))
363 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
365 ret = cpt2->property[(unsigned char)(code >> 16)];
366 if (CHAR_BYTE_TABLE_P (ret))
368 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
370 ret = cpt3->property[(unsigned char)(code >> 8)];
371 if (CHAR_BYTE_TABLE_P (ret))
373 struct Lisp_Char_Byte_Table* cpt4
374 = XCHAR_BYTE_TABLE (ret);
376 cpt4->property[(unsigned char)code] = value;
378 else if (!EQ (ret, value))
380 Lisp_Object cpt4 = make_char_byte_table (ret);
382 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
383 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
386 else if (!EQ (ret, value))
388 Lisp_Object cpt3 = make_char_byte_table (ret);
389 Lisp_Object cpt4 = make_char_byte_table (ret);
391 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
392 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
394 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
397 else if (!EQ (ret, value))
399 Lisp_Object cpt2 = make_char_byte_table (ret);
400 Lisp_Object cpt3 = make_char_byte_table (ret);
401 Lisp_Object cpt4 = make_char_byte_table (ret);
403 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
404 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
405 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
406 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
411 Lisp_Object Vcharacter_attribute_table;
412 Lisp_Object Vcharacter_composition_table;
413 Lisp_Object Vcharacter_variant_table;
415 Lisp_Object Q_decomposition;
418 Lisp_Object Qisolated;
419 Lisp_Object Qinitial;
422 Lisp_Object Qvertical;
423 Lisp_Object QnoBreak;
424 Lisp_Object Qfraction;
435 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
441 else if (EQ (v, Qcompat))
443 else if (EQ (v, Qisolated))
445 else if (EQ (v, Qinitial))
447 else if (EQ (v, Qmedial))
449 else if (EQ (v, Qfinal))
451 else if (EQ (v, Qvertical))
453 else if (EQ (v, QnoBreak))
455 else if (EQ (v, Qfraction))
457 else if (EQ (v, Qsuper))
459 else if (EQ (v, Qsub))
461 else if (EQ (v, Qcircle))
463 else if (EQ (v, Qsquare))
465 else if (EQ (v, Qwide))
467 else if (EQ (v, Qnarrow))
469 else if (EQ (v, Qsmall))
471 else if (EQ (v, Qfont))
474 signal_simple_error (err_msg, err_arg);
477 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
478 Return character corresponding with list.
482 Lisp_Object table = Vcharacter_composition_table;
483 Lisp_Object rest = list;
487 Lisp_Object v = Fcar (rest);
489 Emchar c = to_char_code (v, "Invalid value for composition", list);
491 ret = get_char_code_table (c, table);
496 if (!CHAR_CODE_TABLE_P (ret))
501 else if (!CONSP (rest))
503 else if (CHAR_CODE_TABLE_P (ret))
506 signal_simple_error ("Invalid table is found with", list);
508 signal_simple_error ("Invalid value for composition", list);
511 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
512 Return variants of CHARACTER.
516 CHECK_CHAR (character);
517 return Fcopy_list (get_char_code_table (XCHAR (character),
518 Vcharacter_variant_table));
521 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
522 Return the alist of attributes of CHARACTER.
526 CHECK_CHAR (character);
527 return Fcopy_alist (get_char_code_table (XCHAR (character),
528 Vcharacter_attribute_table));
531 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
532 Return the value of CHARACTER's ATTRIBUTE.
534 (character, attribute))
539 CHECK_CHAR (character);
540 ret = get_char_code_table (XCHAR (character),
541 Vcharacter_attribute_table);
545 if (!NILP (ccs = Ffind_charset (attribute)))
548 return Fcdr (Fassq (attribute, ret));
552 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
555 Emchar char_code = XCHAR (character);
557 = get_char_code_table (char_code, Vcharacter_attribute_table);
560 cell = Fassq (attribute, ret);
564 ret = Fcons (Fcons (attribute, value), ret);
566 else if (!EQ (Fcdr (cell), value))
568 Fsetcdr (cell, value);
570 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
574 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
575 Store CHARACTER's ATTRIBUTE with VALUE.
577 (character, attribute, value))
581 CHECK_CHAR (character);
582 ccs = Ffind_charset (attribute);
585 Lisp_Object cpos, rest;
586 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
593 /* ad-hoc method for `ascii' */
594 if ((XCHARSET_CHARS (ccs) == 94) &&
595 (XCHARSET_BYTE_OFFSET (ccs) != 33))
596 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
598 ccs_len = XCHARSET_CHARS (ccs);
602 Lisp_Object ret = Fcar (value);
605 signal_simple_error ("Invalid value for coded-charset", value);
606 code_point = XINT (ret);
607 if (XCHARSET_GRAPHIC (ccs) == 1)
615 signal_simple_error ("Invalid value for coded-charset", value);
618 signal_simple_error ("Invalid value for coded-charset", value);
620 if (XCHARSET_GRAPHIC (ccs) == 1)
622 code_point = (code_point << 8) | i;
625 value = make_int (code_point);
627 else if (INTP (value))
629 if (XCHARSET_GRAPHIC (ccs) == 1)
630 value = make_int (XINT (value) & 0x7F7F7F7F);
633 signal_simple_error ("Invalid value for coded-charset", value);
636 cpos = Fget_char_attribute (character, attribute);
641 dim = XCHARSET_DIMENSION (ccs);
642 code_point = XINT (cpos);
646 i = ((code_point >> (8 * dim)) & 255)
647 - XCHARSET_BYTE_OFFSET (ccs);
648 nv = XVECTOR_DATA(v)[i];
654 XVECTOR_DATA(v)[i] = Qnil;
655 v = XCHARSET_DECODING_TABLE (ccs);
660 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
663 dim = XCHARSET_DIMENSION (ccs);
664 code_point = XINT (value);
669 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
670 nv = XVECTOR_DATA(v)[i];
674 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
680 XVECTOR_DATA(v)[i] = character;
682 else if (EQ (attribute, Q_decomposition))
684 Lisp_Object rest = value;
685 Lisp_Object table = Vcharacter_composition_table;
688 signal_simple_error ("Invalid value for ->decomposition",
693 Lisp_Object v = Fcar (rest);
696 = to_char_code (v, "Invalid value for ->decomposition", value);
701 put_char_code_table (c, character, table);
706 ntable = get_char_code_table (c, table);
707 if (!CHAR_CODE_TABLE_P (ntable))
709 ntable = make_char_code_table (Qnil);
710 put_char_code_table (c, ntable, table);
716 else if (EQ (attribute, Q_ucs))
722 signal_simple_error ("Invalid value for ->ucs", value);
726 ret = get_char_code_table (c, Vcharacter_variant_table);
727 if (NILP (Fmemq (character, ret)))
729 put_char_code_table (c, Fcons (character, ret),
730 Vcharacter_variant_table);
733 return put_char_attribute (character, attribute, value);
738 EXFUN (Fmake_char, 3);
740 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
741 Store character's ATTRIBUTES.
745 Lisp_Object rest = attributes;
746 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
747 Lisp_Object character;
753 Lisp_Object cell = Fcar (rest);
757 signal_simple_error ("Invalid argument", attributes);
758 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
761 character = Fmake_char (ccs, Fcar (cell),
763 goto setup_attributes;
767 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
770 signal_simple_error ("Invalid argument", attributes);
772 character = make_char (XINT (code) + 0x100000);
773 goto setup_attributes;
777 else if (!INTP (code))
778 signal_simple_error ("Invalid argument", attributes);
780 character = make_char (XINT (code));
786 Lisp_Object cell = Fcar (rest);
789 signal_simple_error ("Invalid argument", attributes);
790 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
794 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
797 Lisp_Object Vutf_2000_version;
801 int leading_code_private_11;
804 Lisp_Object Qcharsetp;
806 /* Qdoc_string, Qdimension, Qchars defined in general.c */
807 Lisp_Object Qregistry, Qfinal, Qgraphic;
808 Lisp_Object Qdirection;
809 Lisp_Object Qreverse_direction_charset;
810 Lisp_Object Qleading_byte;
811 Lisp_Object Qshort_name, Qlong_name;
827 Qjapanese_jisx0208_1978,
830 Qjapanese_jisx0208_1990,
840 Qvietnamese_viscii_lower,
841 Qvietnamese_viscii_upper,
870 Lisp_Object Ql2r, Qr2l;
872 Lisp_Object Vcharset_hash_table;
875 static Charset_ID next_allocated_leading_byte;
877 static Charset_ID next_allocated_1_byte_leading_byte;
878 static Charset_ID next_allocated_2_byte_leading_byte;
881 /* Composite characters are characters constructed by overstriking two
882 or more regular characters.
884 1) The old Mule implementation involves storing composite characters
885 in a buffer as a tag followed by all of the actual characters
886 used to make up the composite character. I think this is a bad
887 idea; it greatly complicates code that wants to handle strings
888 one character at a time because it has to deal with the possibility
889 of great big ungainly characters. It's much more reasonable to
890 simply store an index into a table of composite characters.
892 2) The current implementation only allows for 16,384 separate
893 composite characters over the lifetime of the XEmacs process.
894 This could become a potential problem if the user
895 edited lots of different files that use composite characters.
896 Due to FSF bogosity, increasing the number of allowable
897 composite characters under Mule would decrease the number
898 of possible faces that can exist. Mule already has shrunk
899 this to 2048, and further shrinkage would become uncomfortable.
900 No such problems exist in XEmacs.
902 Composite characters could be represented as 0x80 C1 C2 C3,
903 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
904 for slightly under 2^20 (one million) composite characters
905 over the XEmacs process lifetime, and you only need to
906 increase the size of a Mule character from 19 to 21 bits.
907 Or you could use 0x80 C1 C2 C3 C4, allowing for about
908 85 million (slightly over 2^26) composite characters. */
911 /************************************************************************/
912 /* Basic Emchar functions */
913 /************************************************************************/
915 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
916 string in STR. Returns the number of bytes stored.
917 Do not call this directly. Use the macro set_charptr_emchar() instead.
921 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
936 else if ( c <= 0x7ff )
938 *p++ = (c >> 6) | 0xc0;
939 *p++ = (c & 0x3f) | 0x80;
941 else if ( c <= 0xffff )
943 *p++ = (c >> 12) | 0xe0;
944 *p++ = ((c >> 6) & 0x3f) | 0x80;
945 *p++ = (c & 0x3f) | 0x80;
947 else if ( c <= 0x1fffff )
949 *p++ = (c >> 18) | 0xf0;
950 *p++ = ((c >> 12) & 0x3f) | 0x80;
951 *p++ = ((c >> 6) & 0x3f) | 0x80;
952 *p++ = (c & 0x3f) | 0x80;
954 else if ( c <= 0x3ffffff )
956 *p++ = (c >> 24) | 0xf8;
957 *p++ = ((c >> 18) & 0x3f) | 0x80;
958 *p++ = ((c >> 12) & 0x3f) | 0x80;
959 *p++ = ((c >> 6) & 0x3f) | 0x80;
960 *p++ = (c & 0x3f) | 0x80;
964 *p++ = (c >> 30) | 0xfc;
965 *p++ = ((c >> 24) & 0x3f) | 0x80;
966 *p++ = ((c >> 18) & 0x3f) | 0x80;
967 *p++ = ((c >> 12) & 0x3f) | 0x80;
968 *p++ = ((c >> 6) & 0x3f) | 0x80;
969 *p++ = (c & 0x3f) | 0x80;
972 BREAKUP_CHAR (c, charset, c1, c2);
973 lb = CHAR_LEADING_BYTE (c);
974 if (LEADING_BYTE_PRIVATE_P (lb))
975 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
977 if (EQ (charset, Vcharset_control_1))
986 /* Return the first character from a Mule-encoded string in STR,
987 assuming it's non-ASCII. Do not call this directly.
988 Use the macro charptr_emchar() instead. */
991 non_ascii_charptr_emchar (CONST Bufbyte *str)
1004 else if ( b >= 0xf8 )
1009 else if ( b >= 0xf0 )
1014 else if ( b >= 0xe0 )
1019 else if ( b >= 0xc0 )
1029 for( ; len > 0; len-- )
1032 ch = ( ch << 6 ) | ( b & 0x3f );
1036 Bufbyte i0 = *str, i1, i2 = 0;
1037 Lisp_Object charset;
1039 if (i0 == LEADING_BYTE_CONTROL_1)
1040 return (Emchar) (*++str - 0x20);
1042 if (LEADING_BYTE_PREFIX_P (i0))
1047 charset = CHARSET_BY_LEADING_BYTE (i0);
1048 if (XCHARSET_DIMENSION (charset) == 2)
1051 return MAKE_CHAR (charset, i1, i2);
1055 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1056 Do not call this directly. Use the macro valid_char_p() instead. */
1060 non_ascii_valid_char_p (Emchar ch)
1064 /* Must have only lowest 19 bits set */
1068 f1 = CHAR_FIELD1 (ch);
1069 f2 = CHAR_FIELD2 (ch);
1070 f3 = CHAR_FIELD3 (ch);
1074 Lisp_Object charset;
1076 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1077 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1078 f2 > MAX_CHAR_FIELD2_PRIVATE)
1083 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1084 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1088 NOTE: This takes advantage of the fact that
1089 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1090 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1092 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1093 if (EQ (charset, Qnil))
1095 return (XCHARSET_CHARS (charset) == 96);
1099 Lisp_Object charset;
1101 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1102 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1103 f1 > MAX_CHAR_FIELD1_PRIVATE)
1105 if (f2 < 0x20 || f3 < 0x20)
1108 #ifdef ENABLE_COMPOSITE_CHARS
1109 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1111 if (UNBOUNDP (Fgethash (make_int (ch),
1112 Vcomposite_char_char2string_hash_table,
1117 #endif /* ENABLE_COMPOSITE_CHARS */
1119 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1120 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1123 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1125 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1128 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1130 if (EQ (charset, Qnil))
1132 return (XCHARSET_CHARS (charset) == 96);
1138 /************************************************************************/
1139 /* Basic string functions */
1140 /************************************************************************/
1142 /* Copy the character pointed to by PTR into STR, assuming it's
1143 non-ASCII. Do not call this directly. Use the macro
1144 charptr_copy_char() instead. */
1147 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1149 Bufbyte *strptr = str;
1151 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1153 /* Notice fallthrough. */
1155 case 6: *++strptr = *ptr++;
1156 case 5: *++strptr = *ptr++;
1158 case 4: *++strptr = *ptr++;
1159 case 3: *++strptr = *ptr++;
1160 case 2: *++strptr = *ptr;
1165 return strptr + 1 - str;
1169 /************************************************************************/
1170 /* streams of Emchars */
1171 /************************************************************************/
1173 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1174 The functions below are not meant to be called directly; use
1175 the macros in insdel.h. */
1178 Lstream_get_emchar_1 (Lstream *stream, int ch)
1180 Bufbyte str[MAX_EMCHAR_LEN];
1181 Bufbyte *strptr = str;
1183 str[0] = (Bufbyte) ch;
1184 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1186 /* Notice fallthrough. */
1189 ch = Lstream_getc (stream);
1191 *++strptr = (Bufbyte) ch;
1193 ch = Lstream_getc (stream);
1195 *++strptr = (Bufbyte) ch;
1198 ch = Lstream_getc (stream);
1200 *++strptr = (Bufbyte) ch;
1202 ch = Lstream_getc (stream);
1204 *++strptr = (Bufbyte) ch;
1206 ch = Lstream_getc (stream);
1208 *++strptr = (Bufbyte) ch;
1213 return charptr_emchar (str);
1217 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1219 Bufbyte str[MAX_EMCHAR_LEN];
1220 Bytecount len = set_charptr_emchar (str, ch);
1221 return Lstream_write (stream, str, len);
1225 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1227 Bufbyte str[MAX_EMCHAR_LEN];
1228 Bytecount len = set_charptr_emchar (str, ch);
1229 Lstream_unread (stream, str, len);
1233 /************************************************************************/
1234 /* charset object */
1235 /************************************************************************/
1238 mark_charset (Lisp_Object obj)
1240 struct Lisp_Charset *cs = XCHARSET (obj);
1242 mark_object (cs->short_name);
1243 mark_object (cs->long_name);
1244 mark_object (cs->doc_string);
1245 mark_object (cs->registry);
1246 mark_object (cs->ccl_program);
1248 mark_object (cs->decoding_table);
1254 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1256 struct Lisp_Charset *cs = XCHARSET (obj);
1260 error ("printing unreadable object #<charset %s 0x%x>",
1261 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1264 write_c_string ("#<charset ", printcharfun);
1265 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1266 write_c_string (" ", printcharfun);
1267 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1268 write_c_string (" ", printcharfun);
1269 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1270 write_c_string (" ", printcharfun);
1271 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1272 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1273 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1274 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1275 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1277 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1278 CHARSET_COLUMNS (cs),
1279 CHARSET_GRAPHIC (cs),
1280 CHARSET_FINAL (cs));
1281 write_c_string (buf, printcharfun);
1282 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1283 sprintf (buf, " 0x%x>", cs->header.uid);
1284 write_c_string (buf, printcharfun);
1287 static const struct lrecord_description charset_description[] = {
1288 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1290 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1295 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1296 mark_charset, print_charset, 0, 0, 0,
1297 charset_description,
1298 struct Lisp_Charset);
1299 /* Make a new charset. */
1302 make_charset (Charset_ID id, Lisp_Object name,
1303 unsigned char type, unsigned char columns, unsigned char graphic,
1304 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1305 Lisp_Object long_name, Lisp_Object doc,
1307 Lisp_Object decoding_table,
1308 Emchar ucs_min, Emchar ucs_max,
1309 Emchar code_offset, unsigned char byte_offset)
1312 struct Lisp_Charset *cs =
1313 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1314 XSETCHARSET (obj, cs);
1316 CHARSET_ID (cs) = id;
1317 CHARSET_NAME (cs) = name;
1318 CHARSET_SHORT_NAME (cs) = short_name;
1319 CHARSET_LONG_NAME (cs) = long_name;
1320 CHARSET_DIRECTION (cs) = direction;
1321 CHARSET_TYPE (cs) = type;
1322 CHARSET_COLUMNS (cs) = columns;
1323 CHARSET_GRAPHIC (cs) = graphic;
1324 CHARSET_FINAL (cs) = final;
1325 CHARSET_DOC_STRING (cs) = doc;
1326 CHARSET_REGISTRY (cs) = reg;
1327 CHARSET_CCL_PROGRAM (cs) = Qnil;
1328 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1330 CHARSET_DECODING_TABLE(cs) = Qnil;
1331 CHARSET_UCS_MIN(cs) = ucs_min;
1332 CHARSET_UCS_MAX(cs) = ucs_max;
1333 CHARSET_CODE_OFFSET(cs) = code_offset;
1334 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1337 switch (CHARSET_TYPE (cs))
1339 case CHARSET_TYPE_94:
1340 CHARSET_DIMENSION (cs) = 1;
1341 CHARSET_CHARS (cs) = 94;
1343 case CHARSET_TYPE_96:
1344 CHARSET_DIMENSION (cs) = 1;
1345 CHARSET_CHARS (cs) = 96;
1347 case CHARSET_TYPE_94X94:
1348 CHARSET_DIMENSION (cs) = 2;
1349 CHARSET_CHARS (cs) = 94;
1351 case CHARSET_TYPE_96X96:
1352 CHARSET_DIMENSION (cs) = 2;
1353 CHARSET_CHARS (cs) = 96;
1356 case CHARSET_TYPE_128:
1357 CHARSET_DIMENSION (cs) = 1;
1358 CHARSET_CHARS (cs) = 128;
1360 case CHARSET_TYPE_128X128:
1361 CHARSET_DIMENSION (cs) = 2;
1362 CHARSET_CHARS (cs) = 128;
1364 case CHARSET_TYPE_256:
1365 CHARSET_DIMENSION (cs) = 1;
1366 CHARSET_CHARS (cs) = 256;
1368 case CHARSET_TYPE_256X256:
1369 CHARSET_DIMENSION (cs) = 2;
1370 CHARSET_CHARS (cs) = 256;
1376 if (id == LEADING_BYTE_ASCII)
1377 CHARSET_REP_BYTES (cs) = 1;
1379 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1381 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1386 /* some charsets do not have final characters. This includes
1387 ASCII, Control-1, Composite, and the two faux private
1390 if (code_offset == 0)
1392 assert (NILP (chlook->charset_by_attributes[type][final]));
1393 chlook->charset_by_attributes[type][final] = obj;
1396 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1397 chlook->charset_by_attributes[type][final][direction] = obj;
1401 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1402 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1404 /* Some charsets are "faux" and don't have names or really exist at
1405 all except in the leading-byte table. */
1407 Fputhash (name, obj, Vcharset_hash_table);
1412 get_unallocated_leading_byte (int dimension)
1417 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1420 lb = next_allocated_leading_byte++;
1424 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1427 lb = next_allocated_1_byte_leading_byte++;
1431 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1434 lb = next_allocated_2_byte_leading_byte++;
1440 ("No more character sets free for this dimension",
1441 make_int (dimension));
1448 range_charset_code_point (Lisp_Object charset, Emchar ch)
1452 if ((XCHARSET_UCS_MIN (charset) <= ch)
1453 && (ch <= XCHARSET_UCS_MAX (charset)))
1455 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1457 if (XCHARSET_DIMENSION (charset) == 1)
1458 return d + XCHARSET_BYTE_OFFSET (charset);
1459 else if (XCHARSET_DIMENSION (charset) == 2)
1461 ((d / XCHARSET_CHARS (charset)
1462 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1463 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1464 else if (XCHARSET_DIMENSION (charset) == 3)
1466 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1467 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1468 | ((d / XCHARSET_CHARS (charset)
1469 % XCHARSET_CHARS (charset)
1470 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1471 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1472 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1474 ((d / (XCHARSET_CHARS (charset)
1475 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1476 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1477 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1478 % XCHARSET_CHARS (charset)
1479 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1480 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1481 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1482 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1484 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1486 if (XCHARSET_DIMENSION (charset) == 1)
1488 if (XCHARSET_CHARS (charset) == 94)
1490 if (((d = ch - (MIN_CHAR_94
1491 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1495 else if (XCHARSET_CHARS (charset) == 96)
1497 if (((d = ch - (MIN_CHAR_96
1498 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1505 else if (XCHARSET_DIMENSION (charset) == 2)
1507 if (XCHARSET_CHARS (charset) == 94)
1509 if (((d = ch - (MIN_CHAR_94x94
1510 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1513 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1515 else if (XCHARSET_CHARS (charset) == 96)
1517 if (((d = ch - (MIN_CHAR_96x96
1518 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1521 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1531 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1533 if (c <= MAX_CHAR_BASIC_LATIN)
1535 *charset = Vcharset_ascii;
1540 *charset = Vcharset_control_1;
1545 *charset = Vcharset_latin_iso8859_1;
1548 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1550 *charset = Vcharset_greek_iso8859_7;
1551 return c - MIN_CHAR_GREEK + 0x20;
1554 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1556 *charset = Vcharset_cyrillic_iso8859_5;
1557 return c - MIN_CHAR_CYRILLIC + 0x20;
1560 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1562 *charset = Vcharset_hebrew_iso8859_8;
1563 return c - MIN_CHAR_HEBREW + 0x20;
1565 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1567 *charset = Vcharset_thai_tis620;
1568 return c - MIN_CHAR_THAI + 0x20;
1571 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1572 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1574 return list2 (Vcharset_katakana_jisx0201,
1575 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1578 else if (c <= MAX_CHAR_BMP)
1580 *charset = Vcharset_ucs_bmp;
1583 else if ((MIN_CHAR_DAIKANWA <= c) && (c <= MAX_CHAR_DAIKANWA))
1585 *charset = Vcharset_ideograph_daikanwa;
1586 return c - MIN_CHAR_DAIKANWA;
1588 else if (c <= MAX_CHAR_94)
1590 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1591 ((c - MIN_CHAR_94) / 94) + '0',
1592 CHARSET_LEFT_TO_RIGHT);
1593 return ((c - MIN_CHAR_94) % 94) + 33;
1595 else if (c <= MAX_CHAR_96)
1597 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1598 ((c - MIN_CHAR_96) / 96) + '0',
1599 CHARSET_LEFT_TO_RIGHT);
1600 return ((c - MIN_CHAR_96) % 96) + 32;
1602 else if (c <= MAX_CHAR_94x94)
1605 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1606 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1607 CHARSET_LEFT_TO_RIGHT);
1608 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1609 | (((c - MIN_CHAR_94x94) % 94) + 33);
1611 else if (c <= MAX_CHAR_96x96)
1614 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1615 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1616 CHARSET_LEFT_TO_RIGHT);
1617 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1618 | (((c - MIN_CHAR_96x96) % 96) + 32);
1627 charset_code_point (Lisp_Object charset, Emchar ch)
1629 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1633 Lisp_Object field = Fassq (charset, cdef);
1636 return XINT (Fcdr (field));
1638 return range_charset_code_point (charset, ch);
1641 Lisp_Object Vdefault_coded_charset_priority_list;
1645 /************************************************************************/
1646 /* Basic charset Lisp functions */
1647 /************************************************************************/
1649 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1650 Return non-nil if OBJECT is a charset.
1654 return CHARSETP (object) ? Qt : Qnil;
1657 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1658 Retrieve the charset of the given name.
1659 If CHARSET-OR-NAME is a charset object, it is simply returned.
1660 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1661 nil is returned. Otherwise the associated charset object is returned.
1665 if (CHARSETP (charset_or_name))
1666 return charset_or_name;
1668 CHECK_SYMBOL (charset_or_name);
1669 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1672 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1673 Retrieve the charset of the given name.
1674 Same as `find-charset' except an error is signalled if there is no such
1675 charset instead of returning nil.
1679 Lisp_Object charset = Ffind_charset (name);
1682 signal_simple_error ("No such charset", name);
1686 /* We store the charsets in hash tables with the names as the key and the
1687 actual charset object as the value. Occasionally we need to use them
1688 in a list format. These routines provide us with that. */
1689 struct charset_list_closure
1691 Lisp_Object *charset_list;
1695 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1696 void *charset_list_closure)
1698 /* This function can GC */
1699 struct charset_list_closure *chcl =
1700 (struct charset_list_closure*) charset_list_closure;
1701 Lisp_Object *charset_list = chcl->charset_list;
1703 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1707 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1708 Return a list of the names of all defined charsets.
1712 Lisp_Object charset_list = Qnil;
1713 struct gcpro gcpro1;
1714 struct charset_list_closure charset_list_closure;
1716 GCPRO1 (charset_list);
1717 charset_list_closure.charset_list = &charset_list;
1718 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1719 &charset_list_closure);
1722 return charset_list;
1725 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1726 Return the name of the given charset.
1730 return XCHARSET_NAME (Fget_charset (charset));
1733 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1734 Define a new character set.
1735 This function is for use with Mule support.
1736 NAME is a symbol, the name by which the character set is normally referred.
1737 DOC-STRING is a string describing the character set.
1738 PROPS is a property list, describing the specific nature of the
1739 character set. Recognized properties are:
1741 'short-name Short version of the charset name (ex: Latin-1)
1742 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1743 'registry A regular expression matching the font registry field for
1745 'dimension Number of octets used to index a character in this charset.
1746 Either 1 or 2. Defaults to 1.
1747 'columns Number of columns used to display a character in this charset.
1748 Only used in TTY mode. (Under X, the actual width of a
1749 character can be derived from the font used to display the
1750 characters.) If unspecified, defaults to the dimension
1751 (this is almost always the correct value).
1752 'chars Number of characters in each dimension (94 or 96).
1753 Defaults to 94. Note that if the dimension is 2, the
1754 character set thus described is 94x94 or 96x96.
1755 'final Final byte of ISO 2022 escape sequence. Must be
1756 supplied. Each combination of (DIMENSION, CHARS) defines a
1757 separate namespace for final bytes. Note that ISO
1758 2022 restricts the final byte to the range
1759 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1760 dimension == 2. Note also that final bytes in the range
1761 0x30 - 0x3F are reserved for user-defined (not official)
1763 'graphic 0 (use left half of font on output) or 1 (use right half
1764 of font on output). Defaults to 0. For example, for
1765 a font whose registry is ISO8859-1, the left half
1766 (octets 0x20 - 0x7F) is the `ascii' character set, while
1767 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1768 character set. With 'graphic set to 0, the octets
1769 will have their high bit cleared; with it set to 1,
1770 the octets will have their high bit set.
1771 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1773 'ccl-program A compiled CCL program used to convert a character in
1774 this charset into an index into the font. This is in
1775 addition to the 'graphic property. The CCL program
1776 is passed the octets of the character, with the high
1777 bit cleared and set depending upon whether the value
1778 of the 'graphic property is 0 or 1.
1780 (name, doc_string, props))
1782 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1783 int direction = CHARSET_LEFT_TO_RIGHT;
1785 Lisp_Object registry = Qnil;
1786 Lisp_Object charset;
1787 Lisp_Object rest, keyword, value;
1788 Lisp_Object ccl_program = Qnil;
1789 Lisp_Object short_name = Qnil, long_name = Qnil;
1790 int byte_offset = -1;
1792 CHECK_SYMBOL (name);
1793 if (!NILP (doc_string))
1794 CHECK_STRING (doc_string);
1796 charset = Ffind_charset (name);
1797 if (!NILP (charset))
1798 signal_simple_error ("Cannot redefine existing charset", name);
1800 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1802 if (EQ (keyword, Qshort_name))
1804 CHECK_STRING (value);
1808 if (EQ (keyword, Qlong_name))
1810 CHECK_STRING (value);
1814 else if (EQ (keyword, Qdimension))
1817 dimension = XINT (value);
1818 if (dimension < 1 || dimension > 2)
1819 signal_simple_error ("Invalid value for 'dimension", value);
1822 else if (EQ (keyword, Qchars))
1825 chars = XINT (value);
1826 if (chars != 94 && chars != 96)
1827 signal_simple_error ("Invalid value for 'chars", value);
1830 else if (EQ (keyword, Qcolumns))
1833 columns = XINT (value);
1834 if (columns != 1 && columns != 2)
1835 signal_simple_error ("Invalid value for 'columns", value);
1838 else if (EQ (keyword, Qgraphic))
1841 graphic = XINT (value);
1843 if (graphic < 0 || graphic > 2)
1845 if (graphic < 0 || graphic > 1)
1847 signal_simple_error ("Invalid value for 'graphic", value);
1850 else if (EQ (keyword, Qregistry))
1852 CHECK_STRING (value);
1856 else if (EQ (keyword, Qdirection))
1858 if (EQ (value, Ql2r))
1859 direction = CHARSET_LEFT_TO_RIGHT;
1860 else if (EQ (value, Qr2l))
1861 direction = CHARSET_RIGHT_TO_LEFT;
1863 signal_simple_error ("Invalid value for 'direction", value);
1866 else if (EQ (keyword, Qfinal))
1868 CHECK_CHAR_COERCE_INT (value);
1869 final = XCHAR (value);
1870 if (final < '0' || final > '~')
1871 signal_simple_error ("Invalid value for 'final", value);
1874 else if (EQ (keyword, Qccl_program))
1876 CHECK_VECTOR (value);
1877 ccl_program = value;
1881 signal_simple_error ("Unrecognized property", keyword);
1885 error ("'final must be specified");
1886 if (dimension == 2 && final > 0x5F)
1888 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1892 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1894 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1896 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1897 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1899 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1901 id = get_unallocated_leading_byte (dimension);
1903 if (NILP (doc_string))
1904 doc_string = build_string ("");
1906 if (NILP (registry))
1907 registry = build_string ("");
1909 if (NILP (short_name))
1910 XSETSTRING (short_name, XSYMBOL (name)->name);
1912 if (NILP (long_name))
1913 long_name = doc_string;
1916 columns = dimension;
1918 if (byte_offset < 0)
1922 else if (chars == 96)
1928 charset = make_charset (id, name, type, columns, graphic,
1929 final, direction, short_name, long_name,
1930 doc_string, registry,
1931 Qnil, 0, 0, 0, byte_offset);
1932 if (!NILP (ccl_program))
1933 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1937 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1939 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1940 NEW-NAME is the name of the new charset. Return the new charset.
1942 (charset, new_name))
1944 Lisp_Object new_charset = Qnil;
1945 int id, dimension, columns, graphic, final;
1946 int direction, type;
1947 Lisp_Object registry, doc_string, short_name, long_name;
1948 struct Lisp_Charset *cs;
1950 charset = Fget_charset (charset);
1951 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1952 signal_simple_error ("Charset already has reverse-direction charset",
1955 CHECK_SYMBOL (new_name);
1956 if (!NILP (Ffind_charset (new_name)))
1957 signal_simple_error ("Cannot redefine existing charset", new_name);
1959 cs = XCHARSET (charset);
1961 type = CHARSET_TYPE (cs);
1962 columns = CHARSET_COLUMNS (cs);
1963 dimension = CHARSET_DIMENSION (cs);
1964 id = get_unallocated_leading_byte (dimension);
1966 graphic = CHARSET_GRAPHIC (cs);
1967 final = CHARSET_FINAL (cs);
1968 direction = CHARSET_RIGHT_TO_LEFT;
1969 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1970 direction = CHARSET_LEFT_TO_RIGHT;
1971 doc_string = CHARSET_DOC_STRING (cs);
1972 short_name = CHARSET_SHORT_NAME (cs);
1973 long_name = CHARSET_LONG_NAME (cs);
1974 registry = CHARSET_REGISTRY (cs);
1976 new_charset = make_charset (id, new_name, type, columns,
1977 graphic, final, direction, short_name, long_name,
1978 doc_string, registry,
1980 CHARSET_DECODING_TABLE(cs),
1981 CHARSET_UCS_MIN(cs),
1982 CHARSET_UCS_MAX(cs),
1983 CHARSET_CODE_OFFSET(cs),
1984 CHARSET_BYTE_OFFSET(cs)
1990 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1991 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1996 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1997 Define symbol ALIAS as an alias for CHARSET.
2001 CHECK_SYMBOL (alias);
2002 charset = Fget_charset (charset);
2003 return Fputhash (alias, charset, Vcharset_hash_table);
2006 /* #### Reverse direction charsets not yet implemented. */
2008 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2010 Return the reverse-direction charset parallel to CHARSET, if any.
2011 This is the charset with the same properties (in particular, the same
2012 dimension, number of characters per dimension, and final byte) as
2013 CHARSET but whose characters are displayed in the opposite direction.
2017 charset = Fget_charset (charset);
2018 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2022 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2023 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2024 If DIRECTION is omitted, both directions will be checked (left-to-right
2025 will be returned if character sets exist for both directions).
2027 (dimension, chars, final, direction))
2029 int dm, ch, fi, di = -1;
2031 Lisp_Object obj = Qnil;
2033 CHECK_INT (dimension);
2034 dm = XINT (dimension);
2035 if (dm < 1 || dm > 2)
2036 signal_simple_error ("Invalid value for DIMENSION", dimension);
2040 if (ch != 94 && ch != 96)
2041 signal_simple_error ("Invalid value for CHARS", chars);
2043 CHECK_CHAR_COERCE_INT (final);
2045 if (fi < '0' || fi > '~')
2046 signal_simple_error ("Invalid value for FINAL", final);
2048 if (EQ (direction, Ql2r))
2049 di = CHARSET_LEFT_TO_RIGHT;
2050 else if (EQ (direction, Qr2l))
2051 di = CHARSET_RIGHT_TO_LEFT;
2052 else if (!NILP (direction))
2053 signal_simple_error ("Invalid value for DIRECTION", direction);
2055 if (dm == 2 && fi > 0x5F)
2057 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2060 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2062 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2066 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2068 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2071 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2074 return XCHARSET_NAME (obj);
2078 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2079 Return short name of CHARSET.
2083 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2086 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2087 Return long name of CHARSET.
2091 return XCHARSET_LONG_NAME (Fget_charset (charset));
2094 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2095 Return description of CHARSET.
2099 return XCHARSET_DOC_STRING (Fget_charset (charset));
2102 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2103 Return dimension of CHARSET.
2107 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2110 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2111 Return property PROP of CHARSET.
2112 Recognized properties are those listed in `make-charset', as well as
2113 'name and 'doc-string.
2117 struct Lisp_Charset *cs;
2119 charset = Fget_charset (charset);
2120 cs = XCHARSET (charset);
2122 CHECK_SYMBOL (prop);
2123 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2124 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2125 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2126 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2127 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2128 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2129 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2130 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2131 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2132 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2133 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2134 if (EQ (prop, Qdirection))
2135 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2136 if (EQ (prop, Qreverse_direction_charset))
2138 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2142 return XCHARSET_NAME (obj);
2144 signal_simple_error ("Unrecognized charset property name", prop);
2145 return Qnil; /* not reached */
2148 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2149 Return charset identification number of CHARSET.
2153 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2156 /* #### We need to figure out which properties we really want to
2159 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2160 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2162 (charset, ccl_program))
2164 charset = Fget_charset (charset);
2165 CHECK_VECTOR (ccl_program);
2166 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2171 invalidate_charset_font_caches (Lisp_Object charset)
2173 /* Invalidate font cache entries for charset on all devices. */
2174 Lisp_Object devcons, concons, hash_table;
2175 DEVICE_LOOP_NO_BREAK (devcons, concons)
2177 struct device *d = XDEVICE (XCAR (devcons));
2178 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2179 if (!UNBOUNDP (hash_table))
2180 Fclrhash (hash_table);
2184 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2185 Set the 'registry property of CHARSET to REGISTRY.
2187 (charset, registry))
2189 charset = Fget_charset (charset);
2190 CHECK_STRING (registry);
2191 XCHARSET_REGISTRY (charset) = registry;
2192 invalidate_charset_font_caches (charset);
2193 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2198 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2199 Return mapping-table of CHARSET.
2203 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2206 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2207 Set mapping-table of CHARSET to TABLE.
2211 struct Lisp_Charset *cs;
2212 Lisp_Object old_table;
2215 charset = Fget_charset (charset);
2216 cs = XCHARSET (charset);
2218 if (EQ (table, Qnil))
2220 CHARSET_DECODING_TABLE(cs) = table;
2223 else if (VECTORP (table))
2227 /* ad-hoc method for `ascii' */
2228 if ((CHARSET_CHARS (cs) == 94) &&
2229 (CHARSET_BYTE_OFFSET (cs) != 33))
2230 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2232 ccs_len = CHARSET_CHARS (cs);
2234 if (XVECTOR_LENGTH (table) > ccs_len)
2235 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2236 old_table = CHARSET_DECODING_TABLE(cs);
2237 CHARSET_DECODING_TABLE(cs) = table;
2240 signal_error (Qwrong_type_argument,
2241 list2 (build_translated_string ("vector-or-nil-p"),
2243 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2245 switch (CHARSET_DIMENSION (cs))
2248 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2250 Lisp_Object c = XVECTOR_DATA(table)[i];
2255 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2259 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2261 Lisp_Object v = XVECTOR_DATA(table)[i];
2267 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2269 CHARSET_DECODING_TABLE(cs) = old_table;
2270 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2272 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2274 Lisp_Object c = XVECTOR_DATA(v)[j];
2277 put_char_attribute (c, charset,
2280 (i + CHARSET_BYTE_OFFSET (cs)),
2282 (j + CHARSET_BYTE_OFFSET (cs))));
2286 put_char_attribute (v, charset,
2288 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2297 /************************************************************************/
2298 /* Lisp primitives for working with characters */
2299 /************************************************************************/
2301 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2302 Make a character from CHARSET and octets ARG1 and ARG2.
2303 ARG2 is required only for characters from two-dimensional charsets.
2304 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2305 character s with caron.
2307 (charset, arg1, arg2))
2309 struct Lisp_Charset *cs;
2311 int lowlim, highlim;
2313 charset = Fget_charset (charset);
2314 cs = XCHARSET (charset);
2316 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2317 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2319 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2321 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2322 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2325 /* It is useful (and safe, according to Olivier Galibert) to strip
2326 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2327 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2328 Latin 2 code of the character. */
2336 if (a1 < lowlim || a1 > highlim)
2337 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2339 if (CHARSET_DIMENSION (cs) == 1)
2343 ("Charset is of dimension one; second octet must be nil", arg2);
2344 return make_char (MAKE_CHAR (charset, a1, 0));
2353 a2 = XINT (arg2) & 0x7f;
2355 if (a2 < lowlim || a2 > highlim)
2356 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2358 return make_char (MAKE_CHAR (charset, a1, a2));
2361 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2362 Return the character set of char CH.
2366 CHECK_CHAR_COERCE_INT (ch);
2368 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2371 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2372 Return the octet numbered N (should be 0 or 1) of char CH.
2373 N defaults to 0 if omitted.
2377 Lisp_Object charset;
2380 CHECK_CHAR_COERCE_INT (ch);
2382 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2384 if (NILP (n) || EQ (n, Qzero))
2385 return make_int (octet0);
2386 else if (EQ (n, make_int (1)))
2387 return make_int (octet1);
2389 signal_simple_error ("Octet number must be 0 or 1", n);
2392 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2393 Return list of charset and one or two position-codes of CHAR.
2397 /* This function can GC */
2398 struct gcpro gcpro1, gcpro2;
2399 Lisp_Object charset = Qnil;
2400 Lisp_Object rc = Qnil;
2403 GCPRO2 (charset, rc);
2404 CHECK_CHAR_COERCE_INT (character);
2406 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2408 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2410 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2414 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2422 #ifdef ENABLE_COMPOSITE_CHARS
2423 /************************************************************************/
2424 /* composite character functions */
2425 /************************************************************************/
2428 lookup_composite_char (Bufbyte *str, int len)
2430 Lisp_Object lispstr = make_string (str, len);
2431 Lisp_Object ch = Fgethash (lispstr,
2432 Vcomposite_char_string2char_hash_table,
2438 if (composite_char_row_next >= 128)
2439 signal_simple_error ("No more composite chars available", lispstr);
2440 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2441 composite_char_col_next);
2442 Fputhash (make_char (emch), lispstr,
2443 Vcomposite_char_char2string_hash_table);
2444 Fputhash (lispstr, make_char (emch),
2445 Vcomposite_char_string2char_hash_table);
2446 composite_char_col_next++;
2447 if (composite_char_col_next >= 128)
2449 composite_char_col_next = 32;
2450 composite_char_row_next++;
2459 composite_char_string (Emchar ch)
2461 Lisp_Object str = Fgethash (make_char (ch),
2462 Vcomposite_char_char2string_hash_table,
2464 assert (!UNBOUNDP (str));
2468 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2469 Convert a string into a single composite character.
2470 The character is the result of overstriking all the characters in
2475 CHECK_STRING (string);
2476 return make_char (lookup_composite_char (XSTRING_DATA (string),
2477 XSTRING_LENGTH (string)));
2480 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2481 Return a string of the characters comprising a composite character.
2489 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2490 signal_simple_error ("Must be composite char", ch);
2491 return composite_char_string (emch);
2493 #endif /* ENABLE_COMPOSITE_CHARS */
2496 /************************************************************************/
2497 /* initialization */
2498 /************************************************************************/
2501 syms_of_mule_charset (void)
2503 DEFSUBR (Fcharsetp);
2504 DEFSUBR (Ffind_charset);
2505 DEFSUBR (Fget_charset);
2506 DEFSUBR (Fcharset_list);
2507 DEFSUBR (Fcharset_name);
2508 DEFSUBR (Fmake_charset);
2509 DEFSUBR (Fmake_reverse_direction_charset);
2510 /* DEFSUBR (Freverse_direction_charset); */
2511 DEFSUBR (Fdefine_charset_alias);
2512 DEFSUBR (Fcharset_from_attributes);
2513 DEFSUBR (Fcharset_short_name);
2514 DEFSUBR (Fcharset_long_name);
2515 DEFSUBR (Fcharset_description);
2516 DEFSUBR (Fcharset_dimension);
2517 DEFSUBR (Fcharset_property);
2518 DEFSUBR (Fcharset_id);
2519 DEFSUBR (Fset_charset_ccl_program);
2520 DEFSUBR (Fset_charset_registry);
2522 DEFSUBR (Fchar_attribute_alist);
2523 DEFSUBR (Fget_char_attribute);
2524 DEFSUBR (Fput_char_attribute);
2525 DEFSUBR (Fdefine_char);
2526 DEFSUBR (Fchar_variants);
2527 DEFSUBR (Fget_composite_char);
2528 DEFSUBR (Fcharset_mapping_table);
2529 DEFSUBR (Fset_charset_mapping_table);
2532 DEFSUBR (Fmake_char);
2533 DEFSUBR (Fchar_charset);
2534 DEFSUBR (Fchar_octet);
2535 DEFSUBR (Fsplit_char);
2537 #ifdef ENABLE_COMPOSITE_CHARS
2538 DEFSUBR (Fmake_composite_char);
2539 DEFSUBR (Fcomposite_char_string);
2542 defsymbol (&Qcharsetp, "charsetp");
2543 defsymbol (&Qregistry, "registry");
2544 defsymbol (&Qfinal, "final");
2545 defsymbol (&Qgraphic, "graphic");
2546 defsymbol (&Qdirection, "direction");
2547 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2548 defsymbol (&Qshort_name, "short-name");
2549 defsymbol (&Qlong_name, "long-name");
2551 defsymbol (&Ql2r, "l2r");
2552 defsymbol (&Qr2l, "r2l");
2554 /* Charsets, compatible with FSF 20.3
2555 Naming convention is Script-Charset[-Edition] */
2556 defsymbol (&Qascii, "ascii");
2557 defsymbol (&Qcontrol_1, "control-1");
2558 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2559 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2560 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2561 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2562 defsymbol (&Qthai_tis620, "thai-tis620");
2563 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2564 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2565 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2566 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2567 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2568 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2569 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2570 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2571 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2572 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2573 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2574 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2575 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2576 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2577 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2579 defsymbol (&Q_ucs, "->ucs");
2580 defsymbol (&Q_decomposition, "->decomposition");
2581 defsymbol (&Qcompat, "compat");
2582 defsymbol (&Qisolated, "isolated");
2583 defsymbol (&Qinitial, "initial");
2584 defsymbol (&Qmedial, "medial");
2585 defsymbol (&Qfinal, "final");
2586 defsymbol (&Qvertical, "vertical");
2587 defsymbol (&QnoBreak, "noBreak");
2588 defsymbol (&Qfraction, "fraction");
2589 defsymbol (&Qsuper, "super");
2590 defsymbol (&Qsub, "sub");
2591 defsymbol (&Qcircle, "circle");
2592 defsymbol (&Qsquare, "square");
2593 defsymbol (&Qwide, "wide");
2594 defsymbol (&Qnarrow, "narrow");
2595 defsymbol (&Qsmall, "small");
2596 defsymbol (&Qfont, "font");
2597 defsymbol (&Qucs, "ucs");
2598 defsymbol (&Qucs_bmp, "ucs-bmp");
2599 defsymbol (&Qlatin_viscii, "latin-viscii");
2600 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2601 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2602 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2603 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2604 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2605 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2606 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2607 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2608 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2609 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2610 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2611 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2612 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2613 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2614 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2615 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2616 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2617 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2618 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2619 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2620 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2621 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2622 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2623 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2624 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2625 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2626 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2628 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2629 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2631 defsymbol (&Qcomposite, "composite");
2635 vars_of_mule_charset (void)
2642 chlook = xnew (struct charset_lookup);
2643 dumpstruct (&chlook, &charset_lookup_description);
2645 /* Table of charsets indexed by leading byte. */
2646 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2647 chlook->charset_by_leading_byte[i] = Qnil;
2650 /* Table of charsets indexed by type/final-byte. */
2651 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2652 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2653 chlook->charset_by_attributes[i][j] = Qnil;
2655 /* Table of charsets indexed by type/final-byte/direction. */
2656 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2657 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2658 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2659 chlook->charset_by_attributes[i][j][k] = Qnil;
2663 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2665 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2666 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2670 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2671 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2672 Leading-code of private TYPE9N charset of column-width 1.
2674 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2678 Vutf_2000_version = build_string("0.13 (Takaida)");
2679 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2680 Version number of UTF-2000.
2683 staticpro (&Vcharacter_attribute_table);
2684 Vcharacter_attribute_table = make_char_code_table (Qnil);
2686 staticpro (&Vcharacter_composition_table);
2687 Vcharacter_composition_table = make_char_code_table (Qnil);
2689 staticpro (&Vcharacter_variant_table);
2690 Vcharacter_variant_table = make_char_code_table (Qnil);
2692 Vdefault_coded_charset_priority_list = Qnil;
2693 DEFVAR_LISP ("default-coded-charset-priority-list",
2694 &Vdefault_coded_charset_priority_list /*
2695 Default order of preferred coded-character-sets.
2701 complex_vars_of_mule_charset (void)
2703 staticpro (&Vcharset_hash_table);
2704 Vcharset_hash_table =
2705 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2707 /* Predefined character sets. We store them into variables for
2711 staticpro (&Vcharset_ucs_bmp);
2713 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2714 CHARSET_TYPE_256X256, 1, 2, 0,
2715 CHARSET_LEFT_TO_RIGHT,
2716 build_string ("BMP"),
2717 build_string ("BMP"),
2718 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2719 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2720 Qnil, 0, 0xFFFF, 0, 0);
2722 # define MIN_CHAR_THAI 0
2723 # define MAX_CHAR_THAI 0
2724 # define MIN_CHAR_GREEK 0
2725 # define MAX_CHAR_GREEK 0
2726 # define MIN_CHAR_HEBREW 0
2727 # define MAX_CHAR_HEBREW 0
2728 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2729 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2730 # define MIN_CHAR_CYRILLIC 0
2731 # define MAX_CHAR_CYRILLIC 0
2733 staticpro (&Vcharset_ascii);
2735 make_charset (LEADING_BYTE_ASCII, Qascii,
2736 CHARSET_TYPE_94, 1, 0, 'B',
2737 CHARSET_LEFT_TO_RIGHT,
2738 build_string ("ASCII"),
2739 build_string ("ASCII)"),
2740 build_string ("ASCII (ISO646 IRV)"),
2741 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2742 Qnil, 0, 0x7F, 0, 0);
2743 staticpro (&Vcharset_control_1);
2744 Vcharset_control_1 =
2745 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2746 CHARSET_TYPE_94, 1, 1, 0,
2747 CHARSET_LEFT_TO_RIGHT,
2748 build_string ("C1"),
2749 build_string ("Control characters"),
2750 build_string ("Control characters 128-191"),
2752 Qnil, 0x80, 0x9F, 0, 0);
2753 staticpro (&Vcharset_latin_iso8859_1);
2754 Vcharset_latin_iso8859_1 =
2755 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2756 CHARSET_TYPE_96, 1, 1, 'A',
2757 CHARSET_LEFT_TO_RIGHT,
2758 build_string ("Latin-1"),
2759 build_string ("ISO8859-1 (Latin-1)"),
2760 build_string ("ISO8859-1 (Latin-1)"),
2761 build_string ("iso8859-1"),
2762 Qnil, 0xA0, 0xFF, 0, 32);
2763 staticpro (&Vcharset_latin_iso8859_2);
2764 Vcharset_latin_iso8859_2 =
2765 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2766 CHARSET_TYPE_96, 1, 1, 'B',
2767 CHARSET_LEFT_TO_RIGHT,
2768 build_string ("Latin-2"),
2769 build_string ("ISO8859-2 (Latin-2)"),
2770 build_string ("ISO8859-2 (Latin-2)"),
2771 build_string ("iso8859-2"),
2773 staticpro (&Vcharset_latin_iso8859_3);
2774 Vcharset_latin_iso8859_3 =
2775 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2776 CHARSET_TYPE_96, 1, 1, 'C',
2777 CHARSET_LEFT_TO_RIGHT,
2778 build_string ("Latin-3"),
2779 build_string ("ISO8859-3 (Latin-3)"),
2780 build_string ("ISO8859-3 (Latin-3)"),
2781 build_string ("iso8859-3"),
2783 staticpro (&Vcharset_latin_iso8859_4);
2784 Vcharset_latin_iso8859_4 =
2785 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2786 CHARSET_TYPE_96, 1, 1, 'D',
2787 CHARSET_LEFT_TO_RIGHT,
2788 build_string ("Latin-4"),
2789 build_string ("ISO8859-4 (Latin-4)"),
2790 build_string ("ISO8859-4 (Latin-4)"),
2791 build_string ("iso8859-4"),
2793 staticpro (&Vcharset_thai_tis620);
2794 Vcharset_thai_tis620 =
2795 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2796 CHARSET_TYPE_96, 1, 1, 'T',
2797 CHARSET_LEFT_TO_RIGHT,
2798 build_string ("TIS620"),
2799 build_string ("TIS620 (Thai)"),
2800 build_string ("TIS620.2529 (Thai)"),
2801 build_string ("tis620"),
2802 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2803 staticpro (&Vcharset_greek_iso8859_7);
2804 Vcharset_greek_iso8859_7 =
2805 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2806 CHARSET_TYPE_96, 1, 1, 'F',
2807 CHARSET_LEFT_TO_RIGHT,
2808 build_string ("ISO8859-7"),
2809 build_string ("ISO8859-7 (Greek)"),
2810 build_string ("ISO8859-7 (Greek)"),
2811 build_string ("iso8859-7"),
2812 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2813 staticpro (&Vcharset_arabic_iso8859_6);
2814 Vcharset_arabic_iso8859_6 =
2815 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2816 CHARSET_TYPE_96, 1, 1, 'G',
2817 CHARSET_RIGHT_TO_LEFT,
2818 build_string ("ISO8859-6"),
2819 build_string ("ISO8859-6 (Arabic)"),
2820 build_string ("ISO8859-6 (Arabic)"),
2821 build_string ("iso8859-6"),
2823 staticpro (&Vcharset_hebrew_iso8859_8);
2824 Vcharset_hebrew_iso8859_8 =
2825 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2826 CHARSET_TYPE_96, 1, 1, 'H',
2827 CHARSET_RIGHT_TO_LEFT,
2828 build_string ("ISO8859-8"),
2829 build_string ("ISO8859-8 (Hebrew)"),
2830 build_string ("ISO8859-8 (Hebrew)"),
2831 build_string ("iso8859-8"),
2832 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2833 staticpro (&Vcharset_katakana_jisx0201);
2834 Vcharset_katakana_jisx0201 =
2835 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2836 CHARSET_TYPE_94, 1, 1, 'I',
2837 CHARSET_LEFT_TO_RIGHT,
2838 build_string ("JISX0201 Kana"),
2839 build_string ("JISX0201.1976 (Japanese Kana)"),
2840 build_string ("JISX0201.1976 Japanese Kana"),
2841 build_string ("jisx0201\\.1976"),
2843 staticpro (&Vcharset_latin_jisx0201);
2844 Vcharset_latin_jisx0201 =
2845 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2846 CHARSET_TYPE_94, 1, 0, 'J',
2847 CHARSET_LEFT_TO_RIGHT,
2848 build_string ("JISX0201 Roman"),
2849 build_string ("JISX0201.1976 (Japanese Roman)"),
2850 build_string ("JISX0201.1976 Japanese Roman"),
2851 build_string ("jisx0201\\.1976"),
2853 staticpro (&Vcharset_cyrillic_iso8859_5);
2854 Vcharset_cyrillic_iso8859_5 =
2855 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2856 CHARSET_TYPE_96, 1, 1, 'L',
2857 CHARSET_LEFT_TO_RIGHT,
2858 build_string ("ISO8859-5"),
2859 build_string ("ISO8859-5 (Cyrillic)"),
2860 build_string ("ISO8859-5 (Cyrillic)"),
2861 build_string ("iso8859-5"),
2862 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2863 staticpro (&Vcharset_latin_iso8859_9);
2864 Vcharset_latin_iso8859_9 =
2865 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2866 CHARSET_TYPE_96, 1, 1, 'M',
2867 CHARSET_LEFT_TO_RIGHT,
2868 build_string ("Latin-5"),
2869 build_string ("ISO8859-9 (Latin-5)"),
2870 build_string ("ISO8859-9 (Latin-5)"),
2871 build_string ("iso8859-9"),
2873 staticpro (&Vcharset_japanese_jisx0208_1978);
2874 Vcharset_japanese_jisx0208_1978 =
2875 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2876 CHARSET_TYPE_94X94, 2, 0, '@',
2877 CHARSET_LEFT_TO_RIGHT,
2878 build_string ("JIS X0208:1978"),
2879 build_string ("JIS X0208:1978 (Japanese)"),
2881 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2882 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2884 staticpro (&Vcharset_chinese_gb2312);
2885 Vcharset_chinese_gb2312 =
2886 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2887 CHARSET_TYPE_94X94, 2, 0, 'A',
2888 CHARSET_LEFT_TO_RIGHT,
2889 build_string ("GB2312"),
2890 build_string ("GB2312)"),
2891 build_string ("GB2312 Chinese simplified"),
2892 build_string ("gb2312"),
2894 staticpro (&Vcharset_japanese_jisx0208);
2895 Vcharset_japanese_jisx0208 =
2896 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2897 CHARSET_TYPE_94X94, 2, 0, 'B',
2898 CHARSET_LEFT_TO_RIGHT,
2899 build_string ("JISX0208"),
2900 build_string ("JIS X0208:1983 (Japanese)"),
2901 build_string ("JIS X0208:1983 Japanese Kanji"),
2902 build_string ("jisx0208\\.1983"),
2905 staticpro (&Vcharset_japanese_jisx0208_1990);
2906 Vcharset_japanese_jisx0208_1990 =
2907 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2908 Qjapanese_jisx0208_1990,
2909 CHARSET_TYPE_94X94, 2, 0, 0,
2910 CHARSET_LEFT_TO_RIGHT,
2911 build_string ("JISX0208-1990"),
2912 build_string ("JIS X0208:1990 (Japanese)"),
2913 build_string ("JIS X0208:1990 Japanese Kanji"),
2914 build_string ("jisx0208\\.1990"),
2916 MIN_CHAR_JIS_X0208_1990,
2917 MAX_CHAR_JIS_X0208_1990, 0, 33);
2919 staticpro (&Vcharset_korean_ksc5601);
2920 Vcharset_korean_ksc5601 =
2921 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2922 CHARSET_TYPE_94X94, 2, 0, 'C',
2923 CHARSET_LEFT_TO_RIGHT,
2924 build_string ("KSC5601"),
2925 build_string ("KSC5601 (Korean"),
2926 build_string ("KSC5601 Korean Hangul and Hanja"),
2927 build_string ("ksc5601"),
2929 staticpro (&Vcharset_japanese_jisx0212);
2930 Vcharset_japanese_jisx0212 =
2931 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2932 CHARSET_TYPE_94X94, 2, 0, 'D',
2933 CHARSET_LEFT_TO_RIGHT,
2934 build_string ("JISX0212"),
2935 build_string ("JISX0212 (Japanese)"),
2936 build_string ("JISX0212 Japanese Supplement"),
2937 build_string ("jisx0212"),
2940 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2941 staticpro (&Vcharset_chinese_cns11643_1);
2942 Vcharset_chinese_cns11643_1 =
2943 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2944 CHARSET_TYPE_94X94, 2, 0, 'G',
2945 CHARSET_LEFT_TO_RIGHT,
2946 build_string ("CNS11643-1"),
2947 build_string ("CNS11643-1 (Chinese traditional)"),
2949 ("CNS 11643 Plane 1 Chinese traditional"),
2950 build_string (CHINESE_CNS_PLANE_RE("1")),
2952 staticpro (&Vcharset_chinese_cns11643_2);
2953 Vcharset_chinese_cns11643_2 =
2954 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2955 CHARSET_TYPE_94X94, 2, 0, 'H',
2956 CHARSET_LEFT_TO_RIGHT,
2957 build_string ("CNS11643-2"),
2958 build_string ("CNS11643-2 (Chinese traditional)"),
2960 ("CNS 11643 Plane 2 Chinese traditional"),
2961 build_string (CHINESE_CNS_PLANE_RE("2")),
2964 staticpro (&Vcharset_latin_viscii_lower);
2965 Vcharset_latin_viscii_lower =
2966 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2967 CHARSET_TYPE_96, 1, 1, '1',
2968 CHARSET_LEFT_TO_RIGHT,
2969 build_string ("VISCII lower"),
2970 build_string ("VISCII lower (Vietnamese)"),
2971 build_string ("VISCII lower (Vietnamese)"),
2972 build_string ("MULEVISCII-LOWER"),
2974 staticpro (&Vcharset_latin_viscii_upper);
2975 Vcharset_latin_viscii_upper =
2976 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2977 CHARSET_TYPE_96, 1, 1, '2',
2978 CHARSET_LEFT_TO_RIGHT,
2979 build_string ("VISCII upper"),
2980 build_string ("VISCII upper (Vietnamese)"),
2981 build_string ("VISCII upper (Vietnamese)"),
2982 build_string ("MULEVISCII-UPPER"),
2984 staticpro (&Vcharset_latin_viscii);
2985 Vcharset_latin_viscii =
2986 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2987 CHARSET_TYPE_256, 1, 2, 0,
2988 CHARSET_LEFT_TO_RIGHT,
2989 build_string ("VISCII"),
2990 build_string ("VISCII 1.1 (Vietnamese)"),
2991 build_string ("VISCII 1.1 (Vietnamese)"),
2992 build_string ("VISCII1\\.1"),
2994 staticpro (&Vcharset_ideograph_daikanwa);
2995 Vcharset_ideograph_daikanwa =
2996 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2997 CHARSET_TYPE_256X256, 2, 2, 0,
2998 CHARSET_LEFT_TO_RIGHT,
2999 build_string ("Daikanwa"),
3000 build_string ("Morohashi's Daikanwa"),
3001 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3002 build_string ("Daikanwa"),
3003 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3004 staticpro (&Vcharset_mojikyo_pj_1);
3005 Vcharset_mojikyo_pj_1 =
3006 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1,
3007 CHARSET_TYPE_94X94, 2, 0, 0,
3008 CHARSET_LEFT_TO_RIGHT,
3009 build_string ("Mojikyo-PJ-1"),
3010 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3012 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3013 build_string ("jisx0208\\.Mojikyo-1$"),
3015 staticpro (&Vcharset_mojikyo_pj_2);
3016 Vcharset_mojikyo_pj_2 =
3017 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2,
3018 CHARSET_TYPE_94X94, 2, 0, 0,
3019 CHARSET_LEFT_TO_RIGHT,
3020 build_string ("Mojikyo-PJ-2"),
3021 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3023 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3024 build_string ("jisx0208\\.Mojikyo-2$"),
3026 staticpro (&Vcharset_mojikyo_pj_3);
3027 Vcharset_mojikyo_pj_3 =
3028 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3,
3029 CHARSET_TYPE_94X94, 2, 0, 0,
3030 CHARSET_LEFT_TO_RIGHT,
3031 build_string ("Mojikyo-PJ-3"),
3032 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3034 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3035 build_string ("jisx0208\\.Mojikyo-3$"),
3037 staticpro (&Vcharset_mojikyo_pj_4);
3038 Vcharset_mojikyo_pj_4 =
3039 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4,
3040 CHARSET_TYPE_94X94, 2, 0, 0,
3041 CHARSET_LEFT_TO_RIGHT,
3042 build_string ("Mojikyo-PJ-4"),
3043 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3045 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3046 build_string ("jisx0208\\.Mojikyo-4$"),
3048 staticpro (&Vcharset_mojikyo_pj_5);
3049 Vcharset_mojikyo_pj_5 =
3050 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5,
3051 CHARSET_TYPE_94X94, 2, 0, 0,
3052 CHARSET_LEFT_TO_RIGHT,
3053 build_string ("Mojikyo-PJ-5"),
3054 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3056 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3057 build_string ("jisx0208\\.Mojikyo-5$"),
3059 staticpro (&Vcharset_mojikyo_pj_6);
3060 Vcharset_mojikyo_pj_6 =
3061 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6,
3062 CHARSET_TYPE_94X94, 2, 0, 0,
3063 CHARSET_LEFT_TO_RIGHT,
3064 build_string ("Mojikyo-PJ-6"),
3065 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3067 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3068 build_string ("jisx0208\\.Mojikyo-6$"),
3070 staticpro (&Vcharset_mojikyo_pj_7);
3071 Vcharset_mojikyo_pj_7 =
3072 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7,
3073 CHARSET_TYPE_94X94, 2, 0, 0,
3074 CHARSET_LEFT_TO_RIGHT,
3075 build_string ("Mojikyo-PJ-7"),
3076 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3078 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3079 build_string ("jisx0208\\.Mojikyo-7$"),
3081 staticpro (&Vcharset_mojikyo_pj_8);
3082 Vcharset_mojikyo_pj_8 =
3083 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8,
3084 CHARSET_TYPE_94X94, 2, 0, 0,
3085 CHARSET_LEFT_TO_RIGHT,
3086 build_string ("Mojikyo-PJ-8"),
3087 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3089 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3090 build_string ("jisx0208\\.Mojikyo-8$"),
3092 staticpro (&Vcharset_mojikyo_pj_9);
3093 Vcharset_mojikyo_pj_9 =
3094 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9,
3095 CHARSET_TYPE_94X94, 2, 0, 0,
3096 CHARSET_LEFT_TO_RIGHT,
3097 build_string ("Mojikyo-PJ-9"),
3098 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3100 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3101 build_string ("jisx0208\\.Mojikyo-9$"),
3103 staticpro (&Vcharset_mojikyo_pj_10);
3104 Vcharset_mojikyo_pj_10 =
3105 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10,
3106 CHARSET_TYPE_94X94, 2, 0, 0,
3107 CHARSET_LEFT_TO_RIGHT,
3108 build_string ("Mojikyo-PJ-10"),
3109 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3111 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3112 build_string ("jisx0208\\.Mojikyo-10$"),
3114 staticpro (&Vcharset_mojikyo_pj_11);
3115 Vcharset_mojikyo_pj_11 =
3116 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11,
3117 CHARSET_TYPE_94X94, 2, 0, 0,
3118 CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Mojikyo-PJ-11"),
3120 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3122 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3123 build_string ("jisx0208\\.Mojikyo-11$"),
3125 staticpro (&Vcharset_mojikyo_pj_12);
3126 Vcharset_mojikyo_pj_12 =
3127 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12,
3128 CHARSET_TYPE_94X94, 2, 0, 0,
3129 CHARSET_LEFT_TO_RIGHT,
3130 build_string ("Mojikyo-PJ-12"),
3131 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3133 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3134 build_string ("jisx0208\\.Mojikyo-12$"),
3136 staticpro (&Vcharset_mojikyo_pj_13);
3137 Vcharset_mojikyo_pj_13 =
3138 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13,
3139 CHARSET_TYPE_94X94, 2, 0, 0,
3140 CHARSET_LEFT_TO_RIGHT,
3141 build_string ("Mojikyo-PJ-13"),
3142 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3144 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3145 build_string ("jisx0208\\.Mojikyo-13$"),
3147 staticpro (&Vcharset_mojikyo_pj_14);
3148 Vcharset_mojikyo_pj_14 =
3149 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14,
3150 CHARSET_TYPE_94X94, 2, 0, 0,
3151 CHARSET_LEFT_TO_RIGHT,
3152 build_string ("Mojikyo-PJ-14"),
3153 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3155 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3156 build_string ("jisx0208\\.Mojikyo-14$"),
3158 staticpro (&Vcharset_mojikyo_pj_15);
3159 Vcharset_mojikyo_pj_15 =
3160 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15,
3161 CHARSET_TYPE_94X94, 2, 0, 0,
3162 CHARSET_LEFT_TO_RIGHT,
3163 build_string ("Mojikyo-PJ-15"),
3164 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3166 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3167 build_string ("jisx0208\\.Mojikyo-15$"),
3169 staticpro (&Vcharset_mojikyo_pj_16);
3170 Vcharset_mojikyo_pj_16 =
3171 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16,
3172 CHARSET_TYPE_94X94, 2, 0, 0,
3173 CHARSET_LEFT_TO_RIGHT,
3174 build_string ("Mojikyo-PJ-16"),
3175 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3177 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3178 build_string ("jisx0208\\.Mojikyo-16$"),
3180 staticpro (&Vcharset_mojikyo_pj_17);
3181 Vcharset_mojikyo_pj_17 =
3182 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17,
3183 CHARSET_TYPE_94X94, 2, 0, 0,
3184 CHARSET_LEFT_TO_RIGHT,
3185 build_string ("Mojikyo-PJ-17"),
3186 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3188 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3189 build_string ("jisx0208\\.Mojikyo-17$"),
3191 staticpro (&Vcharset_mojikyo_pj_18);
3192 Vcharset_mojikyo_pj_18 =
3193 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18,
3194 CHARSET_TYPE_94X94, 2, 0, 0,
3195 CHARSET_LEFT_TO_RIGHT,
3196 build_string ("Mojikyo-PJ-18"),
3197 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3199 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3200 build_string ("jisx0208\\.Mojikyo-18$"),
3202 staticpro (&Vcharset_mojikyo_pj_19);
3203 Vcharset_mojikyo_pj_19 =
3204 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19,
3205 CHARSET_TYPE_94X94, 2, 0, 0,
3206 CHARSET_LEFT_TO_RIGHT,
3207 build_string ("Mojikyo-PJ-19"),
3208 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3210 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3211 build_string ("jisx0208\\.Mojikyo-19$"),
3213 staticpro (&Vcharset_mojikyo_pj_20);
3214 Vcharset_mojikyo_pj_20 =
3215 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20,
3216 CHARSET_TYPE_94X94, 2, 0, 0,
3217 CHARSET_LEFT_TO_RIGHT,
3218 build_string ("Mojikyo-PJ-20"),
3219 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3221 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3222 build_string ("jisx0208\\.Mojikyo-20$"),
3224 staticpro (&Vcharset_mojikyo_pj_21);
3225 Vcharset_mojikyo_pj_21 =
3226 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21,
3227 CHARSET_TYPE_94X94, 2, 0, 0,
3228 CHARSET_LEFT_TO_RIGHT,
3229 build_string ("Mojikyo-PJ-21"),
3230 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3232 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3233 build_string ("jisx0208\\.Mojikyo-21$"),
3235 staticpro (&Vcharset_ethiopic_ucs);
3236 Vcharset_ethiopic_ucs =
3237 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
3238 CHARSET_TYPE_256X256, 2, 2, 0,
3239 CHARSET_LEFT_TO_RIGHT,
3240 build_string ("Ethiopic (UCS)"),
3241 build_string ("Ethiopic (UCS)"),
3242 build_string ("Ethiopic of UCS"),
3243 build_string ("Ethiopic-Unicode"),
3244 Qnil, 0x1200, 0x137F, 0x1200, 0);
3246 staticpro (&Vcharset_chinese_big5_1);
3247 Vcharset_chinese_big5_1 =
3248 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3249 CHARSET_TYPE_94X94, 2, 0, '0',
3250 CHARSET_LEFT_TO_RIGHT,
3251 build_string ("Big5"),
3252 build_string ("Big5 (Level-1)"),
3254 ("Big5 Level-1 Chinese traditional"),
3255 build_string ("big5"),
3257 staticpro (&Vcharset_chinese_big5_2);
3258 Vcharset_chinese_big5_2 =
3259 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3260 CHARSET_TYPE_94X94, 2, 0, '1',
3261 CHARSET_LEFT_TO_RIGHT,
3262 build_string ("Big5"),
3263 build_string ("Big5 (Level-2)"),
3265 ("Big5 Level-2 Chinese traditional"),
3266 build_string ("big5"),
3269 #ifdef ENABLE_COMPOSITE_CHARS
3270 /* #### For simplicity, we put composite chars into a 96x96 charset.
3271 This is going to lead to problems because you can run out of
3272 room, esp. as we don't yet recycle numbers. */
3273 staticpro (&Vcharset_composite);
3274 Vcharset_composite =
3275 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3276 CHARSET_TYPE_96X96, 2, 0, 0,
3277 CHARSET_LEFT_TO_RIGHT,
3278 build_string ("Composite"),
3279 build_string ("Composite characters"),
3280 build_string ("Composite characters"),
3283 /* #### not dumped properly */
3284 composite_char_row_next = 32;
3285 composite_char_col_next = 32;
3287 Vcomposite_char_string2char_hash_table =
3288 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3289 Vcomposite_char_char2string_hash_table =
3290 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3291 staticpro (&Vcomposite_char_string2char_hash_table);
3292 staticpro (&Vcomposite_char_char2string_hash_table);
3293 #endif /* ENABLE_COMPOSITE_CHARS */