1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_japanese_jisx0208_1990;
57 Lisp_Object Vcharset_korean_ksc5601;
58 Lisp_Object Vcharset_japanese_jisx0212;
59 Lisp_Object Vcharset_chinese_cns11643_1;
60 Lisp_Object Vcharset_chinese_cns11643_2;
62 Lisp_Object Vcharset_ucs_bmp;
63 Lisp_Object Vcharset_latin_viscii;
64 Lisp_Object Vcharset_latin_viscii_lower;
65 Lisp_Object Vcharset_latin_viscii_upper;
66 Lisp_Object Vcharset_ideograph_daikanwa;
67 Lisp_Object Vcharset_mojikyo_pj_1;
68 Lisp_Object Vcharset_mojikyo_pj_2;
69 Lisp_Object Vcharset_mojikyo_pj_3;
70 Lisp_Object Vcharset_mojikyo_pj_4;
71 Lisp_Object Vcharset_mojikyo_pj_5;
72 Lisp_Object Vcharset_mojikyo_pj_6;
73 Lisp_Object Vcharset_mojikyo_pj_7;
74 Lisp_Object Vcharset_mojikyo_pj_8;
75 Lisp_Object Vcharset_mojikyo_pj_9;
76 Lisp_Object Vcharset_mojikyo_pj_10;
77 Lisp_Object Vcharset_mojikyo_pj_11;
78 Lisp_Object Vcharset_mojikyo_pj_12;
79 Lisp_Object Vcharset_mojikyo_pj_13;
80 Lisp_Object Vcharset_mojikyo_pj_14;
81 Lisp_Object Vcharset_mojikyo_pj_15;
82 Lisp_Object Vcharset_mojikyo_pj_16;
83 Lisp_Object Vcharset_mojikyo_pj_17;
84 Lisp_Object Vcharset_mojikyo_pj_18;
85 Lisp_Object Vcharset_mojikyo_pj_19;
86 Lisp_Object Vcharset_mojikyo_pj_20;
87 Lisp_Object Vcharset_mojikyo_pj_21;
88 Lisp_Object Vcharset_ethiopic_ucs;
90 Lisp_Object Vcharset_chinese_big5_1;
91 Lisp_Object Vcharset_chinese_big5_2;
93 #ifdef ENABLE_COMPOSITE_CHARS
94 Lisp_Object Vcharset_composite;
96 /* Hash tables for composite chars. One maps string representing
97 composed chars to their equivalent chars; one goes the
99 Lisp_Object Vcomposite_char_char2string_hash_table;
100 Lisp_Object Vcomposite_char_string2char_hash_table;
102 static int composite_char_row_next;
103 static int composite_char_col_next;
105 #endif /* ENABLE_COMPOSITE_CHARS */
107 struct charset_lookup *chlook;
109 static const struct lrecord_description charset_lookup_description_1[] = {
110 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
119 static const struct struct_description charset_lookup_description = {
120 sizeof(struct charset_lookup),
121 charset_lookup_description_1
125 /* Table of number of bytes in the string representation of a character
126 indexed by the first byte of that representation.
128 rep_bytes_by_first_byte(c) is more efficient than the equivalent
129 canonical computation:
131 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
133 const Bytecount rep_bytes_by_first_byte[0xA0] =
134 { /* 0x00 - 0x7f are for straight ASCII */
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 /* 0x80 - 0x8f are for Dimension-1 official charsets */
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
149 /* 0x90 - 0x9d are for Dimension-2 official charsets */
150 /* 0x9e is for Dimension-1 private charsets */
151 /* 0x9f is for Dimension-2 private charsets */
152 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
159 mark_char_byte_table (Lisp_Object obj)
161 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
164 for (i = 0; i < 256; i++)
166 mark_object (cte->property[i]);
172 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
174 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
175 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
178 for (i = 0; i < 256; i++)
179 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
181 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
183 if (!char_byte_table_equal (cte1->property[i],
184 cte2->property[i], depth + 1))
191 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
197 char_byte_table_hash (Lisp_Object obj, int depth)
199 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
201 return internal_array_hash (cte->property, 256, depth);
204 static const struct lrecord_description char_byte_table_description[] = {
205 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
209 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
210 mark_char_byte_table,
211 internal_object_printer,
212 0, char_byte_table_equal,
213 char_byte_table_hash,
214 char_byte_table_description,
215 struct Lisp_Char_Byte_Table);
218 make_char_byte_table (Lisp_Object initval)
222 struct Lisp_Char_Byte_Table *cte =
223 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
224 &lrecord_char_byte_table);
226 for (i = 0; i < 256; i++)
227 cte->property[i] = initval;
229 XSETCHAR_BYTE_TABLE (obj, cte);
234 copy_char_byte_table (Lisp_Object entry)
236 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
239 struct Lisp_Char_Byte_Table *ctenew =
240 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
241 &lrecord_char_byte_table);
243 for (i = 0; i < 256; i++)
245 Lisp_Object new = cte->property[i];
246 if (CHAR_BYTE_TABLE_P (new))
247 ctenew->property[i] = copy_char_byte_table (new);
249 ctenew->property[i] = new;
252 XSETCHAR_BYTE_TABLE (obj, ctenew);
258 mark_char_code_table (Lisp_Object obj)
260 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
266 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
268 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
269 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
271 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
275 char_code_table_hash (Lisp_Object obj, int depth)
277 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
279 return char_code_table_hash (cte->table, depth + 1);
282 static const struct lrecord_description char_code_table_description[] = {
283 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
287 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
288 mark_char_code_table,
289 internal_object_printer,
290 0, char_code_table_equal,
291 char_code_table_hash,
292 char_code_table_description,
293 struct Lisp_Char_Code_Table);
296 make_char_code_table (Lisp_Object initval)
299 struct Lisp_Char_Code_Table *cte =
300 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
301 &lrecord_char_code_table);
303 cte->table = make_char_byte_table (initval);
305 XSETCHAR_CODE_TABLE (obj, cte);
310 copy_char_code_table (Lisp_Object entry)
312 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
314 struct Lisp_Char_Code_Table *ctenew =
315 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
316 &lrecord_char_code_table);
318 ctenew->table = copy_char_byte_table (cte->table);
319 XSETCHAR_CODE_TABLE (obj, ctenew);
325 get_char_code_table (Emchar ch, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
333 cpt = XCHAR_BYTE_TABLE (ret);
337 ret = cpt->property [(unsigned char) (code >> 16)];
338 if (CHAR_BYTE_TABLE_P (ret))
339 cpt = XCHAR_BYTE_TABLE (ret);
343 ret = cpt->property [(unsigned char) (code >> 8)];
344 if (CHAR_BYTE_TABLE_P (ret))
345 cpt = XCHAR_BYTE_TABLE (ret);
349 return cpt->property [(unsigned char) code];
353 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
355 unsigned int code = ch;
356 struct Lisp_Char_Byte_Table* cpt1
357 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
358 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
360 if (CHAR_BYTE_TABLE_P (ret))
362 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
364 ret = cpt2->property[(unsigned char)(code >> 16)];
365 if (CHAR_BYTE_TABLE_P (ret))
367 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
369 ret = cpt3->property[(unsigned char)(code >> 8)];
370 if (CHAR_BYTE_TABLE_P (ret))
372 struct Lisp_Char_Byte_Table* cpt4
373 = XCHAR_BYTE_TABLE (ret);
375 cpt4->property[(unsigned char)code] = value;
377 else if (!EQ (ret, value))
379 Lisp_Object cpt4 = make_char_byte_table (ret);
381 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
382 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
385 else if (!EQ (ret, value))
387 Lisp_Object cpt3 = make_char_byte_table (ret);
388 Lisp_Object cpt4 = make_char_byte_table (ret);
390 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
391 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
393 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
396 else if (!EQ (ret, value))
398 Lisp_Object cpt2 = make_char_byte_table (ret);
399 Lisp_Object cpt3 = make_char_byte_table (ret);
400 Lisp_Object cpt4 = make_char_byte_table (ret);
402 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
403 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
404 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
405 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
410 Lisp_Object Vcharacter_attribute_table;
411 Lisp_Object Vcharacter_composition_table;
412 Lisp_Object Vcharacter_variant_table;
414 Lisp_Object Q_decomposition;
417 Lisp_Object Qisolated;
418 Lisp_Object Qinitial;
421 Lisp_Object Qvertical;
422 Lisp_Object QnoBreak;
423 Lisp_Object Qfraction;
434 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
440 else if (EQ (v, Qcompat))
442 else if (EQ (v, Qisolated))
444 else if (EQ (v, Qinitial))
446 else if (EQ (v, Qmedial))
448 else if (EQ (v, Qfinal))
450 else if (EQ (v, Qvertical))
452 else if (EQ (v, QnoBreak))
454 else if (EQ (v, Qfraction))
456 else if (EQ (v, Qsuper))
458 else if (EQ (v, Qsub))
460 else if (EQ (v, Qcircle))
462 else if (EQ (v, Qsquare))
464 else if (EQ (v, Qwide))
466 else if (EQ (v, Qnarrow))
468 else if (EQ (v, Qsmall))
470 else if (EQ (v, Qfont))
473 signal_simple_error (err_msg, err_arg);
476 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
477 Return character corresponding with list.
481 Lisp_Object table = Vcharacter_composition_table;
482 Lisp_Object rest = list;
486 Lisp_Object v = Fcar (rest);
488 Emchar c = to_char_code (v, "Invalid value for composition", list);
490 ret = get_char_code_table (c, table);
495 if (!CHAR_CODE_TABLE_P (ret))
500 else if (!CONSP (rest))
502 else if (CHAR_CODE_TABLE_P (ret))
505 signal_simple_error ("Invalid table is found with", list);
507 signal_simple_error ("Invalid value for composition", list);
510 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
511 Return variants of CHARACTER.
515 CHECK_CHAR (character);
516 return Fcopy_list (get_char_code_table (XCHAR (character),
517 Vcharacter_variant_table));
520 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
521 Return the alist of attributes of CHARACTER.
525 CHECK_CHAR (character);
526 return Fcopy_alist (get_char_code_table (XCHAR (character),
527 Vcharacter_attribute_table));
530 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
531 Return the value of CHARACTER's ATTRIBUTE.
533 (character, attribute))
538 CHECK_CHAR (character);
539 ret = get_char_code_table (XCHAR (character),
540 Vcharacter_attribute_table);
544 if (!NILP (ccs = Ffind_charset (attribute)))
547 return Fcdr (Fassq (attribute, ret));
551 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
554 Emchar char_code = XCHAR (character);
556 = get_char_code_table (char_code, Vcharacter_attribute_table);
559 cell = Fassq (attribute, ret);
563 ret = Fcons (Fcons (attribute, value), ret);
565 else if (!EQ (Fcdr (cell), value))
567 Fsetcdr (cell, value);
569 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
573 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
574 Store CHARACTER's ATTRIBUTE with VALUE.
576 (character, attribute, value))
580 CHECK_CHAR (character);
581 ccs = Ffind_charset (attribute);
585 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
590 /* ad-hoc method for `ascii' */
591 if ((XCHARSET_CHARS (ccs) == 94) &&
592 (XCHARSET_BYTE_OFFSET (ccs) != 33))
593 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
595 ccs_len = XCHARSET_CHARS (ccs);
598 signal_simple_error ("Invalid value for coded-charset",
602 rest = Fget_char_attribute (character, attribute);
609 Lisp_Object ei = Fcar (rest);
611 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
612 nv = XVECTOR_DATA(v)[i];
619 XVECTOR_DATA(v)[i] = Qnil;
620 v = XCHARSET_DECODING_TABLE (ccs);
625 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
628 if (XCHARSET_GRAPHIC (ccs) == 1)
629 value = Fcopy_list (value);
634 Lisp_Object ei = Fcar (rest);
637 signal_simple_error ("Invalid value for coded-charset", value);
639 if ((i < 0) || (255 < i))
640 signal_simple_error ("Invalid value for coded-charset", value);
641 if (XCHARSET_GRAPHIC (ccs) == 1)
644 Fsetcar (rest, make_int (i));
646 i -= XCHARSET_BYTE_OFFSET (ccs);
647 nv = XVECTOR_DATA(v)[i];
653 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
660 XVECTOR_DATA(v)[i] = character;
662 else if (EQ (attribute, Q_decomposition))
664 Lisp_Object rest = value;
665 Lisp_Object table = Vcharacter_composition_table;
668 signal_simple_error ("Invalid value for ->decomposition",
673 Lisp_Object v = Fcar (rest);
676 = to_char_code (v, "Invalid value for ->decomposition", value);
681 put_char_code_table (c, character, table);
686 ntable = get_char_code_table (c, table);
687 if (!CHAR_CODE_TABLE_P (ntable))
689 ntable = make_char_code_table (Qnil);
690 put_char_code_table (c, ntable, table);
696 else if (EQ (attribute, Q_ucs))
702 signal_simple_error ("Invalid value for ->ucs", value);
706 ret = get_char_code_table (c, Vcharacter_variant_table);
707 if (NILP (Fmemq (character, ret)))
709 put_char_code_table (c, Fcons (character, ret),
710 Vcharacter_variant_table);
713 return put_char_attribute (character, attribute, value);
718 EXFUN (Fmake_char, 3);
720 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
721 Store character's ATTRIBUTES.
725 Lisp_Object rest = attributes;
726 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
727 Lisp_Object character;
733 Lisp_Object cell = Fcar (rest);
737 signal_simple_error ("Invalid argument", attributes);
738 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
741 character = Fmake_char (ccs, Fcar (cell),
743 goto setup_attributes;
747 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
750 signal_simple_error ("Invalid argument", attributes);
752 character = make_char (XINT (code) + 0x100000);
753 goto setup_attributes;
757 else if (!INTP (code))
758 signal_simple_error ("Invalid argument", attributes);
760 character = make_char (XINT (code));
766 Lisp_Object cell = Fcar (rest);
769 signal_simple_error ("Invalid argument", attributes);
770 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
774 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
777 Lisp_Object Vutf_2000_version;
781 int leading_code_private_11;
784 Lisp_Object Qcharsetp;
786 /* Qdoc_string, Qdimension, Qchars defined in general.c */
787 Lisp_Object Qregistry, Qfinal, Qgraphic;
788 Lisp_Object Qdirection;
789 Lisp_Object Qreverse_direction_charset;
790 Lisp_Object Qleading_byte;
791 Lisp_Object Qshort_name, Qlong_name;
807 Qjapanese_jisx0208_1978,
810 Qjapanese_jisx0208_1990,
820 Qvietnamese_viscii_lower,
821 Qvietnamese_viscii_upper,
850 Lisp_Object Ql2r, Qr2l;
852 Lisp_Object Vcharset_hash_table;
855 static Charset_ID next_allocated_leading_byte;
857 static Charset_ID next_allocated_1_byte_leading_byte;
858 static Charset_ID next_allocated_2_byte_leading_byte;
861 /* Composite characters are characters constructed by overstriking two
862 or more regular characters.
864 1) The old Mule implementation involves storing composite characters
865 in a buffer as a tag followed by all of the actual characters
866 used to make up the composite character. I think this is a bad
867 idea; it greatly complicates code that wants to handle strings
868 one character at a time because it has to deal with the possibility
869 of great big ungainly characters. It's much more reasonable to
870 simply store an index into a table of composite characters.
872 2) The current implementation only allows for 16,384 separate
873 composite characters over the lifetime of the XEmacs process.
874 This could become a potential problem if the user
875 edited lots of different files that use composite characters.
876 Due to FSF bogosity, increasing the number of allowable
877 composite characters under Mule would decrease the number
878 of possible faces that can exist. Mule already has shrunk
879 this to 2048, and further shrinkage would become uncomfortable.
880 No such problems exist in XEmacs.
882 Composite characters could be represented as 0x80 C1 C2 C3,
883 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
884 for slightly under 2^20 (one million) composite characters
885 over the XEmacs process lifetime, and you only need to
886 increase the size of a Mule character from 19 to 21 bits.
887 Or you could use 0x80 C1 C2 C3 C4, allowing for about
888 85 million (slightly over 2^26) composite characters. */
891 /************************************************************************/
892 /* Basic Emchar functions */
893 /************************************************************************/
895 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
896 string in STR. Returns the number of bytes stored.
897 Do not call this directly. Use the macro set_charptr_emchar() instead.
901 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
916 else if ( c <= 0x7ff )
918 *p++ = (c >> 6) | 0xc0;
919 *p++ = (c & 0x3f) | 0x80;
921 else if ( c <= 0xffff )
923 *p++ = (c >> 12) | 0xe0;
924 *p++ = ((c >> 6) & 0x3f) | 0x80;
925 *p++ = (c & 0x3f) | 0x80;
927 else if ( c <= 0x1fffff )
929 *p++ = (c >> 18) | 0xf0;
930 *p++ = ((c >> 12) & 0x3f) | 0x80;
931 *p++ = ((c >> 6) & 0x3f) | 0x80;
932 *p++ = (c & 0x3f) | 0x80;
934 else if ( c <= 0x3ffffff )
936 *p++ = (c >> 24) | 0xf8;
937 *p++ = ((c >> 18) & 0x3f) | 0x80;
938 *p++ = ((c >> 12) & 0x3f) | 0x80;
939 *p++ = ((c >> 6) & 0x3f) | 0x80;
940 *p++ = (c & 0x3f) | 0x80;
944 *p++ = (c >> 30) | 0xfc;
945 *p++ = ((c >> 24) & 0x3f) | 0x80;
946 *p++ = ((c >> 18) & 0x3f) | 0x80;
947 *p++ = ((c >> 12) & 0x3f) | 0x80;
948 *p++ = ((c >> 6) & 0x3f) | 0x80;
949 *p++ = (c & 0x3f) | 0x80;
952 BREAKUP_CHAR (c, charset, c1, c2);
953 lb = CHAR_LEADING_BYTE (c);
954 if (LEADING_BYTE_PRIVATE_P (lb))
955 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
957 if (EQ (charset, Vcharset_control_1))
966 /* Return the first character from a Mule-encoded string in STR,
967 assuming it's non-ASCII. Do not call this directly.
968 Use the macro charptr_emchar() instead. */
971 non_ascii_charptr_emchar (CONST Bufbyte *str)
984 else if ( b >= 0xf8 )
989 else if ( b >= 0xf0 )
994 else if ( b >= 0xe0 )
999 else if ( b >= 0xc0 )
1009 for( ; len > 0; len-- )
1012 ch = ( ch << 6 ) | ( b & 0x3f );
1016 Bufbyte i0 = *str, i1, i2 = 0;
1017 Lisp_Object charset;
1019 if (i0 == LEADING_BYTE_CONTROL_1)
1020 return (Emchar) (*++str - 0x20);
1022 if (LEADING_BYTE_PREFIX_P (i0))
1027 charset = CHARSET_BY_LEADING_BYTE (i0);
1028 if (XCHARSET_DIMENSION (charset) == 2)
1031 return MAKE_CHAR (charset, i1, i2);
1035 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1036 Do not call this directly. Use the macro valid_char_p() instead. */
1040 non_ascii_valid_char_p (Emchar ch)
1044 /* Must have only lowest 19 bits set */
1048 f1 = CHAR_FIELD1 (ch);
1049 f2 = CHAR_FIELD2 (ch);
1050 f3 = CHAR_FIELD3 (ch);
1054 Lisp_Object charset;
1056 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1057 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1058 f2 > MAX_CHAR_FIELD2_PRIVATE)
1063 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1064 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1068 NOTE: This takes advantage of the fact that
1069 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1070 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1072 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1073 if (EQ (charset, Qnil))
1075 return (XCHARSET_CHARS (charset) == 96);
1079 Lisp_Object charset;
1081 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1082 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1083 f1 > MAX_CHAR_FIELD1_PRIVATE)
1085 if (f2 < 0x20 || f3 < 0x20)
1088 #ifdef ENABLE_COMPOSITE_CHARS
1089 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1091 if (UNBOUNDP (Fgethash (make_int (ch),
1092 Vcomposite_char_char2string_hash_table,
1097 #endif /* ENABLE_COMPOSITE_CHARS */
1099 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1100 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1103 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1105 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1108 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1110 if (EQ (charset, Qnil))
1112 return (XCHARSET_CHARS (charset) == 96);
1118 /************************************************************************/
1119 /* Basic string functions */
1120 /************************************************************************/
1122 /* Copy the character pointed to by PTR into STR, assuming it's
1123 non-ASCII. Do not call this directly. Use the macro
1124 charptr_copy_char() instead. */
1127 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1129 Bufbyte *strptr = str;
1131 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1133 /* Notice fallthrough. */
1135 case 6: *++strptr = *ptr++;
1136 case 5: *++strptr = *ptr++;
1138 case 4: *++strptr = *ptr++;
1139 case 3: *++strptr = *ptr++;
1140 case 2: *++strptr = *ptr;
1145 return strptr + 1 - str;
1149 /************************************************************************/
1150 /* streams of Emchars */
1151 /************************************************************************/
1153 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1154 The functions below are not meant to be called directly; use
1155 the macros in insdel.h. */
1158 Lstream_get_emchar_1 (Lstream *stream, int ch)
1160 Bufbyte str[MAX_EMCHAR_LEN];
1161 Bufbyte *strptr = str;
1163 str[0] = (Bufbyte) ch;
1164 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1166 /* Notice fallthrough. */
1169 ch = Lstream_getc (stream);
1171 *++strptr = (Bufbyte) ch;
1173 ch = Lstream_getc (stream);
1175 *++strptr = (Bufbyte) ch;
1178 ch = Lstream_getc (stream);
1180 *++strptr = (Bufbyte) ch;
1182 ch = Lstream_getc (stream);
1184 *++strptr = (Bufbyte) ch;
1186 ch = Lstream_getc (stream);
1188 *++strptr = (Bufbyte) ch;
1193 return charptr_emchar (str);
1197 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1199 Bufbyte str[MAX_EMCHAR_LEN];
1200 Bytecount len = set_charptr_emchar (str, ch);
1201 return Lstream_write (stream, str, len);
1205 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1207 Bufbyte str[MAX_EMCHAR_LEN];
1208 Bytecount len = set_charptr_emchar (str, ch);
1209 Lstream_unread (stream, str, len);
1213 /************************************************************************/
1214 /* charset object */
1215 /************************************************************************/
1218 mark_charset (Lisp_Object obj)
1220 struct Lisp_Charset *cs = XCHARSET (obj);
1222 mark_object (cs->short_name);
1223 mark_object (cs->long_name);
1224 mark_object (cs->doc_string);
1225 mark_object (cs->registry);
1226 mark_object (cs->ccl_program);
1228 mark_object (cs->decoding_table);
1234 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1236 struct Lisp_Charset *cs = XCHARSET (obj);
1240 error ("printing unreadable object #<charset %s 0x%x>",
1241 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1244 write_c_string ("#<charset ", printcharfun);
1245 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1246 write_c_string (" ", printcharfun);
1247 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1248 write_c_string (" ", printcharfun);
1249 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1250 write_c_string (" ", printcharfun);
1251 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1252 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1253 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1254 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1255 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1257 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1258 CHARSET_COLUMNS (cs),
1259 CHARSET_GRAPHIC (cs),
1260 CHARSET_FINAL (cs));
1261 write_c_string (buf, printcharfun);
1262 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1263 sprintf (buf, " 0x%x>", cs->header.uid);
1264 write_c_string (buf, printcharfun);
1267 static const struct lrecord_description charset_description[] = {
1268 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1270 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1275 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1276 mark_charset, print_charset, 0, 0, 0,
1277 charset_description,
1278 struct Lisp_Charset);
1279 /* Make a new charset. */
1282 make_charset (Charset_ID id, Lisp_Object name,
1283 unsigned char type, unsigned char columns, unsigned char graphic,
1284 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1285 Lisp_Object long_name, Lisp_Object doc,
1287 Lisp_Object decoding_table,
1288 Emchar ucs_min, Emchar ucs_max,
1289 Emchar code_offset, unsigned char byte_offset)
1292 struct Lisp_Charset *cs =
1293 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1294 XSETCHARSET (obj, cs);
1296 CHARSET_ID (cs) = id;
1297 CHARSET_NAME (cs) = name;
1298 CHARSET_SHORT_NAME (cs) = short_name;
1299 CHARSET_LONG_NAME (cs) = long_name;
1300 CHARSET_DIRECTION (cs) = direction;
1301 CHARSET_TYPE (cs) = type;
1302 CHARSET_COLUMNS (cs) = columns;
1303 CHARSET_GRAPHIC (cs) = graphic;
1304 CHARSET_FINAL (cs) = final;
1305 CHARSET_DOC_STRING (cs) = doc;
1306 CHARSET_REGISTRY (cs) = reg;
1307 CHARSET_CCL_PROGRAM (cs) = Qnil;
1308 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1310 CHARSET_DECODING_TABLE(cs) = Qnil;
1311 CHARSET_UCS_MIN(cs) = ucs_min;
1312 CHARSET_UCS_MAX(cs) = ucs_max;
1313 CHARSET_CODE_OFFSET(cs) = code_offset;
1314 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1317 switch (CHARSET_TYPE (cs))
1319 case CHARSET_TYPE_94:
1320 CHARSET_DIMENSION (cs) = 1;
1321 CHARSET_CHARS (cs) = 94;
1323 case CHARSET_TYPE_96:
1324 CHARSET_DIMENSION (cs) = 1;
1325 CHARSET_CHARS (cs) = 96;
1327 case CHARSET_TYPE_94X94:
1328 CHARSET_DIMENSION (cs) = 2;
1329 CHARSET_CHARS (cs) = 94;
1331 case CHARSET_TYPE_96X96:
1332 CHARSET_DIMENSION (cs) = 2;
1333 CHARSET_CHARS (cs) = 96;
1336 case CHARSET_TYPE_128:
1337 CHARSET_DIMENSION (cs) = 1;
1338 CHARSET_CHARS (cs) = 128;
1340 case CHARSET_TYPE_128X128:
1341 CHARSET_DIMENSION (cs) = 2;
1342 CHARSET_CHARS (cs) = 128;
1344 case CHARSET_TYPE_256:
1345 CHARSET_DIMENSION (cs) = 1;
1346 CHARSET_CHARS (cs) = 256;
1348 case CHARSET_TYPE_256X256:
1349 CHARSET_DIMENSION (cs) = 2;
1350 CHARSET_CHARS (cs) = 256;
1356 if (id == LEADING_BYTE_ASCII)
1357 CHARSET_REP_BYTES (cs) = 1;
1359 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1361 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1366 /* some charsets do not have final characters. This includes
1367 ASCII, Control-1, Composite, and the two faux private
1370 if (code_offset == 0)
1372 assert (NILP (chlook->charset_by_attributes[type][final]));
1373 chlook->charset_by_attributes[type][final] = obj;
1376 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1377 chlook->charset_by_attributes[type][final][direction] = obj;
1381 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1382 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1384 /* Some charsets are "faux" and don't have names or really exist at
1385 all except in the leading-byte table. */
1387 Fputhash (name, obj, Vcharset_hash_table);
1392 get_unallocated_leading_byte (int dimension)
1397 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1400 lb = next_allocated_leading_byte++;
1404 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1407 lb = next_allocated_1_byte_leading_byte++;
1411 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1414 lb = next_allocated_2_byte_leading_byte++;
1420 ("No more character sets free for this dimension",
1421 make_int (dimension));
1428 range_charset_code_point (Lisp_Object charset, Emchar ch)
1432 if ((XCHARSET_UCS_MIN (charset) <= ch)
1433 && (ch <= XCHARSET_UCS_MAX (charset)))
1435 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1437 if (XCHARSET_DIMENSION (charset) == 1)
1438 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1439 else if (XCHARSET_DIMENSION (charset) == 2)
1440 return list2 (make_int (d / XCHARSET_CHARS (charset)
1441 + XCHARSET_BYTE_OFFSET (charset)),
1442 make_int (d % XCHARSET_CHARS (charset)
1443 + XCHARSET_BYTE_OFFSET (charset)));
1444 else if (XCHARSET_DIMENSION (charset) == 3)
1445 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1446 * XCHARSET_CHARS (charset))
1447 + XCHARSET_BYTE_OFFSET (charset)),
1448 make_int (d / XCHARSET_CHARS (charset)
1449 % XCHARSET_CHARS (charset)
1450 + XCHARSET_BYTE_OFFSET (charset)),
1451 make_int (d % XCHARSET_CHARS (charset)
1452 + XCHARSET_BYTE_OFFSET (charset)));
1453 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1454 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1455 * XCHARSET_CHARS (charset)
1456 * XCHARSET_CHARS (charset))
1457 + XCHARSET_BYTE_OFFSET (charset)),
1458 make_int (d / (XCHARSET_CHARS (charset)
1459 * XCHARSET_CHARS (charset))
1460 % XCHARSET_CHARS (charset)
1461 + XCHARSET_BYTE_OFFSET (charset)),
1462 make_int (d / XCHARSET_CHARS (charset)
1463 % XCHARSET_CHARS (charset)
1464 + XCHARSET_BYTE_OFFSET (charset)),
1465 make_int (d % XCHARSET_CHARS (charset)
1466 + XCHARSET_BYTE_OFFSET (charset)));
1468 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1470 if (XCHARSET_DIMENSION (charset) == 1)
1472 if (XCHARSET_CHARS (charset) == 94)
1474 if (((d = ch - (MIN_CHAR_94
1475 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1477 return list1 (make_int (d + 33));
1479 else if (XCHARSET_CHARS (charset) == 96)
1481 if (((d = ch - (MIN_CHAR_96
1482 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1484 return list1 (make_int (d + 32));
1489 else if (XCHARSET_DIMENSION (charset) == 2)
1491 if (XCHARSET_CHARS (charset) == 94)
1493 if (((d = ch - (MIN_CHAR_94x94
1494 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1497 return list2 (make_int ((d / 94) + 33),
1498 make_int (d % 94 + 33));
1500 else if (XCHARSET_CHARS (charset) == 96)
1502 if (((d = ch - (MIN_CHAR_96x96
1503 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1506 return list2 (make_int ((d / 96) + 32),
1507 make_int (d % 96 + 32));
1515 split_builtin_char (Emchar c)
1517 if (c <= MAX_CHAR_BASIC_LATIN)
1519 return list2 (Vcharset_ascii, make_int (c));
1523 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1527 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1529 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1531 return list2 (Vcharset_greek_iso8859_7,
1532 make_int (c - MIN_CHAR_GREEK + 0x20));
1534 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1536 return list2 (Vcharset_cyrillic_iso8859_5,
1537 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1539 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1541 return list2 (Vcharset_hebrew_iso8859_8,
1542 make_int (c - MIN_CHAR_HEBREW + 0x20));
1544 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1546 return list2 (Vcharset_thai_tis620,
1547 make_int (c - MIN_CHAR_THAI + 0x20));
1549 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1550 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1552 return list2 (Vcharset_katakana_jisx0201,
1553 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1555 else if (c <= MAX_CHAR_BMP)
1557 return list3 (Vcharset_ucs_bmp,
1558 make_int (c >> 8), make_int (c & 0xff));
1560 else if ((MIN_CHAR_DAIKANWA <= c) && (c <= MAX_CHAR_DAIKANWA))
1562 return list3 (Vcharset_ideograph_daikanwa,
1563 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1564 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1566 else if (c <= MAX_CHAR_94)
1568 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1569 ((c - MIN_CHAR_94) / 94) + '0',
1570 CHARSET_LEFT_TO_RIGHT),
1571 make_int (((c - MIN_CHAR_94) % 94) + 33));
1573 else if (c <= MAX_CHAR_96)
1575 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1576 ((c - MIN_CHAR_96) / 96) + '0',
1577 CHARSET_LEFT_TO_RIGHT),
1578 make_int (((c - MIN_CHAR_96) % 96) + 32));
1580 else if (c <= MAX_CHAR_94x94)
1582 return list3 (CHARSET_BY_ATTRIBUTES
1583 (CHARSET_TYPE_94X94,
1584 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1585 CHARSET_LEFT_TO_RIGHT),
1586 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1587 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1589 else if (c <= MAX_CHAR_96x96)
1591 return list3 (CHARSET_BY_ATTRIBUTES
1592 (CHARSET_TYPE_96X96,
1593 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1594 CHARSET_LEFT_TO_RIGHT),
1595 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1596 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1605 charset_code_point (Lisp_Object charset, Emchar ch)
1607 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1609 if (!EQ (cdef, Qnil))
1611 Lisp_Object field = Fassq (charset, cdef);
1613 if (!EQ (field, Qnil))
1614 return Fcdr (field);
1616 return range_charset_code_point (charset, ch);
1619 Lisp_Object Vdefault_coded_charset_priority_list;
1623 /************************************************************************/
1624 /* Basic charset Lisp functions */
1625 /************************************************************************/
1627 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1628 Return non-nil if OBJECT is a charset.
1632 return CHARSETP (object) ? Qt : Qnil;
1635 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1636 Retrieve the charset of the given name.
1637 If CHARSET-OR-NAME is a charset object, it is simply returned.
1638 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1639 nil is returned. Otherwise the associated charset object is returned.
1643 if (CHARSETP (charset_or_name))
1644 return charset_or_name;
1646 CHECK_SYMBOL (charset_or_name);
1647 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1650 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1651 Retrieve the charset of the given name.
1652 Same as `find-charset' except an error is signalled if there is no such
1653 charset instead of returning nil.
1657 Lisp_Object charset = Ffind_charset (name);
1660 signal_simple_error ("No such charset", name);
1664 /* We store the charsets in hash tables with the names as the key and the
1665 actual charset object as the value. Occasionally we need to use them
1666 in a list format. These routines provide us with that. */
1667 struct charset_list_closure
1669 Lisp_Object *charset_list;
1673 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1674 void *charset_list_closure)
1676 /* This function can GC */
1677 struct charset_list_closure *chcl =
1678 (struct charset_list_closure*) charset_list_closure;
1679 Lisp_Object *charset_list = chcl->charset_list;
1681 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1685 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1686 Return a list of the names of all defined charsets.
1690 Lisp_Object charset_list = Qnil;
1691 struct gcpro gcpro1;
1692 struct charset_list_closure charset_list_closure;
1694 GCPRO1 (charset_list);
1695 charset_list_closure.charset_list = &charset_list;
1696 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1697 &charset_list_closure);
1700 return charset_list;
1703 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1704 Return the name of the given charset.
1708 return XCHARSET_NAME (Fget_charset (charset));
1711 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1712 Define a new character set.
1713 This function is for use with Mule support.
1714 NAME is a symbol, the name by which the character set is normally referred.
1715 DOC-STRING is a string describing the character set.
1716 PROPS is a property list, describing the specific nature of the
1717 character set. Recognized properties are:
1719 'short-name Short version of the charset name (ex: Latin-1)
1720 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1721 'registry A regular expression matching the font registry field for
1723 'dimension Number of octets used to index a character in this charset.
1724 Either 1 or 2. Defaults to 1.
1725 'columns Number of columns used to display a character in this charset.
1726 Only used in TTY mode. (Under X, the actual width of a
1727 character can be derived from the font used to display the
1728 characters.) If unspecified, defaults to the dimension
1729 (this is almost always the correct value).
1730 'chars Number of characters in each dimension (94 or 96).
1731 Defaults to 94. Note that if the dimension is 2, the
1732 character set thus described is 94x94 or 96x96.
1733 'final Final byte of ISO 2022 escape sequence. Must be
1734 supplied. Each combination of (DIMENSION, CHARS) defines a
1735 separate namespace for final bytes. Note that ISO
1736 2022 restricts the final byte to the range
1737 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1738 dimension == 2. Note also that final bytes in the range
1739 0x30 - 0x3F are reserved for user-defined (not official)
1741 'graphic 0 (use left half of font on output) or 1 (use right half
1742 of font on output). Defaults to 0. For example, for
1743 a font whose registry is ISO8859-1, the left half
1744 (octets 0x20 - 0x7F) is the `ascii' character set, while
1745 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1746 character set. With 'graphic set to 0, the octets
1747 will have their high bit cleared; with it set to 1,
1748 the octets will have their high bit set.
1749 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1751 'ccl-program A compiled CCL program used to convert a character in
1752 this charset into an index into the font. This is in
1753 addition to the 'graphic property. The CCL program
1754 is passed the octets of the character, with the high
1755 bit cleared and set depending upon whether the value
1756 of the 'graphic property is 0 or 1.
1758 (name, doc_string, props))
1760 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1761 int direction = CHARSET_LEFT_TO_RIGHT;
1763 Lisp_Object registry = Qnil;
1764 Lisp_Object charset;
1765 Lisp_Object rest, keyword, value;
1766 Lisp_Object ccl_program = Qnil;
1767 Lisp_Object short_name = Qnil, long_name = Qnil;
1768 int byte_offset = -1;
1770 CHECK_SYMBOL (name);
1771 if (!NILP (doc_string))
1772 CHECK_STRING (doc_string);
1774 charset = Ffind_charset (name);
1775 if (!NILP (charset))
1776 signal_simple_error ("Cannot redefine existing charset", name);
1778 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1780 if (EQ (keyword, Qshort_name))
1782 CHECK_STRING (value);
1786 if (EQ (keyword, Qlong_name))
1788 CHECK_STRING (value);
1792 else if (EQ (keyword, Qdimension))
1795 dimension = XINT (value);
1796 if (dimension < 1 || dimension > 2)
1797 signal_simple_error ("Invalid value for 'dimension", value);
1800 else if (EQ (keyword, Qchars))
1803 chars = XINT (value);
1804 if (chars != 94 && chars != 96)
1805 signal_simple_error ("Invalid value for 'chars", value);
1808 else if (EQ (keyword, Qcolumns))
1811 columns = XINT (value);
1812 if (columns != 1 && columns != 2)
1813 signal_simple_error ("Invalid value for 'columns", value);
1816 else if (EQ (keyword, Qgraphic))
1819 graphic = XINT (value);
1821 if (graphic < 0 || graphic > 2)
1823 if (graphic < 0 || graphic > 1)
1825 signal_simple_error ("Invalid value for 'graphic", value);
1828 else if (EQ (keyword, Qregistry))
1830 CHECK_STRING (value);
1834 else if (EQ (keyword, Qdirection))
1836 if (EQ (value, Ql2r))
1837 direction = CHARSET_LEFT_TO_RIGHT;
1838 else if (EQ (value, Qr2l))
1839 direction = CHARSET_RIGHT_TO_LEFT;
1841 signal_simple_error ("Invalid value for 'direction", value);
1844 else if (EQ (keyword, Qfinal))
1846 CHECK_CHAR_COERCE_INT (value);
1847 final = XCHAR (value);
1848 if (final < '0' || final > '~')
1849 signal_simple_error ("Invalid value for 'final", value);
1852 else if (EQ (keyword, Qccl_program))
1854 CHECK_VECTOR (value);
1855 ccl_program = value;
1859 signal_simple_error ("Unrecognized property", keyword);
1863 error ("'final must be specified");
1864 if (dimension == 2 && final > 0x5F)
1866 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1870 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1872 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1874 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1875 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1877 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1879 id = get_unallocated_leading_byte (dimension);
1881 if (NILP (doc_string))
1882 doc_string = build_string ("");
1884 if (NILP (registry))
1885 registry = build_string ("");
1887 if (NILP (short_name))
1888 XSETSTRING (short_name, XSYMBOL (name)->name);
1890 if (NILP (long_name))
1891 long_name = doc_string;
1894 columns = dimension;
1896 if (byte_offset < 0)
1900 else if (chars == 96)
1906 charset = make_charset (id, name, type, columns, graphic,
1907 final, direction, short_name, long_name,
1908 doc_string, registry,
1909 Qnil, 0, 0, 0, byte_offset);
1910 if (!NILP (ccl_program))
1911 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1915 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1917 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1918 NEW-NAME is the name of the new charset. Return the new charset.
1920 (charset, new_name))
1922 Lisp_Object new_charset = Qnil;
1923 int id, dimension, columns, graphic, final;
1924 int direction, type;
1925 Lisp_Object registry, doc_string, short_name, long_name;
1926 struct Lisp_Charset *cs;
1928 charset = Fget_charset (charset);
1929 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1930 signal_simple_error ("Charset already has reverse-direction charset",
1933 CHECK_SYMBOL (new_name);
1934 if (!NILP (Ffind_charset (new_name)))
1935 signal_simple_error ("Cannot redefine existing charset", new_name);
1937 cs = XCHARSET (charset);
1939 type = CHARSET_TYPE (cs);
1940 columns = CHARSET_COLUMNS (cs);
1941 dimension = CHARSET_DIMENSION (cs);
1942 id = get_unallocated_leading_byte (dimension);
1944 graphic = CHARSET_GRAPHIC (cs);
1945 final = CHARSET_FINAL (cs);
1946 direction = CHARSET_RIGHT_TO_LEFT;
1947 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1948 direction = CHARSET_LEFT_TO_RIGHT;
1949 doc_string = CHARSET_DOC_STRING (cs);
1950 short_name = CHARSET_SHORT_NAME (cs);
1951 long_name = CHARSET_LONG_NAME (cs);
1952 registry = CHARSET_REGISTRY (cs);
1954 new_charset = make_charset (id, new_name, type, columns,
1955 graphic, final, direction, short_name, long_name,
1956 doc_string, registry,
1958 CHARSET_DECODING_TABLE(cs),
1959 CHARSET_UCS_MIN(cs),
1960 CHARSET_UCS_MAX(cs),
1961 CHARSET_CODE_OFFSET(cs),
1962 CHARSET_BYTE_OFFSET(cs)
1968 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1969 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1974 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1975 Define symbol ALIAS as an alias for CHARSET.
1979 CHECK_SYMBOL (alias);
1980 charset = Fget_charset (charset);
1981 return Fputhash (alias, charset, Vcharset_hash_table);
1984 /* #### Reverse direction charsets not yet implemented. */
1986 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1988 Return the reverse-direction charset parallel to CHARSET, if any.
1989 This is the charset with the same properties (in particular, the same
1990 dimension, number of characters per dimension, and final byte) as
1991 CHARSET but whose characters are displayed in the opposite direction.
1995 charset = Fget_charset (charset);
1996 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2000 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2001 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2002 If DIRECTION is omitted, both directions will be checked (left-to-right
2003 will be returned if character sets exist for both directions).
2005 (dimension, chars, final, direction))
2007 int dm, ch, fi, di = -1;
2009 Lisp_Object obj = Qnil;
2011 CHECK_INT (dimension);
2012 dm = XINT (dimension);
2013 if (dm < 1 || dm > 2)
2014 signal_simple_error ("Invalid value for DIMENSION", dimension);
2018 if (ch != 94 && ch != 96)
2019 signal_simple_error ("Invalid value for CHARS", chars);
2021 CHECK_CHAR_COERCE_INT (final);
2023 if (fi < '0' || fi > '~')
2024 signal_simple_error ("Invalid value for FINAL", final);
2026 if (EQ (direction, Ql2r))
2027 di = CHARSET_LEFT_TO_RIGHT;
2028 else if (EQ (direction, Qr2l))
2029 di = CHARSET_RIGHT_TO_LEFT;
2030 else if (!NILP (direction))
2031 signal_simple_error ("Invalid value for DIRECTION", direction);
2033 if (dm == 2 && fi > 0x5F)
2035 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2038 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2040 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2044 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2046 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2049 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2052 return XCHARSET_NAME (obj);
2056 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2057 Return short name of CHARSET.
2061 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2064 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2065 Return long name of CHARSET.
2069 return XCHARSET_LONG_NAME (Fget_charset (charset));
2072 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2073 Return description of CHARSET.
2077 return XCHARSET_DOC_STRING (Fget_charset (charset));
2080 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2081 Return dimension of CHARSET.
2085 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2088 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2089 Return property PROP of CHARSET.
2090 Recognized properties are those listed in `make-charset', as well as
2091 'name and 'doc-string.
2095 struct Lisp_Charset *cs;
2097 charset = Fget_charset (charset);
2098 cs = XCHARSET (charset);
2100 CHECK_SYMBOL (prop);
2101 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2102 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2103 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2104 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2105 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2106 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2107 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2108 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2109 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2110 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2111 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2112 if (EQ (prop, Qdirection))
2113 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2114 if (EQ (prop, Qreverse_direction_charset))
2116 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2120 return XCHARSET_NAME (obj);
2122 signal_simple_error ("Unrecognized charset property name", prop);
2123 return Qnil; /* not reached */
2126 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2127 Return charset identification number of CHARSET.
2131 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2134 /* #### We need to figure out which properties we really want to
2137 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2138 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2140 (charset, ccl_program))
2142 charset = Fget_charset (charset);
2143 CHECK_VECTOR (ccl_program);
2144 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2149 invalidate_charset_font_caches (Lisp_Object charset)
2151 /* Invalidate font cache entries for charset on all devices. */
2152 Lisp_Object devcons, concons, hash_table;
2153 DEVICE_LOOP_NO_BREAK (devcons, concons)
2155 struct device *d = XDEVICE (XCAR (devcons));
2156 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2157 if (!UNBOUNDP (hash_table))
2158 Fclrhash (hash_table);
2162 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2163 Set the 'registry property of CHARSET to REGISTRY.
2165 (charset, registry))
2167 charset = Fget_charset (charset);
2168 CHECK_STRING (registry);
2169 XCHARSET_REGISTRY (charset) = registry;
2170 invalidate_charset_font_caches (charset);
2171 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2176 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2177 Return mapping-table of CHARSET.
2181 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2184 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2185 Set mapping-table of CHARSET to TABLE.
2189 struct Lisp_Charset *cs;
2190 Lisp_Object old_table;
2193 charset = Fget_charset (charset);
2194 cs = XCHARSET (charset);
2196 if (EQ (table, Qnil))
2198 CHARSET_DECODING_TABLE(cs) = table;
2201 else if (VECTORP (table))
2205 /* ad-hoc method for `ascii' */
2206 if ((CHARSET_CHARS (cs) == 94) &&
2207 (CHARSET_BYTE_OFFSET (cs) != 33))
2208 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2210 ccs_len = CHARSET_CHARS (cs);
2212 if (XVECTOR_LENGTH (table) > ccs_len)
2213 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2214 old_table = CHARSET_DECODING_TABLE(cs);
2215 CHARSET_DECODING_TABLE(cs) = table;
2218 signal_error (Qwrong_type_argument,
2219 list2 (build_translated_string ("vector-or-nil-p"),
2221 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2223 switch (CHARSET_DIMENSION (cs))
2226 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2228 Lisp_Object c = XVECTOR_DATA(table)[i];
2233 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2237 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2239 Lisp_Object v = XVECTOR_DATA(table)[i];
2245 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2247 CHARSET_DECODING_TABLE(cs) = old_table;
2248 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2250 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2252 Lisp_Object c = XVECTOR_DATA(v)[j];
2255 put_char_attribute (c, charset,
2258 (i + CHARSET_BYTE_OFFSET (cs)),
2260 (j + CHARSET_BYTE_OFFSET (cs))));
2264 put_char_attribute (v, charset,
2266 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2275 /************************************************************************/
2276 /* Lisp primitives for working with characters */
2277 /************************************************************************/
2279 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2280 Make a character from CHARSET and octets ARG1 and ARG2.
2281 ARG2 is required only for characters from two-dimensional charsets.
2282 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2283 character s with caron.
2285 (charset, arg1, arg2))
2287 struct Lisp_Charset *cs;
2289 int lowlim, highlim;
2291 charset = Fget_charset (charset);
2292 cs = XCHARSET (charset);
2294 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2295 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2297 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2299 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2300 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2303 /* It is useful (and safe, according to Olivier Galibert) to strip
2304 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2305 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2306 Latin 2 code of the character. */
2314 if (a1 < lowlim || a1 > highlim)
2315 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2317 if (CHARSET_DIMENSION (cs) == 1)
2321 ("Charset is of dimension one; second octet must be nil", arg2);
2322 return make_char (MAKE_CHAR (charset, a1, 0));
2331 a2 = XINT (arg2) & 0x7f;
2333 if (a2 < lowlim || a2 > highlim)
2334 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2336 return make_char (MAKE_CHAR (charset, a1, a2));
2339 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2340 Return the character set of char CH.
2344 CHECK_CHAR_COERCE_INT (ch);
2346 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2349 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2350 Return the octet numbered N (should be 0 or 1) of char CH.
2351 N defaults to 0 if omitted.
2355 Lisp_Object charset;
2358 CHECK_CHAR_COERCE_INT (ch);
2360 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2362 if (NILP (n) || EQ (n, Qzero))
2363 return make_int (octet0);
2364 else if (EQ (n, make_int (1)))
2365 return make_int (octet1);
2367 signal_simple_error ("Octet number must be 0 or 1", n);
2370 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2371 Return list of charset and one or two position-codes of CHAR.
2377 Lisp_Object charset;
2379 CHECK_CHAR_COERCE_INT (character);
2380 ret = SPLIT_CHAR (XCHAR (character));
2381 charset = Fcar (ret);
2382 if (CHARSETP (charset))
2383 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2387 /* This function can GC */
2388 struct gcpro gcpro1, gcpro2;
2389 Lisp_Object charset = Qnil;
2390 Lisp_Object rc = Qnil;
2393 GCPRO2 (charset, rc);
2394 CHECK_CHAR_COERCE_INT (character);
2396 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2398 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2400 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2404 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2413 #ifdef ENABLE_COMPOSITE_CHARS
2414 /************************************************************************/
2415 /* composite character functions */
2416 /************************************************************************/
2419 lookup_composite_char (Bufbyte *str, int len)
2421 Lisp_Object lispstr = make_string (str, len);
2422 Lisp_Object ch = Fgethash (lispstr,
2423 Vcomposite_char_string2char_hash_table,
2429 if (composite_char_row_next >= 128)
2430 signal_simple_error ("No more composite chars available", lispstr);
2431 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2432 composite_char_col_next);
2433 Fputhash (make_char (emch), lispstr,
2434 Vcomposite_char_char2string_hash_table);
2435 Fputhash (lispstr, make_char (emch),
2436 Vcomposite_char_string2char_hash_table);
2437 composite_char_col_next++;
2438 if (composite_char_col_next >= 128)
2440 composite_char_col_next = 32;
2441 composite_char_row_next++;
2450 composite_char_string (Emchar ch)
2452 Lisp_Object str = Fgethash (make_char (ch),
2453 Vcomposite_char_char2string_hash_table,
2455 assert (!UNBOUNDP (str));
2459 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2460 Convert a string into a single composite character.
2461 The character is the result of overstriking all the characters in
2466 CHECK_STRING (string);
2467 return make_char (lookup_composite_char (XSTRING_DATA (string),
2468 XSTRING_LENGTH (string)));
2471 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2472 Return a string of the characters comprising a composite character.
2480 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2481 signal_simple_error ("Must be composite char", ch);
2482 return composite_char_string (emch);
2484 #endif /* ENABLE_COMPOSITE_CHARS */
2487 /************************************************************************/
2488 /* initialization */
2489 /************************************************************************/
2492 syms_of_mule_charset (void)
2494 DEFSUBR (Fcharsetp);
2495 DEFSUBR (Ffind_charset);
2496 DEFSUBR (Fget_charset);
2497 DEFSUBR (Fcharset_list);
2498 DEFSUBR (Fcharset_name);
2499 DEFSUBR (Fmake_charset);
2500 DEFSUBR (Fmake_reverse_direction_charset);
2501 /* DEFSUBR (Freverse_direction_charset); */
2502 DEFSUBR (Fdefine_charset_alias);
2503 DEFSUBR (Fcharset_from_attributes);
2504 DEFSUBR (Fcharset_short_name);
2505 DEFSUBR (Fcharset_long_name);
2506 DEFSUBR (Fcharset_description);
2507 DEFSUBR (Fcharset_dimension);
2508 DEFSUBR (Fcharset_property);
2509 DEFSUBR (Fcharset_id);
2510 DEFSUBR (Fset_charset_ccl_program);
2511 DEFSUBR (Fset_charset_registry);
2513 DEFSUBR (Fchar_attribute_alist);
2514 DEFSUBR (Fget_char_attribute);
2515 DEFSUBR (Fput_char_attribute);
2516 DEFSUBR (Fdefine_char);
2517 DEFSUBR (Fchar_variants);
2518 DEFSUBR (Fget_composite_char);
2519 DEFSUBR (Fcharset_mapping_table);
2520 DEFSUBR (Fset_charset_mapping_table);
2523 DEFSUBR (Fmake_char);
2524 DEFSUBR (Fchar_charset);
2525 DEFSUBR (Fchar_octet);
2526 DEFSUBR (Fsplit_char);
2528 #ifdef ENABLE_COMPOSITE_CHARS
2529 DEFSUBR (Fmake_composite_char);
2530 DEFSUBR (Fcomposite_char_string);
2533 defsymbol (&Qcharsetp, "charsetp");
2534 defsymbol (&Qregistry, "registry");
2535 defsymbol (&Qfinal, "final");
2536 defsymbol (&Qgraphic, "graphic");
2537 defsymbol (&Qdirection, "direction");
2538 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2539 defsymbol (&Qshort_name, "short-name");
2540 defsymbol (&Qlong_name, "long-name");
2542 defsymbol (&Ql2r, "l2r");
2543 defsymbol (&Qr2l, "r2l");
2545 /* Charsets, compatible with FSF 20.3
2546 Naming convention is Script-Charset[-Edition] */
2547 defsymbol (&Qascii, "ascii");
2548 defsymbol (&Qcontrol_1, "control-1");
2549 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2550 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2551 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2552 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2553 defsymbol (&Qthai_tis620, "thai-tis620");
2554 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2555 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2556 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2557 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2558 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2559 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2560 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2561 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2562 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2563 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2564 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2565 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2566 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2567 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2568 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2570 defsymbol (&Q_ucs, "->ucs");
2571 defsymbol (&Q_decomposition, "->decomposition");
2572 defsymbol (&Qcompat, "compat");
2573 defsymbol (&Qisolated, "isolated");
2574 defsymbol (&Qinitial, "initial");
2575 defsymbol (&Qmedial, "medial");
2576 defsymbol (&Qfinal, "final");
2577 defsymbol (&Qvertical, "vertical");
2578 defsymbol (&QnoBreak, "noBreak");
2579 defsymbol (&Qfraction, "fraction");
2580 defsymbol (&Qsuper, "super");
2581 defsymbol (&Qsub, "sub");
2582 defsymbol (&Qcircle, "circle");
2583 defsymbol (&Qsquare, "square");
2584 defsymbol (&Qwide, "wide");
2585 defsymbol (&Qnarrow, "narrow");
2586 defsymbol (&Qsmall, "small");
2587 defsymbol (&Qfont, "font");
2588 defsymbol (&Qucs, "ucs");
2589 defsymbol (&Qucs_bmp, "ucs-bmp");
2590 defsymbol (&Qlatin_viscii, "latin-viscii");
2591 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2592 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2593 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2594 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2595 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2596 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2597 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2598 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2599 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2600 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2601 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2602 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2603 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2604 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2605 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2606 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2607 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2608 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2609 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2610 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2611 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2612 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2613 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2614 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2615 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2616 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2617 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2619 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2620 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2622 defsymbol (&Qcomposite, "composite");
2626 vars_of_mule_charset (void)
2633 chlook = xnew (struct charset_lookup);
2634 dumpstruct (&chlook, &charset_lookup_description);
2636 /* Table of charsets indexed by leading byte. */
2637 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2638 chlook->charset_by_leading_byte[i] = Qnil;
2641 /* Table of charsets indexed by type/final-byte. */
2642 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2643 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2644 chlook->charset_by_attributes[i][j] = Qnil;
2646 /* Table of charsets indexed by type/final-byte/direction. */
2647 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2648 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2649 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2650 chlook->charset_by_attributes[i][j][k] = Qnil;
2654 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2656 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2657 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2661 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2662 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2663 Leading-code of private TYPE9N charset of column-width 1.
2665 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2669 Vutf_2000_version = build_string("0.13 (Takaida)");
2670 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2671 Version number of UTF-2000.
2674 staticpro (&Vcharacter_attribute_table);
2675 Vcharacter_attribute_table = make_char_code_table (Qnil);
2677 staticpro (&Vcharacter_composition_table);
2678 Vcharacter_composition_table = make_char_code_table (Qnil);
2680 staticpro (&Vcharacter_variant_table);
2681 Vcharacter_variant_table = make_char_code_table (Qnil);
2683 Vdefault_coded_charset_priority_list = Qnil;
2684 DEFVAR_LISP ("default-coded-charset-priority-list",
2685 &Vdefault_coded_charset_priority_list /*
2686 Default order of preferred coded-character-sets.
2692 complex_vars_of_mule_charset (void)
2694 staticpro (&Vcharset_hash_table);
2695 Vcharset_hash_table =
2696 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2698 /* Predefined character sets. We store them into variables for
2702 staticpro (&Vcharset_ucs_bmp);
2704 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2705 CHARSET_TYPE_256X256, 1, 2, 0,
2706 CHARSET_LEFT_TO_RIGHT,
2707 build_string ("BMP"),
2708 build_string ("BMP"),
2709 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2710 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2711 Qnil, 0, 0xFFFF, 0, 0);
2713 # define MIN_CHAR_THAI 0
2714 # define MAX_CHAR_THAI 0
2715 # define MIN_CHAR_GREEK 0
2716 # define MAX_CHAR_GREEK 0
2717 # define MIN_CHAR_HEBREW 0
2718 # define MAX_CHAR_HEBREW 0
2719 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2720 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2721 # define MIN_CHAR_CYRILLIC 0
2722 # define MAX_CHAR_CYRILLIC 0
2724 staticpro (&Vcharset_ascii);
2726 make_charset (LEADING_BYTE_ASCII, Qascii,
2727 CHARSET_TYPE_94, 1, 0, 'B',
2728 CHARSET_LEFT_TO_RIGHT,
2729 build_string ("ASCII"),
2730 build_string ("ASCII)"),
2731 build_string ("ASCII (ISO646 IRV)"),
2732 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2733 Qnil, 0, 0x7F, 0, 0);
2734 staticpro (&Vcharset_control_1);
2735 Vcharset_control_1 =
2736 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2737 CHARSET_TYPE_94, 1, 1, 0,
2738 CHARSET_LEFT_TO_RIGHT,
2739 build_string ("C1"),
2740 build_string ("Control characters"),
2741 build_string ("Control characters 128-191"),
2743 Qnil, 0x80, 0x9F, 0, 0);
2744 staticpro (&Vcharset_latin_iso8859_1);
2745 Vcharset_latin_iso8859_1 =
2746 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2747 CHARSET_TYPE_96, 1, 1, 'A',
2748 CHARSET_LEFT_TO_RIGHT,
2749 build_string ("Latin-1"),
2750 build_string ("ISO8859-1 (Latin-1)"),
2751 build_string ("ISO8859-1 (Latin-1)"),
2752 build_string ("iso8859-1"),
2753 Qnil, 0xA0, 0xFF, 0, 32);
2754 staticpro (&Vcharset_latin_iso8859_2);
2755 Vcharset_latin_iso8859_2 =
2756 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2757 CHARSET_TYPE_96, 1, 1, 'B',
2758 CHARSET_LEFT_TO_RIGHT,
2759 build_string ("Latin-2"),
2760 build_string ("ISO8859-2 (Latin-2)"),
2761 build_string ("ISO8859-2 (Latin-2)"),
2762 build_string ("iso8859-2"),
2764 staticpro (&Vcharset_latin_iso8859_3);
2765 Vcharset_latin_iso8859_3 =
2766 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2767 CHARSET_TYPE_96, 1, 1, 'C',
2768 CHARSET_LEFT_TO_RIGHT,
2769 build_string ("Latin-3"),
2770 build_string ("ISO8859-3 (Latin-3)"),
2771 build_string ("ISO8859-3 (Latin-3)"),
2772 build_string ("iso8859-3"),
2774 staticpro (&Vcharset_latin_iso8859_4);
2775 Vcharset_latin_iso8859_4 =
2776 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2777 CHARSET_TYPE_96, 1, 1, 'D',
2778 CHARSET_LEFT_TO_RIGHT,
2779 build_string ("Latin-4"),
2780 build_string ("ISO8859-4 (Latin-4)"),
2781 build_string ("ISO8859-4 (Latin-4)"),
2782 build_string ("iso8859-4"),
2784 staticpro (&Vcharset_thai_tis620);
2785 Vcharset_thai_tis620 =
2786 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2787 CHARSET_TYPE_96, 1, 1, 'T',
2788 CHARSET_LEFT_TO_RIGHT,
2789 build_string ("TIS620"),
2790 build_string ("TIS620 (Thai)"),
2791 build_string ("TIS620.2529 (Thai)"),
2792 build_string ("tis620"),
2793 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2794 staticpro (&Vcharset_greek_iso8859_7);
2795 Vcharset_greek_iso8859_7 =
2796 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2797 CHARSET_TYPE_96, 1, 1, 'F',
2798 CHARSET_LEFT_TO_RIGHT,
2799 build_string ("ISO8859-7"),
2800 build_string ("ISO8859-7 (Greek)"),
2801 build_string ("ISO8859-7 (Greek)"),
2802 build_string ("iso8859-7"),
2803 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2804 staticpro (&Vcharset_arabic_iso8859_6);
2805 Vcharset_arabic_iso8859_6 =
2806 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2807 CHARSET_TYPE_96, 1, 1, 'G',
2808 CHARSET_RIGHT_TO_LEFT,
2809 build_string ("ISO8859-6"),
2810 build_string ("ISO8859-6 (Arabic)"),
2811 build_string ("ISO8859-6 (Arabic)"),
2812 build_string ("iso8859-6"),
2814 staticpro (&Vcharset_hebrew_iso8859_8);
2815 Vcharset_hebrew_iso8859_8 =
2816 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2817 CHARSET_TYPE_96, 1, 1, 'H',
2818 CHARSET_RIGHT_TO_LEFT,
2819 build_string ("ISO8859-8"),
2820 build_string ("ISO8859-8 (Hebrew)"),
2821 build_string ("ISO8859-8 (Hebrew)"),
2822 build_string ("iso8859-8"),
2823 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2824 staticpro (&Vcharset_katakana_jisx0201);
2825 Vcharset_katakana_jisx0201 =
2826 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2827 CHARSET_TYPE_94, 1, 1, 'I',
2828 CHARSET_LEFT_TO_RIGHT,
2829 build_string ("JISX0201 Kana"),
2830 build_string ("JISX0201.1976 (Japanese Kana)"),
2831 build_string ("JISX0201.1976 Japanese Kana"),
2832 build_string ("jisx0201\\.1976"),
2834 staticpro (&Vcharset_latin_jisx0201);
2835 Vcharset_latin_jisx0201 =
2836 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2837 CHARSET_TYPE_94, 1, 0, 'J',
2838 CHARSET_LEFT_TO_RIGHT,
2839 build_string ("JISX0201 Roman"),
2840 build_string ("JISX0201.1976 (Japanese Roman)"),
2841 build_string ("JISX0201.1976 Japanese Roman"),
2842 build_string ("jisx0201\\.1976"),
2844 staticpro (&Vcharset_cyrillic_iso8859_5);
2845 Vcharset_cyrillic_iso8859_5 =
2846 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2847 CHARSET_TYPE_96, 1, 1, 'L',
2848 CHARSET_LEFT_TO_RIGHT,
2849 build_string ("ISO8859-5"),
2850 build_string ("ISO8859-5 (Cyrillic)"),
2851 build_string ("ISO8859-5 (Cyrillic)"),
2852 build_string ("iso8859-5"),
2853 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2854 staticpro (&Vcharset_latin_iso8859_9);
2855 Vcharset_latin_iso8859_9 =
2856 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2857 CHARSET_TYPE_96, 1, 1, 'M',
2858 CHARSET_LEFT_TO_RIGHT,
2859 build_string ("Latin-5"),
2860 build_string ("ISO8859-9 (Latin-5)"),
2861 build_string ("ISO8859-9 (Latin-5)"),
2862 build_string ("iso8859-9"),
2864 staticpro (&Vcharset_japanese_jisx0208_1978);
2865 Vcharset_japanese_jisx0208_1978 =
2866 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2867 CHARSET_TYPE_94X94, 2, 0, '@',
2868 CHARSET_LEFT_TO_RIGHT,
2869 build_string ("JIS X0208:1978"),
2870 build_string ("JIS X0208:1978 (Japanese)"),
2872 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2873 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2875 staticpro (&Vcharset_chinese_gb2312);
2876 Vcharset_chinese_gb2312 =
2877 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2878 CHARSET_TYPE_94X94, 2, 0, 'A',
2879 CHARSET_LEFT_TO_RIGHT,
2880 build_string ("GB2312"),
2881 build_string ("GB2312)"),
2882 build_string ("GB2312 Chinese simplified"),
2883 build_string ("gb2312"),
2885 staticpro (&Vcharset_japanese_jisx0208);
2886 Vcharset_japanese_jisx0208 =
2887 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2888 CHARSET_TYPE_94X94, 2, 0, 'B',
2889 CHARSET_LEFT_TO_RIGHT,
2890 build_string ("JISX0208"),
2891 build_string ("JIS X0208:1983 (Japanese)"),
2892 build_string ("JIS X0208:1983 Japanese Kanji"),
2893 build_string ("jisx0208\\.1983"),
2895 staticpro (&Vcharset_japanese_jisx0208_1990);
2896 Vcharset_japanese_jisx0208_1990 =
2897 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2898 Qjapanese_jisx0208_1990,
2899 CHARSET_TYPE_94X94, 2, 0, 0,
2900 CHARSET_LEFT_TO_RIGHT,
2901 build_string ("JISX0208-1990"),
2902 build_string ("JIS X0208:1990 (Japanese)"),
2903 build_string ("JIS X0208:1990 Japanese Kanji"),
2904 build_string ("jisx0208\\.1990"),
2906 MIN_CHAR_JIS_X0208_1990,
2907 MAX_CHAR_JIS_X0208_1990, 0, 33);
2908 staticpro (&Vcharset_korean_ksc5601);
2909 Vcharset_korean_ksc5601 =
2910 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2911 CHARSET_TYPE_94X94, 2, 0, 'C',
2912 CHARSET_LEFT_TO_RIGHT,
2913 build_string ("KSC5601"),
2914 build_string ("KSC5601 (Korean"),
2915 build_string ("KSC5601 Korean Hangul and Hanja"),
2916 build_string ("ksc5601"),
2918 staticpro (&Vcharset_japanese_jisx0212);
2919 Vcharset_japanese_jisx0212 =
2920 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2921 CHARSET_TYPE_94X94, 2, 0, 'D',
2922 CHARSET_LEFT_TO_RIGHT,
2923 build_string ("JISX0212"),
2924 build_string ("JISX0212 (Japanese)"),
2925 build_string ("JISX0212 Japanese Supplement"),
2926 build_string ("jisx0212"),
2929 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2930 staticpro (&Vcharset_chinese_cns11643_1);
2931 Vcharset_chinese_cns11643_1 =
2932 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2933 CHARSET_TYPE_94X94, 2, 0, 'G',
2934 CHARSET_LEFT_TO_RIGHT,
2935 build_string ("CNS11643-1"),
2936 build_string ("CNS11643-1 (Chinese traditional)"),
2938 ("CNS 11643 Plane 1 Chinese traditional"),
2939 build_string (CHINESE_CNS_PLANE_RE("1")),
2941 staticpro (&Vcharset_chinese_cns11643_2);
2942 Vcharset_chinese_cns11643_2 =
2943 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2944 CHARSET_TYPE_94X94, 2, 0, 'H',
2945 CHARSET_LEFT_TO_RIGHT,
2946 build_string ("CNS11643-2"),
2947 build_string ("CNS11643-2 (Chinese traditional)"),
2949 ("CNS 11643 Plane 2 Chinese traditional"),
2950 build_string (CHINESE_CNS_PLANE_RE("2")),
2953 staticpro (&Vcharset_latin_viscii_lower);
2954 Vcharset_latin_viscii_lower =
2955 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2956 CHARSET_TYPE_96, 1, 1, '1',
2957 CHARSET_LEFT_TO_RIGHT,
2958 build_string ("VISCII lower"),
2959 build_string ("VISCII lower (Vietnamese)"),
2960 build_string ("VISCII lower (Vietnamese)"),
2961 build_string ("MULEVISCII-LOWER"),
2963 staticpro (&Vcharset_latin_viscii_upper);
2964 Vcharset_latin_viscii_upper =
2965 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2966 CHARSET_TYPE_96, 1, 1, '2',
2967 CHARSET_LEFT_TO_RIGHT,
2968 build_string ("VISCII upper"),
2969 build_string ("VISCII upper (Vietnamese)"),
2970 build_string ("VISCII upper (Vietnamese)"),
2971 build_string ("MULEVISCII-UPPER"),
2973 staticpro (&Vcharset_latin_viscii);
2974 Vcharset_latin_viscii =
2975 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2976 CHARSET_TYPE_256, 1, 2, 0,
2977 CHARSET_LEFT_TO_RIGHT,
2978 build_string ("VISCII"),
2979 build_string ("VISCII 1.1 (Vietnamese)"),
2980 build_string ("VISCII 1.1 (Vietnamese)"),
2981 build_string ("VISCII1\\.1"),
2983 staticpro (&Vcharset_ideograph_daikanwa);
2984 Vcharset_ideograph_daikanwa =
2985 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2986 CHARSET_TYPE_256X256, 2, 2, 0,
2987 CHARSET_LEFT_TO_RIGHT,
2988 build_string ("Daikanwa"),
2989 build_string ("Morohashi's Daikanwa"),
2990 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2991 build_string ("Daikanwa"),
2992 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2993 staticpro (&Vcharset_mojikyo_pj_1);
2994 Vcharset_mojikyo_pj_1 =
2995 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1,
2996 CHARSET_TYPE_94X94, 2, 0, 0,
2997 CHARSET_LEFT_TO_RIGHT,
2998 build_string ("Mojikyo-PJ-1"),
2999 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3001 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3002 build_string ("jisx0208\\.Mojikyo-1$"),
3004 staticpro (&Vcharset_mojikyo_pj_2);
3005 Vcharset_mojikyo_pj_2 =
3006 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2,
3007 CHARSET_TYPE_94X94, 2, 0, 0,
3008 CHARSET_LEFT_TO_RIGHT,
3009 build_string ("Mojikyo-PJ-2"),
3010 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3012 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3013 build_string ("jisx0208\\.Mojikyo-2$"),
3015 staticpro (&Vcharset_mojikyo_pj_3);
3016 Vcharset_mojikyo_pj_3 =
3017 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3,
3018 CHARSET_TYPE_94X94, 2, 0, 0,
3019 CHARSET_LEFT_TO_RIGHT,
3020 build_string ("Mojikyo-PJ-3"),
3021 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3023 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3024 build_string ("jisx0208\\.Mojikyo-3$"),
3026 staticpro (&Vcharset_mojikyo_pj_4);
3027 Vcharset_mojikyo_pj_4 =
3028 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4,
3029 CHARSET_TYPE_94X94, 2, 0, 0,
3030 CHARSET_LEFT_TO_RIGHT,
3031 build_string ("Mojikyo-PJ-4"),
3032 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3034 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3035 build_string ("jisx0208\\.Mojikyo-4$"),
3037 staticpro (&Vcharset_mojikyo_pj_5);
3038 Vcharset_mojikyo_pj_5 =
3039 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5,
3040 CHARSET_TYPE_94X94, 2, 0, 0,
3041 CHARSET_LEFT_TO_RIGHT,
3042 build_string ("Mojikyo-PJ-5"),
3043 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3045 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3046 build_string ("jisx0208\\.Mojikyo-5$"),
3048 staticpro (&Vcharset_mojikyo_pj_6);
3049 Vcharset_mojikyo_pj_6 =
3050 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6,
3051 CHARSET_TYPE_94X94, 2, 0, 0,
3052 CHARSET_LEFT_TO_RIGHT,
3053 build_string ("Mojikyo-PJ-6"),
3054 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3056 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3057 build_string ("jisx0208\\.Mojikyo-6$"),
3059 staticpro (&Vcharset_mojikyo_pj_7);
3060 Vcharset_mojikyo_pj_7 =
3061 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7,
3062 CHARSET_TYPE_94X94, 2, 0, 0,
3063 CHARSET_LEFT_TO_RIGHT,
3064 build_string ("Mojikyo-PJ-7"),
3065 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3067 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3068 build_string ("jisx0208\\.Mojikyo-7$"),
3070 staticpro (&Vcharset_mojikyo_pj_8);
3071 Vcharset_mojikyo_pj_8 =
3072 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8,
3073 CHARSET_TYPE_94X94, 2, 0, 0,
3074 CHARSET_LEFT_TO_RIGHT,
3075 build_string ("Mojikyo-PJ-8"),
3076 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3078 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3079 build_string ("jisx0208\\.Mojikyo-8$"),
3081 staticpro (&Vcharset_mojikyo_pj_9);
3082 Vcharset_mojikyo_pj_9 =
3083 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9,
3084 CHARSET_TYPE_94X94, 2, 0, 0,
3085 CHARSET_LEFT_TO_RIGHT,
3086 build_string ("Mojikyo-PJ-9"),
3087 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3089 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3090 build_string ("jisx0208\\.Mojikyo-9$"),
3092 staticpro (&Vcharset_mojikyo_pj_10);
3093 Vcharset_mojikyo_pj_10 =
3094 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10,
3095 CHARSET_TYPE_94X94, 2, 0, 0,
3096 CHARSET_LEFT_TO_RIGHT,
3097 build_string ("Mojikyo-PJ-10"),
3098 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3100 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3101 build_string ("jisx0208\\.Mojikyo-10$"),
3103 staticpro (&Vcharset_mojikyo_pj_11);
3104 Vcharset_mojikyo_pj_11 =
3105 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11,
3106 CHARSET_TYPE_94X94, 2, 0, 0,
3107 CHARSET_LEFT_TO_RIGHT,
3108 build_string ("Mojikyo-PJ-11"),
3109 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3111 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3112 build_string ("jisx0208\\.Mojikyo-11$"),
3114 staticpro (&Vcharset_mojikyo_pj_12);
3115 Vcharset_mojikyo_pj_12 =
3116 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12,
3117 CHARSET_TYPE_94X94, 2, 0, 0,
3118 CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Mojikyo-PJ-12"),
3120 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3122 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3123 build_string ("jisx0208\\.Mojikyo-12$"),
3125 staticpro (&Vcharset_mojikyo_pj_13);
3126 Vcharset_mojikyo_pj_13 =
3127 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13,
3128 CHARSET_TYPE_94X94, 2, 0, 0,
3129 CHARSET_LEFT_TO_RIGHT,
3130 build_string ("Mojikyo-PJ-13"),
3131 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3133 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3134 build_string ("jisx0208\\.Mojikyo-13$"),
3136 staticpro (&Vcharset_mojikyo_pj_14);
3137 Vcharset_mojikyo_pj_14 =
3138 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14,
3139 CHARSET_TYPE_94X94, 2, 0, 0,
3140 CHARSET_LEFT_TO_RIGHT,
3141 build_string ("Mojikyo-PJ-14"),
3142 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3144 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3145 build_string ("jisx0208\\.Mojikyo-14$"),
3147 staticpro (&Vcharset_mojikyo_pj_15);
3148 Vcharset_mojikyo_pj_15 =
3149 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15,
3150 CHARSET_TYPE_94X94, 2, 0, 0,
3151 CHARSET_LEFT_TO_RIGHT,
3152 build_string ("Mojikyo-PJ-15"),
3153 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3155 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3156 build_string ("jisx0208\\.Mojikyo-15$"),
3158 staticpro (&Vcharset_mojikyo_pj_16);
3159 Vcharset_mojikyo_pj_16 =
3160 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16,
3161 CHARSET_TYPE_94X94, 2, 0, 0,
3162 CHARSET_LEFT_TO_RIGHT,
3163 build_string ("Mojikyo-PJ-16"),
3164 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3166 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3167 build_string ("jisx0208\\.Mojikyo-16$"),
3169 staticpro (&Vcharset_mojikyo_pj_17);
3170 Vcharset_mojikyo_pj_17 =
3171 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17,
3172 CHARSET_TYPE_94X94, 2, 0, 0,
3173 CHARSET_LEFT_TO_RIGHT,
3174 build_string ("Mojikyo-PJ-17"),
3175 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3177 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3178 build_string ("jisx0208\\.Mojikyo-17$"),
3180 staticpro (&Vcharset_mojikyo_pj_18);
3181 Vcharset_mojikyo_pj_18 =
3182 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18,
3183 CHARSET_TYPE_94X94, 2, 0, 0,
3184 CHARSET_LEFT_TO_RIGHT,
3185 build_string ("Mojikyo-PJ-18"),
3186 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3188 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3189 build_string ("jisx0208\\.Mojikyo-18$"),
3191 staticpro (&Vcharset_mojikyo_pj_19);
3192 Vcharset_mojikyo_pj_19 =
3193 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19,
3194 CHARSET_TYPE_94X94, 2, 0, 0,
3195 CHARSET_LEFT_TO_RIGHT,
3196 build_string ("Mojikyo-PJ-19"),
3197 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3199 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3200 build_string ("jisx0208\\.Mojikyo-19$"),
3202 staticpro (&Vcharset_mojikyo_pj_20);
3203 Vcharset_mojikyo_pj_20 =
3204 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20,
3205 CHARSET_TYPE_94X94, 2, 0, 0,
3206 CHARSET_LEFT_TO_RIGHT,
3207 build_string ("Mojikyo-PJ-20"),
3208 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3210 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3211 build_string ("jisx0208\\.Mojikyo-20$"),
3213 staticpro (&Vcharset_mojikyo_pj_21);
3214 Vcharset_mojikyo_pj_21 =
3215 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21,
3216 CHARSET_TYPE_94X94, 2, 0, 0,
3217 CHARSET_LEFT_TO_RIGHT,
3218 build_string ("Mojikyo-PJ-21"),
3219 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3221 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3222 build_string ("jisx0208\\.Mojikyo-21$"),
3224 staticpro (&Vcharset_ethiopic_ucs);
3225 Vcharset_ethiopic_ucs =
3226 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
3227 CHARSET_TYPE_256X256, 2, 2, 0,
3228 CHARSET_LEFT_TO_RIGHT,
3229 build_string ("Ethiopic (UCS)"),
3230 build_string ("Ethiopic (UCS)"),
3231 build_string ("Ethiopic of UCS"),
3232 build_string ("Ethiopic-Unicode"),
3233 Qnil, 0x1200, 0x137F, 0x1200, 0);
3235 staticpro (&Vcharset_chinese_big5_1);
3236 Vcharset_chinese_big5_1 =
3237 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3238 CHARSET_TYPE_94X94, 2, 0, '0',
3239 CHARSET_LEFT_TO_RIGHT,
3240 build_string ("Big5"),
3241 build_string ("Big5 (Level-1)"),
3243 ("Big5 Level-1 Chinese traditional"),
3244 build_string ("big5"),
3246 staticpro (&Vcharset_chinese_big5_2);
3247 Vcharset_chinese_big5_2 =
3248 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3249 CHARSET_TYPE_94X94, 2, 0, '1',
3250 CHARSET_LEFT_TO_RIGHT,
3251 build_string ("Big5"),
3252 build_string ("Big5 (Level-2)"),
3254 ("Big5 Level-2 Chinese traditional"),
3255 build_string ("big5"),
3258 #ifdef ENABLE_COMPOSITE_CHARS
3259 /* #### For simplicity, we put composite chars into a 96x96 charset.
3260 This is going to lead to problems because you can run out of
3261 room, esp. as we don't yet recycle numbers. */
3262 staticpro (&Vcharset_composite);
3263 Vcharset_composite =
3264 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3265 CHARSET_TYPE_96X96, 2, 0, 0,
3266 CHARSET_LEFT_TO_RIGHT,
3267 build_string ("Composite"),
3268 build_string ("Composite characters"),
3269 build_string ("Composite characters"),
3272 /* #### not dumped properly */
3273 composite_char_row_next = 32;
3274 composite_char_col_next = 32;
3276 Vcomposite_char_string2char_hash_table =
3277 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3278 Vcomposite_char_char2string_hash_table =
3279 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3280 staticpro (&Vcomposite_char_string2char_hash_table);
3281 staticpro (&Vcomposite_char_char2string_hash_table);
3282 #endif /* ENABLE_COMPOSITE_CHARS */