1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_japanese_jisx0208_1990;
57 Lisp_Object Vcharset_korean_ksc5601;
58 Lisp_Object Vcharset_japanese_jisx0212;
59 Lisp_Object Vcharset_chinese_cns11643_1;
60 Lisp_Object Vcharset_chinese_cns11643_2;
62 Lisp_Object Vcharset_ucs_bmp;
63 Lisp_Object Vcharset_latin_viscii;
64 Lisp_Object Vcharset_latin_viscii_lower;
65 Lisp_Object Vcharset_latin_viscii_upper;
66 Lisp_Object Vcharset_ideograph_daikanwa;
67 Lisp_Object Vcharset_mojikyo_pj_1;
68 Lisp_Object Vcharset_mojikyo_pj_2;
69 Lisp_Object Vcharset_mojikyo_pj_3;
70 Lisp_Object Vcharset_mojikyo_pj_4;
71 Lisp_Object Vcharset_mojikyo_pj_5;
72 Lisp_Object Vcharset_mojikyo_pj_6;
73 Lisp_Object Vcharset_mojikyo_pj_7;
74 Lisp_Object Vcharset_mojikyo_pj_8;
75 Lisp_Object Vcharset_mojikyo_pj_9;
76 Lisp_Object Vcharset_mojikyo_pj_10;
77 Lisp_Object Vcharset_mojikyo_pj_11;
78 Lisp_Object Vcharset_mojikyo_pj_12;
79 Lisp_Object Vcharset_mojikyo_pj_13;
80 Lisp_Object Vcharset_mojikyo_pj_14;
81 Lisp_Object Vcharset_mojikyo_pj_15;
82 Lisp_Object Vcharset_mojikyo_pj_16;
83 Lisp_Object Vcharset_mojikyo_pj_17;
84 Lisp_Object Vcharset_mojikyo_pj_18;
85 Lisp_Object Vcharset_mojikyo_pj_19;
86 Lisp_Object Vcharset_mojikyo_pj_20;
87 Lisp_Object Vcharset_mojikyo_pj_21;
88 Lisp_Object Vcharset_ethiopic_ucs;
90 Lisp_Object Vcharset_chinese_big5_1;
91 Lisp_Object Vcharset_chinese_big5_2;
93 #ifdef ENABLE_COMPOSITE_CHARS
94 Lisp_Object Vcharset_composite;
96 /* Hash tables for composite chars. One maps string representing
97 composed chars to their equivalent chars; one goes the
99 Lisp_Object Vcomposite_char_char2string_hash_table;
100 Lisp_Object Vcomposite_char_string2char_hash_table;
102 static int composite_char_row_next;
103 static int composite_char_col_next;
105 #endif /* ENABLE_COMPOSITE_CHARS */
107 struct charset_lookup *chlook;
109 static const struct lrecord_description charset_lookup_description_1[] = {
110 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
119 static const struct struct_description charset_lookup_description = {
120 sizeof(struct charset_lookup),
121 charset_lookup_description_1
125 /* Table of number of bytes in the string representation of a character
126 indexed by the first byte of that representation.
128 rep_bytes_by_first_byte(c) is more efficient than the equivalent
129 canonical computation:
131 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
133 Bytecount rep_bytes_by_first_byte[0xA0] =
134 { /* 0x00 - 0x7f are for straight ASCII */
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 /* 0x80 - 0x8f are for Dimension-1 official charsets */
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
149 /* 0x90 - 0x9d are for Dimension-2 official charsets */
150 /* 0x9e is for Dimension-1 private charsets */
151 /* 0x9f is for Dimension-2 private charsets */
152 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
159 mark_char_byte_table (Lisp_Object obj)
161 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
164 for (i = 0; i < 256; i++)
166 mark_object (cte->property[i]);
172 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
174 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
175 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
178 for (i = 0; i < 256; i++)
179 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
181 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
183 if (!char_byte_table_equal (cte1->property[i],
184 cte2->property[i], depth + 1))
191 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
197 char_byte_table_hash (Lisp_Object obj, int depth)
199 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
201 return internal_array_hash (cte->property, 256, depth);
204 static const struct lrecord_description char_byte_table_description[] = {
205 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
209 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
210 mark_char_byte_table,
211 internal_object_printer,
212 0, char_byte_table_equal,
213 char_byte_table_hash,
214 char_byte_table_description,
215 struct Lisp_Char_Byte_Table);
218 make_char_byte_table (Lisp_Object initval)
222 struct Lisp_Char_Byte_Table *cte =
223 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
224 &lrecord_char_byte_table);
226 for (i = 0; i < 256; i++)
227 cte->property[i] = initval;
229 XSETCHAR_BYTE_TABLE (obj, cte);
234 copy_char_byte_table (Lisp_Object entry)
236 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
239 struct Lisp_Char_Byte_Table *ctenew =
240 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
241 &lrecord_char_byte_table);
243 for (i = 0; i < 256; i++)
245 Lisp_Object new = cte->property[i];
246 if (CHAR_BYTE_TABLE_P (new))
247 ctenew->property[i] = copy_char_byte_table (new);
249 ctenew->property[i] = new;
252 XSETCHAR_BYTE_TABLE (obj, ctenew);
258 mark_char_code_table (Lisp_Object obj)
260 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
266 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
268 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
269 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
271 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
275 char_code_table_hash (Lisp_Object obj, int depth)
277 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
279 return char_code_table_hash (cte->table, depth + 1);
282 static const struct lrecord_description char_code_table_description[] = {
283 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
287 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
288 mark_char_code_table,
289 internal_object_printer,
290 0, char_code_table_equal,
291 char_code_table_hash,
292 char_code_table_description,
293 struct Lisp_Char_Code_Table);
296 make_char_code_table (Lisp_Object initval)
299 struct Lisp_Char_Code_Table *cte =
300 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
301 &lrecord_char_code_table);
303 cte->table = make_char_byte_table (initval);
305 XSETCHAR_CODE_TABLE (obj, cte);
310 copy_char_code_table (Lisp_Object entry)
312 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
314 struct Lisp_Char_Code_Table *ctenew =
315 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
316 &lrecord_char_code_table);
318 ctenew->table = copy_char_byte_table (cte->table);
319 XSETCHAR_CODE_TABLE (obj, ctenew);
325 get_char_code_table (Emchar ch, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
333 cpt = XCHAR_BYTE_TABLE (ret);
337 ret = cpt->property [(unsigned char) (code >> 16)];
338 if (CHAR_BYTE_TABLE_P (ret))
339 cpt = XCHAR_BYTE_TABLE (ret);
343 ret = cpt->property [(unsigned char) (code >> 8)];
344 if (CHAR_BYTE_TABLE_P (ret))
345 cpt = XCHAR_BYTE_TABLE (ret);
349 return cpt->property [(unsigned char) code];
353 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
355 unsigned int code = ch;
356 struct Lisp_Char_Byte_Table* cpt1
357 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
358 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
360 if (CHAR_BYTE_TABLE_P (ret))
362 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
364 ret = cpt2->property[(unsigned char)(code >> 16)];
365 if (CHAR_BYTE_TABLE_P (ret))
367 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
369 ret = cpt3->property[(unsigned char)(code >> 8)];
370 if (CHAR_BYTE_TABLE_P (ret))
372 struct Lisp_Char_Byte_Table* cpt4
373 = XCHAR_BYTE_TABLE (ret);
375 cpt4->property[(unsigned char)code] = value;
377 else if (!EQ (ret, value))
379 Lisp_Object cpt4 = make_char_byte_table (ret);
381 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
382 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
385 else if (!EQ (ret, value))
387 Lisp_Object cpt3 = make_char_byte_table (ret);
388 Lisp_Object cpt4 = make_char_byte_table (ret);
390 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
391 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
393 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
396 else if (!EQ (ret, value))
398 Lisp_Object cpt2 = make_char_byte_table (ret);
399 Lisp_Object cpt3 = make_char_byte_table (ret);
400 Lisp_Object cpt4 = make_char_byte_table (ret);
402 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
403 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
404 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
405 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
410 Lisp_Object Vcharacter_attribute_table;
411 Lisp_Object Vcharacter_composition_table;
412 Lisp_Object Vcharacter_variant_table;
414 Lisp_Object Q_decomposition;
417 Lisp_Object Qisolated;
418 Lisp_Object Qinitial;
421 Lisp_Object Qvertical;
422 Lisp_Object QnoBreak;
423 Lisp_Object Qfraction;
434 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
440 else if (EQ (v, Qcompat))
442 else if (EQ (v, Qisolated))
444 else if (EQ (v, Qinitial))
446 else if (EQ (v, Qmedial))
448 else if (EQ (v, Qfinal))
450 else if (EQ (v, Qvertical))
452 else if (EQ (v, QnoBreak))
454 else if (EQ (v, Qfraction))
456 else if (EQ (v, Qsuper))
458 else if (EQ (v, Qsub))
460 else if (EQ (v, Qcircle))
462 else if (EQ (v, Qsquare))
464 else if (EQ (v, Qwide))
466 else if (EQ (v, Qnarrow))
468 else if (EQ (v, Qsmall))
470 else if (EQ (v, Qfont))
473 signal_simple_error (err_msg, err_arg);
476 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
477 Return character corresponding with list.
481 Lisp_Object table = Vcharacter_composition_table;
482 Lisp_Object rest = list;
486 Lisp_Object v = Fcar (rest);
488 Emchar c = to_char_code (v, "Invalid value for composition", list);
490 ret = get_char_code_table (c, table);
495 if (!CHAR_CODE_TABLE_P (ret))
500 else if (!CONSP (rest))
502 else if (CHAR_CODE_TABLE_P (ret))
505 signal_simple_error ("Invalid table is found with", list);
507 signal_simple_error ("Invalid value for composition", list);
510 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
511 Return variants of CHARACTER.
515 CHECK_CHAR (character);
516 return Fcopy_list (get_char_code_table (XCHAR (character),
517 Vcharacter_variant_table));
520 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
521 Return the alist of attributes of CHARACTER.
525 CHECK_CHAR (character);
526 return Fcopy_alist (get_char_code_table (XCHAR (character),
527 Vcharacter_attribute_table));
530 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
531 Return the value of CHARACTER's ATTRIBUTE.
533 (character, attribute))
538 CHECK_CHAR (character);
539 ret = get_char_code_table (XCHAR (character),
540 Vcharacter_attribute_table);
544 if (!NILP (ccs = Ffind_charset (attribute)))
547 return Fcdr (Fassq (attribute, ret));
551 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
554 Emchar char_code = XCHAR (character);
556 = get_char_code_table (char_code, Vcharacter_attribute_table);
559 cell = Fassq (attribute, ret);
563 ret = Fcons (Fcons (attribute, value), ret);
565 else if (!EQ (Fcdr (cell), value))
567 Fsetcdr (cell, value);
569 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
573 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
574 Store CHARACTER's ATTRIBUTE with VALUE.
576 (character, attribute, value))
580 CHECK_CHAR (character);
581 ccs = Ffind_charset (attribute);
585 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
590 /* ad-hoc method for `ascii' */
591 if ((XCHARSET_CHARS (ccs) == 94) &&
592 (XCHARSET_BYTE_OFFSET (ccs) != 33))
593 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
595 ccs_len = XCHARSET_CHARS (ccs);
598 signal_simple_error ("Invalid value for coded-charset",
602 rest = Fget_char_attribute (character, attribute);
609 Lisp_Object ei = Fcar (rest);
611 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
612 nv = XVECTOR_DATA(v)[i];
619 XVECTOR_DATA(v)[i] = Qnil;
620 v = XCHARSET_DECODING_TABLE (ccs);
625 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
628 if (XCHARSET_GRAPHIC (ccs) == 1)
629 value = Fcopy_list (value);
634 Lisp_Object ei = Fcar (rest);
637 signal_simple_error ("Invalid value for coded-charset", value);
639 if ((i < 0) || (255 < i))
640 signal_simple_error ("Invalid value for coded-charset", value);
641 if (XCHARSET_GRAPHIC (ccs) == 1)
644 Fsetcar (rest, make_int (i));
646 i -= XCHARSET_BYTE_OFFSET (ccs);
647 nv = XVECTOR_DATA(v)[i];
653 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
660 XVECTOR_DATA(v)[i] = character;
662 else if (EQ (attribute, Q_decomposition))
664 Lisp_Object rest = value;
665 Lisp_Object table = Vcharacter_composition_table;
668 signal_simple_error ("Invalid value for ->decomposition",
673 Lisp_Object v = Fcar (rest);
676 = to_char_code (v, "Invalid value for ->decomposition", value);
681 put_char_code_table (c, character, table);
686 ntable = get_char_code_table (c, table);
687 if (!CHAR_CODE_TABLE_P (ntable))
689 ntable = make_char_code_table (Qnil);
690 put_char_code_table (c, ntable, table);
696 else if (EQ (attribute, Q_ucs))
702 signal_simple_error ("Invalid value for ->ucs", value);
706 ret = get_char_code_table (c, Vcharacter_variant_table);
707 if (NILP (Fmemq (character, ret)))
709 put_char_code_table (c, Fcons (character, ret),
710 Vcharacter_variant_table);
713 return put_char_attribute (character, attribute, value);
718 EXFUN (Fmake_char, 3);
720 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
721 Store character's ATTRIBUTES.
725 Lisp_Object rest = attributes;
726 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
727 Lisp_Object character;
733 Lisp_Object cell = Fcar (rest);
737 signal_simple_error ("Invalid argument", attributes);
738 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
741 character = Fmake_char (ccs, Fcar (cell),
743 goto setup_attributes;
747 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
750 signal_simple_error ("Invalid argument", attributes);
752 character = make_char (XINT (code) + 0x100000);
753 goto setup_attributes;
757 else if (!INTP (code))
758 signal_simple_error ("Invalid argument", attributes);
760 character = make_char (XINT (code));
766 Lisp_Object cell = Fcar (rest);
769 signal_simple_error ("Invalid argument", attributes);
770 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
774 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
777 Lisp_Object Vutf_2000_version;
781 int leading_code_private_11;
784 Lisp_Object Qcharsetp;
786 /* Qdoc_string, Qdimension, Qchars defined in general.c */
787 Lisp_Object Qregistry, Qfinal, Qgraphic;
788 Lisp_Object Qdirection;
789 Lisp_Object Qreverse_direction_charset;
790 Lisp_Object Qleading_byte;
791 Lisp_Object Qshort_name, Qlong_name;
807 Qjapanese_jisx0208_1978,
810 Qjapanese_jisx0208_1990,
820 Qvietnamese_viscii_lower,
821 Qvietnamese_viscii_upper,
850 Lisp_Object Ql2r, Qr2l;
852 Lisp_Object Vcharset_hash_table;
855 static Charset_ID next_allocated_leading_byte;
857 static Charset_ID next_allocated_1_byte_leading_byte;
858 static Charset_ID next_allocated_2_byte_leading_byte;
861 /* Composite characters are characters constructed by overstriking two
862 or more regular characters.
864 1) The old Mule implementation involves storing composite characters
865 in a buffer as a tag followed by all of the actual characters
866 used to make up the composite character. I think this is a bad
867 idea; it greatly complicates code that wants to handle strings
868 one character at a time because it has to deal with the possibility
869 of great big ungainly characters. It's much more reasonable to
870 simply store an index into a table of composite characters.
872 2) The current implementation only allows for 16,384 separate
873 composite characters over the lifetime of the XEmacs process.
874 This could become a potential problem if the user
875 edited lots of different files that use composite characters.
876 Due to FSF bogosity, increasing the number of allowable
877 composite characters under Mule would decrease the number
878 of possible faces that can exist. Mule already has shrunk
879 this to 2048, and further shrinkage would become uncomfortable.
880 No such problems exist in XEmacs.
882 Composite characters could be represented as 0x80 C1 C2 C3,
883 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
884 for slightly under 2^20 (one million) composite characters
885 over the XEmacs process lifetime, and you only need to
886 increase the size of a Mule character from 19 to 21 bits.
887 Or you could use 0x80 C1 C2 C3 C4, allowing for about
888 85 million (slightly over 2^26) composite characters. */
891 /************************************************************************/
892 /* Basic Emchar functions */
893 /************************************************************************/
895 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
896 string in STR. Returns the number of bytes stored.
897 Do not call this directly. Use the macro set_charptr_emchar() instead.
901 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
916 else if ( c <= 0x7ff )
918 *p++ = (c >> 6) | 0xc0;
919 *p++ = (c & 0x3f) | 0x80;
921 else if ( c <= 0xffff )
923 *p++ = (c >> 12) | 0xe0;
924 *p++ = ((c >> 6) & 0x3f) | 0x80;
925 *p++ = (c & 0x3f) | 0x80;
927 else if ( c <= 0x1fffff )
929 *p++ = (c >> 18) | 0xf0;
930 *p++ = ((c >> 12) & 0x3f) | 0x80;
931 *p++ = ((c >> 6) & 0x3f) | 0x80;
932 *p++ = (c & 0x3f) | 0x80;
934 else if ( c <= 0x3ffffff )
936 *p++ = (c >> 24) | 0xf8;
937 *p++ = ((c >> 18) & 0x3f) | 0x80;
938 *p++ = ((c >> 12) & 0x3f) | 0x80;
939 *p++ = ((c >> 6) & 0x3f) | 0x80;
940 *p++ = (c & 0x3f) | 0x80;
944 *p++ = (c >> 30) | 0xfc;
945 *p++ = ((c >> 24) & 0x3f) | 0x80;
946 *p++ = ((c >> 18) & 0x3f) | 0x80;
947 *p++ = ((c >> 12) & 0x3f) | 0x80;
948 *p++ = ((c >> 6) & 0x3f) | 0x80;
949 *p++ = (c & 0x3f) | 0x80;
952 BREAKUP_CHAR (c, charset, c1, c2);
953 lb = CHAR_LEADING_BYTE (c);
954 if (LEADING_BYTE_PRIVATE_P (lb))
955 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
957 if (EQ (charset, Vcharset_control_1))
966 /* Return the first character from a Mule-encoded string in STR,
967 assuming it's non-ASCII. Do not call this directly.
968 Use the macro charptr_emchar() instead. */
971 non_ascii_charptr_emchar (CONST Bufbyte *str)
984 else if ( b >= 0xf8 )
989 else if ( b >= 0xf0 )
994 else if ( b >= 0xe0 )
999 else if ( b >= 0xc0 )
1009 for( ; len > 0; len-- )
1012 ch = ( ch << 6 ) | ( b & 0x3f );
1016 Bufbyte i0 = *str, i1, i2 = 0;
1017 Lisp_Object charset;
1019 if (i0 == LEADING_BYTE_CONTROL_1)
1020 return (Emchar) (*++str - 0x20);
1022 if (LEADING_BYTE_PREFIX_P (i0))
1027 charset = CHARSET_BY_LEADING_BYTE (i0);
1028 if (XCHARSET_DIMENSION (charset) == 2)
1031 return MAKE_CHAR (charset, i1, i2);
1035 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1036 Do not call this directly. Use the macro valid_char_p() instead. */
1040 non_ascii_valid_char_p (Emchar ch)
1044 /* Must have only lowest 19 bits set */
1048 f1 = CHAR_FIELD1 (ch);
1049 f2 = CHAR_FIELD2 (ch);
1050 f3 = CHAR_FIELD3 (ch);
1054 Lisp_Object charset;
1056 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1057 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1058 f2 > MAX_CHAR_FIELD2_PRIVATE)
1063 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1064 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1068 NOTE: This takes advantage of the fact that
1069 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1070 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1072 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1073 if (EQ (charset, Qnil))
1075 return (XCHARSET_CHARS (charset) == 96);
1079 Lisp_Object charset;
1081 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1082 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1083 f1 > MAX_CHAR_FIELD1_PRIVATE)
1085 if (f2 < 0x20 || f3 < 0x20)
1088 #ifdef ENABLE_COMPOSITE_CHARS
1089 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1091 if (UNBOUNDP (Fgethash (make_int (ch),
1092 Vcomposite_char_char2string_hash_table,
1097 #endif /* ENABLE_COMPOSITE_CHARS */
1099 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1100 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1103 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1105 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1108 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1110 if (EQ (charset, Qnil))
1112 return (XCHARSET_CHARS (charset) == 96);
1118 /************************************************************************/
1119 /* Basic string functions */
1120 /************************************************************************/
1122 /* Copy the character pointed to by PTR into STR, assuming it's
1123 non-ASCII. Do not call this directly. Use the macro
1124 charptr_copy_char() instead. */
1127 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1129 Bufbyte *strptr = str;
1131 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1133 /* Notice fallthrough. */
1135 case 6: *++strptr = *ptr++;
1136 case 5: *++strptr = *ptr++;
1138 case 4: *++strptr = *ptr++;
1139 case 3: *++strptr = *ptr++;
1140 case 2: *++strptr = *ptr;
1145 return strptr + 1 - str;
1149 /************************************************************************/
1150 /* streams of Emchars */
1151 /************************************************************************/
1153 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1154 The functions below are not meant to be called directly; use
1155 the macros in insdel.h. */
1158 Lstream_get_emchar_1 (Lstream *stream, int ch)
1160 Bufbyte str[MAX_EMCHAR_LEN];
1161 Bufbyte *strptr = str;
1163 str[0] = (Bufbyte) ch;
1164 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1166 /* Notice fallthrough. */
1169 ch = Lstream_getc (stream);
1171 *++strptr = (Bufbyte) ch;
1173 ch = Lstream_getc (stream);
1175 *++strptr = (Bufbyte) ch;
1178 ch = Lstream_getc (stream);
1180 *++strptr = (Bufbyte) ch;
1182 ch = Lstream_getc (stream);
1184 *++strptr = (Bufbyte) ch;
1186 ch = Lstream_getc (stream);
1188 *++strptr = (Bufbyte) ch;
1193 return charptr_emchar (str);
1197 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1199 Bufbyte str[MAX_EMCHAR_LEN];
1200 Bytecount len = set_charptr_emchar (str, ch);
1201 return Lstream_write (stream, str, len);
1205 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1207 Bufbyte str[MAX_EMCHAR_LEN];
1208 Bytecount len = set_charptr_emchar (str, ch);
1209 Lstream_unread (stream, str, len);
1213 /************************************************************************/
1214 /* charset object */
1215 /************************************************************************/
1218 mark_charset (Lisp_Object obj)
1220 struct Lisp_Charset *cs = XCHARSET (obj);
1222 mark_object (cs->short_name);
1223 mark_object (cs->long_name);
1224 mark_object (cs->doc_string);
1225 mark_object (cs->registry);
1226 mark_object (cs->ccl_program);
1228 mark_object (cs->decoding_table);
1234 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1236 struct Lisp_Charset *cs = XCHARSET (obj);
1240 error ("printing unreadable object #<charset %s 0x%x>",
1241 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1244 write_c_string ("#<charset ", printcharfun);
1245 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1246 write_c_string (" ", printcharfun);
1247 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1248 write_c_string (" ", printcharfun);
1249 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1250 write_c_string (" ", printcharfun);
1251 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1252 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1253 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1254 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1255 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1257 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1258 CHARSET_COLUMNS (cs),
1259 CHARSET_GRAPHIC (cs),
1260 CHARSET_FINAL (cs));
1261 write_c_string (buf, printcharfun);
1262 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1263 sprintf (buf, " 0x%x>", cs->header.uid);
1264 write_c_string (buf, printcharfun);
1267 static const struct lrecord_description charset_description[] = {
1268 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1270 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1275 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1276 mark_charset, print_charset, 0, 0, 0,
1277 charset_description,
1278 struct Lisp_Charset);
1280 /* Make a new charset. */
1283 make_charset (Charset_ID id, Lisp_Object name,
1284 unsigned char type, unsigned char columns, unsigned char graphic,
1285 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1286 Lisp_Object long_name, Lisp_Object doc,
1288 Lisp_Object decoding_table,
1289 Emchar ucs_min, Emchar ucs_max,
1290 Emchar code_offset, unsigned char byte_offset)
1293 struct Lisp_Charset *cs =
1294 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1295 XSETCHARSET (obj, cs);
1297 CHARSET_ID (cs) = id;
1298 CHARSET_NAME (cs) = name;
1299 CHARSET_SHORT_NAME (cs) = short_name;
1300 CHARSET_LONG_NAME (cs) = long_name;
1301 CHARSET_DIRECTION (cs) = direction;
1302 CHARSET_TYPE (cs) = type;
1303 CHARSET_COLUMNS (cs) = columns;
1304 CHARSET_GRAPHIC (cs) = graphic;
1305 CHARSET_FINAL (cs) = final;
1306 CHARSET_DOC_STRING (cs) = doc;
1307 CHARSET_REGISTRY (cs) = reg;
1308 CHARSET_CCL_PROGRAM (cs) = Qnil;
1309 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1311 CHARSET_DECODING_TABLE(cs) = Qnil;
1312 CHARSET_UCS_MIN(cs) = ucs_min;
1313 CHARSET_UCS_MAX(cs) = ucs_max;
1314 CHARSET_CODE_OFFSET(cs) = code_offset;
1315 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1318 switch (CHARSET_TYPE (cs))
1320 case CHARSET_TYPE_94:
1321 CHARSET_DIMENSION (cs) = 1;
1322 CHARSET_CHARS (cs) = 94;
1324 case CHARSET_TYPE_96:
1325 CHARSET_DIMENSION (cs) = 1;
1326 CHARSET_CHARS (cs) = 96;
1328 case CHARSET_TYPE_94X94:
1329 CHARSET_DIMENSION (cs) = 2;
1330 CHARSET_CHARS (cs) = 94;
1332 case CHARSET_TYPE_96X96:
1333 CHARSET_DIMENSION (cs) = 2;
1334 CHARSET_CHARS (cs) = 96;
1337 case CHARSET_TYPE_128:
1338 CHARSET_DIMENSION (cs) = 1;
1339 CHARSET_CHARS (cs) = 128;
1341 case CHARSET_TYPE_128X128:
1342 CHARSET_DIMENSION (cs) = 2;
1343 CHARSET_CHARS (cs) = 128;
1345 case CHARSET_TYPE_256:
1346 CHARSET_DIMENSION (cs) = 1;
1347 CHARSET_CHARS (cs) = 256;
1349 case CHARSET_TYPE_256X256:
1350 CHARSET_DIMENSION (cs) = 2;
1351 CHARSET_CHARS (cs) = 256;
1357 if (id == LEADING_BYTE_ASCII)
1358 CHARSET_REP_BYTES (cs) = 1;
1360 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1362 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1367 /* some charsets do not have final characters. This includes
1368 ASCII, Control-1, Composite, and the two faux private
1371 if (code_offset == 0)
1373 assert (NILP (chlook->charset_by_attributes[type][final]));
1374 chlook->charset_by_attributes[type][final] = obj;
1377 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1378 chlook->charset_by_attributes[type][final][direction] = obj;
1382 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1383 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1386 /* official leading byte */
1387 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1390 /* Some charsets are "faux" and don't have names or really exist at
1391 all except in the leading-byte table. */
1393 Fputhash (name, obj, Vcharset_hash_table);
1398 get_unallocated_leading_byte (int dimension)
1403 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1406 lb = next_allocated_leading_byte++;
1410 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1413 lb = next_allocated_1_byte_leading_byte++;
1417 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1420 lb = next_allocated_2_byte_leading_byte++;
1426 ("No more character sets free for this dimension",
1427 make_int (dimension));
1434 range_charset_code_point (Lisp_Object charset, Emchar ch)
1438 if ((XCHARSET_UCS_MIN (charset) <= ch)
1439 && (ch <= XCHARSET_UCS_MAX (charset)))
1441 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1443 if (XCHARSET_DIMENSION (charset) == 1)
1444 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1445 else if (XCHARSET_DIMENSION (charset) == 2)
1446 return list2 (make_int (d / XCHARSET_CHARS (charset)
1447 + XCHARSET_BYTE_OFFSET (charset)),
1448 make_int (d % XCHARSET_CHARS (charset)
1449 + XCHARSET_BYTE_OFFSET (charset)));
1450 else if (XCHARSET_DIMENSION (charset) == 3)
1451 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1452 * XCHARSET_CHARS (charset))
1453 + XCHARSET_BYTE_OFFSET (charset)),
1454 make_int (d / XCHARSET_CHARS (charset)
1455 % XCHARSET_CHARS (charset)
1456 + XCHARSET_BYTE_OFFSET (charset)),
1457 make_int (d % XCHARSET_CHARS (charset)
1458 + XCHARSET_BYTE_OFFSET (charset)));
1459 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1460 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1461 * XCHARSET_CHARS (charset)
1462 * XCHARSET_CHARS (charset))
1463 + XCHARSET_BYTE_OFFSET (charset)),
1464 make_int (d / (XCHARSET_CHARS (charset)
1465 * XCHARSET_CHARS (charset))
1466 % XCHARSET_CHARS (charset)
1467 + XCHARSET_BYTE_OFFSET (charset)),
1468 make_int (d / XCHARSET_CHARS (charset)
1469 % XCHARSET_CHARS (charset)
1470 + XCHARSET_BYTE_OFFSET (charset)),
1471 make_int (d % XCHARSET_CHARS (charset)
1472 + XCHARSET_BYTE_OFFSET (charset)));
1474 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1476 if (XCHARSET_DIMENSION (charset) == 1)
1478 if (XCHARSET_CHARS (charset) == 94)
1480 if (((d = ch - (MIN_CHAR_94
1481 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1483 return list1 (make_int (d + 33));
1485 else if (XCHARSET_CHARS (charset) == 96)
1487 if (((d = ch - (MIN_CHAR_96
1488 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1490 return list1 (make_int (d + 32));
1495 else if (XCHARSET_DIMENSION (charset) == 2)
1497 if (XCHARSET_CHARS (charset) == 94)
1499 if (((d = ch - (MIN_CHAR_94x94
1500 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1503 return list2 (make_int ((d / 94) + 33),
1504 make_int (d % 94 + 33));
1506 else if (XCHARSET_CHARS (charset) == 96)
1508 if (((d = ch - (MIN_CHAR_96x96
1509 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1512 return list2 (make_int ((d / 96) + 32),
1513 make_int (d % 96 + 32));
1521 split_builtin_char (Emchar c)
1523 if (c <= MAX_CHAR_BASIC_LATIN)
1525 return list2 (Vcharset_ascii, make_int (c));
1529 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1533 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1535 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1537 return list2 (Vcharset_greek_iso8859_7,
1538 make_int (c - MIN_CHAR_GREEK + 0x20));
1540 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1542 return list2 (Vcharset_cyrillic_iso8859_5,
1543 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1545 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1547 return list2 (Vcharset_hebrew_iso8859_8,
1548 make_int (c - MIN_CHAR_HEBREW + 0x20));
1550 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1552 return list2 (Vcharset_thai_tis620,
1553 make_int (c - MIN_CHAR_THAI + 0x20));
1555 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1556 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1558 return list2 (Vcharset_katakana_jisx0201,
1559 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1561 else if (c <= MAX_CHAR_BMP)
1563 return list3 (Vcharset_ucs_bmp,
1564 make_int (c >> 8), make_int (c & 0xff));
1566 else if ((MIN_CHAR_DAIKANWA <= c) && (c <= MAX_CHAR_DAIKANWA))
1568 return list3 (Vcharset_ideograph_daikanwa,
1569 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1570 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1572 else if (c <= MAX_CHAR_94)
1574 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1575 ((c - MIN_CHAR_94) / 94) + '0',
1576 CHARSET_LEFT_TO_RIGHT),
1577 make_int (((c - MIN_CHAR_94) % 94) + 33));
1579 else if (c <= MAX_CHAR_96)
1581 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1582 ((c - MIN_CHAR_96) / 96) + '0',
1583 CHARSET_LEFT_TO_RIGHT),
1584 make_int (((c - MIN_CHAR_96) % 96) + 32));
1586 else if (c <= MAX_CHAR_94x94)
1588 return list3 (CHARSET_BY_ATTRIBUTES
1589 (CHARSET_TYPE_94X94,
1590 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1591 CHARSET_LEFT_TO_RIGHT),
1592 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1593 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1595 else if (c <= MAX_CHAR_96x96)
1597 return list3 (CHARSET_BY_ATTRIBUTES
1598 (CHARSET_TYPE_96X96,
1599 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1600 CHARSET_LEFT_TO_RIGHT),
1601 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1602 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1611 charset_code_point (Lisp_Object charset, Emchar ch)
1613 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1615 if (!EQ (cdef, Qnil))
1617 Lisp_Object field = Fassq (charset, cdef);
1619 if (!EQ (field, Qnil))
1620 return Fcdr (field);
1622 return range_charset_code_point (charset, ch);
1625 Lisp_Object Vdefault_coded_charset_priority_list;
1629 /************************************************************************/
1630 /* Basic charset Lisp functions */
1631 /************************************************************************/
1633 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1634 Return non-nil if OBJECT is a charset.
1638 return CHARSETP (object) ? Qt : Qnil;
1641 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1642 Retrieve the charset of the given name.
1643 If CHARSET-OR-NAME is a charset object, it is simply returned.
1644 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1645 nil is returned. Otherwise the associated charset object is returned.
1649 if (CHARSETP (charset_or_name))
1650 return charset_or_name;
1652 CHECK_SYMBOL (charset_or_name);
1653 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1656 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1657 Retrieve the charset of the given name.
1658 Same as `find-charset' except an error is signalled if there is no such
1659 charset instead of returning nil.
1663 Lisp_Object charset = Ffind_charset (name);
1666 signal_simple_error ("No such charset", name);
1670 /* We store the charsets in hash tables with the names as the key and the
1671 actual charset object as the value. Occasionally we need to use them
1672 in a list format. These routines provide us with that. */
1673 struct charset_list_closure
1675 Lisp_Object *charset_list;
1679 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1680 void *charset_list_closure)
1682 /* This function can GC */
1683 struct charset_list_closure *chcl =
1684 (struct charset_list_closure*) charset_list_closure;
1685 Lisp_Object *charset_list = chcl->charset_list;
1687 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1691 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1692 Return a list of the names of all defined charsets.
1696 Lisp_Object charset_list = Qnil;
1697 struct gcpro gcpro1;
1698 struct charset_list_closure charset_list_closure;
1700 GCPRO1 (charset_list);
1701 charset_list_closure.charset_list = &charset_list;
1702 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1703 &charset_list_closure);
1706 return charset_list;
1709 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1710 Return the name of the given charset.
1714 return XCHARSET_NAME (Fget_charset (charset));
1717 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1718 Define a new character set.
1719 This function is for use with Mule support.
1720 NAME is a symbol, the name by which the character set is normally referred.
1721 DOC-STRING is a string describing the character set.
1722 PROPS is a property list, describing the specific nature of the
1723 character set. Recognized properties are:
1725 'short-name Short version of the charset name (ex: Latin-1)
1726 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1727 'registry A regular expression matching the font registry field for
1729 'dimension Number of octets used to index a character in this charset.
1730 Either 1 or 2. Defaults to 1.
1731 'columns Number of columns used to display a character in this charset.
1732 Only used in TTY mode. (Under X, the actual width of a
1733 character can be derived from the font used to display the
1734 characters.) If unspecified, defaults to the dimension
1735 (this is almost always the correct value).
1736 'chars Number of characters in each dimension (94 or 96).
1737 Defaults to 94. Note that if the dimension is 2, the
1738 character set thus described is 94x94 or 96x96.
1739 'final Final byte of ISO 2022 escape sequence. Must be
1740 supplied. Each combination of (DIMENSION, CHARS) defines a
1741 separate namespace for final bytes. Note that ISO
1742 2022 restricts the final byte to the range
1743 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1744 dimension == 2. Note also that final bytes in the range
1745 0x30 - 0x3F are reserved for user-defined (not official)
1747 'graphic 0 (use left half of font on output) or 1 (use right half
1748 of font on output). Defaults to 0. For example, for
1749 a font whose registry is ISO8859-1, the left half
1750 (octets 0x20 - 0x7F) is the `ascii' character set, while
1751 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1752 character set. With 'graphic set to 0, the octets
1753 will have their high bit cleared; with it set to 1,
1754 the octets will have their high bit set.
1755 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1757 'ccl-program A compiled CCL program used to convert a character in
1758 this charset into an index into the font. This is in
1759 addition to the 'graphic property. The CCL program
1760 is passed the octets of the character, with the high
1761 bit cleared and set depending upon whether the value
1762 of the 'graphic property is 0 or 1.
1764 (name, doc_string, props))
1766 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1767 int direction = CHARSET_LEFT_TO_RIGHT;
1769 Lisp_Object registry = Qnil;
1770 Lisp_Object charset;
1771 Lisp_Object rest, keyword, value;
1772 Lisp_Object ccl_program = Qnil;
1773 Lisp_Object short_name = Qnil, long_name = Qnil;
1774 int byte_offset = -1;
1776 CHECK_SYMBOL (name);
1777 if (!NILP (doc_string))
1778 CHECK_STRING (doc_string);
1780 charset = Ffind_charset (name);
1781 if (!NILP (charset))
1782 signal_simple_error ("Cannot redefine existing charset", name);
1784 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1786 if (EQ (keyword, Qshort_name))
1788 CHECK_STRING (value);
1792 if (EQ (keyword, Qlong_name))
1794 CHECK_STRING (value);
1798 else if (EQ (keyword, Qdimension))
1801 dimension = XINT (value);
1802 if (dimension < 1 || dimension > 2)
1803 signal_simple_error ("Invalid value for 'dimension", value);
1806 else if (EQ (keyword, Qchars))
1809 chars = XINT (value);
1810 if (chars != 94 && chars != 96)
1811 signal_simple_error ("Invalid value for 'chars", value);
1814 else if (EQ (keyword, Qcolumns))
1817 columns = XINT (value);
1818 if (columns != 1 && columns != 2)
1819 signal_simple_error ("Invalid value for 'columns", value);
1822 else if (EQ (keyword, Qgraphic))
1825 graphic = XINT (value);
1827 if (graphic < 0 || graphic > 2)
1829 if (graphic < 0 || graphic > 1)
1831 signal_simple_error ("Invalid value for 'graphic", value);
1834 else if (EQ (keyword, Qregistry))
1836 CHECK_STRING (value);
1840 else if (EQ (keyword, Qdirection))
1842 if (EQ (value, Ql2r))
1843 direction = CHARSET_LEFT_TO_RIGHT;
1844 else if (EQ (value, Qr2l))
1845 direction = CHARSET_RIGHT_TO_LEFT;
1847 signal_simple_error ("Invalid value for 'direction", value);
1850 else if (EQ (keyword, Qfinal))
1852 CHECK_CHAR_COERCE_INT (value);
1853 final = XCHAR (value);
1854 if (final < '0' || final > '~')
1855 signal_simple_error ("Invalid value for 'final", value);
1858 else if (EQ (keyword, Qccl_program))
1860 CHECK_VECTOR (value);
1861 ccl_program = value;
1865 signal_simple_error ("Unrecognized property", keyword);
1869 error ("'final must be specified");
1870 if (dimension == 2 && final > 0x5F)
1872 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1876 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1878 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1880 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1881 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1883 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1885 id = get_unallocated_leading_byte (dimension);
1887 if (NILP (doc_string))
1888 doc_string = build_string ("");
1890 if (NILP (registry))
1891 registry = build_string ("");
1893 if (NILP (short_name))
1894 XSETSTRING (short_name, XSYMBOL (name)->name);
1896 if (NILP (long_name))
1897 long_name = doc_string;
1900 columns = dimension;
1902 if (byte_offset < 0)
1906 else if (chars == 96)
1912 charset = make_charset (id, name, type, columns, graphic,
1913 final, direction, short_name, long_name,
1914 doc_string, registry,
1915 Qnil, 0, 0, 0, byte_offset);
1916 if (!NILP (ccl_program))
1917 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1921 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1923 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1924 NEW-NAME is the name of the new charset. Return the new charset.
1926 (charset, new_name))
1928 Lisp_Object new_charset = Qnil;
1929 int id, dimension, columns, graphic, final;
1930 int direction, type;
1931 Lisp_Object registry, doc_string, short_name, long_name;
1932 struct Lisp_Charset *cs;
1934 charset = Fget_charset (charset);
1935 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1936 signal_simple_error ("Charset already has reverse-direction charset",
1939 CHECK_SYMBOL (new_name);
1940 if (!NILP (Ffind_charset (new_name)))
1941 signal_simple_error ("Cannot redefine existing charset", new_name);
1943 cs = XCHARSET (charset);
1945 type = CHARSET_TYPE (cs);
1946 columns = CHARSET_COLUMNS (cs);
1947 dimension = CHARSET_DIMENSION (cs);
1948 id = get_unallocated_leading_byte (dimension);
1950 graphic = CHARSET_GRAPHIC (cs);
1951 final = CHARSET_FINAL (cs);
1952 direction = CHARSET_RIGHT_TO_LEFT;
1953 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1954 direction = CHARSET_LEFT_TO_RIGHT;
1955 doc_string = CHARSET_DOC_STRING (cs);
1956 short_name = CHARSET_SHORT_NAME (cs);
1957 long_name = CHARSET_LONG_NAME (cs);
1958 registry = CHARSET_REGISTRY (cs);
1960 new_charset = make_charset (id, new_name, type, columns,
1961 graphic, final, direction, short_name, long_name,
1962 doc_string, registry,
1964 CHARSET_DECODING_TABLE(cs),
1965 CHARSET_UCS_MIN(cs),
1966 CHARSET_UCS_MAX(cs),
1967 CHARSET_CODE_OFFSET(cs),
1968 CHARSET_BYTE_OFFSET(cs)
1974 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1975 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1980 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1981 Define symbol ALIAS as an alias for CHARSET.
1985 CHECK_SYMBOL (alias);
1986 charset = Fget_charset (charset);
1987 return Fputhash (alias, charset, Vcharset_hash_table);
1990 /* #### Reverse direction charsets not yet implemented. */
1992 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1994 Return the reverse-direction charset parallel to CHARSET, if any.
1995 This is the charset with the same properties (in particular, the same
1996 dimension, number of characters per dimension, and final byte) as
1997 CHARSET but whose characters are displayed in the opposite direction.
2001 charset = Fget_charset (charset);
2002 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2006 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2007 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2008 If DIRECTION is omitted, both directions will be checked (left-to-right
2009 will be returned if character sets exist for both directions).
2011 (dimension, chars, final, direction))
2013 int dm, ch, fi, di = -1;
2015 Lisp_Object obj = Qnil;
2017 CHECK_INT (dimension);
2018 dm = XINT (dimension);
2019 if (dm < 1 || dm > 2)
2020 signal_simple_error ("Invalid value for DIMENSION", dimension);
2024 if (ch != 94 && ch != 96)
2025 signal_simple_error ("Invalid value for CHARS", chars);
2027 CHECK_CHAR_COERCE_INT (final);
2029 if (fi < '0' || fi > '~')
2030 signal_simple_error ("Invalid value for FINAL", final);
2032 if (EQ (direction, Ql2r))
2033 di = CHARSET_LEFT_TO_RIGHT;
2034 else if (EQ (direction, Qr2l))
2035 di = CHARSET_RIGHT_TO_LEFT;
2036 else if (!NILP (direction))
2037 signal_simple_error ("Invalid value for DIRECTION", direction);
2039 if (dm == 2 && fi > 0x5F)
2041 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2044 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2046 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2050 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2052 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2055 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2058 return XCHARSET_NAME (obj);
2062 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2063 Return short name of CHARSET.
2067 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2070 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2071 Return long name of CHARSET.
2075 return XCHARSET_LONG_NAME (Fget_charset (charset));
2078 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2079 Return description of CHARSET.
2083 return XCHARSET_DOC_STRING (Fget_charset (charset));
2086 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2087 Return dimension of CHARSET.
2091 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2094 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2095 Return property PROP of CHARSET.
2096 Recognized properties are those listed in `make-charset', as well as
2097 'name and 'doc-string.
2101 struct Lisp_Charset *cs;
2103 charset = Fget_charset (charset);
2104 cs = XCHARSET (charset);
2106 CHECK_SYMBOL (prop);
2107 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2108 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2109 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2110 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2111 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2112 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2113 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2114 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2115 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2116 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2117 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2118 if (EQ (prop, Qdirection))
2119 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2120 if (EQ (prop, Qreverse_direction_charset))
2122 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2126 return XCHARSET_NAME (obj);
2128 signal_simple_error ("Unrecognized charset property name", prop);
2129 return Qnil; /* not reached */
2132 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2133 Return charset identification number of CHARSET.
2137 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2140 /* #### We need to figure out which properties we really want to
2143 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2144 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2146 (charset, ccl_program))
2148 charset = Fget_charset (charset);
2149 CHECK_VECTOR (ccl_program);
2150 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2155 invalidate_charset_font_caches (Lisp_Object charset)
2157 /* Invalidate font cache entries for charset on all devices. */
2158 Lisp_Object devcons, concons, hash_table;
2159 DEVICE_LOOP_NO_BREAK (devcons, concons)
2161 struct device *d = XDEVICE (XCAR (devcons));
2162 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2163 if (!UNBOUNDP (hash_table))
2164 Fclrhash (hash_table);
2168 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2169 Set the 'registry property of CHARSET to REGISTRY.
2171 (charset, registry))
2173 charset = Fget_charset (charset);
2174 CHECK_STRING (registry);
2175 XCHARSET_REGISTRY (charset) = registry;
2176 invalidate_charset_font_caches (charset);
2177 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2182 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2183 Return mapping-table of CHARSET.
2187 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2190 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2191 Set mapping-table of CHARSET to TABLE.
2195 struct Lisp_Charset *cs;
2196 Lisp_Object old_table;
2199 charset = Fget_charset (charset);
2200 cs = XCHARSET (charset);
2202 if (EQ (table, Qnil))
2204 CHARSET_DECODING_TABLE(cs) = table;
2207 else if (VECTORP (table))
2211 /* ad-hoc method for `ascii' */
2212 if ((CHARSET_CHARS (cs) == 94) &&
2213 (CHARSET_BYTE_OFFSET (cs) != 33))
2214 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2216 ccs_len = CHARSET_CHARS (cs);
2218 if (XVECTOR_LENGTH (table) > ccs_len)
2219 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2220 old_table = CHARSET_DECODING_TABLE(cs);
2221 CHARSET_DECODING_TABLE(cs) = table;
2224 signal_error (Qwrong_type_argument,
2225 list2 (build_translated_string ("vector-or-nil-p"),
2227 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2229 switch (CHARSET_DIMENSION (cs))
2232 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2234 Lisp_Object c = XVECTOR_DATA(table)[i];
2239 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2243 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2245 Lisp_Object v = XVECTOR_DATA(table)[i];
2251 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2253 CHARSET_DECODING_TABLE(cs) = old_table;
2254 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2256 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2258 Lisp_Object c = XVECTOR_DATA(v)[j];
2261 put_char_attribute (c, charset,
2264 (i + CHARSET_BYTE_OFFSET (cs)),
2266 (j + CHARSET_BYTE_OFFSET (cs))));
2270 put_char_attribute (v, charset,
2272 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2281 /************************************************************************/
2282 /* Lisp primitives for working with characters */
2283 /************************************************************************/
2285 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2286 Make a character from CHARSET and octets ARG1 and ARG2.
2287 ARG2 is required only for characters from two-dimensional charsets.
2288 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2289 character s with caron.
2291 (charset, arg1, arg2))
2293 struct Lisp_Charset *cs;
2295 int lowlim, highlim;
2297 charset = Fget_charset (charset);
2298 cs = XCHARSET (charset);
2300 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2301 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2303 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2305 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2306 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2309 /* It is useful (and safe, according to Olivier Galibert) to strip
2310 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2311 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2312 Latin 2 code of the character. */
2320 if (a1 < lowlim || a1 > highlim)
2321 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2323 if (CHARSET_DIMENSION (cs) == 1)
2327 ("Charset is of dimension one; second octet must be nil", arg2);
2328 return make_char (MAKE_CHAR (charset, a1, 0));
2337 a2 = XINT (arg2) & 0x7f;
2339 if (a2 < lowlim || a2 > highlim)
2340 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2342 return make_char (MAKE_CHAR (charset, a1, a2));
2345 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2346 Return the character set of char CH.
2350 CHECK_CHAR_COERCE_INT (ch);
2352 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2355 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2356 Return list of charset and one or two position-codes of CHAR.
2362 Lisp_Object charset;
2364 CHECK_CHAR_COERCE_INT (character);
2365 ret = SPLIT_CHAR (XCHAR (character));
2366 charset = Fcar (ret);
2367 if (CHARSETP (charset))
2368 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2372 /* This function can GC */
2373 struct gcpro gcpro1, gcpro2;
2374 Lisp_Object charset = Qnil;
2375 Lisp_Object rc = Qnil;
2378 GCPRO2 (charset, rc);
2379 CHECK_CHAR_COERCE_INT (character);
2381 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2383 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2385 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2389 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2397 #ifdef ENABLE_COMPOSITE_CHARS
2398 /************************************************************************/
2399 /* composite character functions */
2400 /************************************************************************/
2403 lookup_composite_char (Bufbyte *str, int len)
2405 Lisp_Object lispstr = make_string (str, len);
2406 Lisp_Object ch = Fgethash (lispstr,
2407 Vcomposite_char_string2char_hash_table,
2413 if (composite_char_row_next >= 128)
2414 signal_simple_error ("No more composite chars available", lispstr);
2415 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2416 composite_char_col_next);
2417 Fputhash (make_char (emch), lispstr,
2418 Vcomposite_char_char2string_hash_table);
2419 Fputhash (lispstr, make_char (emch),
2420 Vcomposite_char_string2char_hash_table);
2421 composite_char_col_next++;
2422 if (composite_char_col_next >= 128)
2424 composite_char_col_next = 32;
2425 composite_char_row_next++;
2434 composite_char_string (Emchar ch)
2436 Lisp_Object str = Fgethash (make_char (ch),
2437 Vcomposite_char_char2string_hash_table,
2439 assert (!UNBOUNDP (str));
2443 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2444 Convert a string into a single composite character.
2445 The character is the result of overstriking all the characters in
2450 CHECK_STRING (string);
2451 return make_char (lookup_composite_char (XSTRING_DATA (string),
2452 XSTRING_LENGTH (string)));
2455 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2456 Return a string of the characters comprising a composite character.
2464 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2465 signal_simple_error ("Must be composite char", ch);
2466 return composite_char_string (emch);
2468 #endif /* ENABLE_COMPOSITE_CHARS */
2471 /************************************************************************/
2472 /* initialization */
2473 /************************************************************************/
2476 syms_of_mule_charset (void)
2478 DEFSUBR (Fcharsetp);
2479 DEFSUBR (Ffind_charset);
2480 DEFSUBR (Fget_charset);
2481 DEFSUBR (Fcharset_list);
2482 DEFSUBR (Fcharset_name);
2483 DEFSUBR (Fmake_charset);
2484 DEFSUBR (Fmake_reverse_direction_charset);
2485 /* DEFSUBR (Freverse_direction_charset); */
2486 DEFSUBR (Fdefine_charset_alias);
2487 DEFSUBR (Fcharset_from_attributes);
2488 DEFSUBR (Fcharset_short_name);
2489 DEFSUBR (Fcharset_long_name);
2490 DEFSUBR (Fcharset_description);
2491 DEFSUBR (Fcharset_dimension);
2492 DEFSUBR (Fcharset_property);
2493 DEFSUBR (Fcharset_id);
2494 DEFSUBR (Fset_charset_ccl_program);
2495 DEFSUBR (Fset_charset_registry);
2497 DEFSUBR (Fchar_attribute_alist);
2498 DEFSUBR (Fget_char_attribute);
2499 DEFSUBR (Fput_char_attribute);
2500 DEFSUBR (Fdefine_char);
2501 DEFSUBR (Fchar_variants);
2502 DEFSUBR (Fget_composite_char);
2503 DEFSUBR (Fcharset_mapping_table);
2504 DEFSUBR (Fset_charset_mapping_table);
2507 DEFSUBR (Fmake_char);
2508 DEFSUBR (Fchar_charset);
2509 DEFSUBR (Fsplit_char);
2511 #ifdef ENABLE_COMPOSITE_CHARS
2512 DEFSUBR (Fmake_composite_char);
2513 DEFSUBR (Fcomposite_char_string);
2516 defsymbol (&Qcharsetp, "charsetp");
2517 defsymbol (&Qregistry, "registry");
2518 defsymbol (&Qfinal, "final");
2519 defsymbol (&Qgraphic, "graphic");
2520 defsymbol (&Qdirection, "direction");
2521 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2522 defsymbol (&Qshort_name, "short-name");
2523 defsymbol (&Qlong_name, "long-name");
2525 defsymbol (&Ql2r, "l2r");
2526 defsymbol (&Qr2l, "r2l");
2528 /* Charsets, compatible with FSF 20.3
2529 Naming convention is Script-Charset[-Edition] */
2530 defsymbol (&Qascii, "ascii");
2531 defsymbol (&Qcontrol_1, "control-1");
2532 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2533 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2534 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2535 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2536 defsymbol (&Qthai_tis620, "thai-tis620");
2537 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2538 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2539 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2540 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2541 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2542 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2543 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2544 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2545 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2546 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2547 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2548 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2549 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2550 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2551 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2553 defsymbol (&Q_ucs, "->ucs");
2554 defsymbol (&Q_decomposition, "->decomposition");
2555 defsymbol (&Qcompat, "compat");
2556 defsymbol (&Qisolated, "isolated");
2557 defsymbol (&Qinitial, "initial");
2558 defsymbol (&Qmedial, "medial");
2559 defsymbol (&Qfinal, "final");
2560 defsymbol (&Qvertical, "vertical");
2561 defsymbol (&QnoBreak, "noBreak");
2562 defsymbol (&Qfraction, "fraction");
2563 defsymbol (&Qsuper, "super");
2564 defsymbol (&Qsub, "sub");
2565 defsymbol (&Qcircle, "circle");
2566 defsymbol (&Qsquare, "square");
2567 defsymbol (&Qwide, "wide");
2568 defsymbol (&Qnarrow, "narrow");
2569 defsymbol (&Qsmall, "small");
2570 defsymbol (&Qfont, "font");
2571 defsymbol (&Qucs, "ucs");
2572 defsymbol (&Qucs_bmp, "ucs-bmp");
2573 defsymbol (&Qlatin_viscii, "latin-viscii");
2574 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2575 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2576 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2577 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2578 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2579 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2580 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2581 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2582 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2583 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2584 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2585 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2586 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2587 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2588 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2589 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2590 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2591 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2592 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2593 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2594 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2595 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2596 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2597 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2598 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2599 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2600 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2602 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2603 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2605 defsymbol (&Qcomposite, "composite");
2609 vars_of_mule_charset (void)
2616 chlook = xnew (struct charset_lookup);
2617 dumpstruct (&chlook, &charset_lookup_description);
2619 /* Table of charsets indexed by leading byte. */
2620 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2621 chlook->charset_by_leading_byte[i] = Qnil;
2624 /* Table of charsets indexed by type/final-byte. */
2625 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2626 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2627 chlook->charset_by_attributes[i][j] = Qnil;
2629 /* Table of charsets indexed by type/final-byte/direction. */
2630 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2631 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2632 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2633 chlook->charset_by_attributes[i][j][k] = Qnil;
2637 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2639 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2640 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2644 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2645 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2646 Leading-code of private TYPE9N charset of column-width 1.
2648 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2652 Vutf_2000_version = build_string("0.13 (Takaida)");
2653 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2654 Version number of UTF-2000.
2657 staticpro (&Vcharacter_attribute_table);
2658 Vcharacter_attribute_table = make_char_code_table (Qnil);
2660 staticpro (&Vcharacter_composition_table);
2661 Vcharacter_composition_table = make_char_code_table (Qnil);
2663 staticpro (&Vcharacter_variant_table);
2664 Vcharacter_variant_table = make_char_code_table (Qnil);
2666 Vdefault_coded_charset_priority_list = Qnil;
2667 DEFVAR_LISP ("default-coded-charset-priority-list",
2668 &Vdefault_coded_charset_priority_list /*
2669 Default order of preferred coded-character-sets.
2675 complex_vars_of_mule_charset (void)
2677 staticpro (&Vcharset_hash_table);
2678 Vcharset_hash_table =
2679 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2681 /* Predefined character sets. We store them into variables for
2685 staticpro (&Vcharset_ucs_bmp);
2687 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2688 CHARSET_TYPE_256X256, 1, 2, 0,
2689 CHARSET_LEFT_TO_RIGHT,
2690 build_string ("BMP"),
2691 build_string ("BMP"),
2692 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2693 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2694 Qnil, 0, 0xFFFF, 0, 0);
2696 # define MIN_CHAR_THAI 0
2697 # define MAX_CHAR_THAI 0
2698 # define MIN_CHAR_GREEK 0
2699 # define MAX_CHAR_GREEK 0
2700 # define MIN_CHAR_HEBREW 0
2701 # define MAX_CHAR_HEBREW 0
2702 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2703 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2704 # define MIN_CHAR_CYRILLIC 0
2705 # define MAX_CHAR_CYRILLIC 0
2707 staticpro (&Vcharset_ascii);
2709 make_charset (LEADING_BYTE_ASCII, Qascii,
2710 CHARSET_TYPE_94, 1, 0, 'B',
2711 CHARSET_LEFT_TO_RIGHT,
2712 build_string ("ASCII"),
2713 build_string ("ASCII)"),
2714 build_string ("ASCII (ISO646 IRV)"),
2715 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2716 Qnil, 0, 0x7F, 0, 0);
2717 staticpro (&Vcharset_control_1);
2718 Vcharset_control_1 =
2719 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2720 CHARSET_TYPE_94, 1, 1, 0,
2721 CHARSET_LEFT_TO_RIGHT,
2722 build_string ("C1"),
2723 build_string ("Control characters"),
2724 build_string ("Control characters 128-191"),
2726 Qnil, 0x80, 0x9F, 0, 0);
2727 staticpro (&Vcharset_latin_iso8859_1);
2728 Vcharset_latin_iso8859_1 =
2729 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2730 CHARSET_TYPE_96, 1, 1, 'A',
2731 CHARSET_LEFT_TO_RIGHT,
2732 build_string ("Latin-1"),
2733 build_string ("ISO8859-1 (Latin-1)"),
2734 build_string ("ISO8859-1 (Latin-1)"),
2735 build_string ("iso8859-1"),
2736 Qnil, 0xA0, 0xFF, 0, 32);
2737 staticpro (&Vcharset_latin_iso8859_2);
2738 Vcharset_latin_iso8859_2 =
2739 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2740 CHARSET_TYPE_96, 1, 1, 'B',
2741 CHARSET_LEFT_TO_RIGHT,
2742 build_string ("Latin-2"),
2743 build_string ("ISO8859-2 (Latin-2)"),
2744 build_string ("ISO8859-2 (Latin-2)"),
2745 build_string ("iso8859-2"),
2747 staticpro (&Vcharset_latin_iso8859_3);
2748 Vcharset_latin_iso8859_3 =
2749 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2750 CHARSET_TYPE_96, 1, 1, 'C',
2751 CHARSET_LEFT_TO_RIGHT,
2752 build_string ("Latin-3"),
2753 build_string ("ISO8859-3 (Latin-3)"),
2754 build_string ("ISO8859-3 (Latin-3)"),
2755 build_string ("iso8859-3"),
2757 staticpro (&Vcharset_latin_iso8859_4);
2758 Vcharset_latin_iso8859_4 =
2759 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2760 CHARSET_TYPE_96, 1, 1, 'D',
2761 CHARSET_LEFT_TO_RIGHT,
2762 build_string ("Latin-4"),
2763 build_string ("ISO8859-4 (Latin-4)"),
2764 build_string ("ISO8859-4 (Latin-4)"),
2765 build_string ("iso8859-4"),
2767 staticpro (&Vcharset_thai_tis620);
2768 Vcharset_thai_tis620 =
2769 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2770 CHARSET_TYPE_96, 1, 1, 'T',
2771 CHARSET_LEFT_TO_RIGHT,
2772 build_string ("TIS620"),
2773 build_string ("TIS620 (Thai)"),
2774 build_string ("TIS620.2529 (Thai)"),
2775 build_string ("tis620"),
2776 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2777 staticpro (&Vcharset_greek_iso8859_7);
2778 Vcharset_greek_iso8859_7 =
2779 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2780 CHARSET_TYPE_96, 1, 1, 'F',
2781 CHARSET_LEFT_TO_RIGHT,
2782 build_string ("ISO8859-7"),
2783 build_string ("ISO8859-7 (Greek)"),
2784 build_string ("ISO8859-7 (Greek)"),
2785 build_string ("iso8859-7"),
2786 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2787 staticpro (&Vcharset_arabic_iso8859_6);
2788 Vcharset_arabic_iso8859_6 =
2789 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2790 CHARSET_TYPE_96, 1, 1, 'G',
2791 CHARSET_RIGHT_TO_LEFT,
2792 build_string ("ISO8859-6"),
2793 build_string ("ISO8859-6 (Arabic)"),
2794 build_string ("ISO8859-6 (Arabic)"),
2795 build_string ("iso8859-6"),
2797 staticpro (&Vcharset_hebrew_iso8859_8);
2798 Vcharset_hebrew_iso8859_8 =
2799 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2800 CHARSET_TYPE_96, 1, 1, 'H',
2801 CHARSET_RIGHT_TO_LEFT,
2802 build_string ("ISO8859-8"),
2803 build_string ("ISO8859-8 (Hebrew)"),
2804 build_string ("ISO8859-8 (Hebrew)"),
2805 build_string ("iso8859-8"),
2806 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2807 staticpro (&Vcharset_katakana_jisx0201);
2808 Vcharset_katakana_jisx0201 =
2809 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2810 CHARSET_TYPE_94, 1, 1, 'I',
2811 CHARSET_LEFT_TO_RIGHT,
2812 build_string ("JISX0201 Kana"),
2813 build_string ("JISX0201.1976 (Japanese Kana)"),
2814 build_string ("JISX0201.1976 Japanese Kana"),
2815 build_string ("jisx0201\\.1976"),
2817 staticpro (&Vcharset_latin_jisx0201);
2818 Vcharset_latin_jisx0201 =
2819 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2820 CHARSET_TYPE_94, 1, 0, 'J',
2821 CHARSET_LEFT_TO_RIGHT,
2822 build_string ("JISX0201 Roman"),
2823 build_string ("JISX0201.1976 (Japanese Roman)"),
2824 build_string ("JISX0201.1976 Japanese Roman"),
2825 build_string ("jisx0201\\.1976"),
2827 staticpro (&Vcharset_cyrillic_iso8859_5);
2828 Vcharset_cyrillic_iso8859_5 =
2829 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2830 CHARSET_TYPE_96, 1, 1, 'L',
2831 CHARSET_LEFT_TO_RIGHT,
2832 build_string ("ISO8859-5"),
2833 build_string ("ISO8859-5 (Cyrillic)"),
2834 build_string ("ISO8859-5 (Cyrillic)"),
2835 build_string ("iso8859-5"),
2836 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2837 staticpro (&Vcharset_latin_iso8859_9);
2838 Vcharset_latin_iso8859_9 =
2839 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2840 CHARSET_TYPE_96, 1, 1, 'M',
2841 CHARSET_LEFT_TO_RIGHT,
2842 build_string ("Latin-5"),
2843 build_string ("ISO8859-9 (Latin-5)"),
2844 build_string ("ISO8859-9 (Latin-5)"),
2845 build_string ("iso8859-9"),
2847 staticpro (&Vcharset_japanese_jisx0208_1978);
2848 Vcharset_japanese_jisx0208_1978 =
2849 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2850 CHARSET_TYPE_94X94, 2, 0, '@',
2851 CHARSET_LEFT_TO_RIGHT,
2852 build_string ("JIS X0208:1978"),
2853 build_string ("JIS X0208:1978 (Japanese)"),
2855 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2856 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2858 staticpro (&Vcharset_chinese_gb2312);
2859 Vcharset_chinese_gb2312 =
2860 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2861 CHARSET_TYPE_94X94, 2, 0, 'A',
2862 CHARSET_LEFT_TO_RIGHT,
2863 build_string ("GB2312"),
2864 build_string ("GB2312)"),
2865 build_string ("GB2312 Chinese simplified"),
2866 build_string ("gb2312"),
2868 staticpro (&Vcharset_japanese_jisx0208);
2869 Vcharset_japanese_jisx0208 =
2870 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2871 CHARSET_TYPE_94X94, 2, 0, 'B',
2872 CHARSET_LEFT_TO_RIGHT,
2873 build_string ("JISX0208"),
2874 build_string ("JIS X0208:1983 (Japanese)"),
2875 build_string ("JIS X0208:1983 Japanese Kanji"),
2876 build_string ("jisx0208\\.1983"),
2878 staticpro (&Vcharset_japanese_jisx0208_1990);
2879 Vcharset_japanese_jisx0208_1990 =
2880 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2881 Qjapanese_jisx0208_1990,
2882 CHARSET_TYPE_94X94, 2, 0, 0,
2883 CHARSET_LEFT_TO_RIGHT,
2884 build_string ("JISX0208-1990"),
2885 build_string ("JIS X0208:1990 (Japanese)"),
2886 build_string ("JIS X0208:1990 Japanese Kanji"),
2887 build_string ("jisx0208\\.1990"),
2889 MIN_CHAR_JIS_X0208_1990,
2890 MAX_CHAR_JIS_X0208_1990, 0, 33);
2891 staticpro (&Vcharset_korean_ksc5601);
2892 Vcharset_korean_ksc5601 =
2893 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2894 CHARSET_TYPE_94X94, 2, 0, 'C',
2895 CHARSET_LEFT_TO_RIGHT,
2896 build_string ("KSC5601"),
2897 build_string ("KSC5601 (Korean"),
2898 build_string ("KSC5601 Korean Hangul and Hanja"),
2899 build_string ("ksc5601"),
2901 staticpro (&Vcharset_japanese_jisx0212);
2902 Vcharset_japanese_jisx0212 =
2903 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2904 CHARSET_TYPE_94X94, 2, 0, 'D',
2905 CHARSET_LEFT_TO_RIGHT,
2906 build_string ("JISX0212"),
2907 build_string ("JISX0212 (Japanese)"),
2908 build_string ("JISX0212 Japanese Supplement"),
2909 build_string ("jisx0212"),
2912 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2913 staticpro (&Vcharset_chinese_cns11643_1);
2914 Vcharset_chinese_cns11643_1 =
2915 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2916 CHARSET_TYPE_94X94, 2, 0, 'G',
2917 CHARSET_LEFT_TO_RIGHT,
2918 build_string ("CNS11643-1"),
2919 build_string ("CNS11643-1 (Chinese traditional)"),
2921 ("CNS 11643 Plane 1 Chinese traditional"),
2922 build_string (CHINESE_CNS_PLANE_RE("1")),
2924 staticpro (&Vcharset_chinese_cns11643_2);
2925 Vcharset_chinese_cns11643_2 =
2926 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2927 CHARSET_TYPE_94X94, 2, 0, 'H',
2928 CHARSET_LEFT_TO_RIGHT,
2929 build_string ("CNS11643-2"),
2930 build_string ("CNS11643-2 (Chinese traditional)"),
2932 ("CNS 11643 Plane 2 Chinese traditional"),
2933 build_string (CHINESE_CNS_PLANE_RE("2")),
2936 staticpro (&Vcharset_latin_viscii_lower);
2937 Vcharset_latin_viscii_lower =
2938 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2939 CHARSET_TYPE_96, 1, 1, '1',
2940 CHARSET_LEFT_TO_RIGHT,
2941 build_string ("VISCII lower"),
2942 build_string ("VISCII lower (Vietnamese)"),
2943 build_string ("VISCII lower (Vietnamese)"),
2944 build_string ("MULEVISCII-LOWER"),
2946 staticpro (&Vcharset_latin_viscii_upper);
2947 Vcharset_latin_viscii_upper =
2948 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2949 CHARSET_TYPE_96, 1, 1, '2',
2950 CHARSET_LEFT_TO_RIGHT,
2951 build_string ("VISCII upper"),
2952 build_string ("VISCII upper (Vietnamese)"),
2953 build_string ("VISCII upper (Vietnamese)"),
2954 build_string ("MULEVISCII-UPPER"),
2956 staticpro (&Vcharset_latin_viscii);
2957 Vcharset_latin_viscii =
2958 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2959 CHARSET_TYPE_256, 1, 2, 0,
2960 CHARSET_LEFT_TO_RIGHT,
2961 build_string ("VISCII"),
2962 build_string ("VISCII 1.1 (Vietnamese)"),
2963 build_string ("VISCII 1.1 (Vietnamese)"),
2964 build_string ("VISCII1\\.1"),
2966 staticpro (&Vcharset_ideograph_daikanwa);
2967 Vcharset_ideograph_daikanwa =
2968 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2969 CHARSET_TYPE_256X256, 2, 2, 0,
2970 CHARSET_LEFT_TO_RIGHT,
2971 build_string ("Daikanwa"),
2972 build_string ("Morohashi's Daikanwa"),
2973 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2974 build_string ("Daikanwa"),
2975 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2976 staticpro (&Vcharset_mojikyo_pj_1);
2977 Vcharset_mojikyo_pj_1 =
2978 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1,
2979 CHARSET_TYPE_94X94, 2, 0, 0,
2980 CHARSET_LEFT_TO_RIGHT,
2981 build_string ("Mojikyo-PJ-1"),
2982 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
2984 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
2985 build_string ("jisx0208\\.Mojikyo-1$"),
2987 staticpro (&Vcharset_mojikyo_pj_2);
2988 Vcharset_mojikyo_pj_2 =
2989 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2,
2990 CHARSET_TYPE_94X94, 2, 0, 0,
2991 CHARSET_LEFT_TO_RIGHT,
2992 build_string ("Mojikyo-PJ-2"),
2993 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
2995 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
2996 build_string ("jisx0208\\.Mojikyo-2$"),
2998 staticpro (&Vcharset_mojikyo_pj_3);
2999 Vcharset_mojikyo_pj_3 =
3000 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3,
3001 CHARSET_TYPE_94X94, 2, 0, 0,
3002 CHARSET_LEFT_TO_RIGHT,
3003 build_string ("Mojikyo-PJ-3"),
3004 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3006 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3007 build_string ("jisx0208\\.Mojikyo-3$"),
3009 staticpro (&Vcharset_mojikyo_pj_4);
3010 Vcharset_mojikyo_pj_4 =
3011 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4,
3012 CHARSET_TYPE_94X94, 2, 0, 0,
3013 CHARSET_LEFT_TO_RIGHT,
3014 build_string ("Mojikyo-PJ-4"),
3015 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3017 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3018 build_string ("jisx0208\\.Mojikyo-4$"),
3020 staticpro (&Vcharset_mojikyo_pj_5);
3021 Vcharset_mojikyo_pj_5 =
3022 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5,
3023 CHARSET_TYPE_94X94, 2, 0, 0,
3024 CHARSET_LEFT_TO_RIGHT,
3025 build_string ("Mojikyo-PJ-5"),
3026 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3028 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3029 build_string ("jisx0208\\.Mojikyo-5$"),
3031 staticpro (&Vcharset_mojikyo_pj_6);
3032 Vcharset_mojikyo_pj_6 =
3033 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6,
3034 CHARSET_TYPE_94X94, 2, 0, 0,
3035 CHARSET_LEFT_TO_RIGHT,
3036 build_string ("Mojikyo-PJ-6"),
3037 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3039 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3040 build_string ("jisx0208\\.Mojikyo-6$"),
3042 staticpro (&Vcharset_mojikyo_pj_7);
3043 Vcharset_mojikyo_pj_7 =
3044 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7,
3045 CHARSET_TYPE_94X94, 2, 0, 0,
3046 CHARSET_LEFT_TO_RIGHT,
3047 build_string ("Mojikyo-PJ-7"),
3048 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3050 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3051 build_string ("jisx0208\\.Mojikyo-7$"),
3053 staticpro (&Vcharset_mojikyo_pj_8);
3054 Vcharset_mojikyo_pj_8 =
3055 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8,
3056 CHARSET_TYPE_94X94, 2, 0, 0,
3057 CHARSET_LEFT_TO_RIGHT,
3058 build_string ("Mojikyo-PJ-8"),
3059 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3061 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3062 build_string ("jisx0208\\.Mojikyo-8$"),
3064 staticpro (&Vcharset_mojikyo_pj_9);
3065 Vcharset_mojikyo_pj_9 =
3066 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9,
3067 CHARSET_TYPE_94X94, 2, 0, 0,
3068 CHARSET_LEFT_TO_RIGHT,
3069 build_string ("Mojikyo-PJ-9"),
3070 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3072 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3073 build_string ("jisx0208\\.Mojikyo-9$"),
3075 staticpro (&Vcharset_mojikyo_pj_10);
3076 Vcharset_mojikyo_pj_10 =
3077 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10,
3078 CHARSET_TYPE_94X94, 2, 0, 0,
3079 CHARSET_LEFT_TO_RIGHT,
3080 build_string ("Mojikyo-PJ-10"),
3081 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3083 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3084 build_string ("jisx0208\\.Mojikyo-10$"),
3086 staticpro (&Vcharset_mojikyo_pj_11);
3087 Vcharset_mojikyo_pj_11 =
3088 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11,
3089 CHARSET_TYPE_94X94, 2, 0, 0,
3090 CHARSET_LEFT_TO_RIGHT,
3091 build_string ("Mojikyo-PJ-11"),
3092 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3094 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3095 build_string ("jisx0208\\.Mojikyo-11$"),
3097 staticpro (&Vcharset_mojikyo_pj_12);
3098 Vcharset_mojikyo_pj_12 =
3099 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12,
3100 CHARSET_TYPE_94X94, 2, 0, 0,
3101 CHARSET_LEFT_TO_RIGHT,
3102 build_string ("Mojikyo-PJ-12"),
3103 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3105 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3106 build_string ("jisx0208\\.Mojikyo-12$"),
3108 staticpro (&Vcharset_mojikyo_pj_13);
3109 Vcharset_mojikyo_pj_13 =
3110 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13,
3111 CHARSET_TYPE_94X94, 2, 0, 0,
3112 CHARSET_LEFT_TO_RIGHT,
3113 build_string ("Mojikyo-PJ-13"),
3114 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3116 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3117 build_string ("jisx0208\\.Mojikyo-13$"),
3119 staticpro (&Vcharset_mojikyo_pj_14);
3120 Vcharset_mojikyo_pj_14 =
3121 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14,
3122 CHARSET_TYPE_94X94, 2, 0, 0,
3123 CHARSET_LEFT_TO_RIGHT,
3124 build_string ("Mojikyo-PJ-14"),
3125 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3127 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3128 build_string ("jisx0208\\.Mojikyo-14$"),
3130 staticpro (&Vcharset_mojikyo_pj_15);
3131 Vcharset_mojikyo_pj_15 =
3132 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15,
3133 CHARSET_TYPE_94X94, 2, 0, 0,
3134 CHARSET_LEFT_TO_RIGHT,
3135 build_string ("Mojikyo-PJ-15"),
3136 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3138 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3139 build_string ("jisx0208\\.Mojikyo-15$"),
3141 staticpro (&Vcharset_mojikyo_pj_16);
3142 Vcharset_mojikyo_pj_16 =
3143 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16,
3144 CHARSET_TYPE_94X94, 2, 0, 0,
3145 CHARSET_LEFT_TO_RIGHT,
3146 build_string ("Mojikyo-PJ-16"),
3147 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3149 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3150 build_string ("jisx0208\\.Mojikyo-16$"),
3152 staticpro (&Vcharset_mojikyo_pj_17);
3153 Vcharset_mojikyo_pj_17 =
3154 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17,
3155 CHARSET_TYPE_94X94, 2, 0, 0,
3156 CHARSET_LEFT_TO_RIGHT,
3157 build_string ("Mojikyo-PJ-17"),
3158 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3160 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3161 build_string ("jisx0208\\.Mojikyo-17$"),
3163 staticpro (&Vcharset_mojikyo_pj_18);
3164 Vcharset_mojikyo_pj_18 =
3165 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18,
3166 CHARSET_TYPE_94X94, 2, 0, 0,
3167 CHARSET_LEFT_TO_RIGHT,
3168 build_string ("Mojikyo-PJ-18"),
3169 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3171 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3172 build_string ("jisx0208\\.Mojikyo-18$"),
3174 staticpro (&Vcharset_mojikyo_pj_19);
3175 Vcharset_mojikyo_pj_19 =
3176 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19,
3177 CHARSET_TYPE_94X94, 2, 0, 0,
3178 CHARSET_LEFT_TO_RIGHT,
3179 build_string ("Mojikyo-PJ-19"),
3180 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3182 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3183 build_string ("jisx0208\\.Mojikyo-19$"),
3185 staticpro (&Vcharset_mojikyo_pj_20);
3186 Vcharset_mojikyo_pj_20 =
3187 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20,
3188 CHARSET_TYPE_94X94, 2, 0, 0,
3189 CHARSET_LEFT_TO_RIGHT,
3190 build_string ("Mojikyo-PJ-20"),
3191 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3193 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3194 build_string ("jisx0208\\.Mojikyo-20$"),
3196 staticpro (&Vcharset_mojikyo_pj_21);
3197 Vcharset_mojikyo_pj_21 =
3198 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21,
3199 CHARSET_TYPE_94X94, 2, 0, 0,
3200 CHARSET_LEFT_TO_RIGHT,
3201 build_string ("Mojikyo-PJ-21"),
3202 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3204 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3205 build_string ("jisx0208\\.Mojikyo-21$"),
3207 staticpro (&Vcharset_ethiopic_ucs);
3208 Vcharset_ethiopic_ucs =
3209 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
3210 CHARSET_TYPE_256X256, 2, 2, 0,
3211 CHARSET_LEFT_TO_RIGHT,
3212 build_string ("Ethiopic (UCS)"),
3213 build_string ("Ethiopic (UCS)"),
3214 build_string ("Ethiopic of UCS"),
3215 build_string ("Ethiopic-Unicode"),
3216 Qnil, 0x1200, 0x137F, 0x1200, 0);
3218 staticpro (&Vcharset_chinese_big5_1);
3219 Vcharset_chinese_big5_1 =
3220 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3221 CHARSET_TYPE_94X94, 2, 0, '0',
3222 CHARSET_LEFT_TO_RIGHT,
3223 build_string ("Big5"),
3224 build_string ("Big5 (Level-1)"),
3226 ("Big5 Level-1 Chinese traditional"),
3227 build_string ("big5"),
3229 staticpro (&Vcharset_chinese_big5_2);
3230 Vcharset_chinese_big5_2 =
3231 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3232 CHARSET_TYPE_94X94, 2, 0, '1',
3233 CHARSET_LEFT_TO_RIGHT,
3234 build_string ("Big5"),
3235 build_string ("Big5 (Level-2)"),
3237 ("Big5 Level-2 Chinese traditional"),
3238 build_string ("big5"),
3241 #ifdef ENABLE_COMPOSITE_CHARS
3242 /* #### For simplicity, we put composite chars into a 96x96 charset.
3243 This is going to lead to problems because you can run out of
3244 room, esp. as we don't yet recycle numbers. */
3245 staticpro (&Vcharset_composite);
3246 Vcharset_composite =
3247 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3248 CHARSET_TYPE_96X96, 2, 0, 0,
3249 CHARSET_LEFT_TO_RIGHT,
3250 build_string ("Composite"),
3251 build_string ("Composite characters"),
3252 build_string ("Composite characters"),
3255 /* #### not dumped properly */
3256 composite_char_row_next = 32;
3257 composite_char_col_next = 32;
3259 Vcomposite_char_string2char_hash_table =
3260 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3261 Vcomposite_char_char2string_hash_table =
3262 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3263 staticpro (&Vcomposite_char_string2char_hash_table);
3264 staticpro (&Vcomposite_char_char2string_hash_table);
3265 #endif /* ENABLE_COMPOSITE_CHARS */