1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo_pj_1;
70 Lisp_Object Vcharset_mojikyo_pj_2;
71 Lisp_Object Vcharset_mojikyo_pj_3;
72 Lisp_Object Vcharset_mojikyo_pj_4;
73 Lisp_Object Vcharset_mojikyo_pj_5;
74 Lisp_Object Vcharset_mojikyo_pj_6;
75 Lisp_Object Vcharset_mojikyo_pj_7;
76 Lisp_Object Vcharset_mojikyo_pj_8;
77 Lisp_Object Vcharset_mojikyo_pj_9;
78 Lisp_Object Vcharset_mojikyo_pj_10;
79 Lisp_Object Vcharset_mojikyo_pj_11;
80 Lisp_Object Vcharset_mojikyo_pj_12;
81 Lisp_Object Vcharset_mojikyo_pj_13;
82 Lisp_Object Vcharset_mojikyo_pj_14;
83 Lisp_Object Vcharset_mojikyo_pj_15;
84 Lisp_Object Vcharset_mojikyo_pj_16;
85 Lisp_Object Vcharset_mojikyo_pj_17;
86 Lisp_Object Vcharset_mojikyo_pj_18;
87 Lisp_Object Vcharset_mojikyo_pj_19;
88 Lisp_Object Vcharset_mojikyo_pj_20;
89 Lisp_Object Vcharset_mojikyo_pj_21;
90 Lisp_Object Vcharset_ethiopic_ucs;
92 Lisp_Object Vcharset_chinese_big5_1;
93 Lisp_Object Vcharset_chinese_big5_2;
95 #ifdef ENABLE_COMPOSITE_CHARS
96 Lisp_Object Vcharset_composite;
98 /* Hash tables for composite chars. One maps string representing
99 composed chars to their equivalent chars; one goes the
101 Lisp_Object Vcomposite_char_char2string_hash_table;
102 Lisp_Object Vcomposite_char_string2char_hash_table;
104 static int composite_char_row_next;
105 static int composite_char_col_next;
107 #endif /* ENABLE_COMPOSITE_CHARS */
109 struct charset_lookup *chlook;
111 static const struct lrecord_description charset_lookup_description_1[] = {
112 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
121 static const struct struct_description charset_lookup_description = {
122 sizeof(struct charset_lookup),
123 charset_lookup_description_1
127 /* Table of number of bytes in the string representation of a character
128 indexed by the first byte of that representation.
130 rep_bytes_by_first_byte(c) is more efficient than the equivalent
131 canonical computation:
133 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
135 const Bytecount rep_bytes_by_first_byte[0xA0] =
136 { /* 0x00 - 0x7f are for straight ASCII */
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 /* 0x80 - 0x8f are for Dimension-1 official charsets */
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 /* 0x90 - 0x9d are for Dimension-2 official charsets */
152 /* 0x9e is for Dimension-1 private charsets */
153 /* 0x9f is for Dimension-2 private charsets */
154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
161 mark_char_byte_table (Lisp_Object obj)
163 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
166 for (i = 0; i < 256; i++)
168 mark_object (cte->property[i]);
174 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
176 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
177 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
180 for (i = 0; i < 256; i++)
181 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
183 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
185 if (!char_byte_table_equal (cte1->property[i],
186 cte2->property[i], depth + 1))
193 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
199 char_byte_table_hash (Lisp_Object obj, int depth)
201 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
203 return internal_array_hash (cte->property, 256, depth);
206 static const struct lrecord_description char_byte_table_description[] = {
207 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
211 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
212 mark_char_byte_table,
213 internal_object_printer,
214 0, char_byte_table_equal,
215 char_byte_table_hash,
216 char_byte_table_description,
217 struct Lisp_Char_Byte_Table);
220 make_char_byte_table (Lisp_Object initval)
224 struct Lisp_Char_Byte_Table *cte =
225 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
226 &lrecord_char_byte_table);
228 for (i = 0; i < 256; i++)
229 cte->property[i] = initval;
231 XSETCHAR_BYTE_TABLE (obj, cte);
236 copy_char_byte_table (Lisp_Object entry)
238 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
241 struct Lisp_Char_Byte_Table *ctenew =
242 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
243 &lrecord_char_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (CHAR_BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_char_byte_table (new);
251 ctenew->property[i] = new;
254 XSETCHAR_BYTE_TABLE (obj, ctenew);
260 mark_char_code_table (Lisp_Object obj)
262 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
268 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
271 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
273 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_code_table_hash (Lisp_Object obj, int depth)
279 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
281 return char_code_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_code_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
290 mark_char_code_table,
291 internal_object_printer,
292 0, char_code_table_equal,
293 char_code_table_hash,
294 char_code_table_description,
295 struct Lisp_Char_Code_Table);
298 make_char_code_table (Lisp_Object initval)
301 struct Lisp_Char_Code_Table *cte =
302 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
303 &lrecord_char_code_table);
305 cte->table = make_char_byte_table (initval);
307 XSETCHAR_CODE_TABLE (obj, cte);
312 copy_char_code_table (Lisp_Object entry)
314 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
316 struct Lisp_Char_Code_Table *ctenew =
317 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
318 &lrecord_char_code_table);
320 ctenew->table = copy_char_byte_table (cte->table);
321 XSETCHAR_CODE_TABLE (obj, ctenew);
327 get_char_code_table (Emchar ch, Lisp_Object table)
329 unsigned int code = ch;
330 struct Lisp_Char_Byte_Table* cpt
331 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
332 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
334 if (CHAR_BYTE_TABLE_P (ret))
335 cpt = XCHAR_BYTE_TABLE (ret);
339 ret = cpt->property [(unsigned char) (code >> 16)];
340 if (CHAR_BYTE_TABLE_P (ret))
341 cpt = XCHAR_BYTE_TABLE (ret);
345 ret = cpt->property [(unsigned char) (code >> 8)];
346 if (CHAR_BYTE_TABLE_P (ret))
347 cpt = XCHAR_BYTE_TABLE (ret);
351 return cpt->property [(unsigned char) code];
355 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
357 unsigned int code = ch;
358 struct Lisp_Char_Byte_Table* cpt1
359 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
360 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
362 if (CHAR_BYTE_TABLE_P (ret))
364 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
366 ret = cpt2->property[(unsigned char)(code >> 16)];
367 if (CHAR_BYTE_TABLE_P (ret))
369 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
371 ret = cpt3->property[(unsigned char)(code >> 8)];
372 if (CHAR_BYTE_TABLE_P (ret))
374 struct Lisp_Char_Byte_Table* cpt4
375 = XCHAR_BYTE_TABLE (ret);
377 cpt4->property[(unsigned char)code] = value;
379 else if (!EQ (ret, value))
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
387 else if (!EQ (ret, value))
389 Lisp_Object cpt3 = make_char_byte_table (ret);
390 Lisp_Object cpt4 = make_char_byte_table (ret);
392 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
393 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
395 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
398 else if (!EQ (ret, value))
400 Lisp_Object cpt2 = make_char_byte_table (ret);
401 Lisp_Object cpt3 = make_char_byte_table (ret);
402 Lisp_Object cpt4 = make_char_byte_table (ret);
404 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
405 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
406 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
407 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
412 Lisp_Object Vcharacter_attribute_table;
413 Lisp_Object Vcharacter_composition_table;
414 Lisp_Object Vcharacter_variant_table;
416 Lisp_Object Q_decomposition;
419 Lisp_Object Qisolated;
420 Lisp_Object Qinitial;
423 Lisp_Object Qvertical;
424 Lisp_Object QnoBreak;
425 Lisp_Object Qfraction;
436 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
442 else if (EQ (v, Qcompat))
444 else if (EQ (v, Qisolated))
446 else if (EQ (v, Qinitial))
448 else if (EQ (v, Qmedial))
450 else if (EQ (v, Qfinal))
452 else if (EQ (v, Qvertical))
454 else if (EQ (v, QnoBreak))
456 else if (EQ (v, Qfraction))
458 else if (EQ (v, Qsuper))
460 else if (EQ (v, Qsub))
462 else if (EQ (v, Qcircle))
464 else if (EQ (v, Qsquare))
466 else if (EQ (v, Qwide))
468 else if (EQ (v, Qnarrow))
470 else if (EQ (v, Qsmall))
472 else if (EQ (v, Qfont))
475 signal_simple_error (err_msg, err_arg);
478 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
479 Return character corresponding with list.
483 Lisp_Object table = Vcharacter_composition_table;
484 Lisp_Object rest = list;
488 Lisp_Object v = Fcar (rest);
490 Emchar c = to_char_code (v, "Invalid value for composition", list);
492 ret = get_char_code_table (c, table);
497 if (!CHAR_CODE_TABLE_P (ret))
502 else if (!CONSP (rest))
504 else if (CHAR_CODE_TABLE_P (ret))
507 signal_simple_error ("Invalid table is found with", list);
509 signal_simple_error ("Invalid value for composition", list);
512 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
513 Return variants of CHARACTER.
517 CHECK_CHAR (character);
518 return Fcopy_list (get_char_code_table (XCHAR (character),
519 Vcharacter_variant_table));
522 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
523 Return the alist of attributes of CHARACTER.
527 CHECK_CHAR (character);
528 return Fcopy_alist (get_char_code_table (XCHAR (character),
529 Vcharacter_attribute_table));
532 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
533 Return the value of CHARACTER's ATTRIBUTE.
535 (character, attribute))
540 CHECK_CHAR (character);
541 ret = get_char_code_table (XCHAR (character),
542 Vcharacter_attribute_table);
546 if (!NILP (ccs = Ffind_charset (attribute)))
549 return Fcdr (Fassq (attribute, ret));
553 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
556 Emchar char_code = XCHAR (character);
558 = get_char_code_table (char_code, Vcharacter_attribute_table);
561 cell = Fassq (attribute, ret);
565 ret = Fcons (Fcons (attribute, value), ret);
567 else if (!EQ (Fcdr (cell), value))
569 Fsetcdr (cell, value);
571 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
575 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
576 Store CHARACTER's ATTRIBUTE with VALUE.
578 (character, attribute, value))
582 CHECK_CHAR (character);
583 ccs = Ffind_charset (attribute);
586 Lisp_Object cpos, rest;
587 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
594 /* ad-hoc method for `ascii' */
595 if ((XCHARSET_CHARS (ccs) == 94) &&
596 (XCHARSET_BYTE_OFFSET (ccs) != 33))
597 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
599 ccs_len = XCHARSET_CHARS (ccs);
603 Lisp_Object ret = Fcar (value);
606 signal_simple_error ("Invalid value for coded-charset", value);
607 code_point = XINT (ret);
608 if (XCHARSET_GRAPHIC (ccs) == 1)
616 signal_simple_error ("Invalid value for coded-charset", value);
619 signal_simple_error ("Invalid value for coded-charset", value);
621 if (XCHARSET_GRAPHIC (ccs) == 1)
623 code_point = (code_point << 8) | i;
626 value = make_int (code_point);
628 else if (INTP (value))
630 if (XCHARSET_GRAPHIC (ccs) == 1)
631 value = make_int (XINT (value) & 0x7F7F7F7F);
634 signal_simple_error ("Invalid value for coded-charset", value);
637 cpos = Fget_char_attribute (character, attribute);
642 dim = XCHARSET_DIMENSION (ccs);
643 code_point = XINT (cpos);
647 i = ((code_point >> (8 * dim)) & 255)
648 - XCHARSET_BYTE_OFFSET (ccs);
649 nv = XVECTOR_DATA(v)[i];
655 XVECTOR_DATA(v)[i] = Qnil;
656 v = XCHARSET_DECODING_TABLE (ccs);
661 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
664 dim = XCHARSET_DIMENSION (ccs);
665 code_point = XINT (value);
670 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
671 nv = XVECTOR_DATA(v)[i];
675 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
681 XVECTOR_DATA(v)[i] = character;
683 else if (EQ (attribute, Q_decomposition))
685 Lisp_Object rest = value;
686 Lisp_Object table = Vcharacter_composition_table;
689 signal_simple_error ("Invalid value for ->decomposition",
694 Lisp_Object v = Fcar (rest);
697 = to_char_code (v, "Invalid value for ->decomposition", value);
702 put_char_code_table (c, character, table);
707 ntable = get_char_code_table (c, table);
708 if (!CHAR_CODE_TABLE_P (ntable))
710 ntable = make_char_code_table (Qnil);
711 put_char_code_table (c, ntable, table);
717 else if (EQ (attribute, Q_ucs))
723 signal_simple_error ("Invalid value for ->ucs", value);
727 ret = get_char_code_table (c, Vcharacter_variant_table);
728 if (NILP (Fmemq (character, ret)))
730 put_char_code_table (c, Fcons (character, ret),
731 Vcharacter_variant_table);
734 return put_char_attribute (character, attribute, value);
739 EXFUN (Fmake_char, 3);
740 EXFUN (Fdecode_char, 2);
742 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
743 Store character's ATTRIBUTES.
747 Lisp_Object rest = attributes;
748 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
749 Lisp_Object character;
755 Lisp_Object cell = Fcar (rest);
759 signal_simple_error ("Invalid argument", attributes);
760 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
764 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
766 character = Fdecode_char (ccs, cell);
767 goto setup_attributes;
771 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
774 signal_simple_error ("Invalid argument", attributes);
776 character = make_char (XINT (code) + 0x100000);
777 goto setup_attributes;
781 else if (!INTP (code))
782 signal_simple_error ("Invalid argument", attributes);
784 character = make_char (XINT (code));
790 Lisp_Object cell = Fcar (rest);
793 signal_simple_error ("Invalid argument", attributes);
794 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
798 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
801 Lisp_Object Vutf_2000_version;
805 int leading_code_private_11;
808 Lisp_Object Qcharsetp;
810 /* Qdoc_string, Qdimension, Qchars defined in general.c */
811 Lisp_Object Qregistry, Qfinal, Qgraphic;
812 Lisp_Object Qdirection;
813 Lisp_Object Qreverse_direction_charset;
814 Lisp_Object Qleading_byte;
815 Lisp_Object Qshort_name, Qlong_name;
831 Qjapanese_jisx0208_1978,
834 Qjapanese_jisx0208_1990,
844 Qvietnamese_viscii_lower,
845 Qvietnamese_viscii_upper,
874 Lisp_Object Ql2r, Qr2l;
876 Lisp_Object Vcharset_hash_table;
879 static Charset_ID next_allocated_leading_byte;
881 static Charset_ID next_allocated_1_byte_leading_byte;
882 static Charset_ID next_allocated_2_byte_leading_byte;
885 /* Composite characters are characters constructed by overstriking two
886 or more regular characters.
888 1) The old Mule implementation involves storing composite characters
889 in a buffer as a tag followed by all of the actual characters
890 used to make up the composite character. I think this is a bad
891 idea; it greatly complicates code that wants to handle strings
892 one character at a time because it has to deal with the possibility
893 of great big ungainly characters. It's much more reasonable to
894 simply store an index into a table of composite characters.
896 2) The current implementation only allows for 16,384 separate
897 composite characters over the lifetime of the XEmacs process.
898 This could become a potential problem if the user
899 edited lots of different files that use composite characters.
900 Due to FSF bogosity, increasing the number of allowable
901 composite characters under Mule would decrease the number
902 of possible faces that can exist. Mule already has shrunk
903 this to 2048, and further shrinkage would become uncomfortable.
904 No such problems exist in XEmacs.
906 Composite characters could be represented as 0x80 C1 C2 C3,
907 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
908 for slightly under 2^20 (one million) composite characters
909 over the XEmacs process lifetime, and you only need to
910 increase the size of a Mule character from 19 to 21 bits.
911 Or you could use 0x80 C1 C2 C3 C4, allowing for about
912 85 million (slightly over 2^26) composite characters. */
915 /************************************************************************/
916 /* Basic Emchar functions */
917 /************************************************************************/
919 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
920 string in STR. Returns the number of bytes stored.
921 Do not call this directly. Use the macro set_charptr_emchar() instead.
925 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
940 else if ( c <= 0x7ff )
942 *p++ = (c >> 6) | 0xc0;
943 *p++ = (c & 0x3f) | 0x80;
945 else if ( c <= 0xffff )
947 *p++ = (c >> 12) | 0xe0;
948 *p++ = ((c >> 6) & 0x3f) | 0x80;
949 *p++ = (c & 0x3f) | 0x80;
951 else if ( c <= 0x1fffff )
953 *p++ = (c >> 18) | 0xf0;
954 *p++ = ((c >> 12) & 0x3f) | 0x80;
955 *p++ = ((c >> 6) & 0x3f) | 0x80;
956 *p++ = (c & 0x3f) | 0x80;
958 else if ( c <= 0x3ffffff )
960 *p++ = (c >> 24) | 0xf8;
961 *p++ = ((c >> 18) & 0x3f) | 0x80;
962 *p++ = ((c >> 12) & 0x3f) | 0x80;
963 *p++ = ((c >> 6) & 0x3f) | 0x80;
964 *p++ = (c & 0x3f) | 0x80;
968 *p++ = (c >> 30) | 0xfc;
969 *p++ = ((c >> 24) & 0x3f) | 0x80;
970 *p++ = ((c >> 18) & 0x3f) | 0x80;
971 *p++ = ((c >> 12) & 0x3f) | 0x80;
972 *p++ = ((c >> 6) & 0x3f) | 0x80;
973 *p++ = (c & 0x3f) | 0x80;
976 BREAKUP_CHAR (c, charset, c1, c2);
977 lb = CHAR_LEADING_BYTE (c);
978 if (LEADING_BYTE_PRIVATE_P (lb))
979 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
981 if (EQ (charset, Vcharset_control_1))
990 /* Return the first character from a Mule-encoded string in STR,
991 assuming it's non-ASCII. Do not call this directly.
992 Use the macro charptr_emchar() instead. */
995 non_ascii_charptr_emchar (CONST Bufbyte *str)
1008 else if ( b >= 0xf8 )
1013 else if ( b >= 0xf0 )
1018 else if ( b >= 0xe0 )
1023 else if ( b >= 0xc0 )
1033 for( ; len > 0; len-- )
1036 ch = ( ch << 6 ) | ( b & 0x3f );
1040 Bufbyte i0 = *str, i1, i2 = 0;
1041 Lisp_Object charset;
1043 if (i0 == LEADING_BYTE_CONTROL_1)
1044 return (Emchar) (*++str - 0x20);
1046 if (LEADING_BYTE_PREFIX_P (i0))
1051 charset = CHARSET_BY_LEADING_BYTE (i0);
1052 if (XCHARSET_DIMENSION (charset) == 2)
1055 return MAKE_CHAR (charset, i1, i2);
1059 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1060 Do not call this directly. Use the macro valid_char_p() instead. */
1064 non_ascii_valid_char_p (Emchar ch)
1068 /* Must have only lowest 19 bits set */
1072 f1 = CHAR_FIELD1 (ch);
1073 f2 = CHAR_FIELD2 (ch);
1074 f3 = CHAR_FIELD3 (ch);
1078 Lisp_Object charset;
1080 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1081 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1082 f2 > MAX_CHAR_FIELD2_PRIVATE)
1087 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1088 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1092 NOTE: This takes advantage of the fact that
1093 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1094 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1096 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1097 if (EQ (charset, Qnil))
1099 return (XCHARSET_CHARS (charset) == 96);
1103 Lisp_Object charset;
1105 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1106 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1107 f1 > MAX_CHAR_FIELD1_PRIVATE)
1109 if (f2 < 0x20 || f3 < 0x20)
1112 #ifdef ENABLE_COMPOSITE_CHARS
1113 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1115 if (UNBOUNDP (Fgethash (make_int (ch),
1116 Vcomposite_char_char2string_hash_table,
1121 #endif /* ENABLE_COMPOSITE_CHARS */
1123 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1124 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1127 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1129 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1132 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1134 if (EQ (charset, Qnil))
1136 return (XCHARSET_CHARS (charset) == 96);
1142 /************************************************************************/
1143 /* Basic string functions */
1144 /************************************************************************/
1146 /* Copy the character pointed to by PTR into STR, assuming it's
1147 non-ASCII. Do not call this directly. Use the macro
1148 charptr_copy_char() instead. */
1151 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1153 Bufbyte *strptr = str;
1155 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1157 /* Notice fallthrough. */
1159 case 6: *++strptr = *ptr++;
1160 case 5: *++strptr = *ptr++;
1162 case 4: *++strptr = *ptr++;
1163 case 3: *++strptr = *ptr++;
1164 case 2: *++strptr = *ptr;
1169 return strptr + 1 - str;
1173 /************************************************************************/
1174 /* streams of Emchars */
1175 /************************************************************************/
1177 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1178 The functions below are not meant to be called directly; use
1179 the macros in insdel.h. */
1182 Lstream_get_emchar_1 (Lstream *stream, int ch)
1184 Bufbyte str[MAX_EMCHAR_LEN];
1185 Bufbyte *strptr = str;
1187 str[0] = (Bufbyte) ch;
1188 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1190 /* Notice fallthrough. */
1193 ch = Lstream_getc (stream);
1195 *++strptr = (Bufbyte) ch;
1197 ch = Lstream_getc (stream);
1199 *++strptr = (Bufbyte) ch;
1202 ch = Lstream_getc (stream);
1204 *++strptr = (Bufbyte) ch;
1206 ch = Lstream_getc (stream);
1208 *++strptr = (Bufbyte) ch;
1210 ch = Lstream_getc (stream);
1212 *++strptr = (Bufbyte) ch;
1217 return charptr_emchar (str);
1221 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1223 Bufbyte str[MAX_EMCHAR_LEN];
1224 Bytecount len = set_charptr_emchar (str, ch);
1225 return Lstream_write (stream, str, len);
1229 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1231 Bufbyte str[MAX_EMCHAR_LEN];
1232 Bytecount len = set_charptr_emchar (str, ch);
1233 Lstream_unread (stream, str, len);
1237 /************************************************************************/
1238 /* charset object */
1239 /************************************************************************/
1242 mark_charset (Lisp_Object obj)
1244 struct Lisp_Charset *cs = XCHARSET (obj);
1246 mark_object (cs->short_name);
1247 mark_object (cs->long_name);
1248 mark_object (cs->doc_string);
1249 mark_object (cs->registry);
1250 mark_object (cs->ccl_program);
1252 mark_object (cs->decoding_table);
1258 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1260 struct Lisp_Charset *cs = XCHARSET (obj);
1264 error ("printing unreadable object #<charset %s 0x%x>",
1265 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1268 write_c_string ("#<charset ", printcharfun);
1269 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1270 write_c_string (" ", printcharfun);
1271 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1272 write_c_string (" ", printcharfun);
1273 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1274 write_c_string (" ", printcharfun);
1275 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1276 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1278 CHARSET_DIMENSION (cs),
1279 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1280 CHARSET_COLUMNS (cs),
1281 CHARSET_GRAPHIC (cs),
1282 CHARSET_FINAL (cs));
1283 write_c_string (buf, printcharfun);
1284 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1285 sprintf (buf, " 0x%x>", cs->header.uid);
1286 write_c_string (buf, printcharfun);
1289 static const struct lrecord_description charset_description[] = {
1290 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1292 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1297 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1298 mark_charset, print_charset, 0, 0, 0,
1299 charset_description,
1300 struct Lisp_Charset);
1301 /* Make a new charset. */
1304 make_charset (Charset_ID id, Lisp_Object name,
1305 unsigned short chars, unsigned char dimension,
1306 unsigned char columns, unsigned char graphic,
1307 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1308 Lisp_Object long_name, Lisp_Object doc,
1310 Lisp_Object decoding_table,
1311 Emchar ucs_min, Emchar ucs_max,
1312 Emchar code_offset, unsigned char byte_offset)
1314 unsigned char type = 0;
1316 struct Lisp_Charset *cs =
1317 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1318 XSETCHARSET (obj, cs);
1320 CHARSET_ID (cs) = id;
1321 CHARSET_NAME (cs) = name;
1322 CHARSET_SHORT_NAME (cs) = short_name;
1323 CHARSET_LONG_NAME (cs) = long_name;
1324 CHARSET_CHARS (cs) = chars;
1325 CHARSET_DIMENSION (cs) = dimension;
1326 CHARSET_DIRECTION (cs) = direction;
1327 CHARSET_COLUMNS (cs) = columns;
1328 CHARSET_GRAPHIC (cs) = graphic;
1329 CHARSET_FINAL (cs) = final;
1330 CHARSET_DOC_STRING (cs) = doc;
1331 CHARSET_REGISTRY (cs) = reg;
1332 CHARSET_CCL_PROGRAM (cs) = Qnil;
1333 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1335 CHARSET_DECODING_TABLE(cs) = Qnil;
1336 CHARSET_UCS_MIN(cs) = ucs_min;
1337 CHARSET_UCS_MAX(cs) = ucs_max;
1338 CHARSET_CODE_OFFSET(cs) = code_offset;
1339 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1342 switch (CHARSET_CHARS (cs))
1345 switch (CHARSET_DIMENSION (cs))
1348 type = CHARSET_TYPE_94;
1351 type = CHARSET_TYPE_94X94;
1356 switch (CHARSET_DIMENSION (cs))
1359 type = CHARSET_TYPE_96;
1362 type = CHARSET_TYPE_96X96;
1368 switch (CHARSET_DIMENSION (cs))
1371 type = CHARSET_TYPE_128;
1374 type = CHARSET_TYPE_128X128;
1379 switch (CHARSET_DIMENSION (cs))
1382 type = CHARSET_TYPE_256;
1385 type = CHARSET_TYPE_256X256;
1392 CHARSET_TYPE (cs) = type;
1396 if (id == LEADING_BYTE_ASCII)
1397 CHARSET_REP_BYTES (cs) = 1;
1399 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1401 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1406 /* some charsets do not have final characters. This includes
1407 ASCII, Control-1, Composite, and the two faux private
1410 if (code_offset == 0)
1412 assert (NILP (chlook->charset_by_attributes[type][final]));
1413 chlook->charset_by_attributes[type][final] = obj;
1416 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1417 chlook->charset_by_attributes[type][final][direction] = obj;
1421 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1422 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1424 /* Some charsets are "faux" and don't have names or really exist at
1425 all except in the leading-byte table. */
1427 Fputhash (name, obj, Vcharset_hash_table);
1432 get_unallocated_leading_byte (int dimension)
1437 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1440 lb = next_allocated_leading_byte++;
1444 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1447 lb = next_allocated_1_byte_leading_byte++;
1451 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1454 lb = next_allocated_2_byte_leading_byte++;
1460 ("No more character sets free for this dimension",
1461 make_int (dimension));
1468 make_builtin_char (Lisp_Object charset, int c1, int c2)
1470 if (XCHARSET_UCS_MAX (charset))
1473 = (XCHARSET_DIMENSION (charset) == 1
1475 c1 - XCHARSET_BYTE_OFFSET (charset)
1477 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1478 + c2 - XCHARSET_BYTE_OFFSET (charset))
1479 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1480 if ((code < XCHARSET_UCS_MIN (charset))
1481 || (XCHARSET_UCS_MAX (charset) < code))
1482 signal_simple_error ("Arguments makes invalid character",
1486 else if (XCHARSET_DIMENSION (charset) == 1)
1488 switch (XCHARSET_CHARS (charset))
1492 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1495 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1502 switch (XCHARSET_CHARS (charset))
1505 return MIN_CHAR_94x94
1506 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1507 + (c1 - 33) * 94 + (c2 - 33);
1509 return MIN_CHAR_96x96
1510 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1511 + (c1 - 32) * 96 + (c2 - 32);
1519 range_charset_code_point (Lisp_Object charset, Emchar ch)
1523 if ((XCHARSET_UCS_MIN (charset) <= ch)
1524 && (ch <= XCHARSET_UCS_MAX (charset)))
1526 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1528 if (XCHARSET_CHARS (charset) == 256)
1530 else if (XCHARSET_DIMENSION (charset) == 1)
1531 return d + XCHARSET_BYTE_OFFSET (charset);
1532 else if (XCHARSET_DIMENSION (charset) == 2)
1534 ((d / XCHARSET_CHARS (charset)
1535 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1536 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1537 else if (XCHARSET_DIMENSION (charset) == 3)
1539 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1540 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1541 | ((d / XCHARSET_CHARS (charset)
1542 % XCHARSET_CHARS (charset)
1543 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1544 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1545 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1547 ((d / (XCHARSET_CHARS (charset)
1548 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1549 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1550 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1551 % XCHARSET_CHARS (charset)
1552 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1553 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1554 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1555 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1557 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1559 if (XCHARSET_DIMENSION (charset) == 1)
1561 if (XCHARSET_CHARS (charset) == 94)
1563 if (((d = ch - (MIN_CHAR_94
1564 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1568 else if (XCHARSET_CHARS (charset) == 96)
1570 if (((d = ch - (MIN_CHAR_96
1571 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1578 else if (XCHARSET_DIMENSION (charset) == 2)
1580 if (XCHARSET_CHARS (charset) == 94)
1582 if (((d = ch - (MIN_CHAR_94x94
1583 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1586 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1588 else if (XCHARSET_CHARS (charset) == 96)
1590 if (((d = ch - (MIN_CHAR_96x96
1591 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1594 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1604 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1606 if (c <= MAX_CHAR_BASIC_LATIN)
1608 *charset = Vcharset_ascii;
1613 *charset = Vcharset_control_1;
1618 *charset = Vcharset_latin_iso8859_1;
1621 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1623 *charset = Vcharset_greek_iso8859_7;
1624 return c - MIN_CHAR_GREEK + 0x20;
1627 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1629 *charset = Vcharset_cyrillic_iso8859_5;
1630 return c - MIN_CHAR_CYRILLIC + 0x20;
1633 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1635 *charset = Vcharset_hebrew_iso8859_8;
1636 return c - MIN_CHAR_HEBREW + 0x20;
1638 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1640 *charset = Vcharset_thai_tis620;
1641 return c - MIN_CHAR_THAI + 0x20;
1644 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1645 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1647 return list2 (Vcharset_katakana_jisx0201,
1648 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1651 else if (c <= MAX_CHAR_BMP)
1653 *charset = Vcharset_ucs_bmp;
1656 else if (c < MIN_CHAR_DAIKANWA)
1658 *charset = Vcharset_ucs;
1661 else if (c <= MAX_CHAR_DAIKANWA)
1663 *charset = Vcharset_ideograph_daikanwa;
1664 return c - MIN_CHAR_DAIKANWA;
1666 else if (c < MIN_CHAR_94)
1668 *charset = Vcharset_ucs;
1671 else if (c <= MAX_CHAR_94)
1673 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1674 ((c - MIN_CHAR_94) / 94) + '0',
1675 CHARSET_LEFT_TO_RIGHT);
1676 if (!NILP (*charset))
1677 return ((c - MIN_CHAR_94) % 94) + 33;
1680 *charset = Vcharset_ucs;
1684 else if (c <= MAX_CHAR_96)
1686 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1687 ((c - MIN_CHAR_96) / 96) + '0',
1688 CHARSET_LEFT_TO_RIGHT);
1689 if (!NILP (*charset))
1690 return ((c - MIN_CHAR_96) % 96) + 32;
1693 *charset = Vcharset_ucs;
1697 else if (c <= MAX_CHAR_94x94)
1700 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1701 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1702 CHARSET_LEFT_TO_RIGHT);
1703 if (!NILP (*charset))
1704 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1705 | (((c - MIN_CHAR_94x94) % 94) + 33);
1708 *charset = Vcharset_ucs;
1712 else if (c <= MAX_CHAR_96x96)
1715 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1716 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1717 CHARSET_LEFT_TO_RIGHT);
1718 if (!NILP (*charset))
1719 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1720 | (((c - MIN_CHAR_96x96) % 96) + 32);
1723 *charset = Vcharset_ucs;
1729 *charset = Vcharset_ucs;
1734 Lisp_Object Vdefault_coded_charset_priority_list;
1738 /************************************************************************/
1739 /* Basic charset Lisp functions */
1740 /************************************************************************/
1742 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1743 Return non-nil if OBJECT is a charset.
1747 return CHARSETP (object) ? Qt : Qnil;
1750 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1751 Retrieve the charset of the given name.
1752 If CHARSET-OR-NAME is a charset object, it is simply returned.
1753 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1754 nil is returned. Otherwise the associated charset object is returned.
1758 if (CHARSETP (charset_or_name))
1759 return charset_or_name;
1761 CHECK_SYMBOL (charset_or_name);
1762 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1765 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1766 Retrieve the charset of the given name.
1767 Same as `find-charset' except an error is signalled if there is no such
1768 charset instead of returning nil.
1772 Lisp_Object charset = Ffind_charset (name);
1775 signal_simple_error ("No such charset", name);
1779 /* We store the charsets in hash tables with the names as the key and the
1780 actual charset object as the value. Occasionally we need to use them
1781 in a list format. These routines provide us with that. */
1782 struct charset_list_closure
1784 Lisp_Object *charset_list;
1788 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1789 void *charset_list_closure)
1791 /* This function can GC */
1792 struct charset_list_closure *chcl =
1793 (struct charset_list_closure*) charset_list_closure;
1794 Lisp_Object *charset_list = chcl->charset_list;
1796 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1800 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1801 Return a list of the names of all defined charsets.
1805 Lisp_Object charset_list = Qnil;
1806 struct gcpro gcpro1;
1807 struct charset_list_closure charset_list_closure;
1809 GCPRO1 (charset_list);
1810 charset_list_closure.charset_list = &charset_list;
1811 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1812 &charset_list_closure);
1815 return charset_list;
1818 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1819 Return the name of the given charset.
1823 return XCHARSET_NAME (Fget_charset (charset));
1826 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1827 Define a new character set.
1828 This function is for use with Mule support.
1829 NAME is a symbol, the name by which the character set is normally referred.
1830 DOC-STRING is a string describing the character set.
1831 PROPS is a property list, describing the specific nature of the
1832 character set. Recognized properties are:
1834 'short-name Short version of the charset name (ex: Latin-1)
1835 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1836 'registry A regular expression matching the font registry field for
1838 'dimension Number of octets used to index a character in this charset.
1839 Either 1 or 2. Defaults to 1.
1840 'columns Number of columns used to display a character in this charset.
1841 Only used in TTY mode. (Under X, the actual width of a
1842 character can be derived from the font used to display the
1843 characters.) If unspecified, defaults to the dimension
1844 (this is almost always the correct value).
1845 'chars Number of characters in each dimension (94 or 96).
1846 Defaults to 94. Note that if the dimension is 2, the
1847 character set thus described is 94x94 or 96x96.
1848 'final Final byte of ISO 2022 escape sequence. Must be
1849 supplied. Each combination of (DIMENSION, CHARS) defines a
1850 separate namespace for final bytes. Note that ISO
1851 2022 restricts the final byte to the range
1852 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1853 dimension == 2. Note also that final bytes in the range
1854 0x30 - 0x3F are reserved for user-defined (not official)
1856 'graphic 0 (use left half of font on output) or 1 (use right half
1857 of font on output). Defaults to 0. For example, for
1858 a font whose registry is ISO8859-1, the left half
1859 (octets 0x20 - 0x7F) is the `ascii' character set, while
1860 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1861 character set. With 'graphic set to 0, the octets
1862 will have their high bit cleared; with it set to 1,
1863 the octets will have their high bit set.
1864 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1866 'ccl-program A compiled CCL program used to convert a character in
1867 this charset into an index into the font. This is in
1868 addition to the 'graphic property. The CCL program
1869 is passed the octets of the character, with the high
1870 bit cleared and set depending upon whether the value
1871 of the 'graphic property is 0 or 1.
1873 (name, doc_string, props))
1875 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1876 int direction = CHARSET_LEFT_TO_RIGHT;
1878 Lisp_Object registry = Qnil;
1879 Lisp_Object charset;
1880 Lisp_Object rest, keyword, value;
1881 Lisp_Object ccl_program = Qnil;
1882 Lisp_Object short_name = Qnil, long_name = Qnil;
1883 int byte_offset = -1;
1885 CHECK_SYMBOL (name);
1886 if (!NILP (doc_string))
1887 CHECK_STRING (doc_string);
1889 charset = Ffind_charset (name);
1890 if (!NILP (charset))
1891 signal_simple_error ("Cannot redefine existing charset", name);
1893 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1895 if (EQ (keyword, Qshort_name))
1897 CHECK_STRING (value);
1901 if (EQ (keyword, Qlong_name))
1903 CHECK_STRING (value);
1907 else if (EQ (keyword, Qdimension))
1910 dimension = XINT (value);
1911 if (dimension < 1 || dimension > 2)
1912 signal_simple_error ("Invalid value for 'dimension", value);
1915 else if (EQ (keyword, Qchars))
1918 chars = XINT (value);
1919 if (chars != 94 && chars != 96)
1920 signal_simple_error ("Invalid value for 'chars", value);
1923 else if (EQ (keyword, Qcolumns))
1926 columns = XINT (value);
1927 if (columns != 1 && columns != 2)
1928 signal_simple_error ("Invalid value for 'columns", value);
1931 else if (EQ (keyword, Qgraphic))
1934 graphic = XINT (value);
1936 if (graphic < 0 || graphic > 2)
1938 if (graphic < 0 || graphic > 1)
1940 signal_simple_error ("Invalid value for 'graphic", value);
1943 else if (EQ (keyword, Qregistry))
1945 CHECK_STRING (value);
1949 else if (EQ (keyword, Qdirection))
1951 if (EQ (value, Ql2r))
1952 direction = CHARSET_LEFT_TO_RIGHT;
1953 else if (EQ (value, Qr2l))
1954 direction = CHARSET_RIGHT_TO_LEFT;
1956 signal_simple_error ("Invalid value for 'direction", value);
1959 else if (EQ (keyword, Qfinal))
1961 CHECK_CHAR_COERCE_INT (value);
1962 final = XCHAR (value);
1963 if (final < '0' || final > '~')
1964 signal_simple_error ("Invalid value for 'final", value);
1967 else if (EQ (keyword, Qccl_program))
1969 CHECK_VECTOR (value);
1970 ccl_program = value;
1974 signal_simple_error ("Unrecognized property", keyword);
1978 error ("'final must be specified");
1979 if (dimension == 2 && final > 0x5F)
1981 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1985 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1987 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1989 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1990 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1992 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1994 id = get_unallocated_leading_byte (dimension);
1996 if (NILP (doc_string))
1997 doc_string = build_string ("");
1999 if (NILP (registry))
2000 registry = build_string ("");
2002 if (NILP (short_name))
2003 XSETSTRING (short_name, XSYMBOL (name)->name);
2005 if (NILP (long_name))
2006 long_name = doc_string;
2009 columns = dimension;
2011 if (byte_offset < 0)
2015 else if (chars == 96)
2021 charset = make_charset (id, name, chars, dimension, columns, graphic,
2022 final, direction, short_name, long_name,
2023 doc_string, registry,
2024 Qnil, 0, 0, 0, byte_offset);
2025 if (!NILP (ccl_program))
2026 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2030 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2032 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2033 NEW-NAME is the name of the new charset. Return the new charset.
2035 (charset, new_name))
2037 Lisp_Object new_charset = Qnil;
2038 int id, chars, dimension, columns, graphic, final;
2040 Lisp_Object registry, doc_string, short_name, long_name;
2041 struct Lisp_Charset *cs;
2043 charset = Fget_charset (charset);
2044 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2045 signal_simple_error ("Charset already has reverse-direction charset",
2048 CHECK_SYMBOL (new_name);
2049 if (!NILP (Ffind_charset (new_name)))
2050 signal_simple_error ("Cannot redefine existing charset", new_name);
2052 cs = XCHARSET (charset);
2054 chars = CHARSET_CHARS (cs);
2055 dimension = CHARSET_DIMENSION (cs);
2056 columns = CHARSET_COLUMNS (cs);
2057 id = get_unallocated_leading_byte (dimension);
2059 graphic = CHARSET_GRAPHIC (cs);
2060 final = CHARSET_FINAL (cs);
2061 direction = CHARSET_RIGHT_TO_LEFT;
2062 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2063 direction = CHARSET_LEFT_TO_RIGHT;
2064 doc_string = CHARSET_DOC_STRING (cs);
2065 short_name = CHARSET_SHORT_NAME (cs);
2066 long_name = CHARSET_LONG_NAME (cs);
2067 registry = CHARSET_REGISTRY (cs);
2069 new_charset = make_charset (id, new_name, chars, dimension, columns,
2070 graphic, final, direction, short_name, long_name,
2071 doc_string, registry,
2073 CHARSET_DECODING_TABLE(cs),
2074 CHARSET_UCS_MIN(cs),
2075 CHARSET_UCS_MAX(cs),
2076 CHARSET_CODE_OFFSET(cs),
2077 CHARSET_BYTE_OFFSET(cs)
2083 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2084 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2089 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2090 Define symbol ALIAS as an alias for CHARSET.
2094 CHECK_SYMBOL (alias);
2095 charset = Fget_charset (charset);
2096 return Fputhash (alias, charset, Vcharset_hash_table);
2099 /* #### Reverse direction charsets not yet implemented. */
2101 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2103 Return the reverse-direction charset parallel to CHARSET, if any.
2104 This is the charset with the same properties (in particular, the same
2105 dimension, number of characters per dimension, and final byte) as
2106 CHARSET but whose characters are displayed in the opposite direction.
2110 charset = Fget_charset (charset);
2111 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2115 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2116 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2117 If DIRECTION is omitted, both directions will be checked (left-to-right
2118 will be returned if character sets exist for both directions).
2120 (dimension, chars, final, direction))
2122 int dm, ch, fi, di = -1;
2124 Lisp_Object obj = Qnil;
2126 CHECK_INT (dimension);
2127 dm = XINT (dimension);
2128 if (dm < 1 || dm > 2)
2129 signal_simple_error ("Invalid value for DIMENSION", dimension);
2133 if (ch != 94 && ch != 96)
2134 signal_simple_error ("Invalid value for CHARS", chars);
2136 CHECK_CHAR_COERCE_INT (final);
2138 if (fi < '0' || fi > '~')
2139 signal_simple_error ("Invalid value for FINAL", final);
2141 if (EQ (direction, Ql2r))
2142 di = CHARSET_LEFT_TO_RIGHT;
2143 else if (EQ (direction, Qr2l))
2144 di = CHARSET_RIGHT_TO_LEFT;
2145 else if (!NILP (direction))
2146 signal_simple_error ("Invalid value for DIRECTION", direction);
2148 if (dm == 2 && fi > 0x5F)
2150 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2153 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2155 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2159 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2161 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2164 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2167 return XCHARSET_NAME (obj);
2171 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2172 Return short name of CHARSET.
2176 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2179 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2180 Return long name of CHARSET.
2184 return XCHARSET_LONG_NAME (Fget_charset (charset));
2187 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2188 Return description of CHARSET.
2192 return XCHARSET_DOC_STRING (Fget_charset (charset));
2195 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2196 Return dimension of CHARSET.
2200 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2203 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2204 Return property PROP of CHARSET.
2205 Recognized properties are those listed in `make-charset', as well as
2206 'name and 'doc-string.
2210 struct Lisp_Charset *cs;
2212 charset = Fget_charset (charset);
2213 cs = XCHARSET (charset);
2215 CHECK_SYMBOL (prop);
2216 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2217 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2218 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2219 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2220 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2221 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2222 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2223 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2224 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2225 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2226 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2227 if (EQ (prop, Qdirection))
2228 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2229 if (EQ (prop, Qreverse_direction_charset))
2231 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2235 return XCHARSET_NAME (obj);
2237 signal_simple_error ("Unrecognized charset property name", prop);
2238 return Qnil; /* not reached */
2241 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2242 Return charset identification number of CHARSET.
2246 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2249 /* #### We need to figure out which properties we really want to
2252 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2253 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2255 (charset, ccl_program))
2257 charset = Fget_charset (charset);
2258 CHECK_VECTOR (ccl_program);
2259 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2264 invalidate_charset_font_caches (Lisp_Object charset)
2266 /* Invalidate font cache entries for charset on all devices. */
2267 Lisp_Object devcons, concons, hash_table;
2268 DEVICE_LOOP_NO_BREAK (devcons, concons)
2270 struct device *d = XDEVICE (XCAR (devcons));
2271 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2272 if (!UNBOUNDP (hash_table))
2273 Fclrhash (hash_table);
2277 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2278 Set the 'registry property of CHARSET to REGISTRY.
2280 (charset, registry))
2282 charset = Fget_charset (charset);
2283 CHECK_STRING (registry);
2284 XCHARSET_REGISTRY (charset) = registry;
2285 invalidate_charset_font_caches (charset);
2286 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2291 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2292 Return mapping-table of CHARSET.
2296 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2299 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2300 Set mapping-table of CHARSET to TABLE.
2304 struct Lisp_Charset *cs;
2305 Lisp_Object old_table;
2308 charset = Fget_charset (charset);
2309 cs = XCHARSET (charset);
2311 if (EQ (table, Qnil))
2313 CHARSET_DECODING_TABLE(cs) = table;
2316 else if (VECTORP (table))
2320 /* ad-hoc method for `ascii' */
2321 if ((CHARSET_CHARS (cs) == 94) &&
2322 (CHARSET_BYTE_OFFSET (cs) != 33))
2323 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2325 ccs_len = CHARSET_CHARS (cs);
2327 if (XVECTOR_LENGTH (table) > ccs_len)
2328 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2329 old_table = CHARSET_DECODING_TABLE(cs);
2330 CHARSET_DECODING_TABLE(cs) = table;
2333 signal_error (Qwrong_type_argument,
2334 list2 (build_translated_string ("vector-or-nil-p"),
2336 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2338 switch (CHARSET_DIMENSION (cs))
2341 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2343 Lisp_Object c = XVECTOR_DATA(table)[i];
2348 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2352 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2354 Lisp_Object v = XVECTOR_DATA(table)[i];
2360 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2362 CHARSET_DECODING_TABLE(cs) = old_table;
2363 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2365 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2367 Lisp_Object c = XVECTOR_DATA(v)[j];
2372 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2373 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2377 put_char_attribute (v, charset,
2378 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2387 /************************************************************************/
2388 /* Lisp primitives for working with characters */
2389 /************************************************************************/
2392 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2393 Make a character from CHARSET and code-point CODE.
2399 charset = Fget_charset (charset);
2401 if (XCHARSET_GRAPHIC (charset) == 1)
2403 return make_char (DECODE_CHAR (charset, c));
2407 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2408 Make a character from CHARSET and octets ARG1 and ARG2.
2409 ARG2 is required only for characters from two-dimensional charsets.
2410 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2411 character s with caron.
2413 (charset, arg1, arg2))
2415 struct Lisp_Charset *cs;
2417 int lowlim, highlim;
2419 charset = Fget_charset (charset);
2420 cs = XCHARSET (charset);
2422 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2423 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2425 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2427 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2428 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2431 /* It is useful (and safe, according to Olivier Galibert) to strip
2432 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2433 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2434 Latin 2 code of the character. */
2442 if (a1 < lowlim || a1 > highlim)
2443 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2445 if (CHARSET_DIMENSION (cs) == 1)
2449 ("Charset is of dimension one; second octet must be nil", arg2);
2450 return make_char (MAKE_CHAR (charset, a1, 0));
2459 a2 = XINT (arg2) & 0x7f;
2461 if (a2 < lowlim || a2 > highlim)
2462 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2464 return make_char (MAKE_CHAR (charset, a1, a2));
2467 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2468 Return the character set of char CH.
2472 CHECK_CHAR_COERCE_INT (ch);
2474 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2477 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2478 Return the octet numbered N (should be 0 or 1) of char CH.
2479 N defaults to 0 if omitted.
2483 Lisp_Object charset;
2486 CHECK_CHAR_COERCE_INT (ch);
2488 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2490 if (NILP (n) || EQ (n, Qzero))
2491 return make_int (octet0);
2492 else if (EQ (n, make_int (1)))
2493 return make_int (octet1);
2495 signal_simple_error ("Octet number must be 0 or 1", n);
2498 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2499 Return list of charset and one or two position-codes of CHAR.
2503 /* This function can GC */
2504 struct gcpro gcpro1, gcpro2;
2505 Lisp_Object charset = Qnil;
2506 Lisp_Object rc = Qnil;
2514 GCPRO2 (charset, rc);
2515 CHECK_CHAR_COERCE_INT (character);
2518 code_point = ENCODE_CHAR (XCHAR (character), charset);
2519 dimension = XCHARSET_DIMENSION (charset);
2520 while (dimension > 0)
2522 rc = Fcons (make_int (code_point & 255), rc);
2526 rc = Fcons (XCHARSET_NAME (charset), rc);
2528 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2530 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2532 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2536 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2545 #ifdef ENABLE_COMPOSITE_CHARS
2546 /************************************************************************/
2547 /* composite character functions */
2548 /************************************************************************/
2551 lookup_composite_char (Bufbyte *str, int len)
2553 Lisp_Object lispstr = make_string (str, len);
2554 Lisp_Object ch = Fgethash (lispstr,
2555 Vcomposite_char_string2char_hash_table,
2561 if (composite_char_row_next >= 128)
2562 signal_simple_error ("No more composite chars available", lispstr);
2563 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2564 composite_char_col_next);
2565 Fputhash (make_char (emch), lispstr,
2566 Vcomposite_char_char2string_hash_table);
2567 Fputhash (lispstr, make_char (emch),
2568 Vcomposite_char_string2char_hash_table);
2569 composite_char_col_next++;
2570 if (composite_char_col_next >= 128)
2572 composite_char_col_next = 32;
2573 composite_char_row_next++;
2582 composite_char_string (Emchar ch)
2584 Lisp_Object str = Fgethash (make_char (ch),
2585 Vcomposite_char_char2string_hash_table,
2587 assert (!UNBOUNDP (str));
2591 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2592 Convert a string into a single composite character.
2593 The character is the result of overstriking all the characters in
2598 CHECK_STRING (string);
2599 return make_char (lookup_composite_char (XSTRING_DATA (string),
2600 XSTRING_LENGTH (string)));
2603 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2604 Return a string of the characters comprising a composite character.
2612 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2613 signal_simple_error ("Must be composite char", ch);
2614 return composite_char_string (emch);
2616 #endif /* ENABLE_COMPOSITE_CHARS */
2619 /************************************************************************/
2620 /* initialization */
2621 /************************************************************************/
2624 syms_of_mule_charset (void)
2626 DEFSUBR (Fcharsetp);
2627 DEFSUBR (Ffind_charset);
2628 DEFSUBR (Fget_charset);
2629 DEFSUBR (Fcharset_list);
2630 DEFSUBR (Fcharset_name);
2631 DEFSUBR (Fmake_charset);
2632 DEFSUBR (Fmake_reverse_direction_charset);
2633 /* DEFSUBR (Freverse_direction_charset); */
2634 DEFSUBR (Fdefine_charset_alias);
2635 DEFSUBR (Fcharset_from_attributes);
2636 DEFSUBR (Fcharset_short_name);
2637 DEFSUBR (Fcharset_long_name);
2638 DEFSUBR (Fcharset_description);
2639 DEFSUBR (Fcharset_dimension);
2640 DEFSUBR (Fcharset_property);
2641 DEFSUBR (Fcharset_id);
2642 DEFSUBR (Fset_charset_ccl_program);
2643 DEFSUBR (Fset_charset_registry);
2645 DEFSUBR (Fchar_attribute_alist);
2646 DEFSUBR (Fget_char_attribute);
2647 DEFSUBR (Fput_char_attribute);
2648 DEFSUBR (Fdefine_char);
2649 DEFSUBR (Fchar_variants);
2650 DEFSUBR (Fget_composite_char);
2651 DEFSUBR (Fcharset_mapping_table);
2652 DEFSUBR (Fset_charset_mapping_table);
2656 DEFSUBR (Fdecode_char);
2658 DEFSUBR (Fmake_char);
2659 DEFSUBR (Fchar_charset);
2660 DEFSUBR (Fchar_octet);
2661 DEFSUBR (Fsplit_char);
2663 #ifdef ENABLE_COMPOSITE_CHARS
2664 DEFSUBR (Fmake_composite_char);
2665 DEFSUBR (Fcomposite_char_string);
2668 defsymbol (&Qcharsetp, "charsetp");
2669 defsymbol (&Qregistry, "registry");
2670 defsymbol (&Qfinal, "final");
2671 defsymbol (&Qgraphic, "graphic");
2672 defsymbol (&Qdirection, "direction");
2673 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2674 defsymbol (&Qshort_name, "short-name");
2675 defsymbol (&Qlong_name, "long-name");
2677 defsymbol (&Ql2r, "l2r");
2678 defsymbol (&Qr2l, "r2l");
2680 /* Charsets, compatible with FSF 20.3
2681 Naming convention is Script-Charset[-Edition] */
2682 defsymbol (&Qascii, "ascii");
2683 defsymbol (&Qcontrol_1, "control-1");
2684 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2685 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2686 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2687 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2688 defsymbol (&Qthai_tis620, "thai-tis620");
2689 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2690 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2691 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2692 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2693 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2694 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2695 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2696 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2697 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2698 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2699 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2700 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2701 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2702 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2703 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2705 defsymbol (&Q_ucs, "->ucs");
2706 defsymbol (&Q_decomposition, "->decomposition");
2707 defsymbol (&Qcompat, "compat");
2708 defsymbol (&Qisolated, "isolated");
2709 defsymbol (&Qinitial, "initial");
2710 defsymbol (&Qmedial, "medial");
2711 defsymbol (&Qfinal, "final");
2712 defsymbol (&Qvertical, "vertical");
2713 defsymbol (&QnoBreak, "noBreak");
2714 defsymbol (&Qfraction, "fraction");
2715 defsymbol (&Qsuper, "super");
2716 defsymbol (&Qsub, "sub");
2717 defsymbol (&Qcircle, "circle");
2718 defsymbol (&Qsquare, "square");
2719 defsymbol (&Qwide, "wide");
2720 defsymbol (&Qnarrow, "narrow");
2721 defsymbol (&Qsmall, "small");
2722 defsymbol (&Qfont, "font");
2723 defsymbol (&Qucs, "ucs");
2724 defsymbol (&Qucs_bmp, "ucs-bmp");
2725 defsymbol (&Qlatin_viscii, "latin-viscii");
2726 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2727 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2728 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2729 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2730 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2731 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2732 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2733 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2734 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2735 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2736 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2737 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2738 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2739 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2740 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2741 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2742 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2743 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2744 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2745 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2746 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2747 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2748 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2749 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2750 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2751 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2752 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2754 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2755 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2757 defsymbol (&Qcomposite, "composite");
2761 vars_of_mule_charset (void)
2768 chlook = xnew (struct charset_lookup);
2769 dumpstruct (&chlook, &charset_lookup_description);
2771 /* Table of charsets indexed by leading byte. */
2772 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2773 chlook->charset_by_leading_byte[i] = Qnil;
2776 /* Table of charsets indexed by type/final-byte. */
2777 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2778 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2779 chlook->charset_by_attributes[i][j] = Qnil;
2781 /* Table of charsets indexed by type/final-byte/direction. */
2782 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2783 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2784 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2785 chlook->charset_by_attributes[i][j][k] = Qnil;
2789 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2791 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2792 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2796 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2797 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2798 Leading-code of private TYPE9N charset of column-width 1.
2800 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2804 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2805 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2806 Version number of UTF-2000.
2809 staticpro (&Vcharacter_attribute_table);
2810 Vcharacter_attribute_table = make_char_code_table (Qnil);
2812 staticpro (&Vcharacter_composition_table);
2813 Vcharacter_composition_table = make_char_code_table (Qnil);
2815 staticpro (&Vcharacter_variant_table);
2816 Vcharacter_variant_table = make_char_code_table (Qnil);
2818 Vdefault_coded_charset_priority_list = Qnil;
2819 DEFVAR_LISP ("default-coded-charset-priority-list",
2820 &Vdefault_coded_charset_priority_list /*
2821 Default order of preferred coded-character-sets.
2827 complex_vars_of_mule_charset (void)
2829 staticpro (&Vcharset_hash_table);
2830 Vcharset_hash_table =
2831 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2833 /* Predefined character sets. We store them into variables for
2837 staticpro (&Vcharset_ucs);
2839 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2840 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2841 build_string ("UCS"),
2842 build_string ("UCS"),
2843 build_string ("ISO/IEC 10646"),
2845 Qnil, 0, 0xFFFFFFF, 0, 0);
2846 staticpro (&Vcharset_ucs_bmp);
2848 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2849 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2850 build_string ("BMP"),
2851 build_string ("BMP"),
2852 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2853 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2854 Qnil, 0, 0xFFFF, 0, 0);
2856 # define MIN_CHAR_THAI 0
2857 # define MAX_CHAR_THAI 0
2858 # define MIN_CHAR_GREEK 0
2859 # define MAX_CHAR_GREEK 0
2860 # define MIN_CHAR_HEBREW 0
2861 # define MAX_CHAR_HEBREW 0
2862 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2863 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2864 # define MIN_CHAR_CYRILLIC 0
2865 # define MAX_CHAR_CYRILLIC 0
2867 staticpro (&Vcharset_ascii);
2869 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2870 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2871 build_string ("ASCII"),
2872 build_string ("ASCII)"),
2873 build_string ("ASCII (ISO646 IRV)"),
2874 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2875 Qnil, 0, 0x7F, 0, 0);
2876 staticpro (&Vcharset_control_1);
2877 Vcharset_control_1 =
2878 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2879 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2880 build_string ("C1"),
2881 build_string ("Control characters"),
2882 build_string ("Control characters 128-191"),
2884 Qnil, 0x80, 0x9F, 0, 0);
2885 staticpro (&Vcharset_latin_iso8859_1);
2886 Vcharset_latin_iso8859_1 =
2887 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2888 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2889 build_string ("Latin-1"),
2890 build_string ("ISO8859-1 (Latin-1)"),
2891 build_string ("ISO8859-1 (Latin-1)"),
2892 build_string ("iso8859-1"),
2893 Qnil, 0xA0, 0xFF, 0, 32);
2894 staticpro (&Vcharset_latin_iso8859_2);
2895 Vcharset_latin_iso8859_2 =
2896 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2897 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2898 build_string ("Latin-2"),
2899 build_string ("ISO8859-2 (Latin-2)"),
2900 build_string ("ISO8859-2 (Latin-2)"),
2901 build_string ("iso8859-2"),
2903 staticpro (&Vcharset_latin_iso8859_3);
2904 Vcharset_latin_iso8859_3 =
2905 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2906 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2907 build_string ("Latin-3"),
2908 build_string ("ISO8859-3 (Latin-3)"),
2909 build_string ("ISO8859-3 (Latin-3)"),
2910 build_string ("iso8859-3"),
2912 staticpro (&Vcharset_latin_iso8859_4);
2913 Vcharset_latin_iso8859_4 =
2914 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2915 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2916 build_string ("Latin-4"),
2917 build_string ("ISO8859-4 (Latin-4)"),
2918 build_string ("ISO8859-4 (Latin-4)"),
2919 build_string ("iso8859-4"),
2921 staticpro (&Vcharset_thai_tis620);
2922 Vcharset_thai_tis620 =
2923 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2924 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2925 build_string ("TIS620"),
2926 build_string ("TIS620 (Thai)"),
2927 build_string ("TIS620.2529 (Thai)"),
2928 build_string ("tis620"),
2929 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2930 staticpro (&Vcharset_greek_iso8859_7);
2931 Vcharset_greek_iso8859_7 =
2932 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2933 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2934 build_string ("ISO8859-7"),
2935 build_string ("ISO8859-7 (Greek)"),
2936 build_string ("ISO8859-7 (Greek)"),
2937 build_string ("iso8859-7"),
2938 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2939 staticpro (&Vcharset_arabic_iso8859_6);
2940 Vcharset_arabic_iso8859_6 =
2941 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2942 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2943 build_string ("ISO8859-6"),
2944 build_string ("ISO8859-6 (Arabic)"),
2945 build_string ("ISO8859-6 (Arabic)"),
2946 build_string ("iso8859-6"),
2948 staticpro (&Vcharset_hebrew_iso8859_8);
2949 Vcharset_hebrew_iso8859_8 =
2950 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2951 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2952 build_string ("ISO8859-8"),
2953 build_string ("ISO8859-8 (Hebrew)"),
2954 build_string ("ISO8859-8 (Hebrew)"),
2955 build_string ("iso8859-8"),
2956 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2957 staticpro (&Vcharset_katakana_jisx0201);
2958 Vcharset_katakana_jisx0201 =
2959 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2960 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2961 build_string ("JISX0201 Kana"),
2962 build_string ("JISX0201.1976 (Japanese Kana)"),
2963 build_string ("JISX0201.1976 Japanese Kana"),
2964 build_string ("jisx0201\\.1976"),
2966 staticpro (&Vcharset_latin_jisx0201);
2967 Vcharset_latin_jisx0201 =
2968 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2969 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2970 build_string ("JISX0201 Roman"),
2971 build_string ("JISX0201.1976 (Japanese Roman)"),
2972 build_string ("JISX0201.1976 Japanese Roman"),
2973 build_string ("jisx0201\\.1976"),
2975 staticpro (&Vcharset_cyrillic_iso8859_5);
2976 Vcharset_cyrillic_iso8859_5 =
2977 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2978 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2979 build_string ("ISO8859-5"),
2980 build_string ("ISO8859-5 (Cyrillic)"),
2981 build_string ("ISO8859-5 (Cyrillic)"),
2982 build_string ("iso8859-5"),
2984 0 /* MIN_CHAR_CYRILLIC */,
2985 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
2986 staticpro (&Vcharset_latin_iso8859_9);
2987 Vcharset_latin_iso8859_9 =
2988 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2989 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2990 build_string ("Latin-5"),
2991 build_string ("ISO8859-9 (Latin-5)"),
2992 build_string ("ISO8859-9 (Latin-5)"),
2993 build_string ("iso8859-9"),
2995 staticpro (&Vcharset_japanese_jisx0208_1978);
2996 Vcharset_japanese_jisx0208_1978 =
2997 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2998 Qjapanese_jisx0208_1978, 94, 2,
2999 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3000 build_string ("JIS X0208:1978"),
3001 build_string ("JIS X0208:1978 (Japanese)"),
3003 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3004 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3006 staticpro (&Vcharset_chinese_gb2312);
3007 Vcharset_chinese_gb2312 =
3008 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3009 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3010 build_string ("GB2312"),
3011 build_string ("GB2312)"),
3012 build_string ("GB2312 Chinese simplified"),
3013 build_string ("gb2312"),
3015 staticpro (&Vcharset_japanese_jisx0208);
3016 Vcharset_japanese_jisx0208 =
3017 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3018 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3019 build_string ("JISX0208"),
3020 build_string ("JIS X0208:1983 (Japanese)"),
3021 build_string ("JIS X0208:1983 Japanese Kanji"),
3022 build_string ("jisx0208\\.1983"),
3025 staticpro (&Vcharset_japanese_jisx0208_1990);
3026 Vcharset_japanese_jisx0208_1990 =
3027 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3028 Qjapanese_jisx0208_1990, 94, 2,
3029 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3030 build_string ("JISX0208-1990"),
3031 build_string ("JIS X0208:1990 (Japanese)"),
3032 build_string ("JIS X0208:1990 Japanese Kanji"),
3033 build_string ("jisx0208\\.1990"),
3035 MIN_CHAR_JIS_X0208_1990,
3036 MAX_CHAR_JIS_X0208_1990, 0, 33);
3038 staticpro (&Vcharset_korean_ksc5601);
3039 Vcharset_korean_ksc5601 =
3040 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3041 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3042 build_string ("KSC5601"),
3043 build_string ("KSC5601 (Korean"),
3044 build_string ("KSC5601 Korean Hangul and Hanja"),
3045 build_string ("ksc5601"),
3047 staticpro (&Vcharset_japanese_jisx0212);
3048 Vcharset_japanese_jisx0212 =
3049 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3050 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3051 build_string ("JISX0212"),
3052 build_string ("JISX0212 (Japanese)"),
3053 build_string ("JISX0212 Japanese Supplement"),
3054 build_string ("jisx0212"),
3057 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3058 staticpro (&Vcharset_chinese_cns11643_1);
3059 Vcharset_chinese_cns11643_1 =
3060 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3061 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3062 build_string ("CNS11643-1"),
3063 build_string ("CNS11643-1 (Chinese traditional)"),
3065 ("CNS 11643 Plane 1 Chinese traditional"),
3066 build_string (CHINESE_CNS_PLANE_RE("1")),
3068 staticpro (&Vcharset_chinese_cns11643_2);
3069 Vcharset_chinese_cns11643_2 =
3070 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3071 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3072 build_string ("CNS11643-2"),
3073 build_string ("CNS11643-2 (Chinese traditional)"),
3075 ("CNS 11643 Plane 2 Chinese traditional"),
3076 build_string (CHINESE_CNS_PLANE_RE("2")),
3079 staticpro (&Vcharset_latin_viscii_lower);
3080 Vcharset_latin_viscii_lower =
3081 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3082 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3083 build_string ("VISCII lower"),
3084 build_string ("VISCII lower (Vietnamese)"),
3085 build_string ("VISCII lower (Vietnamese)"),
3086 build_string ("MULEVISCII-LOWER"),
3088 staticpro (&Vcharset_latin_viscii_upper);
3089 Vcharset_latin_viscii_upper =
3090 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3091 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3092 build_string ("VISCII upper"),
3093 build_string ("VISCII upper (Vietnamese)"),
3094 build_string ("VISCII upper (Vietnamese)"),
3095 build_string ("MULEVISCII-UPPER"),
3097 staticpro (&Vcharset_latin_viscii);
3098 Vcharset_latin_viscii =
3099 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3100 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3101 build_string ("VISCII"),
3102 build_string ("VISCII 1.1 (Vietnamese)"),
3103 build_string ("VISCII 1.1 (Vietnamese)"),
3104 build_string ("VISCII1\\.1"),
3106 staticpro (&Vcharset_ideograph_daikanwa);
3107 Vcharset_ideograph_daikanwa =
3108 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3109 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3110 build_string ("Daikanwa"),
3111 build_string ("Morohashi's Daikanwa"),
3112 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3113 build_string ("Daikanwa"),
3114 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3115 staticpro (&Vcharset_mojikyo_pj_1);
3116 Vcharset_mojikyo_pj_1 =
3117 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3118 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Mojikyo-PJ-1"),
3120 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3122 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3123 build_string ("jisx0208\\.Mojikyo-1$"),
3125 staticpro (&Vcharset_mojikyo_pj_2);
3126 Vcharset_mojikyo_pj_2 =
3127 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3128 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3129 build_string ("Mojikyo-PJ-2"),
3130 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3132 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3133 build_string ("jisx0208\\.Mojikyo-2$"),
3135 staticpro (&Vcharset_mojikyo_pj_3);
3136 Vcharset_mojikyo_pj_3 =
3137 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3138 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3139 build_string ("Mojikyo-PJ-3"),
3140 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3142 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3143 build_string ("jisx0208\\.Mojikyo-3$"),
3145 staticpro (&Vcharset_mojikyo_pj_4);
3146 Vcharset_mojikyo_pj_4 =
3147 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3148 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3149 build_string ("Mojikyo-PJ-4"),
3150 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3152 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3153 build_string ("jisx0208\\.Mojikyo-4$"),
3155 staticpro (&Vcharset_mojikyo_pj_5);
3156 Vcharset_mojikyo_pj_5 =
3157 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3158 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3159 build_string ("Mojikyo-PJ-5"),
3160 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3162 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3163 build_string ("jisx0208\\.Mojikyo-5$"),
3165 staticpro (&Vcharset_mojikyo_pj_6);
3166 Vcharset_mojikyo_pj_6 =
3167 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3168 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3169 build_string ("Mojikyo-PJ-6"),
3170 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3172 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3173 build_string ("jisx0208\\.Mojikyo-6$"),
3175 staticpro (&Vcharset_mojikyo_pj_7);
3176 Vcharset_mojikyo_pj_7 =
3177 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3178 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3179 build_string ("Mojikyo-PJ-7"),
3180 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3182 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3183 build_string ("jisx0208\\.Mojikyo-7$"),
3185 staticpro (&Vcharset_mojikyo_pj_8);
3186 Vcharset_mojikyo_pj_8 =
3187 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3188 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3189 build_string ("Mojikyo-PJ-8"),
3190 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3192 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3193 build_string ("jisx0208\\.Mojikyo-8$"),
3195 staticpro (&Vcharset_mojikyo_pj_9);
3196 Vcharset_mojikyo_pj_9 =
3197 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3198 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3199 build_string ("Mojikyo-PJ-9"),
3200 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3202 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3203 build_string ("jisx0208\\.Mojikyo-9$"),
3205 staticpro (&Vcharset_mojikyo_pj_10);
3206 Vcharset_mojikyo_pj_10 =
3207 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3208 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3209 build_string ("Mojikyo-PJ-10"),
3210 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3212 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3213 build_string ("jisx0208\\.Mojikyo-10$"),
3215 staticpro (&Vcharset_mojikyo_pj_11);
3216 Vcharset_mojikyo_pj_11 =
3217 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3218 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3219 build_string ("Mojikyo-PJ-11"),
3220 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3222 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3223 build_string ("jisx0208\\.Mojikyo-11$"),
3225 staticpro (&Vcharset_mojikyo_pj_12);
3226 Vcharset_mojikyo_pj_12 =
3227 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3228 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3229 build_string ("Mojikyo-PJ-12"),
3230 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3232 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3233 build_string ("jisx0208\\.Mojikyo-12$"),
3235 staticpro (&Vcharset_mojikyo_pj_13);
3236 Vcharset_mojikyo_pj_13 =
3237 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3238 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3239 build_string ("Mojikyo-PJ-13"),
3240 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3242 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3243 build_string ("jisx0208\\.Mojikyo-13$"),
3245 staticpro (&Vcharset_mojikyo_pj_14);
3246 Vcharset_mojikyo_pj_14 =
3247 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3248 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3249 build_string ("Mojikyo-PJ-14"),
3250 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3252 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3253 build_string ("jisx0208\\.Mojikyo-14$"),
3255 staticpro (&Vcharset_mojikyo_pj_15);
3256 Vcharset_mojikyo_pj_15 =
3257 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3258 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3259 build_string ("Mojikyo-PJ-15"),
3260 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3262 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3263 build_string ("jisx0208\\.Mojikyo-15$"),
3265 staticpro (&Vcharset_mojikyo_pj_16);
3266 Vcharset_mojikyo_pj_16 =
3267 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3268 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3269 build_string ("Mojikyo-PJ-16"),
3270 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3272 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3273 build_string ("jisx0208\\.Mojikyo-16$"),
3275 staticpro (&Vcharset_mojikyo_pj_17);
3276 Vcharset_mojikyo_pj_17 =
3277 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3278 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3279 build_string ("Mojikyo-PJ-17"),
3280 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3282 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3283 build_string ("jisx0208\\.Mojikyo-17$"),
3285 staticpro (&Vcharset_mojikyo_pj_18);
3286 Vcharset_mojikyo_pj_18 =
3287 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3288 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3289 build_string ("Mojikyo-PJ-18"),
3290 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3292 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3293 build_string ("jisx0208\\.Mojikyo-18$"),
3295 staticpro (&Vcharset_mojikyo_pj_19);
3296 Vcharset_mojikyo_pj_19 =
3297 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3298 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3299 build_string ("Mojikyo-PJ-19"),
3300 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3302 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3303 build_string ("jisx0208\\.Mojikyo-19$"),
3305 staticpro (&Vcharset_mojikyo_pj_20);
3306 Vcharset_mojikyo_pj_20 =
3307 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3308 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3309 build_string ("Mojikyo-PJ-20"),
3310 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3312 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3313 build_string ("jisx0208\\.Mojikyo-20$"),
3315 staticpro (&Vcharset_mojikyo_pj_21);
3316 Vcharset_mojikyo_pj_21 =
3317 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3318 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3319 build_string ("Mojikyo-PJ-21"),
3320 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3322 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3323 build_string ("jisx0208\\.Mojikyo-21$"),
3325 staticpro (&Vcharset_ethiopic_ucs);
3326 Vcharset_ethiopic_ucs =
3327 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3328 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3329 build_string ("Ethiopic (UCS)"),
3330 build_string ("Ethiopic (UCS)"),
3331 build_string ("Ethiopic of UCS"),
3332 build_string ("Ethiopic-Unicode"),
3333 Qnil, 0x1200, 0x137F, 0x1200, 0);
3335 staticpro (&Vcharset_chinese_big5_1);
3336 Vcharset_chinese_big5_1 =
3337 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3338 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Big5"),
3340 build_string ("Big5 (Level-1)"),
3342 ("Big5 Level-1 Chinese traditional"),
3343 build_string ("big5"),
3345 staticpro (&Vcharset_chinese_big5_2);
3346 Vcharset_chinese_big5_2 =
3347 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3348 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3349 build_string ("Big5"),
3350 build_string ("Big5 (Level-2)"),
3352 ("Big5 Level-2 Chinese traditional"),
3353 build_string ("big5"),
3356 #ifdef ENABLE_COMPOSITE_CHARS
3357 /* #### For simplicity, we put composite chars into a 96x96 charset.
3358 This is going to lead to problems because you can run out of
3359 room, esp. as we don't yet recycle numbers. */
3360 staticpro (&Vcharset_composite);
3361 Vcharset_composite =
3362 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3363 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3364 build_string ("Composite"),
3365 build_string ("Composite characters"),
3366 build_string ("Composite characters"),
3369 /* #### not dumped properly */
3370 composite_char_row_next = 32;
3371 composite_char_col_next = 32;
3373 Vcomposite_char_string2char_hash_table =
3374 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3375 Vcomposite_char_char2string_hash_table =
3376 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3377 staticpro (&Vcomposite_char_string2char_hash_table);
3378 staticpro (&Vcomposite_char_char2string_hash_table);
3379 #endif /* ENABLE_COMPOSITE_CHARS */