1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo_pj_1;
70 Lisp_Object Vcharset_mojikyo_pj_2;
71 Lisp_Object Vcharset_mojikyo_pj_3;
72 Lisp_Object Vcharset_mojikyo_pj_4;
73 Lisp_Object Vcharset_mojikyo_pj_5;
74 Lisp_Object Vcharset_mojikyo_pj_6;
75 Lisp_Object Vcharset_mojikyo_pj_7;
76 Lisp_Object Vcharset_mojikyo_pj_8;
77 Lisp_Object Vcharset_mojikyo_pj_9;
78 Lisp_Object Vcharset_mojikyo_pj_10;
79 Lisp_Object Vcharset_mojikyo_pj_11;
80 Lisp_Object Vcharset_mojikyo_pj_12;
81 Lisp_Object Vcharset_mojikyo_pj_13;
82 Lisp_Object Vcharset_mojikyo_pj_14;
83 Lisp_Object Vcharset_mojikyo_pj_15;
84 Lisp_Object Vcharset_mojikyo_pj_16;
85 Lisp_Object Vcharset_mojikyo_pj_17;
86 Lisp_Object Vcharset_mojikyo_pj_18;
87 Lisp_Object Vcharset_mojikyo_pj_19;
88 Lisp_Object Vcharset_mojikyo_pj_20;
89 Lisp_Object Vcharset_mojikyo_pj_21;
90 Lisp_Object Vcharset_ethiopic_ucs;
92 Lisp_Object Vcharset_chinese_big5_1;
93 Lisp_Object Vcharset_chinese_big5_2;
95 #ifdef ENABLE_COMPOSITE_CHARS
96 Lisp_Object Vcharset_composite;
98 /* Hash tables for composite chars. One maps string representing
99 composed chars to their equivalent chars; one goes the
101 Lisp_Object Vcomposite_char_char2string_hash_table;
102 Lisp_Object Vcomposite_char_string2char_hash_table;
104 static int composite_char_row_next;
105 static int composite_char_col_next;
107 #endif /* ENABLE_COMPOSITE_CHARS */
109 struct charset_lookup *chlook;
111 static const struct lrecord_description charset_lookup_description_1[] = {
112 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
121 static const struct struct_description charset_lookup_description = {
122 sizeof(struct charset_lookup),
123 charset_lookup_description_1
127 /* Table of number of bytes in the string representation of a character
128 indexed by the first byte of that representation.
130 rep_bytes_by_first_byte(c) is more efficient than the equivalent
131 canonical computation:
133 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
135 const Bytecount rep_bytes_by_first_byte[0xA0] =
136 { /* 0x00 - 0x7f are for straight ASCII */
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 /* 0x80 - 0x8f are for Dimension-1 official charsets */
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 /* 0x90 - 0x9d are for Dimension-2 official charsets */
152 /* 0x9e is for Dimension-1 private charsets */
153 /* 0x9f is for Dimension-2 private charsets */
154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
161 mark_char_byte_table (Lisp_Object obj)
163 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
166 for (i = 0; i < 256; i++)
168 mark_object (cte->property[i]);
174 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
176 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
177 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
180 for (i = 0; i < 256; i++)
181 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
183 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
185 if (!char_byte_table_equal (cte1->property[i],
186 cte2->property[i], depth + 1))
193 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
199 char_byte_table_hash (Lisp_Object obj, int depth)
201 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
203 return internal_array_hash (cte->property, 256, depth);
206 static const struct lrecord_description char_byte_table_description[] = {
207 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
211 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
212 mark_char_byte_table,
213 internal_object_printer,
214 0, char_byte_table_equal,
215 char_byte_table_hash,
216 char_byte_table_description,
217 struct Lisp_Char_Byte_Table);
220 make_char_byte_table (Lisp_Object initval)
224 struct Lisp_Char_Byte_Table *cte =
225 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
226 &lrecord_char_byte_table);
228 for (i = 0; i < 256; i++)
229 cte->property[i] = initval;
231 XSETCHAR_BYTE_TABLE (obj, cte);
236 copy_char_byte_table (Lisp_Object entry)
238 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
241 struct Lisp_Char_Byte_Table *ctenew =
242 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
243 &lrecord_char_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (CHAR_BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_char_byte_table (new);
251 ctenew->property[i] = new;
254 XSETCHAR_BYTE_TABLE (obj, ctenew);
260 mark_char_code_table (Lisp_Object obj)
262 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
268 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
271 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
273 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_code_table_hash (Lisp_Object obj, int depth)
279 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
281 return char_code_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_code_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
290 mark_char_code_table,
291 internal_object_printer,
292 0, char_code_table_equal,
293 char_code_table_hash,
294 char_code_table_description,
295 struct Lisp_Char_Code_Table);
298 make_char_code_table (Lisp_Object initval)
301 struct Lisp_Char_Code_Table *cte =
302 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
303 &lrecord_char_code_table);
305 cte->table = make_char_byte_table (initval);
307 XSETCHAR_CODE_TABLE (obj, cte);
312 copy_char_code_table (Lisp_Object entry)
314 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
316 struct Lisp_Char_Code_Table *ctenew =
317 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
318 &lrecord_char_code_table);
320 ctenew->table = copy_char_byte_table (cte->table);
321 XSETCHAR_CODE_TABLE (obj, ctenew);
327 get_char_code_table (Emchar ch, Lisp_Object table)
329 unsigned int code = ch;
330 struct Lisp_Char_Byte_Table* cpt
331 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
332 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
334 if (CHAR_BYTE_TABLE_P (ret))
335 cpt = XCHAR_BYTE_TABLE (ret);
339 ret = cpt->property [(unsigned char) (code >> 16)];
340 if (CHAR_BYTE_TABLE_P (ret))
341 cpt = XCHAR_BYTE_TABLE (ret);
345 ret = cpt->property [(unsigned char) (code >> 8)];
346 if (CHAR_BYTE_TABLE_P (ret))
347 cpt = XCHAR_BYTE_TABLE (ret);
351 return cpt->property [(unsigned char) code];
355 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
357 unsigned int code = ch;
358 struct Lisp_Char_Byte_Table* cpt1
359 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
360 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
362 if (CHAR_BYTE_TABLE_P (ret))
364 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
366 ret = cpt2->property[(unsigned char)(code >> 16)];
367 if (CHAR_BYTE_TABLE_P (ret))
369 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
371 ret = cpt3->property[(unsigned char)(code >> 8)];
372 if (CHAR_BYTE_TABLE_P (ret))
374 struct Lisp_Char_Byte_Table* cpt4
375 = XCHAR_BYTE_TABLE (ret);
377 cpt4->property[(unsigned char)code] = value;
379 else if (!EQ (ret, value))
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
387 else if (!EQ (ret, value))
389 Lisp_Object cpt3 = make_char_byte_table (ret);
390 Lisp_Object cpt4 = make_char_byte_table (ret);
392 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
393 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
395 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
398 else if (!EQ (ret, value))
400 Lisp_Object cpt2 = make_char_byte_table (ret);
401 Lisp_Object cpt3 = make_char_byte_table (ret);
402 Lisp_Object cpt4 = make_char_byte_table (ret);
404 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
405 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
406 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
407 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
412 Lisp_Object Vcharacter_attribute_table;
413 Lisp_Object Vcharacter_composition_table;
414 Lisp_Object Vcharacter_variant_table;
416 Lisp_Object Q_decomposition;
419 Lisp_Object Qisolated;
420 Lisp_Object Qinitial;
423 Lisp_Object Qvertical;
424 Lisp_Object QnoBreak;
425 Lisp_Object Qfraction;
436 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
442 else if (EQ (v, Qcompat))
444 else if (EQ (v, Qisolated))
446 else if (EQ (v, Qinitial))
448 else if (EQ (v, Qmedial))
450 else if (EQ (v, Qfinal))
452 else if (EQ (v, Qvertical))
454 else if (EQ (v, QnoBreak))
456 else if (EQ (v, Qfraction))
458 else if (EQ (v, Qsuper))
460 else if (EQ (v, Qsub))
462 else if (EQ (v, Qcircle))
464 else if (EQ (v, Qsquare))
466 else if (EQ (v, Qwide))
468 else if (EQ (v, Qnarrow))
470 else if (EQ (v, Qsmall))
472 else if (EQ (v, Qfont))
475 signal_simple_error (err_msg, err_arg);
478 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
479 Return character corresponding with list.
483 Lisp_Object table = Vcharacter_composition_table;
484 Lisp_Object rest = list;
488 Lisp_Object v = Fcar (rest);
490 Emchar c = to_char_code (v, "Invalid value for composition", list);
492 ret = get_char_code_table (c, table);
497 if (!CHAR_CODE_TABLE_P (ret))
502 else if (!CONSP (rest))
504 else if (CHAR_CODE_TABLE_P (ret))
507 signal_simple_error ("Invalid table is found with", list);
509 signal_simple_error ("Invalid value for composition", list);
512 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
513 Return variants of CHARACTER.
517 CHECK_CHAR (character);
518 return Fcopy_list (get_char_code_table (XCHAR (character),
519 Vcharacter_variant_table));
522 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
523 Return the alist of attributes of CHARACTER.
527 CHECK_CHAR (character);
528 return Fcopy_alist (get_char_code_table (XCHAR (character),
529 Vcharacter_attribute_table));
532 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
533 Return the value of CHARACTER's ATTRIBUTE.
535 (character, attribute))
540 CHECK_CHAR (character);
541 ret = get_char_code_table (XCHAR (character),
542 Vcharacter_attribute_table);
546 if (!NILP (ccs = Ffind_charset (attribute)))
549 return Fcdr (Fassq (attribute, ret));
553 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
556 Emchar char_code = XCHAR (character);
558 = get_char_code_table (char_code, Vcharacter_attribute_table);
561 cell = Fassq (attribute, ret);
565 ret = Fcons (Fcons (attribute, value), ret);
567 else if (!EQ (Fcdr (cell), value))
569 Fsetcdr (cell, value);
571 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
575 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
576 Store CHARACTER's ATTRIBUTE with VALUE.
578 (character, attribute, value))
582 CHECK_CHAR (character);
583 ccs = Ffind_charset (attribute);
586 Lisp_Object cpos, rest;
587 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
594 /* ad-hoc method for `ascii' */
595 if ((XCHARSET_CHARS (ccs) == 94) &&
596 (XCHARSET_BYTE_OFFSET (ccs) != 33))
597 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
599 ccs_len = XCHARSET_CHARS (ccs);
603 Lisp_Object ret = Fcar (value);
606 signal_simple_error ("Invalid value for coded-charset", value);
607 code_point = XINT (ret);
608 if (XCHARSET_GRAPHIC (ccs) == 1)
616 signal_simple_error ("Invalid value for coded-charset", value);
619 signal_simple_error ("Invalid value for coded-charset", value);
621 if (XCHARSET_GRAPHIC (ccs) == 1)
623 code_point = (code_point << 8) | i;
626 value = make_int (code_point);
628 else if (INTP (value))
630 if (XCHARSET_GRAPHIC (ccs) == 1)
631 value = make_int (XINT (value) & 0x7F7F7F7F);
634 signal_simple_error ("Invalid value for coded-charset", value);
637 cpos = Fget_char_attribute (character, attribute);
642 dim = XCHARSET_DIMENSION (ccs);
643 code_point = XINT (cpos);
647 i = ((code_point >> (8 * dim)) & 255)
648 - XCHARSET_BYTE_OFFSET (ccs);
649 nv = XVECTOR_DATA(v)[i];
655 XVECTOR_DATA(v)[i] = Qnil;
656 v = XCHARSET_DECODING_TABLE (ccs);
661 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
664 dim = XCHARSET_DIMENSION (ccs);
665 code_point = XINT (value);
670 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
671 nv = XVECTOR_DATA(v)[i];
675 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
681 XVECTOR_DATA(v)[i] = character;
683 else if (EQ (attribute, Q_decomposition))
685 Lisp_Object rest = value;
686 Lisp_Object table = Vcharacter_composition_table;
689 signal_simple_error ("Invalid value for ->decomposition",
694 Lisp_Object v = Fcar (rest);
697 = to_char_code (v, "Invalid value for ->decomposition", value);
702 put_char_code_table (c, character, table);
707 ntable = get_char_code_table (c, table);
708 if (!CHAR_CODE_TABLE_P (ntable))
710 ntable = make_char_code_table (Qnil);
711 put_char_code_table (c, ntable, table);
717 else if (EQ (attribute, Q_ucs))
723 signal_simple_error ("Invalid value for ->ucs", value);
727 ret = get_char_code_table (c, Vcharacter_variant_table);
728 if (NILP (Fmemq (character, ret)))
730 put_char_code_table (c, Fcons (character, ret),
731 Vcharacter_variant_table);
734 return put_char_attribute (character, attribute, value);
739 EXFUN (Fmake_char, 3);
741 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
742 Store character's ATTRIBUTES.
746 Lisp_Object rest = attributes;
747 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
748 Lisp_Object character;
754 Lisp_Object cell = Fcar (rest);
758 signal_simple_error ("Invalid argument", attributes);
759 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
762 character = Fmake_char (ccs, Fcar (cell),
764 goto setup_attributes;
768 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
771 signal_simple_error ("Invalid argument", attributes);
773 character = make_char (XINT (code) + 0x100000);
774 goto setup_attributes;
778 else if (!INTP (code))
779 signal_simple_error ("Invalid argument", attributes);
781 character = make_char (XINT (code));
787 Lisp_Object cell = Fcar (rest);
790 signal_simple_error ("Invalid argument", attributes);
791 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
795 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
798 Lisp_Object Vutf_2000_version;
802 int leading_code_private_11;
805 Lisp_Object Qcharsetp;
807 /* Qdoc_string, Qdimension, Qchars defined in general.c */
808 Lisp_Object Qregistry, Qfinal, Qgraphic;
809 Lisp_Object Qdirection;
810 Lisp_Object Qreverse_direction_charset;
811 Lisp_Object Qleading_byte;
812 Lisp_Object Qshort_name, Qlong_name;
828 Qjapanese_jisx0208_1978,
831 Qjapanese_jisx0208_1990,
841 Qvietnamese_viscii_lower,
842 Qvietnamese_viscii_upper,
871 Lisp_Object Ql2r, Qr2l;
873 Lisp_Object Vcharset_hash_table;
876 static Charset_ID next_allocated_leading_byte;
878 static Charset_ID next_allocated_1_byte_leading_byte;
879 static Charset_ID next_allocated_2_byte_leading_byte;
882 /* Composite characters are characters constructed by overstriking two
883 or more regular characters.
885 1) The old Mule implementation involves storing composite characters
886 in a buffer as a tag followed by all of the actual characters
887 used to make up the composite character. I think this is a bad
888 idea; it greatly complicates code that wants to handle strings
889 one character at a time because it has to deal with the possibility
890 of great big ungainly characters. It's much more reasonable to
891 simply store an index into a table of composite characters.
893 2) The current implementation only allows for 16,384 separate
894 composite characters over the lifetime of the XEmacs process.
895 This could become a potential problem if the user
896 edited lots of different files that use composite characters.
897 Due to FSF bogosity, increasing the number of allowable
898 composite characters under Mule would decrease the number
899 of possible faces that can exist. Mule already has shrunk
900 this to 2048, and further shrinkage would become uncomfortable.
901 No such problems exist in XEmacs.
903 Composite characters could be represented as 0x80 C1 C2 C3,
904 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
905 for slightly under 2^20 (one million) composite characters
906 over the XEmacs process lifetime, and you only need to
907 increase the size of a Mule character from 19 to 21 bits.
908 Or you could use 0x80 C1 C2 C3 C4, allowing for about
909 85 million (slightly over 2^26) composite characters. */
912 /************************************************************************/
913 /* Basic Emchar functions */
914 /************************************************************************/
916 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
917 string in STR. Returns the number of bytes stored.
918 Do not call this directly. Use the macro set_charptr_emchar() instead.
922 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
937 else if ( c <= 0x7ff )
939 *p++ = (c >> 6) | 0xc0;
940 *p++ = (c & 0x3f) | 0x80;
942 else if ( c <= 0xffff )
944 *p++ = (c >> 12) | 0xe0;
945 *p++ = ((c >> 6) & 0x3f) | 0x80;
946 *p++ = (c & 0x3f) | 0x80;
948 else if ( c <= 0x1fffff )
950 *p++ = (c >> 18) | 0xf0;
951 *p++ = ((c >> 12) & 0x3f) | 0x80;
952 *p++ = ((c >> 6) & 0x3f) | 0x80;
953 *p++ = (c & 0x3f) | 0x80;
955 else if ( c <= 0x3ffffff )
957 *p++ = (c >> 24) | 0xf8;
958 *p++ = ((c >> 18) & 0x3f) | 0x80;
959 *p++ = ((c >> 12) & 0x3f) | 0x80;
960 *p++ = ((c >> 6) & 0x3f) | 0x80;
961 *p++ = (c & 0x3f) | 0x80;
965 *p++ = (c >> 30) | 0xfc;
966 *p++ = ((c >> 24) & 0x3f) | 0x80;
967 *p++ = ((c >> 18) & 0x3f) | 0x80;
968 *p++ = ((c >> 12) & 0x3f) | 0x80;
969 *p++ = ((c >> 6) & 0x3f) | 0x80;
970 *p++ = (c & 0x3f) | 0x80;
973 BREAKUP_CHAR (c, charset, c1, c2);
974 lb = CHAR_LEADING_BYTE (c);
975 if (LEADING_BYTE_PRIVATE_P (lb))
976 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
978 if (EQ (charset, Vcharset_control_1))
987 /* Return the first character from a Mule-encoded string in STR,
988 assuming it's non-ASCII. Do not call this directly.
989 Use the macro charptr_emchar() instead. */
992 non_ascii_charptr_emchar (CONST Bufbyte *str)
1005 else if ( b >= 0xf8 )
1010 else if ( b >= 0xf0 )
1015 else if ( b >= 0xe0 )
1020 else if ( b >= 0xc0 )
1030 for( ; len > 0; len-- )
1033 ch = ( ch << 6 ) | ( b & 0x3f );
1037 Bufbyte i0 = *str, i1, i2 = 0;
1038 Lisp_Object charset;
1040 if (i0 == LEADING_BYTE_CONTROL_1)
1041 return (Emchar) (*++str - 0x20);
1043 if (LEADING_BYTE_PREFIX_P (i0))
1048 charset = CHARSET_BY_LEADING_BYTE (i0);
1049 if (XCHARSET_DIMENSION (charset) == 2)
1052 return MAKE_CHAR (charset, i1, i2);
1056 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1057 Do not call this directly. Use the macro valid_char_p() instead. */
1061 non_ascii_valid_char_p (Emchar ch)
1065 /* Must have only lowest 19 bits set */
1069 f1 = CHAR_FIELD1 (ch);
1070 f2 = CHAR_FIELD2 (ch);
1071 f3 = CHAR_FIELD3 (ch);
1075 Lisp_Object charset;
1077 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1078 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1079 f2 > MAX_CHAR_FIELD2_PRIVATE)
1084 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1085 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1089 NOTE: This takes advantage of the fact that
1090 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1091 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1093 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1094 if (EQ (charset, Qnil))
1096 return (XCHARSET_CHARS (charset) == 96);
1100 Lisp_Object charset;
1102 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1103 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1104 f1 > MAX_CHAR_FIELD1_PRIVATE)
1106 if (f2 < 0x20 || f3 < 0x20)
1109 #ifdef ENABLE_COMPOSITE_CHARS
1110 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1112 if (UNBOUNDP (Fgethash (make_int (ch),
1113 Vcomposite_char_char2string_hash_table,
1118 #endif /* ENABLE_COMPOSITE_CHARS */
1120 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1121 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1124 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1126 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1129 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1131 if (EQ (charset, Qnil))
1133 return (XCHARSET_CHARS (charset) == 96);
1139 /************************************************************************/
1140 /* Basic string functions */
1141 /************************************************************************/
1143 /* Copy the character pointed to by PTR into STR, assuming it's
1144 non-ASCII. Do not call this directly. Use the macro
1145 charptr_copy_char() instead. */
1148 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1150 Bufbyte *strptr = str;
1152 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1154 /* Notice fallthrough. */
1156 case 6: *++strptr = *ptr++;
1157 case 5: *++strptr = *ptr++;
1159 case 4: *++strptr = *ptr++;
1160 case 3: *++strptr = *ptr++;
1161 case 2: *++strptr = *ptr;
1166 return strptr + 1 - str;
1170 /************************************************************************/
1171 /* streams of Emchars */
1172 /************************************************************************/
1174 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1175 The functions below are not meant to be called directly; use
1176 the macros in insdel.h. */
1179 Lstream_get_emchar_1 (Lstream *stream, int ch)
1181 Bufbyte str[MAX_EMCHAR_LEN];
1182 Bufbyte *strptr = str;
1184 str[0] = (Bufbyte) ch;
1185 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1187 /* Notice fallthrough. */
1190 ch = Lstream_getc (stream);
1192 *++strptr = (Bufbyte) ch;
1194 ch = Lstream_getc (stream);
1196 *++strptr = (Bufbyte) ch;
1199 ch = Lstream_getc (stream);
1201 *++strptr = (Bufbyte) ch;
1203 ch = Lstream_getc (stream);
1205 *++strptr = (Bufbyte) ch;
1207 ch = Lstream_getc (stream);
1209 *++strptr = (Bufbyte) ch;
1214 return charptr_emchar (str);
1218 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1220 Bufbyte str[MAX_EMCHAR_LEN];
1221 Bytecount len = set_charptr_emchar (str, ch);
1222 return Lstream_write (stream, str, len);
1226 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1228 Bufbyte str[MAX_EMCHAR_LEN];
1229 Bytecount len = set_charptr_emchar (str, ch);
1230 Lstream_unread (stream, str, len);
1234 /************************************************************************/
1235 /* charset object */
1236 /************************************************************************/
1239 mark_charset (Lisp_Object obj)
1241 struct Lisp_Charset *cs = XCHARSET (obj);
1243 mark_object (cs->short_name);
1244 mark_object (cs->long_name);
1245 mark_object (cs->doc_string);
1246 mark_object (cs->registry);
1247 mark_object (cs->ccl_program);
1249 mark_object (cs->decoding_table);
1255 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1257 struct Lisp_Charset *cs = XCHARSET (obj);
1261 error ("printing unreadable object #<charset %s 0x%x>",
1262 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1265 write_c_string ("#<charset ", printcharfun);
1266 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1267 write_c_string (" ", printcharfun);
1268 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1269 write_c_string (" ", printcharfun);
1270 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1271 write_c_string (" ", printcharfun);
1272 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1273 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1275 CHARSET_DIMENSION (cs),
1276 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1277 CHARSET_COLUMNS (cs),
1278 CHARSET_GRAPHIC (cs),
1279 CHARSET_FINAL (cs));
1280 write_c_string (buf, printcharfun);
1281 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1282 sprintf (buf, " 0x%x>", cs->header.uid);
1283 write_c_string (buf, printcharfun);
1286 static const struct lrecord_description charset_description[] = {
1287 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1289 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1294 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1295 mark_charset, print_charset, 0, 0, 0,
1296 charset_description,
1297 struct Lisp_Charset);
1298 /* Make a new charset. */
1301 make_charset (Charset_ID id, Lisp_Object name,
1302 unsigned short chars, unsigned char dimension,
1303 unsigned char columns, unsigned char graphic,
1304 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1305 Lisp_Object long_name, Lisp_Object doc,
1307 Lisp_Object decoding_table,
1308 Emchar ucs_min, Emchar ucs_max,
1309 Emchar code_offset, unsigned char byte_offset)
1311 unsigned char type = 0;
1313 struct Lisp_Charset *cs =
1314 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1315 XSETCHARSET (obj, cs);
1317 CHARSET_ID (cs) = id;
1318 CHARSET_NAME (cs) = name;
1319 CHARSET_SHORT_NAME (cs) = short_name;
1320 CHARSET_LONG_NAME (cs) = long_name;
1321 CHARSET_CHARS (cs) = chars;
1322 CHARSET_DIMENSION (cs) = dimension;
1323 CHARSET_DIRECTION (cs) = direction;
1324 CHARSET_COLUMNS (cs) = columns;
1325 CHARSET_GRAPHIC (cs) = graphic;
1326 CHARSET_FINAL (cs) = final;
1327 CHARSET_DOC_STRING (cs) = doc;
1328 CHARSET_REGISTRY (cs) = reg;
1329 CHARSET_CCL_PROGRAM (cs) = Qnil;
1330 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1332 CHARSET_DECODING_TABLE(cs) = Qnil;
1333 CHARSET_UCS_MIN(cs) = ucs_min;
1334 CHARSET_UCS_MAX(cs) = ucs_max;
1335 CHARSET_CODE_OFFSET(cs) = code_offset;
1336 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1339 switch (CHARSET_CHARS (cs))
1342 switch (CHARSET_DIMENSION (cs))
1345 type = CHARSET_TYPE_94;
1348 type = CHARSET_TYPE_94X94;
1353 switch (CHARSET_DIMENSION (cs))
1356 type = CHARSET_TYPE_96;
1359 type = CHARSET_TYPE_96X96;
1365 switch (CHARSET_DIMENSION (cs))
1368 type = CHARSET_TYPE_128;
1371 type = CHARSET_TYPE_128X128;
1376 switch (CHARSET_DIMENSION (cs))
1379 type = CHARSET_TYPE_256;
1382 type = CHARSET_TYPE_256X256;
1389 CHARSET_TYPE (cs) = type;
1393 if (id == LEADING_BYTE_ASCII)
1394 CHARSET_REP_BYTES (cs) = 1;
1396 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1398 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1403 /* some charsets do not have final characters. This includes
1404 ASCII, Control-1, Composite, and the two faux private
1407 if (code_offset == 0)
1409 assert (NILP (chlook->charset_by_attributes[type][final]));
1410 chlook->charset_by_attributes[type][final] = obj;
1413 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1414 chlook->charset_by_attributes[type][final][direction] = obj;
1418 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1419 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1421 /* Some charsets are "faux" and don't have names or really exist at
1422 all except in the leading-byte table. */
1424 Fputhash (name, obj, Vcharset_hash_table);
1429 get_unallocated_leading_byte (int dimension)
1434 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1437 lb = next_allocated_leading_byte++;
1441 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1444 lb = next_allocated_1_byte_leading_byte++;
1448 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1451 lb = next_allocated_2_byte_leading_byte++;
1457 ("No more character sets free for this dimension",
1458 make_int (dimension));
1465 range_charset_code_point (Lisp_Object charset, Emchar ch)
1469 if ((XCHARSET_UCS_MIN (charset) <= ch)
1470 && (ch <= XCHARSET_UCS_MAX (charset)))
1472 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1474 if (XCHARSET_CHARS (charset) == 256)
1476 else if (XCHARSET_DIMENSION (charset) == 1)
1477 return d + XCHARSET_BYTE_OFFSET (charset);
1478 else if (XCHARSET_DIMENSION (charset) == 2)
1480 ((d / XCHARSET_CHARS (charset)
1481 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1482 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1483 else if (XCHARSET_DIMENSION (charset) == 3)
1485 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1486 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1487 | ((d / XCHARSET_CHARS (charset)
1488 % XCHARSET_CHARS (charset)
1489 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1490 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1491 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1493 ((d / (XCHARSET_CHARS (charset)
1494 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1495 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1496 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1497 % XCHARSET_CHARS (charset)
1498 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1499 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1500 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1501 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1503 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1505 if (XCHARSET_DIMENSION (charset) == 1)
1507 if (XCHARSET_CHARS (charset) == 94)
1509 if (((d = ch - (MIN_CHAR_94
1510 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1514 else if (XCHARSET_CHARS (charset) == 96)
1516 if (((d = ch - (MIN_CHAR_96
1517 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1524 else if (XCHARSET_DIMENSION (charset) == 2)
1526 if (XCHARSET_CHARS (charset) == 94)
1528 if (((d = ch - (MIN_CHAR_94x94
1529 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1532 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1534 else if (XCHARSET_CHARS (charset) == 96)
1536 if (((d = ch - (MIN_CHAR_96x96
1537 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1540 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1550 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1552 if (c <= MAX_CHAR_BASIC_LATIN)
1554 *charset = Vcharset_ascii;
1559 *charset = Vcharset_control_1;
1564 *charset = Vcharset_latin_iso8859_1;
1567 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1569 *charset = Vcharset_greek_iso8859_7;
1570 return c - MIN_CHAR_GREEK + 0x20;
1573 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1575 *charset = Vcharset_cyrillic_iso8859_5;
1576 return c - MIN_CHAR_CYRILLIC + 0x20;
1579 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1581 *charset = Vcharset_hebrew_iso8859_8;
1582 return c - MIN_CHAR_HEBREW + 0x20;
1584 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1586 *charset = Vcharset_thai_tis620;
1587 return c - MIN_CHAR_THAI + 0x20;
1590 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1591 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1593 return list2 (Vcharset_katakana_jisx0201,
1594 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1597 else if (c <= MAX_CHAR_BMP)
1599 *charset = Vcharset_ucs_bmp;
1602 else if (c < MIN_CHAR_DAIKANWA)
1604 *charset = Vcharset_ucs;
1607 else if (c <= MAX_CHAR_DAIKANWA)
1609 *charset = Vcharset_ideograph_daikanwa;
1610 return c - MIN_CHAR_DAIKANWA;
1612 else if (c < MIN_CHAR_94)
1614 *charset = Vcharset_ucs;
1617 else if (c <= MAX_CHAR_94)
1619 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1620 ((c - MIN_CHAR_94) / 94) + '0',
1621 CHARSET_LEFT_TO_RIGHT);
1622 return ((c - MIN_CHAR_94) % 94) + 33;
1624 else if (c <= MAX_CHAR_96)
1626 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1627 ((c - MIN_CHAR_96) / 96) + '0',
1628 CHARSET_LEFT_TO_RIGHT);
1629 return ((c - MIN_CHAR_96) % 96) + 32;
1631 else if (c <= MAX_CHAR_94x94)
1634 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1635 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1636 CHARSET_LEFT_TO_RIGHT);
1637 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1638 | (((c - MIN_CHAR_94x94) % 94) + 33);
1640 else if (c <= MAX_CHAR_96x96)
1643 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1644 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1645 CHARSET_LEFT_TO_RIGHT);
1646 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1647 | (((c - MIN_CHAR_96x96) % 96) + 32);
1651 *charset = Vcharset_ucs;
1656 Lisp_Object Vdefault_coded_charset_priority_list;
1660 /************************************************************************/
1661 /* Basic charset Lisp functions */
1662 /************************************************************************/
1664 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1665 Return non-nil if OBJECT is a charset.
1669 return CHARSETP (object) ? Qt : Qnil;
1672 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1673 Retrieve the charset of the given name.
1674 If CHARSET-OR-NAME is a charset object, it is simply returned.
1675 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1676 nil is returned. Otherwise the associated charset object is returned.
1680 if (CHARSETP (charset_or_name))
1681 return charset_or_name;
1683 CHECK_SYMBOL (charset_or_name);
1684 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1687 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1688 Retrieve the charset of the given name.
1689 Same as `find-charset' except an error is signalled if there is no such
1690 charset instead of returning nil.
1694 Lisp_Object charset = Ffind_charset (name);
1697 signal_simple_error ("No such charset", name);
1701 /* We store the charsets in hash tables with the names as the key and the
1702 actual charset object as the value. Occasionally we need to use them
1703 in a list format. These routines provide us with that. */
1704 struct charset_list_closure
1706 Lisp_Object *charset_list;
1710 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1711 void *charset_list_closure)
1713 /* This function can GC */
1714 struct charset_list_closure *chcl =
1715 (struct charset_list_closure*) charset_list_closure;
1716 Lisp_Object *charset_list = chcl->charset_list;
1718 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1722 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1723 Return a list of the names of all defined charsets.
1727 Lisp_Object charset_list = Qnil;
1728 struct gcpro gcpro1;
1729 struct charset_list_closure charset_list_closure;
1731 GCPRO1 (charset_list);
1732 charset_list_closure.charset_list = &charset_list;
1733 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1734 &charset_list_closure);
1737 return charset_list;
1740 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1741 Return the name of the given charset.
1745 return XCHARSET_NAME (Fget_charset (charset));
1748 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1749 Define a new character set.
1750 This function is for use with Mule support.
1751 NAME is a symbol, the name by which the character set is normally referred.
1752 DOC-STRING is a string describing the character set.
1753 PROPS is a property list, describing the specific nature of the
1754 character set. Recognized properties are:
1756 'short-name Short version of the charset name (ex: Latin-1)
1757 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1758 'registry A regular expression matching the font registry field for
1760 'dimension Number of octets used to index a character in this charset.
1761 Either 1 or 2. Defaults to 1.
1762 'columns Number of columns used to display a character in this charset.
1763 Only used in TTY mode. (Under X, the actual width of a
1764 character can be derived from the font used to display the
1765 characters.) If unspecified, defaults to the dimension
1766 (this is almost always the correct value).
1767 'chars Number of characters in each dimension (94 or 96).
1768 Defaults to 94. Note that if the dimension is 2, the
1769 character set thus described is 94x94 or 96x96.
1770 'final Final byte of ISO 2022 escape sequence. Must be
1771 supplied. Each combination of (DIMENSION, CHARS) defines a
1772 separate namespace for final bytes. Note that ISO
1773 2022 restricts the final byte to the range
1774 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1775 dimension == 2. Note also that final bytes in the range
1776 0x30 - 0x3F are reserved for user-defined (not official)
1778 'graphic 0 (use left half of font on output) or 1 (use right half
1779 of font on output). Defaults to 0. For example, for
1780 a font whose registry is ISO8859-1, the left half
1781 (octets 0x20 - 0x7F) is the `ascii' character set, while
1782 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1783 character set. With 'graphic set to 0, the octets
1784 will have their high bit cleared; with it set to 1,
1785 the octets will have their high bit set.
1786 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1788 'ccl-program A compiled CCL program used to convert a character in
1789 this charset into an index into the font. This is in
1790 addition to the 'graphic property. The CCL program
1791 is passed the octets of the character, with the high
1792 bit cleared and set depending upon whether the value
1793 of the 'graphic property is 0 or 1.
1795 (name, doc_string, props))
1797 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1798 int direction = CHARSET_LEFT_TO_RIGHT;
1800 Lisp_Object registry = Qnil;
1801 Lisp_Object charset;
1802 Lisp_Object rest, keyword, value;
1803 Lisp_Object ccl_program = Qnil;
1804 Lisp_Object short_name = Qnil, long_name = Qnil;
1805 int byte_offset = -1;
1807 CHECK_SYMBOL (name);
1808 if (!NILP (doc_string))
1809 CHECK_STRING (doc_string);
1811 charset = Ffind_charset (name);
1812 if (!NILP (charset))
1813 signal_simple_error ("Cannot redefine existing charset", name);
1815 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1817 if (EQ (keyword, Qshort_name))
1819 CHECK_STRING (value);
1823 if (EQ (keyword, Qlong_name))
1825 CHECK_STRING (value);
1829 else if (EQ (keyword, Qdimension))
1832 dimension = XINT (value);
1833 if (dimension < 1 || dimension > 2)
1834 signal_simple_error ("Invalid value for 'dimension", value);
1837 else if (EQ (keyword, Qchars))
1840 chars = XINT (value);
1841 if (chars != 94 && chars != 96)
1842 signal_simple_error ("Invalid value for 'chars", value);
1845 else if (EQ (keyword, Qcolumns))
1848 columns = XINT (value);
1849 if (columns != 1 && columns != 2)
1850 signal_simple_error ("Invalid value for 'columns", value);
1853 else if (EQ (keyword, Qgraphic))
1856 graphic = XINT (value);
1858 if (graphic < 0 || graphic > 2)
1860 if (graphic < 0 || graphic > 1)
1862 signal_simple_error ("Invalid value for 'graphic", value);
1865 else if (EQ (keyword, Qregistry))
1867 CHECK_STRING (value);
1871 else if (EQ (keyword, Qdirection))
1873 if (EQ (value, Ql2r))
1874 direction = CHARSET_LEFT_TO_RIGHT;
1875 else if (EQ (value, Qr2l))
1876 direction = CHARSET_RIGHT_TO_LEFT;
1878 signal_simple_error ("Invalid value for 'direction", value);
1881 else if (EQ (keyword, Qfinal))
1883 CHECK_CHAR_COERCE_INT (value);
1884 final = XCHAR (value);
1885 if (final < '0' || final > '~')
1886 signal_simple_error ("Invalid value for 'final", value);
1889 else if (EQ (keyword, Qccl_program))
1891 CHECK_VECTOR (value);
1892 ccl_program = value;
1896 signal_simple_error ("Unrecognized property", keyword);
1900 error ("'final must be specified");
1901 if (dimension == 2 && final > 0x5F)
1903 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1907 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1909 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1911 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1912 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1914 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1916 id = get_unallocated_leading_byte (dimension);
1918 if (NILP (doc_string))
1919 doc_string = build_string ("");
1921 if (NILP (registry))
1922 registry = build_string ("");
1924 if (NILP (short_name))
1925 XSETSTRING (short_name, XSYMBOL (name)->name);
1927 if (NILP (long_name))
1928 long_name = doc_string;
1931 columns = dimension;
1933 if (byte_offset < 0)
1937 else if (chars == 96)
1943 charset = make_charset (id, name, chars, dimension, columns, graphic,
1944 final, direction, short_name, long_name,
1945 doc_string, registry,
1946 Qnil, 0, 0, 0, byte_offset);
1947 if (!NILP (ccl_program))
1948 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1952 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1954 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1955 NEW-NAME is the name of the new charset. Return the new charset.
1957 (charset, new_name))
1959 Lisp_Object new_charset = Qnil;
1960 int id, chars, dimension, columns, graphic, final;
1962 Lisp_Object registry, doc_string, short_name, long_name;
1963 struct Lisp_Charset *cs;
1965 charset = Fget_charset (charset);
1966 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1967 signal_simple_error ("Charset already has reverse-direction charset",
1970 CHECK_SYMBOL (new_name);
1971 if (!NILP (Ffind_charset (new_name)))
1972 signal_simple_error ("Cannot redefine existing charset", new_name);
1974 cs = XCHARSET (charset);
1976 chars = CHARSET_CHARS (cs);
1977 dimension = CHARSET_DIMENSION (cs);
1978 columns = CHARSET_COLUMNS (cs);
1979 id = get_unallocated_leading_byte (dimension);
1981 graphic = CHARSET_GRAPHIC (cs);
1982 final = CHARSET_FINAL (cs);
1983 direction = CHARSET_RIGHT_TO_LEFT;
1984 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1985 direction = CHARSET_LEFT_TO_RIGHT;
1986 doc_string = CHARSET_DOC_STRING (cs);
1987 short_name = CHARSET_SHORT_NAME (cs);
1988 long_name = CHARSET_LONG_NAME (cs);
1989 registry = CHARSET_REGISTRY (cs);
1991 new_charset = make_charset (id, new_name, chars, dimension, columns,
1992 graphic, final, direction, short_name, long_name,
1993 doc_string, registry,
1995 CHARSET_DECODING_TABLE(cs),
1996 CHARSET_UCS_MIN(cs),
1997 CHARSET_UCS_MAX(cs),
1998 CHARSET_CODE_OFFSET(cs),
1999 CHARSET_BYTE_OFFSET(cs)
2005 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2006 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2011 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2012 Define symbol ALIAS as an alias for CHARSET.
2016 CHECK_SYMBOL (alias);
2017 charset = Fget_charset (charset);
2018 return Fputhash (alias, charset, Vcharset_hash_table);
2021 /* #### Reverse direction charsets not yet implemented. */
2023 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2025 Return the reverse-direction charset parallel to CHARSET, if any.
2026 This is the charset with the same properties (in particular, the same
2027 dimension, number of characters per dimension, and final byte) as
2028 CHARSET but whose characters are displayed in the opposite direction.
2032 charset = Fget_charset (charset);
2033 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2037 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2038 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2039 If DIRECTION is omitted, both directions will be checked (left-to-right
2040 will be returned if character sets exist for both directions).
2042 (dimension, chars, final, direction))
2044 int dm, ch, fi, di = -1;
2046 Lisp_Object obj = Qnil;
2048 CHECK_INT (dimension);
2049 dm = XINT (dimension);
2050 if (dm < 1 || dm > 2)
2051 signal_simple_error ("Invalid value for DIMENSION", dimension);
2055 if (ch != 94 && ch != 96)
2056 signal_simple_error ("Invalid value for CHARS", chars);
2058 CHECK_CHAR_COERCE_INT (final);
2060 if (fi < '0' || fi > '~')
2061 signal_simple_error ("Invalid value for FINAL", final);
2063 if (EQ (direction, Ql2r))
2064 di = CHARSET_LEFT_TO_RIGHT;
2065 else if (EQ (direction, Qr2l))
2066 di = CHARSET_RIGHT_TO_LEFT;
2067 else if (!NILP (direction))
2068 signal_simple_error ("Invalid value for DIRECTION", direction);
2070 if (dm == 2 && fi > 0x5F)
2072 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2075 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2077 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2081 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2083 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2086 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2089 return XCHARSET_NAME (obj);
2093 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2094 Return short name of CHARSET.
2098 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2101 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2102 Return long name of CHARSET.
2106 return XCHARSET_LONG_NAME (Fget_charset (charset));
2109 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2110 Return description of CHARSET.
2114 return XCHARSET_DOC_STRING (Fget_charset (charset));
2117 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2118 Return dimension of CHARSET.
2122 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2125 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2126 Return property PROP of CHARSET.
2127 Recognized properties are those listed in `make-charset', as well as
2128 'name and 'doc-string.
2132 struct Lisp_Charset *cs;
2134 charset = Fget_charset (charset);
2135 cs = XCHARSET (charset);
2137 CHECK_SYMBOL (prop);
2138 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2139 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2140 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2141 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2142 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2143 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2144 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2145 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2146 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2147 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2148 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2149 if (EQ (prop, Qdirection))
2150 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2151 if (EQ (prop, Qreverse_direction_charset))
2153 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2157 return XCHARSET_NAME (obj);
2159 signal_simple_error ("Unrecognized charset property name", prop);
2160 return Qnil; /* not reached */
2163 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2164 Return charset identification number of CHARSET.
2168 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2171 /* #### We need to figure out which properties we really want to
2174 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2175 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2177 (charset, ccl_program))
2179 charset = Fget_charset (charset);
2180 CHECK_VECTOR (ccl_program);
2181 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2186 invalidate_charset_font_caches (Lisp_Object charset)
2188 /* Invalidate font cache entries for charset on all devices. */
2189 Lisp_Object devcons, concons, hash_table;
2190 DEVICE_LOOP_NO_BREAK (devcons, concons)
2192 struct device *d = XDEVICE (XCAR (devcons));
2193 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2194 if (!UNBOUNDP (hash_table))
2195 Fclrhash (hash_table);
2199 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2200 Set the 'registry property of CHARSET to REGISTRY.
2202 (charset, registry))
2204 charset = Fget_charset (charset);
2205 CHECK_STRING (registry);
2206 XCHARSET_REGISTRY (charset) = registry;
2207 invalidate_charset_font_caches (charset);
2208 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2213 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2214 Return mapping-table of CHARSET.
2218 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2221 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2222 Set mapping-table of CHARSET to TABLE.
2226 struct Lisp_Charset *cs;
2227 Lisp_Object old_table;
2230 charset = Fget_charset (charset);
2231 cs = XCHARSET (charset);
2233 if (EQ (table, Qnil))
2235 CHARSET_DECODING_TABLE(cs) = table;
2238 else if (VECTORP (table))
2242 /* ad-hoc method for `ascii' */
2243 if ((CHARSET_CHARS (cs) == 94) &&
2244 (CHARSET_BYTE_OFFSET (cs) != 33))
2245 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2247 ccs_len = CHARSET_CHARS (cs);
2249 if (XVECTOR_LENGTH (table) > ccs_len)
2250 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2251 old_table = CHARSET_DECODING_TABLE(cs);
2252 CHARSET_DECODING_TABLE(cs) = table;
2255 signal_error (Qwrong_type_argument,
2256 list2 (build_translated_string ("vector-or-nil-p"),
2258 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2260 switch (CHARSET_DIMENSION (cs))
2263 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2265 Lisp_Object c = XVECTOR_DATA(table)[i];
2270 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2274 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2276 Lisp_Object v = XVECTOR_DATA(table)[i];
2282 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2284 CHARSET_DECODING_TABLE(cs) = old_table;
2285 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2287 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2289 Lisp_Object c = XVECTOR_DATA(v)[j];
2292 put_char_attribute (c, charset,
2295 (i + CHARSET_BYTE_OFFSET (cs)),
2297 (j + CHARSET_BYTE_OFFSET (cs))));
2301 put_char_attribute (v, charset,
2303 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2312 /************************************************************************/
2313 /* Lisp primitives for working with characters */
2314 /************************************************************************/
2316 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2317 Make a character from CHARSET and octets ARG1 and ARG2.
2318 ARG2 is required only for characters from two-dimensional charsets.
2319 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2320 character s with caron.
2322 (charset, arg1, arg2))
2324 struct Lisp_Charset *cs;
2326 int lowlim, highlim;
2328 charset = Fget_charset (charset);
2329 cs = XCHARSET (charset);
2331 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2332 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2334 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2336 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2337 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2340 /* It is useful (and safe, according to Olivier Galibert) to strip
2341 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2342 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2343 Latin 2 code of the character. */
2351 if (a1 < lowlim || a1 > highlim)
2352 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2354 if (CHARSET_DIMENSION (cs) == 1)
2358 ("Charset is of dimension one; second octet must be nil", arg2);
2359 return make_char (MAKE_CHAR (charset, a1, 0));
2368 a2 = XINT (arg2) & 0x7f;
2370 if (a2 < lowlim || a2 > highlim)
2371 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2373 return make_char (MAKE_CHAR (charset, a1, a2));
2376 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2377 Return the character set of char CH.
2381 CHECK_CHAR_COERCE_INT (ch);
2383 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2386 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2387 Return the octet numbered N (should be 0 or 1) of char CH.
2388 N defaults to 0 if omitted.
2392 Lisp_Object charset;
2395 CHECK_CHAR_COERCE_INT (ch);
2397 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2399 if (NILP (n) || EQ (n, Qzero))
2400 return make_int (octet0);
2401 else if (EQ (n, make_int (1)))
2402 return make_int (octet1);
2404 signal_simple_error ("Octet number must be 0 or 1", n);
2407 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2408 Return list of charset and one or two position-codes of CHAR.
2412 /* This function can GC */
2413 struct gcpro gcpro1, gcpro2;
2414 Lisp_Object charset = Qnil;
2415 Lisp_Object rc = Qnil;
2423 GCPRO2 (charset, rc);
2424 CHECK_CHAR_COERCE_INT (character);
2427 code_point = ENCODE_CHAR (XCHAR (character), charset);
2428 dimension = XCHARSET_DIMENSION (charset);
2429 while (dimension > 0)
2431 rc = Fcons (make_int (code_point & 255), rc);
2435 rc = Fcons (XCHARSET_NAME (charset), rc);
2437 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2439 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2441 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2445 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2454 #ifdef ENABLE_COMPOSITE_CHARS
2455 /************************************************************************/
2456 /* composite character functions */
2457 /************************************************************************/
2460 lookup_composite_char (Bufbyte *str, int len)
2462 Lisp_Object lispstr = make_string (str, len);
2463 Lisp_Object ch = Fgethash (lispstr,
2464 Vcomposite_char_string2char_hash_table,
2470 if (composite_char_row_next >= 128)
2471 signal_simple_error ("No more composite chars available", lispstr);
2472 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2473 composite_char_col_next);
2474 Fputhash (make_char (emch), lispstr,
2475 Vcomposite_char_char2string_hash_table);
2476 Fputhash (lispstr, make_char (emch),
2477 Vcomposite_char_string2char_hash_table);
2478 composite_char_col_next++;
2479 if (composite_char_col_next >= 128)
2481 composite_char_col_next = 32;
2482 composite_char_row_next++;
2491 composite_char_string (Emchar ch)
2493 Lisp_Object str = Fgethash (make_char (ch),
2494 Vcomposite_char_char2string_hash_table,
2496 assert (!UNBOUNDP (str));
2500 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2501 Convert a string into a single composite character.
2502 The character is the result of overstriking all the characters in
2507 CHECK_STRING (string);
2508 return make_char (lookup_composite_char (XSTRING_DATA (string),
2509 XSTRING_LENGTH (string)));
2512 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2513 Return a string of the characters comprising a composite character.
2521 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2522 signal_simple_error ("Must be composite char", ch);
2523 return composite_char_string (emch);
2525 #endif /* ENABLE_COMPOSITE_CHARS */
2528 /************************************************************************/
2529 /* initialization */
2530 /************************************************************************/
2533 syms_of_mule_charset (void)
2535 DEFSUBR (Fcharsetp);
2536 DEFSUBR (Ffind_charset);
2537 DEFSUBR (Fget_charset);
2538 DEFSUBR (Fcharset_list);
2539 DEFSUBR (Fcharset_name);
2540 DEFSUBR (Fmake_charset);
2541 DEFSUBR (Fmake_reverse_direction_charset);
2542 /* DEFSUBR (Freverse_direction_charset); */
2543 DEFSUBR (Fdefine_charset_alias);
2544 DEFSUBR (Fcharset_from_attributes);
2545 DEFSUBR (Fcharset_short_name);
2546 DEFSUBR (Fcharset_long_name);
2547 DEFSUBR (Fcharset_description);
2548 DEFSUBR (Fcharset_dimension);
2549 DEFSUBR (Fcharset_property);
2550 DEFSUBR (Fcharset_id);
2551 DEFSUBR (Fset_charset_ccl_program);
2552 DEFSUBR (Fset_charset_registry);
2554 DEFSUBR (Fchar_attribute_alist);
2555 DEFSUBR (Fget_char_attribute);
2556 DEFSUBR (Fput_char_attribute);
2557 DEFSUBR (Fdefine_char);
2558 DEFSUBR (Fchar_variants);
2559 DEFSUBR (Fget_composite_char);
2560 DEFSUBR (Fcharset_mapping_table);
2561 DEFSUBR (Fset_charset_mapping_table);
2564 DEFSUBR (Fmake_char);
2565 DEFSUBR (Fchar_charset);
2566 DEFSUBR (Fchar_octet);
2567 DEFSUBR (Fsplit_char);
2569 #ifdef ENABLE_COMPOSITE_CHARS
2570 DEFSUBR (Fmake_composite_char);
2571 DEFSUBR (Fcomposite_char_string);
2574 defsymbol (&Qcharsetp, "charsetp");
2575 defsymbol (&Qregistry, "registry");
2576 defsymbol (&Qfinal, "final");
2577 defsymbol (&Qgraphic, "graphic");
2578 defsymbol (&Qdirection, "direction");
2579 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2580 defsymbol (&Qshort_name, "short-name");
2581 defsymbol (&Qlong_name, "long-name");
2583 defsymbol (&Ql2r, "l2r");
2584 defsymbol (&Qr2l, "r2l");
2586 /* Charsets, compatible with FSF 20.3
2587 Naming convention is Script-Charset[-Edition] */
2588 defsymbol (&Qascii, "ascii");
2589 defsymbol (&Qcontrol_1, "control-1");
2590 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2591 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2592 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2593 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2594 defsymbol (&Qthai_tis620, "thai-tis620");
2595 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2596 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2597 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2598 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2599 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2600 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2601 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2602 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2603 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2604 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2605 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2606 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2607 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2608 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2609 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2611 defsymbol (&Q_ucs, "->ucs");
2612 defsymbol (&Q_decomposition, "->decomposition");
2613 defsymbol (&Qcompat, "compat");
2614 defsymbol (&Qisolated, "isolated");
2615 defsymbol (&Qinitial, "initial");
2616 defsymbol (&Qmedial, "medial");
2617 defsymbol (&Qfinal, "final");
2618 defsymbol (&Qvertical, "vertical");
2619 defsymbol (&QnoBreak, "noBreak");
2620 defsymbol (&Qfraction, "fraction");
2621 defsymbol (&Qsuper, "super");
2622 defsymbol (&Qsub, "sub");
2623 defsymbol (&Qcircle, "circle");
2624 defsymbol (&Qsquare, "square");
2625 defsymbol (&Qwide, "wide");
2626 defsymbol (&Qnarrow, "narrow");
2627 defsymbol (&Qsmall, "small");
2628 defsymbol (&Qfont, "font");
2629 defsymbol (&Qucs, "ucs");
2630 defsymbol (&Qucs_bmp, "ucs-bmp");
2631 defsymbol (&Qlatin_viscii, "latin-viscii");
2632 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2633 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2634 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2635 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2636 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2637 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2638 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2639 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2640 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2641 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2642 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2643 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2644 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2645 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2646 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2647 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2648 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2649 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2650 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2651 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2652 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2653 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2654 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2655 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2656 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2657 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2658 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2660 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2661 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2663 defsymbol (&Qcomposite, "composite");
2667 vars_of_mule_charset (void)
2674 chlook = xnew (struct charset_lookup);
2675 dumpstruct (&chlook, &charset_lookup_description);
2677 /* Table of charsets indexed by leading byte. */
2678 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2679 chlook->charset_by_leading_byte[i] = Qnil;
2682 /* Table of charsets indexed by type/final-byte. */
2683 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2684 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2685 chlook->charset_by_attributes[i][j] = Qnil;
2687 /* Table of charsets indexed by type/final-byte/direction. */
2688 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2689 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2690 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2691 chlook->charset_by_attributes[i][j][k] = Qnil;
2695 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2697 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2698 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2702 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2703 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2704 Leading-code of private TYPE9N charset of column-width 1.
2706 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2710 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2711 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2712 Version number of UTF-2000.
2715 staticpro (&Vcharacter_attribute_table);
2716 Vcharacter_attribute_table = make_char_code_table (Qnil);
2718 staticpro (&Vcharacter_composition_table);
2719 Vcharacter_composition_table = make_char_code_table (Qnil);
2721 staticpro (&Vcharacter_variant_table);
2722 Vcharacter_variant_table = make_char_code_table (Qnil);
2724 Vdefault_coded_charset_priority_list = Qnil;
2725 DEFVAR_LISP ("default-coded-charset-priority-list",
2726 &Vdefault_coded_charset_priority_list /*
2727 Default order of preferred coded-character-sets.
2733 complex_vars_of_mule_charset (void)
2735 staticpro (&Vcharset_hash_table);
2736 Vcharset_hash_table =
2737 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2739 /* Predefined character sets. We store them into variables for
2743 staticpro (&Vcharset_ucs);
2745 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2746 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2747 build_string ("UCS"),
2748 build_string ("UCS"),
2749 build_string ("ISO/IEC 10646"),
2751 Qnil, 0, 0xFFFFFFF, 0, 0);
2752 staticpro (&Vcharset_ucs_bmp);
2754 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2755 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2756 build_string ("BMP"),
2757 build_string ("BMP"),
2758 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2759 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2760 Qnil, 0, 0xFFFF, 0, 0);
2762 # define MIN_CHAR_THAI 0
2763 # define MAX_CHAR_THAI 0
2764 # define MIN_CHAR_GREEK 0
2765 # define MAX_CHAR_GREEK 0
2766 # define MIN_CHAR_HEBREW 0
2767 # define MAX_CHAR_HEBREW 0
2768 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2769 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2770 # define MIN_CHAR_CYRILLIC 0
2771 # define MAX_CHAR_CYRILLIC 0
2773 staticpro (&Vcharset_ascii);
2775 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2776 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2777 build_string ("ASCII"),
2778 build_string ("ASCII)"),
2779 build_string ("ASCII (ISO646 IRV)"),
2780 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2781 Qnil, 0, 0x7F, 0, 0);
2782 staticpro (&Vcharset_control_1);
2783 Vcharset_control_1 =
2784 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2785 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2786 build_string ("C1"),
2787 build_string ("Control characters"),
2788 build_string ("Control characters 128-191"),
2790 Qnil, 0x80, 0x9F, 0, 0);
2791 staticpro (&Vcharset_latin_iso8859_1);
2792 Vcharset_latin_iso8859_1 =
2793 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2794 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2795 build_string ("Latin-1"),
2796 build_string ("ISO8859-1 (Latin-1)"),
2797 build_string ("ISO8859-1 (Latin-1)"),
2798 build_string ("iso8859-1"),
2799 Qnil, 0xA0, 0xFF, 0, 32);
2800 staticpro (&Vcharset_latin_iso8859_2);
2801 Vcharset_latin_iso8859_2 =
2802 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2803 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2804 build_string ("Latin-2"),
2805 build_string ("ISO8859-2 (Latin-2)"),
2806 build_string ("ISO8859-2 (Latin-2)"),
2807 build_string ("iso8859-2"),
2809 staticpro (&Vcharset_latin_iso8859_3);
2810 Vcharset_latin_iso8859_3 =
2811 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2812 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2813 build_string ("Latin-3"),
2814 build_string ("ISO8859-3 (Latin-3)"),
2815 build_string ("ISO8859-3 (Latin-3)"),
2816 build_string ("iso8859-3"),
2818 staticpro (&Vcharset_latin_iso8859_4);
2819 Vcharset_latin_iso8859_4 =
2820 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2821 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2822 build_string ("Latin-4"),
2823 build_string ("ISO8859-4 (Latin-4)"),
2824 build_string ("ISO8859-4 (Latin-4)"),
2825 build_string ("iso8859-4"),
2827 staticpro (&Vcharset_thai_tis620);
2828 Vcharset_thai_tis620 =
2829 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2830 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2831 build_string ("TIS620"),
2832 build_string ("TIS620 (Thai)"),
2833 build_string ("TIS620.2529 (Thai)"),
2834 build_string ("tis620"),
2835 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2836 staticpro (&Vcharset_greek_iso8859_7);
2837 Vcharset_greek_iso8859_7 =
2838 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2839 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2840 build_string ("ISO8859-7"),
2841 build_string ("ISO8859-7 (Greek)"),
2842 build_string ("ISO8859-7 (Greek)"),
2843 build_string ("iso8859-7"),
2844 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2845 staticpro (&Vcharset_arabic_iso8859_6);
2846 Vcharset_arabic_iso8859_6 =
2847 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2848 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2849 build_string ("ISO8859-6"),
2850 build_string ("ISO8859-6 (Arabic)"),
2851 build_string ("ISO8859-6 (Arabic)"),
2852 build_string ("iso8859-6"),
2854 staticpro (&Vcharset_hebrew_iso8859_8);
2855 Vcharset_hebrew_iso8859_8 =
2856 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2857 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2858 build_string ("ISO8859-8"),
2859 build_string ("ISO8859-8 (Hebrew)"),
2860 build_string ("ISO8859-8 (Hebrew)"),
2861 build_string ("iso8859-8"),
2862 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2863 staticpro (&Vcharset_katakana_jisx0201);
2864 Vcharset_katakana_jisx0201 =
2865 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2866 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2867 build_string ("JISX0201 Kana"),
2868 build_string ("JISX0201.1976 (Japanese Kana)"),
2869 build_string ("JISX0201.1976 Japanese Kana"),
2870 build_string ("jisx0201\\.1976"),
2872 staticpro (&Vcharset_latin_jisx0201);
2873 Vcharset_latin_jisx0201 =
2874 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2875 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2876 build_string ("JISX0201 Roman"),
2877 build_string ("JISX0201.1976 (Japanese Roman)"),
2878 build_string ("JISX0201.1976 Japanese Roman"),
2879 build_string ("jisx0201\\.1976"),
2881 staticpro (&Vcharset_cyrillic_iso8859_5);
2882 Vcharset_cyrillic_iso8859_5 =
2883 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2884 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2885 build_string ("ISO8859-5"),
2886 build_string ("ISO8859-5 (Cyrillic)"),
2887 build_string ("ISO8859-5 (Cyrillic)"),
2888 build_string ("iso8859-5"),
2889 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2890 staticpro (&Vcharset_latin_iso8859_9);
2891 Vcharset_latin_iso8859_9 =
2892 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2893 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2894 build_string ("Latin-5"),
2895 build_string ("ISO8859-9 (Latin-5)"),
2896 build_string ("ISO8859-9 (Latin-5)"),
2897 build_string ("iso8859-9"),
2899 staticpro (&Vcharset_japanese_jisx0208_1978);
2900 Vcharset_japanese_jisx0208_1978 =
2901 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2902 Qjapanese_jisx0208_1978, 94, 2,
2903 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2904 build_string ("JIS X0208:1978"),
2905 build_string ("JIS X0208:1978 (Japanese)"),
2907 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2908 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2910 staticpro (&Vcharset_chinese_gb2312);
2911 Vcharset_chinese_gb2312 =
2912 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2913 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2914 build_string ("GB2312"),
2915 build_string ("GB2312)"),
2916 build_string ("GB2312 Chinese simplified"),
2917 build_string ("gb2312"),
2919 staticpro (&Vcharset_japanese_jisx0208);
2920 Vcharset_japanese_jisx0208 =
2921 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2922 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2923 build_string ("JISX0208"),
2924 build_string ("JIS X0208:1983 (Japanese)"),
2925 build_string ("JIS X0208:1983 Japanese Kanji"),
2926 build_string ("jisx0208\\.1983"),
2929 staticpro (&Vcharset_japanese_jisx0208_1990);
2930 Vcharset_japanese_jisx0208_1990 =
2931 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2932 Qjapanese_jisx0208_1990, 94, 2,
2933 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2934 build_string ("JISX0208-1990"),
2935 build_string ("JIS X0208:1990 (Japanese)"),
2936 build_string ("JIS X0208:1990 Japanese Kanji"),
2937 build_string ("jisx0208\\.1990"),
2939 MIN_CHAR_JIS_X0208_1990,
2940 MAX_CHAR_JIS_X0208_1990, 0, 33);
2942 staticpro (&Vcharset_korean_ksc5601);
2943 Vcharset_korean_ksc5601 =
2944 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2945 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2946 build_string ("KSC5601"),
2947 build_string ("KSC5601 (Korean"),
2948 build_string ("KSC5601 Korean Hangul and Hanja"),
2949 build_string ("ksc5601"),
2951 staticpro (&Vcharset_japanese_jisx0212);
2952 Vcharset_japanese_jisx0212 =
2953 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2954 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2955 build_string ("JISX0212"),
2956 build_string ("JISX0212 (Japanese)"),
2957 build_string ("JISX0212 Japanese Supplement"),
2958 build_string ("jisx0212"),
2961 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2962 staticpro (&Vcharset_chinese_cns11643_1);
2963 Vcharset_chinese_cns11643_1 =
2964 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2965 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2966 build_string ("CNS11643-1"),
2967 build_string ("CNS11643-1 (Chinese traditional)"),
2969 ("CNS 11643 Plane 1 Chinese traditional"),
2970 build_string (CHINESE_CNS_PLANE_RE("1")),
2972 staticpro (&Vcharset_chinese_cns11643_2);
2973 Vcharset_chinese_cns11643_2 =
2974 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2975 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2976 build_string ("CNS11643-2"),
2977 build_string ("CNS11643-2 (Chinese traditional)"),
2979 ("CNS 11643 Plane 2 Chinese traditional"),
2980 build_string (CHINESE_CNS_PLANE_RE("2")),
2983 staticpro (&Vcharset_latin_viscii_lower);
2984 Vcharset_latin_viscii_lower =
2985 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2986 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2987 build_string ("VISCII lower"),
2988 build_string ("VISCII lower (Vietnamese)"),
2989 build_string ("VISCII lower (Vietnamese)"),
2990 build_string ("MULEVISCII-LOWER"),
2992 staticpro (&Vcharset_latin_viscii_upper);
2993 Vcharset_latin_viscii_upper =
2994 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2995 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2996 build_string ("VISCII upper"),
2997 build_string ("VISCII upper (Vietnamese)"),
2998 build_string ("VISCII upper (Vietnamese)"),
2999 build_string ("MULEVISCII-UPPER"),
3001 staticpro (&Vcharset_latin_viscii);
3002 Vcharset_latin_viscii =
3003 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3004 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3005 build_string ("VISCII"),
3006 build_string ("VISCII 1.1 (Vietnamese)"),
3007 build_string ("VISCII 1.1 (Vietnamese)"),
3008 build_string ("VISCII1\\.1"),
3010 staticpro (&Vcharset_ideograph_daikanwa);
3011 Vcharset_ideograph_daikanwa =
3012 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3013 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3014 build_string ("Daikanwa"),
3015 build_string ("Morohashi's Daikanwa"),
3016 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3017 build_string ("Daikanwa"),
3018 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3019 staticpro (&Vcharset_mojikyo_pj_1);
3020 Vcharset_mojikyo_pj_1 =
3021 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3022 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3023 build_string ("Mojikyo-PJ-1"),
3024 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3026 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3027 build_string ("jisx0208\\.Mojikyo-1$"),
3029 staticpro (&Vcharset_mojikyo_pj_2);
3030 Vcharset_mojikyo_pj_2 =
3031 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3032 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3033 build_string ("Mojikyo-PJ-2"),
3034 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3036 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3037 build_string ("jisx0208\\.Mojikyo-2$"),
3039 staticpro (&Vcharset_mojikyo_pj_3);
3040 Vcharset_mojikyo_pj_3 =
3041 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3042 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3043 build_string ("Mojikyo-PJ-3"),
3044 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3046 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3047 build_string ("jisx0208\\.Mojikyo-3$"),
3049 staticpro (&Vcharset_mojikyo_pj_4);
3050 Vcharset_mojikyo_pj_4 =
3051 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3052 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3053 build_string ("Mojikyo-PJ-4"),
3054 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3056 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3057 build_string ("jisx0208\\.Mojikyo-4$"),
3059 staticpro (&Vcharset_mojikyo_pj_5);
3060 Vcharset_mojikyo_pj_5 =
3061 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3062 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3063 build_string ("Mojikyo-PJ-5"),
3064 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3066 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3067 build_string ("jisx0208\\.Mojikyo-5$"),
3069 staticpro (&Vcharset_mojikyo_pj_6);
3070 Vcharset_mojikyo_pj_6 =
3071 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3072 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3073 build_string ("Mojikyo-PJ-6"),
3074 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3076 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3077 build_string ("jisx0208\\.Mojikyo-6$"),
3079 staticpro (&Vcharset_mojikyo_pj_7);
3080 Vcharset_mojikyo_pj_7 =
3081 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3082 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3083 build_string ("Mojikyo-PJ-7"),
3084 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3086 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3087 build_string ("jisx0208\\.Mojikyo-7$"),
3089 staticpro (&Vcharset_mojikyo_pj_8);
3090 Vcharset_mojikyo_pj_8 =
3091 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3092 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3093 build_string ("Mojikyo-PJ-8"),
3094 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3096 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3097 build_string ("jisx0208\\.Mojikyo-8$"),
3099 staticpro (&Vcharset_mojikyo_pj_9);
3100 Vcharset_mojikyo_pj_9 =
3101 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3102 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3103 build_string ("Mojikyo-PJ-9"),
3104 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3106 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3107 build_string ("jisx0208\\.Mojikyo-9$"),
3109 staticpro (&Vcharset_mojikyo_pj_10);
3110 Vcharset_mojikyo_pj_10 =
3111 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3112 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3113 build_string ("Mojikyo-PJ-10"),
3114 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3116 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3117 build_string ("jisx0208\\.Mojikyo-10$"),
3119 staticpro (&Vcharset_mojikyo_pj_11);
3120 Vcharset_mojikyo_pj_11 =
3121 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3122 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3123 build_string ("Mojikyo-PJ-11"),
3124 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3126 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3127 build_string ("jisx0208\\.Mojikyo-11$"),
3129 staticpro (&Vcharset_mojikyo_pj_12);
3130 Vcharset_mojikyo_pj_12 =
3131 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3132 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3133 build_string ("Mojikyo-PJ-12"),
3134 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3136 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3137 build_string ("jisx0208\\.Mojikyo-12$"),
3139 staticpro (&Vcharset_mojikyo_pj_13);
3140 Vcharset_mojikyo_pj_13 =
3141 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3142 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3143 build_string ("Mojikyo-PJ-13"),
3144 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3146 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3147 build_string ("jisx0208\\.Mojikyo-13$"),
3149 staticpro (&Vcharset_mojikyo_pj_14);
3150 Vcharset_mojikyo_pj_14 =
3151 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3152 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3153 build_string ("Mojikyo-PJ-14"),
3154 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3156 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3157 build_string ("jisx0208\\.Mojikyo-14$"),
3159 staticpro (&Vcharset_mojikyo_pj_15);
3160 Vcharset_mojikyo_pj_15 =
3161 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3162 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3163 build_string ("Mojikyo-PJ-15"),
3164 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3166 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3167 build_string ("jisx0208\\.Mojikyo-15$"),
3169 staticpro (&Vcharset_mojikyo_pj_16);
3170 Vcharset_mojikyo_pj_16 =
3171 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3172 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3173 build_string ("Mojikyo-PJ-16"),
3174 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3176 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3177 build_string ("jisx0208\\.Mojikyo-16$"),
3179 staticpro (&Vcharset_mojikyo_pj_17);
3180 Vcharset_mojikyo_pj_17 =
3181 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3182 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3183 build_string ("Mojikyo-PJ-17"),
3184 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3186 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3187 build_string ("jisx0208\\.Mojikyo-17$"),
3189 staticpro (&Vcharset_mojikyo_pj_18);
3190 Vcharset_mojikyo_pj_18 =
3191 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3192 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3193 build_string ("Mojikyo-PJ-18"),
3194 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3196 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3197 build_string ("jisx0208\\.Mojikyo-18$"),
3199 staticpro (&Vcharset_mojikyo_pj_19);
3200 Vcharset_mojikyo_pj_19 =
3201 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3202 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3203 build_string ("Mojikyo-PJ-19"),
3204 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3206 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3207 build_string ("jisx0208\\.Mojikyo-19$"),
3209 staticpro (&Vcharset_mojikyo_pj_20);
3210 Vcharset_mojikyo_pj_20 =
3211 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3212 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3213 build_string ("Mojikyo-PJ-20"),
3214 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3216 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3217 build_string ("jisx0208\\.Mojikyo-20$"),
3219 staticpro (&Vcharset_mojikyo_pj_21);
3220 Vcharset_mojikyo_pj_21 =
3221 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3222 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3223 build_string ("Mojikyo-PJ-21"),
3224 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3226 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3227 build_string ("jisx0208\\.Mojikyo-21$"),
3229 staticpro (&Vcharset_ethiopic_ucs);
3230 Vcharset_ethiopic_ucs =
3231 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3232 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3233 build_string ("Ethiopic (UCS)"),
3234 build_string ("Ethiopic (UCS)"),
3235 build_string ("Ethiopic of UCS"),
3236 build_string ("Ethiopic-Unicode"),
3237 Qnil, 0x1200, 0x137F, 0x1200, 0);
3239 staticpro (&Vcharset_chinese_big5_1);
3240 Vcharset_chinese_big5_1 =
3241 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3242 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3243 build_string ("Big5"),
3244 build_string ("Big5 (Level-1)"),
3246 ("Big5 Level-1 Chinese traditional"),
3247 build_string ("big5"),
3249 staticpro (&Vcharset_chinese_big5_2);
3250 Vcharset_chinese_big5_2 =
3251 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3252 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3253 build_string ("Big5"),
3254 build_string ("Big5 (Level-2)"),
3256 ("Big5 Level-2 Chinese traditional"),
3257 build_string ("big5"),
3260 #ifdef ENABLE_COMPOSITE_CHARS
3261 /* #### For simplicity, we put composite chars into a 96x96 charset.
3262 This is going to lead to problems because you can run out of
3263 room, esp. as we don't yet recycle numbers. */
3264 staticpro (&Vcharset_composite);
3265 Vcharset_composite =
3266 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3267 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3268 build_string ("Composite"),
3269 build_string ("Composite characters"),
3270 build_string ("Composite characters"),
3273 /* #### not dumped properly */
3274 composite_char_row_next = 32;
3275 composite_char_col_next = 32;
3277 Vcomposite_char_string2char_hash_table =
3278 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3279 Vcomposite_char_char2string_hash_table =
3280 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3281 staticpro (&Vcomposite_char_string2char_hash_table);
3282 staticpro (&Vcomposite_char_char2string_hash_table);
3283 #endif /* ENABLE_COMPOSITE_CHARS */