1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo_pj_1;
70 Lisp_Object Vcharset_mojikyo_pj_2;
71 Lisp_Object Vcharset_mojikyo_pj_3;
72 Lisp_Object Vcharset_mojikyo_pj_4;
73 Lisp_Object Vcharset_mojikyo_pj_5;
74 Lisp_Object Vcharset_mojikyo_pj_6;
75 Lisp_Object Vcharset_mojikyo_pj_7;
76 Lisp_Object Vcharset_mojikyo_pj_8;
77 Lisp_Object Vcharset_mojikyo_pj_9;
78 Lisp_Object Vcharset_mojikyo_pj_10;
79 Lisp_Object Vcharset_mojikyo_pj_11;
80 Lisp_Object Vcharset_mojikyo_pj_12;
81 Lisp_Object Vcharset_mojikyo_pj_13;
82 Lisp_Object Vcharset_mojikyo_pj_14;
83 Lisp_Object Vcharset_mojikyo_pj_15;
84 Lisp_Object Vcharset_mojikyo_pj_16;
85 Lisp_Object Vcharset_mojikyo_pj_17;
86 Lisp_Object Vcharset_mojikyo_pj_18;
87 Lisp_Object Vcharset_mojikyo_pj_19;
88 Lisp_Object Vcharset_mojikyo_pj_20;
89 Lisp_Object Vcharset_mojikyo_pj_21;
90 Lisp_Object Vcharset_ethiopic_ucs;
92 Lisp_Object Vcharset_chinese_big5_1;
93 Lisp_Object Vcharset_chinese_big5_2;
95 #ifdef ENABLE_COMPOSITE_CHARS
96 Lisp_Object Vcharset_composite;
98 /* Hash tables for composite chars. One maps string representing
99 composed chars to their equivalent chars; one goes the
101 Lisp_Object Vcomposite_char_char2string_hash_table;
102 Lisp_Object Vcomposite_char_string2char_hash_table;
104 static int composite_char_row_next;
105 static int composite_char_col_next;
107 #endif /* ENABLE_COMPOSITE_CHARS */
109 struct charset_lookup *chlook;
111 static const struct lrecord_description charset_lookup_description_1[] = {
112 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
121 static const struct struct_description charset_lookup_description = {
122 sizeof(struct charset_lookup),
123 charset_lookup_description_1
127 /* Table of number of bytes in the string representation of a character
128 indexed by the first byte of that representation.
130 rep_bytes_by_first_byte(c) is more efficient than the equivalent
131 canonical computation:
133 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
135 const Bytecount rep_bytes_by_first_byte[0xA0] =
136 { /* 0x00 - 0x7f are for straight ASCII */
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 /* 0x80 - 0x8f are for Dimension-1 official charsets */
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 /* 0x90 - 0x9d are for Dimension-2 official charsets */
152 /* 0x9e is for Dimension-1 private charsets */
153 /* 0x9f is for Dimension-2 private charsets */
154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
161 mark_char_byte_table (Lisp_Object obj)
163 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
166 for (i = 0; i < 256; i++)
168 mark_object (cte->property[i]);
174 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
176 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
177 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
180 for (i = 0; i < 256; i++)
181 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
183 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
185 if (!char_byte_table_equal (cte1->property[i],
186 cte2->property[i], depth + 1))
193 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
199 char_byte_table_hash (Lisp_Object obj, int depth)
201 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
203 return internal_array_hash (cte->property, 256, depth);
206 static const struct lrecord_description char_byte_table_description[] = {
207 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
211 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
212 mark_char_byte_table,
213 internal_object_printer,
214 0, char_byte_table_equal,
215 char_byte_table_hash,
216 char_byte_table_description,
217 struct Lisp_Char_Byte_Table);
220 make_char_byte_table (Lisp_Object initval)
224 struct Lisp_Char_Byte_Table *cte =
225 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
226 &lrecord_char_byte_table);
228 for (i = 0; i < 256; i++)
229 cte->property[i] = initval;
231 XSETCHAR_BYTE_TABLE (obj, cte);
236 copy_char_byte_table (Lisp_Object entry)
238 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
241 struct Lisp_Char_Byte_Table *ctenew =
242 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
243 &lrecord_char_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (CHAR_BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_char_byte_table (new);
251 ctenew->property[i] = new;
254 XSETCHAR_BYTE_TABLE (obj, ctenew);
260 mark_char_code_table (Lisp_Object obj)
262 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
268 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
271 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
273 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_code_table_hash (Lisp_Object obj, int depth)
279 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
281 return char_code_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_code_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
290 mark_char_code_table,
291 internal_object_printer,
292 0, char_code_table_equal,
293 char_code_table_hash,
294 char_code_table_description,
295 struct Lisp_Char_Code_Table);
298 make_char_code_table (Lisp_Object initval)
301 struct Lisp_Char_Code_Table *cte =
302 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
303 &lrecord_char_code_table);
305 cte->table = make_char_byte_table (initval);
307 XSETCHAR_CODE_TABLE (obj, cte);
312 copy_char_code_table (Lisp_Object entry)
314 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
316 struct Lisp_Char_Code_Table *ctenew =
317 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
318 &lrecord_char_code_table);
320 ctenew->table = copy_char_byte_table (cte->table);
321 XSETCHAR_CODE_TABLE (obj, ctenew);
327 get_char_code_table (Emchar ch, Lisp_Object table)
329 unsigned int code = ch;
330 struct Lisp_Char_Byte_Table* cpt
331 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
332 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
334 if (CHAR_BYTE_TABLE_P (ret))
335 cpt = XCHAR_BYTE_TABLE (ret);
339 ret = cpt->property [(unsigned char) (code >> 16)];
340 if (CHAR_BYTE_TABLE_P (ret))
341 cpt = XCHAR_BYTE_TABLE (ret);
345 ret = cpt->property [(unsigned char) (code >> 8)];
346 if (CHAR_BYTE_TABLE_P (ret))
347 cpt = XCHAR_BYTE_TABLE (ret);
351 return cpt->property [(unsigned char) code];
355 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
357 unsigned int code = ch;
358 struct Lisp_Char_Byte_Table* cpt1
359 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
360 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
362 if (CHAR_BYTE_TABLE_P (ret))
364 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
366 ret = cpt2->property[(unsigned char)(code >> 16)];
367 if (CHAR_BYTE_TABLE_P (ret))
369 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
371 ret = cpt3->property[(unsigned char)(code >> 8)];
372 if (CHAR_BYTE_TABLE_P (ret))
374 struct Lisp_Char_Byte_Table* cpt4
375 = XCHAR_BYTE_TABLE (ret);
377 cpt4->property[(unsigned char)code] = value;
379 else if (!EQ (ret, value))
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
387 else if (!EQ (ret, value))
389 Lisp_Object cpt3 = make_char_byte_table (ret);
390 Lisp_Object cpt4 = make_char_byte_table (ret);
392 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
393 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
395 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
398 else if (!EQ (ret, value))
400 Lisp_Object cpt2 = make_char_byte_table (ret);
401 Lisp_Object cpt3 = make_char_byte_table (ret);
402 Lisp_Object cpt4 = make_char_byte_table (ret);
404 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
405 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
406 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
407 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
412 Lisp_Object Vcharacter_attribute_table;
413 Lisp_Object Vcharacter_composition_table;
414 Lisp_Object Vcharacter_variant_table;
416 Lisp_Object Q_decomposition;
419 Lisp_Object Qisolated;
420 Lisp_Object Qinitial;
423 Lisp_Object Qvertical;
424 Lisp_Object QnoBreak;
425 Lisp_Object Qfraction;
436 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
442 else if (EQ (v, Qcompat))
444 else if (EQ (v, Qisolated))
446 else if (EQ (v, Qinitial))
448 else if (EQ (v, Qmedial))
450 else if (EQ (v, Qfinal))
452 else if (EQ (v, Qvertical))
454 else if (EQ (v, QnoBreak))
456 else if (EQ (v, Qfraction))
458 else if (EQ (v, Qsuper))
460 else if (EQ (v, Qsub))
462 else if (EQ (v, Qcircle))
464 else if (EQ (v, Qsquare))
466 else if (EQ (v, Qwide))
468 else if (EQ (v, Qnarrow))
470 else if (EQ (v, Qsmall))
472 else if (EQ (v, Qfont))
475 signal_simple_error (err_msg, err_arg);
478 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
479 Return character corresponding with list.
483 Lisp_Object table = Vcharacter_composition_table;
484 Lisp_Object rest = list;
488 Lisp_Object v = Fcar (rest);
490 Emchar c = to_char_code (v, "Invalid value for composition", list);
492 ret = get_char_code_table (c, table);
497 if (!CHAR_CODE_TABLE_P (ret))
502 else if (!CONSP (rest))
504 else if (CHAR_CODE_TABLE_P (ret))
507 signal_simple_error ("Invalid table is found with", list);
509 signal_simple_error ("Invalid value for composition", list);
512 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
513 Return variants of CHARACTER.
517 CHECK_CHAR (character);
518 return Fcopy_list (get_char_code_table (XCHAR (character),
519 Vcharacter_variant_table));
522 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
523 Return the alist of attributes of CHARACTER.
527 CHECK_CHAR (character);
528 return Fcopy_alist (get_char_code_table (XCHAR (character),
529 Vcharacter_attribute_table));
532 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
533 Return the value of CHARACTER's ATTRIBUTE.
535 (character, attribute))
540 CHECK_CHAR (character);
541 ret = get_char_code_table (XCHAR (character),
542 Vcharacter_attribute_table);
546 if (!NILP (ccs = Ffind_charset (attribute)))
549 return Fcdr (Fassq (attribute, ret));
553 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
556 Emchar char_code = XCHAR (character);
558 = get_char_code_table (char_code, Vcharacter_attribute_table);
561 cell = Fassq (attribute, ret);
565 ret = Fcons (Fcons (attribute, value), ret);
567 else if (!EQ (Fcdr (cell), value))
569 Fsetcdr (cell, value);
571 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
575 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
576 Store CHARACTER's ATTRIBUTE with VALUE.
578 (character, attribute, value))
582 CHECK_CHAR (character);
583 ccs = Ffind_charset (attribute);
586 Lisp_Object cpos, rest;
587 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
594 /* ad-hoc method for `ascii' */
595 if ((XCHARSET_CHARS (ccs) == 94) &&
596 (XCHARSET_BYTE_OFFSET (ccs) != 33))
597 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
599 ccs_len = XCHARSET_CHARS (ccs);
603 Lisp_Object ret = Fcar (value);
606 signal_simple_error ("Invalid value for coded-charset", value);
607 code_point = XINT (ret);
608 if (XCHARSET_GRAPHIC (ccs) == 1)
616 signal_simple_error ("Invalid value for coded-charset", value);
619 signal_simple_error ("Invalid value for coded-charset", value);
621 if (XCHARSET_GRAPHIC (ccs) == 1)
623 code_point = (code_point << 8) | i;
626 value = make_int (code_point);
628 else if (INTP (value))
630 if (XCHARSET_GRAPHIC (ccs) == 1)
631 value = make_int (XINT (value) & 0x7F7F7F7F);
634 signal_simple_error ("Invalid value for coded-charset", value);
637 cpos = Fget_char_attribute (character, attribute);
642 dim = XCHARSET_DIMENSION (ccs);
643 code_point = XINT (cpos);
647 i = ((code_point >> (8 * dim)) & 255)
648 - XCHARSET_BYTE_OFFSET (ccs);
649 nv = XVECTOR_DATA(v)[i];
655 XVECTOR_DATA(v)[i] = Qnil;
656 v = XCHARSET_DECODING_TABLE (ccs);
661 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
664 dim = XCHARSET_DIMENSION (ccs);
665 code_point = XINT (value);
670 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
671 nv = XVECTOR_DATA(v)[i];
675 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
681 XVECTOR_DATA(v)[i] = character;
683 else if (EQ (attribute, Q_decomposition))
685 Lisp_Object rest = value;
686 Lisp_Object table = Vcharacter_composition_table;
689 signal_simple_error ("Invalid value for ->decomposition",
694 Lisp_Object v = Fcar (rest);
697 = to_char_code (v, "Invalid value for ->decomposition", value);
702 put_char_code_table (c, character, table);
707 ntable = get_char_code_table (c, table);
708 if (!CHAR_CODE_TABLE_P (ntable))
710 ntable = make_char_code_table (Qnil);
711 put_char_code_table (c, ntable, table);
717 else if (EQ (attribute, Q_ucs))
723 signal_simple_error ("Invalid value for ->ucs", value);
727 ret = get_char_code_table (c, Vcharacter_variant_table);
728 if (NILP (Fmemq (character, ret)))
730 put_char_code_table (c, Fcons (character, ret),
731 Vcharacter_variant_table);
734 return put_char_attribute (character, attribute, value);
739 EXFUN (Fmake_char, 3);
741 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
742 Store character's ATTRIBUTES.
746 Lisp_Object rest = attributes;
747 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
748 Lisp_Object character;
754 Lisp_Object cell = Fcar (rest);
758 signal_simple_error ("Invalid argument", attributes);
759 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
762 character = Fmake_char (ccs, Fcar (cell),
764 goto setup_attributes;
768 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
771 signal_simple_error ("Invalid argument", attributes);
773 character = make_char (XINT (code) + 0x100000);
774 goto setup_attributes;
778 else if (!INTP (code))
779 signal_simple_error ("Invalid argument", attributes);
781 character = make_char (XINT (code));
787 Lisp_Object cell = Fcar (rest);
790 signal_simple_error ("Invalid argument", attributes);
791 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
795 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
798 Lisp_Object Vutf_2000_version;
802 int leading_code_private_11;
805 Lisp_Object Qcharsetp;
807 /* Qdoc_string, Qdimension, Qchars defined in general.c */
808 Lisp_Object Qregistry, Qfinal, Qgraphic;
809 Lisp_Object Qdirection;
810 Lisp_Object Qreverse_direction_charset;
811 Lisp_Object Qleading_byte;
812 Lisp_Object Qshort_name, Qlong_name;
828 Qjapanese_jisx0208_1978,
831 Qjapanese_jisx0208_1990,
842 Qvietnamese_viscii_lower,
843 Qvietnamese_viscii_upper,
872 Lisp_Object Ql2r, Qr2l;
874 Lisp_Object Vcharset_hash_table;
877 static Charset_ID next_allocated_leading_byte;
879 static Charset_ID next_allocated_1_byte_leading_byte;
880 static Charset_ID next_allocated_2_byte_leading_byte;
883 /* Composite characters are characters constructed by overstriking two
884 or more regular characters.
886 1) The old Mule implementation involves storing composite characters
887 in a buffer as a tag followed by all of the actual characters
888 used to make up the composite character. I think this is a bad
889 idea; it greatly complicates code that wants to handle strings
890 one character at a time because it has to deal with the possibility
891 of great big ungainly characters. It's much more reasonable to
892 simply store an index into a table of composite characters.
894 2) The current implementation only allows for 16,384 separate
895 composite characters over the lifetime of the XEmacs process.
896 This could become a potential problem if the user
897 edited lots of different files that use composite characters.
898 Due to FSF bogosity, increasing the number of allowable
899 composite characters under Mule would decrease the number
900 of possible faces that can exist. Mule already has shrunk
901 this to 2048, and further shrinkage would become uncomfortable.
902 No such problems exist in XEmacs.
904 Composite characters could be represented as 0x80 C1 C2 C3,
905 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
906 for slightly under 2^20 (one million) composite characters
907 over the XEmacs process lifetime, and you only need to
908 increase the size of a Mule character from 19 to 21 bits.
909 Or you could use 0x80 C1 C2 C3 C4, allowing for about
910 85 million (slightly over 2^26) composite characters. */
913 /************************************************************************/
914 /* Basic Emchar functions */
915 /************************************************************************/
917 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
918 string in STR. Returns the number of bytes stored.
919 Do not call this directly. Use the macro set_charptr_emchar() instead.
923 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
938 else if ( c <= 0x7ff )
940 *p++ = (c >> 6) | 0xc0;
941 *p++ = (c & 0x3f) | 0x80;
943 else if ( c <= 0xffff )
945 *p++ = (c >> 12) | 0xe0;
946 *p++ = ((c >> 6) & 0x3f) | 0x80;
947 *p++ = (c & 0x3f) | 0x80;
949 else if ( c <= 0x1fffff )
951 *p++ = (c >> 18) | 0xf0;
952 *p++ = ((c >> 12) & 0x3f) | 0x80;
953 *p++ = ((c >> 6) & 0x3f) | 0x80;
954 *p++ = (c & 0x3f) | 0x80;
956 else if ( c <= 0x3ffffff )
958 *p++ = (c >> 24) | 0xf8;
959 *p++ = ((c >> 18) & 0x3f) | 0x80;
960 *p++ = ((c >> 12) & 0x3f) | 0x80;
961 *p++ = ((c >> 6) & 0x3f) | 0x80;
962 *p++ = (c & 0x3f) | 0x80;
966 *p++ = (c >> 30) | 0xfc;
967 *p++ = ((c >> 24) & 0x3f) | 0x80;
968 *p++ = ((c >> 18) & 0x3f) | 0x80;
969 *p++ = ((c >> 12) & 0x3f) | 0x80;
970 *p++ = ((c >> 6) & 0x3f) | 0x80;
971 *p++ = (c & 0x3f) | 0x80;
974 BREAKUP_CHAR (c, charset, c1, c2);
975 lb = CHAR_LEADING_BYTE (c);
976 if (LEADING_BYTE_PRIVATE_P (lb))
977 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
979 if (EQ (charset, Vcharset_control_1))
988 /* Return the first character from a Mule-encoded string in STR,
989 assuming it's non-ASCII. Do not call this directly.
990 Use the macro charptr_emchar() instead. */
993 non_ascii_charptr_emchar (CONST Bufbyte *str)
1006 else if ( b >= 0xf8 )
1011 else if ( b >= 0xf0 )
1016 else if ( b >= 0xe0 )
1021 else if ( b >= 0xc0 )
1031 for( ; len > 0; len-- )
1034 ch = ( ch << 6 ) | ( b & 0x3f );
1038 Bufbyte i0 = *str, i1, i2 = 0;
1039 Lisp_Object charset;
1041 if (i0 == LEADING_BYTE_CONTROL_1)
1042 return (Emchar) (*++str - 0x20);
1044 if (LEADING_BYTE_PREFIX_P (i0))
1049 charset = CHARSET_BY_LEADING_BYTE (i0);
1050 if (XCHARSET_DIMENSION (charset) == 2)
1053 return MAKE_CHAR (charset, i1, i2);
1057 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1058 Do not call this directly. Use the macro valid_char_p() instead. */
1062 non_ascii_valid_char_p (Emchar ch)
1066 /* Must have only lowest 19 bits set */
1070 f1 = CHAR_FIELD1 (ch);
1071 f2 = CHAR_FIELD2 (ch);
1072 f3 = CHAR_FIELD3 (ch);
1076 Lisp_Object charset;
1078 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1079 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1080 f2 > MAX_CHAR_FIELD2_PRIVATE)
1085 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1086 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1090 NOTE: This takes advantage of the fact that
1091 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1092 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1094 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1095 if (EQ (charset, Qnil))
1097 return (XCHARSET_CHARS (charset) == 96);
1101 Lisp_Object charset;
1103 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1104 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1105 f1 > MAX_CHAR_FIELD1_PRIVATE)
1107 if (f2 < 0x20 || f3 < 0x20)
1110 #ifdef ENABLE_COMPOSITE_CHARS
1111 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1113 if (UNBOUNDP (Fgethash (make_int (ch),
1114 Vcomposite_char_char2string_hash_table,
1119 #endif /* ENABLE_COMPOSITE_CHARS */
1121 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1122 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1125 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1127 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1130 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1132 if (EQ (charset, Qnil))
1134 return (XCHARSET_CHARS (charset) == 96);
1140 /************************************************************************/
1141 /* Basic string functions */
1142 /************************************************************************/
1144 /* Copy the character pointed to by PTR into STR, assuming it's
1145 non-ASCII. Do not call this directly. Use the macro
1146 charptr_copy_char() instead. */
1149 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1151 Bufbyte *strptr = str;
1153 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1155 /* Notice fallthrough. */
1157 case 6: *++strptr = *ptr++;
1158 case 5: *++strptr = *ptr++;
1160 case 4: *++strptr = *ptr++;
1161 case 3: *++strptr = *ptr++;
1162 case 2: *++strptr = *ptr;
1167 return strptr + 1 - str;
1171 /************************************************************************/
1172 /* streams of Emchars */
1173 /************************************************************************/
1175 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1176 The functions below are not meant to be called directly; use
1177 the macros in insdel.h. */
1180 Lstream_get_emchar_1 (Lstream *stream, int ch)
1182 Bufbyte str[MAX_EMCHAR_LEN];
1183 Bufbyte *strptr = str;
1185 str[0] = (Bufbyte) ch;
1186 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1188 /* Notice fallthrough. */
1191 ch = Lstream_getc (stream);
1193 *++strptr = (Bufbyte) ch;
1195 ch = Lstream_getc (stream);
1197 *++strptr = (Bufbyte) ch;
1200 ch = Lstream_getc (stream);
1202 *++strptr = (Bufbyte) ch;
1204 ch = Lstream_getc (stream);
1206 *++strptr = (Bufbyte) ch;
1208 ch = Lstream_getc (stream);
1210 *++strptr = (Bufbyte) ch;
1215 return charptr_emchar (str);
1219 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1221 Bufbyte str[MAX_EMCHAR_LEN];
1222 Bytecount len = set_charptr_emchar (str, ch);
1223 return Lstream_write (stream, str, len);
1227 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1229 Bufbyte str[MAX_EMCHAR_LEN];
1230 Bytecount len = set_charptr_emchar (str, ch);
1231 Lstream_unread (stream, str, len);
1235 /************************************************************************/
1236 /* charset object */
1237 /************************************************************************/
1240 mark_charset (Lisp_Object obj)
1242 struct Lisp_Charset *cs = XCHARSET (obj);
1244 mark_object (cs->short_name);
1245 mark_object (cs->long_name);
1246 mark_object (cs->doc_string);
1247 mark_object (cs->registry);
1248 mark_object (cs->ccl_program);
1250 mark_object (cs->decoding_table);
1256 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1258 struct Lisp_Charset *cs = XCHARSET (obj);
1262 error ("printing unreadable object #<charset %s 0x%x>",
1263 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1266 write_c_string ("#<charset ", printcharfun);
1267 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1268 write_c_string (" ", printcharfun);
1269 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1270 write_c_string (" ", printcharfun);
1271 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1272 write_c_string (" ", printcharfun);
1273 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1274 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1276 CHARSET_DIMENSION (cs),
1277 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1278 CHARSET_COLUMNS (cs),
1279 CHARSET_GRAPHIC (cs),
1280 CHARSET_FINAL (cs));
1281 write_c_string (buf, printcharfun);
1282 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1283 sprintf (buf, " 0x%x>", cs->header.uid);
1284 write_c_string (buf, printcharfun);
1287 static const struct lrecord_description charset_description[] = {
1288 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1290 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1295 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1296 mark_charset, print_charset, 0, 0, 0,
1297 charset_description,
1298 struct Lisp_Charset);
1299 /* Make a new charset. */
1302 make_charset (Charset_ID id, Lisp_Object name,
1303 unsigned short chars, unsigned char dimension,
1304 unsigned char columns, unsigned char graphic,
1305 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1306 Lisp_Object long_name, Lisp_Object doc,
1308 Lisp_Object decoding_table,
1309 Emchar ucs_min, Emchar ucs_max,
1310 Emchar code_offset, unsigned char byte_offset)
1312 unsigned char type = 0;
1314 struct Lisp_Charset *cs =
1315 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1316 XSETCHARSET (obj, cs);
1318 CHARSET_ID (cs) = id;
1319 CHARSET_NAME (cs) = name;
1320 CHARSET_SHORT_NAME (cs) = short_name;
1321 CHARSET_LONG_NAME (cs) = long_name;
1322 CHARSET_CHARS (cs) = chars;
1323 CHARSET_DIMENSION (cs) = dimension;
1324 CHARSET_DIRECTION (cs) = direction;
1325 CHARSET_COLUMNS (cs) = columns;
1326 CHARSET_GRAPHIC (cs) = graphic;
1327 CHARSET_FINAL (cs) = final;
1328 CHARSET_DOC_STRING (cs) = doc;
1329 CHARSET_REGISTRY (cs) = reg;
1330 CHARSET_CCL_PROGRAM (cs) = Qnil;
1331 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1333 CHARSET_DECODING_TABLE(cs) = Qnil;
1334 CHARSET_UCS_MIN(cs) = ucs_min;
1335 CHARSET_UCS_MAX(cs) = ucs_max;
1336 CHARSET_CODE_OFFSET(cs) = code_offset;
1337 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1340 switch (CHARSET_CHARS (cs))
1343 switch (CHARSET_DIMENSION (cs))
1346 type = CHARSET_TYPE_94;
1349 type = CHARSET_TYPE_94X94;
1354 switch (CHARSET_DIMENSION (cs))
1357 type = CHARSET_TYPE_96;
1360 type = CHARSET_TYPE_96X96;
1366 switch (CHARSET_DIMENSION (cs))
1369 type = CHARSET_TYPE_128;
1372 type = CHARSET_TYPE_128X128;
1377 switch (CHARSET_DIMENSION (cs))
1380 type = CHARSET_TYPE_256;
1383 type = CHARSET_TYPE_256X256;
1390 CHARSET_TYPE (cs) = type;
1394 if (id == LEADING_BYTE_ASCII)
1395 CHARSET_REP_BYTES (cs) = 1;
1397 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1399 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1404 /* some charsets do not have final characters. This includes
1405 ASCII, Control-1, Composite, and the two faux private
1408 if (code_offset == 0)
1410 assert (NILP (chlook->charset_by_attributes[type][final]));
1411 chlook->charset_by_attributes[type][final] = obj;
1414 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1415 chlook->charset_by_attributes[type][final][direction] = obj;
1419 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1420 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1422 /* Some charsets are "faux" and don't have names or really exist at
1423 all except in the leading-byte table. */
1425 Fputhash (name, obj, Vcharset_hash_table);
1430 get_unallocated_leading_byte (int dimension)
1435 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1438 lb = next_allocated_leading_byte++;
1442 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1445 lb = next_allocated_1_byte_leading_byte++;
1449 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1452 lb = next_allocated_2_byte_leading_byte++;
1458 ("No more character sets free for this dimension",
1459 make_int (dimension));
1466 range_charset_code_point (Lisp_Object charset, Emchar ch)
1470 if ((XCHARSET_UCS_MIN (charset) <= ch)
1471 && (ch <= XCHARSET_UCS_MAX (charset)))
1473 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1475 if (XCHARSET_CHARS (charset) == 256)
1477 else if (XCHARSET_DIMENSION (charset) == 1)
1478 return d + XCHARSET_BYTE_OFFSET (charset);
1479 else if (XCHARSET_DIMENSION (charset) == 2)
1481 ((d / XCHARSET_CHARS (charset)
1482 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1483 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1484 else if (XCHARSET_DIMENSION (charset) == 3)
1486 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1487 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1488 | ((d / XCHARSET_CHARS (charset)
1489 % XCHARSET_CHARS (charset)
1490 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1491 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1492 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1494 ((d / (XCHARSET_CHARS (charset)
1495 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1496 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1497 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1498 % XCHARSET_CHARS (charset)
1499 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1500 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1501 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1502 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1504 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1506 if (XCHARSET_DIMENSION (charset) == 1)
1508 if (XCHARSET_CHARS (charset) == 94)
1510 if (((d = ch - (MIN_CHAR_94
1511 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1515 else if (XCHARSET_CHARS (charset) == 96)
1517 if (((d = ch - (MIN_CHAR_96
1518 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1525 else if (XCHARSET_DIMENSION (charset) == 2)
1527 if (XCHARSET_CHARS (charset) == 94)
1529 if (((d = ch - (MIN_CHAR_94x94
1530 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1533 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1535 else if (XCHARSET_CHARS (charset) == 96)
1537 if (((d = ch - (MIN_CHAR_96x96
1538 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1541 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1551 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1553 if (c <= MAX_CHAR_BASIC_LATIN)
1555 *charset = Vcharset_ascii;
1560 *charset = Vcharset_control_1;
1565 *charset = Vcharset_latin_iso8859_1;
1568 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1570 *charset = Vcharset_greek_iso8859_7;
1571 return c - MIN_CHAR_GREEK + 0x20;
1574 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1576 *charset = Vcharset_cyrillic_iso8859_5;
1577 return c - MIN_CHAR_CYRILLIC + 0x20;
1580 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1582 *charset = Vcharset_hebrew_iso8859_8;
1583 return c - MIN_CHAR_HEBREW + 0x20;
1585 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1587 *charset = Vcharset_thai_tis620;
1588 return c - MIN_CHAR_THAI + 0x20;
1591 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1592 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1594 return list2 (Vcharset_katakana_jisx0201,
1595 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1598 else if (c <= MAX_CHAR_BMP)
1600 *charset = Vcharset_ucs_bmp;
1603 else if (c < MIN_CHAR_DAIKANWA)
1605 *charset = Vcharset_ucs;
1608 else if (c <= MAX_CHAR_DAIKANWA)
1610 *charset = Vcharset_ideograph_daikanwa;
1611 return c - MIN_CHAR_DAIKANWA;
1613 else if (c < MIN_CHAR_94)
1615 *charset = Vcharset_ucs;
1618 else if (c <= MAX_CHAR_94)
1620 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1621 ((c - MIN_CHAR_94) / 94) + '0',
1622 CHARSET_LEFT_TO_RIGHT);
1623 return ((c - MIN_CHAR_94) % 94) + 33;
1625 else if (c <= MAX_CHAR_96)
1627 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1628 ((c - MIN_CHAR_96) / 96) + '0',
1629 CHARSET_LEFT_TO_RIGHT);
1630 return ((c - MIN_CHAR_96) % 96) + 32;
1632 else if (c <= MAX_CHAR_94x94)
1635 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1636 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1637 CHARSET_LEFT_TO_RIGHT);
1638 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1639 | (((c - MIN_CHAR_94x94) % 94) + 33);
1641 else if (c <= MAX_CHAR_96x96)
1644 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1645 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1646 CHARSET_LEFT_TO_RIGHT);
1647 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1648 | (((c - MIN_CHAR_96x96) % 96) + 32);
1652 *charset = Vcharset_ucs;
1657 Lisp_Object Vdefault_coded_charset_priority_list;
1661 /************************************************************************/
1662 /* Basic charset Lisp functions */
1663 /************************************************************************/
1665 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1666 Return non-nil if OBJECT is a charset.
1670 return CHARSETP (object) ? Qt : Qnil;
1673 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1674 Retrieve the charset of the given name.
1675 If CHARSET-OR-NAME is a charset object, it is simply returned.
1676 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1677 nil is returned. Otherwise the associated charset object is returned.
1681 if (CHARSETP (charset_or_name))
1682 return charset_or_name;
1684 CHECK_SYMBOL (charset_or_name);
1685 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1688 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1689 Retrieve the charset of the given name.
1690 Same as `find-charset' except an error is signalled if there is no such
1691 charset instead of returning nil.
1695 Lisp_Object charset = Ffind_charset (name);
1698 signal_simple_error ("No such charset", name);
1702 /* We store the charsets in hash tables with the names as the key and the
1703 actual charset object as the value. Occasionally we need to use them
1704 in a list format. These routines provide us with that. */
1705 struct charset_list_closure
1707 Lisp_Object *charset_list;
1711 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1712 void *charset_list_closure)
1714 /* This function can GC */
1715 struct charset_list_closure *chcl =
1716 (struct charset_list_closure*) charset_list_closure;
1717 Lisp_Object *charset_list = chcl->charset_list;
1719 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1723 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1724 Return a list of the names of all defined charsets.
1728 Lisp_Object charset_list = Qnil;
1729 struct gcpro gcpro1;
1730 struct charset_list_closure charset_list_closure;
1732 GCPRO1 (charset_list);
1733 charset_list_closure.charset_list = &charset_list;
1734 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1735 &charset_list_closure);
1738 return charset_list;
1741 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1742 Return the name of the given charset.
1746 return XCHARSET_NAME (Fget_charset (charset));
1749 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1750 Define a new character set.
1751 This function is for use with Mule support.
1752 NAME is a symbol, the name by which the character set is normally referred.
1753 DOC-STRING is a string describing the character set.
1754 PROPS is a property list, describing the specific nature of the
1755 character set. Recognized properties are:
1757 'short-name Short version of the charset name (ex: Latin-1)
1758 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1759 'registry A regular expression matching the font registry field for
1761 'dimension Number of octets used to index a character in this charset.
1762 Either 1 or 2. Defaults to 1.
1763 'columns Number of columns used to display a character in this charset.
1764 Only used in TTY mode. (Under X, the actual width of a
1765 character can be derived from the font used to display the
1766 characters.) If unspecified, defaults to the dimension
1767 (this is almost always the correct value).
1768 'chars Number of characters in each dimension (94 or 96).
1769 Defaults to 94. Note that if the dimension is 2, the
1770 character set thus described is 94x94 or 96x96.
1771 'final Final byte of ISO 2022 escape sequence. Must be
1772 supplied. Each combination of (DIMENSION, CHARS) defines a
1773 separate namespace for final bytes. Note that ISO
1774 2022 restricts the final byte to the range
1775 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1776 dimension == 2. Note also that final bytes in the range
1777 0x30 - 0x3F are reserved for user-defined (not official)
1779 'graphic 0 (use left half of font on output) or 1 (use right half
1780 of font on output). Defaults to 0. For example, for
1781 a font whose registry is ISO8859-1, the left half
1782 (octets 0x20 - 0x7F) is the `ascii' character set, while
1783 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1784 character set. With 'graphic set to 0, the octets
1785 will have their high bit cleared; with it set to 1,
1786 the octets will have their high bit set.
1787 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1789 'ccl-program A compiled CCL program used to convert a character in
1790 this charset into an index into the font. This is in
1791 addition to the 'graphic property. The CCL program
1792 is passed the octets of the character, with the high
1793 bit cleared and set depending upon whether the value
1794 of the 'graphic property is 0 or 1.
1796 (name, doc_string, props))
1798 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1799 int direction = CHARSET_LEFT_TO_RIGHT;
1801 Lisp_Object registry = Qnil;
1802 Lisp_Object charset;
1803 Lisp_Object rest, keyword, value;
1804 Lisp_Object ccl_program = Qnil;
1805 Lisp_Object short_name = Qnil, long_name = Qnil;
1806 int byte_offset = -1;
1808 CHECK_SYMBOL (name);
1809 if (!NILP (doc_string))
1810 CHECK_STRING (doc_string);
1812 charset = Ffind_charset (name);
1813 if (!NILP (charset))
1814 signal_simple_error ("Cannot redefine existing charset", name);
1816 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1818 if (EQ (keyword, Qshort_name))
1820 CHECK_STRING (value);
1824 if (EQ (keyword, Qlong_name))
1826 CHECK_STRING (value);
1830 else if (EQ (keyword, Qdimension))
1833 dimension = XINT (value);
1834 if (dimension < 1 || dimension > 2)
1835 signal_simple_error ("Invalid value for 'dimension", value);
1838 else if (EQ (keyword, Qchars))
1841 chars = XINT (value);
1842 if (chars != 94 && chars != 96)
1843 signal_simple_error ("Invalid value for 'chars", value);
1846 else if (EQ (keyword, Qcolumns))
1849 columns = XINT (value);
1850 if (columns != 1 && columns != 2)
1851 signal_simple_error ("Invalid value for 'columns", value);
1854 else if (EQ (keyword, Qgraphic))
1857 graphic = XINT (value);
1859 if (graphic < 0 || graphic > 2)
1861 if (graphic < 0 || graphic > 1)
1863 signal_simple_error ("Invalid value for 'graphic", value);
1866 else if (EQ (keyword, Qregistry))
1868 CHECK_STRING (value);
1872 else if (EQ (keyword, Qdirection))
1874 if (EQ (value, Ql2r))
1875 direction = CHARSET_LEFT_TO_RIGHT;
1876 else if (EQ (value, Qr2l))
1877 direction = CHARSET_RIGHT_TO_LEFT;
1879 signal_simple_error ("Invalid value for 'direction", value);
1882 else if (EQ (keyword, Qfinal))
1884 CHECK_CHAR_COERCE_INT (value);
1885 final = XCHAR (value);
1886 if (final < '0' || final > '~')
1887 signal_simple_error ("Invalid value for 'final", value);
1890 else if (EQ (keyword, Qccl_program))
1892 CHECK_VECTOR (value);
1893 ccl_program = value;
1897 signal_simple_error ("Unrecognized property", keyword);
1901 error ("'final must be specified");
1902 if (dimension == 2 && final > 0x5F)
1904 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1908 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1910 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1912 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1913 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1915 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1917 id = get_unallocated_leading_byte (dimension);
1919 if (NILP (doc_string))
1920 doc_string = build_string ("");
1922 if (NILP (registry))
1923 registry = build_string ("");
1925 if (NILP (short_name))
1926 XSETSTRING (short_name, XSYMBOL (name)->name);
1928 if (NILP (long_name))
1929 long_name = doc_string;
1932 columns = dimension;
1934 if (byte_offset < 0)
1938 else if (chars == 96)
1944 charset = make_charset (id, name, chars, dimension, columns, graphic,
1945 final, direction, short_name, long_name,
1946 doc_string, registry,
1947 Qnil, 0, 0, 0, byte_offset);
1948 if (!NILP (ccl_program))
1949 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1953 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1955 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1956 NEW-NAME is the name of the new charset. Return the new charset.
1958 (charset, new_name))
1960 Lisp_Object new_charset = Qnil;
1961 int id, chars, dimension, columns, graphic, final;
1963 Lisp_Object registry, doc_string, short_name, long_name;
1964 struct Lisp_Charset *cs;
1966 charset = Fget_charset (charset);
1967 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1968 signal_simple_error ("Charset already has reverse-direction charset",
1971 CHECK_SYMBOL (new_name);
1972 if (!NILP (Ffind_charset (new_name)))
1973 signal_simple_error ("Cannot redefine existing charset", new_name);
1975 cs = XCHARSET (charset);
1977 chars = CHARSET_CHARS (cs);
1978 dimension = CHARSET_DIMENSION (cs);
1979 columns = CHARSET_COLUMNS (cs);
1980 id = get_unallocated_leading_byte (dimension);
1982 graphic = CHARSET_GRAPHIC (cs);
1983 final = CHARSET_FINAL (cs);
1984 direction = CHARSET_RIGHT_TO_LEFT;
1985 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1986 direction = CHARSET_LEFT_TO_RIGHT;
1987 doc_string = CHARSET_DOC_STRING (cs);
1988 short_name = CHARSET_SHORT_NAME (cs);
1989 long_name = CHARSET_LONG_NAME (cs);
1990 registry = CHARSET_REGISTRY (cs);
1992 new_charset = make_charset (id, new_name, chars, dimension, columns,
1993 graphic, final, direction, short_name, long_name,
1994 doc_string, registry,
1996 CHARSET_DECODING_TABLE(cs),
1997 CHARSET_UCS_MIN(cs),
1998 CHARSET_UCS_MAX(cs),
1999 CHARSET_CODE_OFFSET(cs),
2000 CHARSET_BYTE_OFFSET(cs)
2006 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2007 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2012 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2013 Define symbol ALIAS as an alias for CHARSET.
2017 CHECK_SYMBOL (alias);
2018 charset = Fget_charset (charset);
2019 return Fputhash (alias, charset, Vcharset_hash_table);
2022 /* #### Reverse direction charsets not yet implemented. */
2024 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2026 Return the reverse-direction charset parallel to CHARSET, if any.
2027 This is the charset with the same properties (in particular, the same
2028 dimension, number of characters per dimension, and final byte) as
2029 CHARSET but whose characters are displayed in the opposite direction.
2033 charset = Fget_charset (charset);
2034 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2038 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2039 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2040 If DIRECTION is omitted, both directions will be checked (left-to-right
2041 will be returned if character sets exist for both directions).
2043 (dimension, chars, final, direction))
2045 int dm, ch, fi, di = -1;
2047 Lisp_Object obj = Qnil;
2049 CHECK_INT (dimension);
2050 dm = XINT (dimension);
2051 if (dm < 1 || dm > 2)
2052 signal_simple_error ("Invalid value for DIMENSION", dimension);
2056 if (ch != 94 && ch != 96)
2057 signal_simple_error ("Invalid value for CHARS", chars);
2059 CHECK_CHAR_COERCE_INT (final);
2061 if (fi < '0' || fi > '~')
2062 signal_simple_error ("Invalid value for FINAL", final);
2064 if (EQ (direction, Ql2r))
2065 di = CHARSET_LEFT_TO_RIGHT;
2066 else if (EQ (direction, Qr2l))
2067 di = CHARSET_RIGHT_TO_LEFT;
2068 else if (!NILP (direction))
2069 signal_simple_error ("Invalid value for DIRECTION", direction);
2071 if (dm == 2 && fi > 0x5F)
2073 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2076 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2078 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2082 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2084 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2087 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2090 return XCHARSET_NAME (obj);
2094 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2095 Return short name of CHARSET.
2099 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2102 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2103 Return long name of CHARSET.
2107 return XCHARSET_LONG_NAME (Fget_charset (charset));
2110 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2111 Return description of CHARSET.
2115 return XCHARSET_DOC_STRING (Fget_charset (charset));
2118 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2119 Return dimension of CHARSET.
2123 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2126 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2127 Return property PROP of CHARSET.
2128 Recognized properties are those listed in `make-charset', as well as
2129 'name and 'doc-string.
2133 struct Lisp_Charset *cs;
2135 charset = Fget_charset (charset);
2136 cs = XCHARSET (charset);
2138 CHECK_SYMBOL (prop);
2139 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2140 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2141 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2142 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2143 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2144 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2145 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2146 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2147 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2148 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2149 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2150 if (EQ (prop, Qdirection))
2151 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2152 if (EQ (prop, Qreverse_direction_charset))
2154 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2158 return XCHARSET_NAME (obj);
2160 signal_simple_error ("Unrecognized charset property name", prop);
2161 return Qnil; /* not reached */
2164 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2165 Return charset identification number of CHARSET.
2169 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2172 /* #### We need to figure out which properties we really want to
2175 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2176 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2178 (charset, ccl_program))
2180 charset = Fget_charset (charset);
2181 CHECK_VECTOR (ccl_program);
2182 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2187 invalidate_charset_font_caches (Lisp_Object charset)
2189 /* Invalidate font cache entries for charset on all devices. */
2190 Lisp_Object devcons, concons, hash_table;
2191 DEVICE_LOOP_NO_BREAK (devcons, concons)
2193 struct device *d = XDEVICE (XCAR (devcons));
2194 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2195 if (!UNBOUNDP (hash_table))
2196 Fclrhash (hash_table);
2200 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2201 Set the 'registry property of CHARSET to REGISTRY.
2203 (charset, registry))
2205 charset = Fget_charset (charset);
2206 CHECK_STRING (registry);
2207 XCHARSET_REGISTRY (charset) = registry;
2208 invalidate_charset_font_caches (charset);
2209 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2214 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2215 Return mapping-table of CHARSET.
2219 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2222 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2223 Set mapping-table of CHARSET to TABLE.
2227 struct Lisp_Charset *cs;
2228 Lisp_Object old_table;
2231 charset = Fget_charset (charset);
2232 cs = XCHARSET (charset);
2234 if (EQ (table, Qnil))
2236 CHARSET_DECODING_TABLE(cs) = table;
2239 else if (VECTORP (table))
2243 /* ad-hoc method for `ascii' */
2244 if ((CHARSET_CHARS (cs) == 94) &&
2245 (CHARSET_BYTE_OFFSET (cs) != 33))
2246 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2248 ccs_len = CHARSET_CHARS (cs);
2250 if (XVECTOR_LENGTH (table) > ccs_len)
2251 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2252 old_table = CHARSET_DECODING_TABLE(cs);
2253 CHARSET_DECODING_TABLE(cs) = table;
2256 signal_error (Qwrong_type_argument,
2257 list2 (build_translated_string ("vector-or-nil-p"),
2259 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2261 switch (CHARSET_DIMENSION (cs))
2264 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2266 Lisp_Object c = XVECTOR_DATA(table)[i];
2271 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2275 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2277 Lisp_Object v = XVECTOR_DATA(table)[i];
2283 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2285 CHARSET_DECODING_TABLE(cs) = old_table;
2286 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2288 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2290 Lisp_Object c = XVECTOR_DATA(v)[j];
2293 put_char_attribute (c, charset,
2296 (i + CHARSET_BYTE_OFFSET (cs)),
2298 (j + CHARSET_BYTE_OFFSET (cs))));
2302 put_char_attribute (v, charset,
2304 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2313 /************************************************************************/
2314 /* Lisp primitives for working with characters */
2315 /************************************************************************/
2317 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2318 Make a character from CHARSET and octets ARG1 and ARG2.
2319 ARG2 is required only for characters from two-dimensional charsets.
2320 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2321 character s with caron.
2323 (charset, arg1, arg2))
2325 struct Lisp_Charset *cs;
2327 int lowlim, highlim;
2329 charset = Fget_charset (charset);
2330 cs = XCHARSET (charset);
2332 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2333 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2335 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2337 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2338 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2341 /* It is useful (and safe, according to Olivier Galibert) to strip
2342 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2343 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2344 Latin 2 code of the character. */
2352 if (a1 < lowlim || a1 > highlim)
2353 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2355 if (CHARSET_DIMENSION (cs) == 1)
2359 ("Charset is of dimension one; second octet must be nil", arg2);
2360 return make_char (MAKE_CHAR (charset, a1, 0));
2369 a2 = XINT (arg2) & 0x7f;
2371 if (a2 < lowlim || a2 > highlim)
2372 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2374 return make_char (MAKE_CHAR (charset, a1, a2));
2377 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2378 Return the character set of char CH.
2382 CHECK_CHAR_COERCE_INT (ch);
2384 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2387 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2388 Return the octet numbered N (should be 0 or 1) of char CH.
2389 N defaults to 0 if omitted.
2393 Lisp_Object charset;
2396 CHECK_CHAR_COERCE_INT (ch);
2398 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2400 if (NILP (n) || EQ (n, Qzero))
2401 return make_int (octet0);
2402 else if (EQ (n, make_int (1)))
2403 return make_int (octet1);
2405 signal_simple_error ("Octet number must be 0 or 1", n);
2408 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2409 Return list of charset and one or two position-codes of CHAR.
2413 /* This function can GC */
2414 struct gcpro gcpro1, gcpro2;
2415 Lisp_Object charset = Qnil;
2416 Lisp_Object rc = Qnil;
2424 GCPRO2 (charset, rc);
2425 CHECK_CHAR_COERCE_INT (character);
2428 code_point = ENCODE_CHAR (XCHAR (character), charset);
2429 dimension = XCHARSET_DIMENSION (charset);
2430 while (dimension > 0)
2432 rc = Fcons (make_int (code_point & 255), rc);
2436 rc = Fcons (XCHARSET_NAME (charset), rc);
2438 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2440 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2442 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2446 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2455 #ifdef ENABLE_COMPOSITE_CHARS
2456 /************************************************************************/
2457 /* composite character functions */
2458 /************************************************************************/
2461 lookup_composite_char (Bufbyte *str, int len)
2463 Lisp_Object lispstr = make_string (str, len);
2464 Lisp_Object ch = Fgethash (lispstr,
2465 Vcomposite_char_string2char_hash_table,
2471 if (composite_char_row_next >= 128)
2472 signal_simple_error ("No more composite chars available", lispstr);
2473 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2474 composite_char_col_next);
2475 Fputhash (make_char (emch), lispstr,
2476 Vcomposite_char_char2string_hash_table);
2477 Fputhash (lispstr, make_char (emch),
2478 Vcomposite_char_string2char_hash_table);
2479 composite_char_col_next++;
2480 if (composite_char_col_next >= 128)
2482 composite_char_col_next = 32;
2483 composite_char_row_next++;
2492 composite_char_string (Emchar ch)
2494 Lisp_Object str = Fgethash (make_char (ch),
2495 Vcomposite_char_char2string_hash_table,
2497 assert (!UNBOUNDP (str));
2501 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2502 Convert a string into a single composite character.
2503 The character is the result of overstriking all the characters in
2508 CHECK_STRING (string);
2509 return make_char (lookup_composite_char (XSTRING_DATA (string),
2510 XSTRING_LENGTH (string)));
2513 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2514 Return a string of the characters comprising a composite character.
2522 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2523 signal_simple_error ("Must be composite char", ch);
2524 return composite_char_string (emch);
2526 #endif /* ENABLE_COMPOSITE_CHARS */
2529 /************************************************************************/
2530 /* initialization */
2531 /************************************************************************/
2534 syms_of_mule_charset (void)
2536 DEFSUBR (Fcharsetp);
2537 DEFSUBR (Ffind_charset);
2538 DEFSUBR (Fget_charset);
2539 DEFSUBR (Fcharset_list);
2540 DEFSUBR (Fcharset_name);
2541 DEFSUBR (Fmake_charset);
2542 DEFSUBR (Fmake_reverse_direction_charset);
2543 /* DEFSUBR (Freverse_direction_charset); */
2544 DEFSUBR (Fdefine_charset_alias);
2545 DEFSUBR (Fcharset_from_attributes);
2546 DEFSUBR (Fcharset_short_name);
2547 DEFSUBR (Fcharset_long_name);
2548 DEFSUBR (Fcharset_description);
2549 DEFSUBR (Fcharset_dimension);
2550 DEFSUBR (Fcharset_property);
2551 DEFSUBR (Fcharset_id);
2552 DEFSUBR (Fset_charset_ccl_program);
2553 DEFSUBR (Fset_charset_registry);
2555 DEFSUBR (Fchar_attribute_alist);
2556 DEFSUBR (Fget_char_attribute);
2557 DEFSUBR (Fput_char_attribute);
2558 DEFSUBR (Fdefine_char);
2559 DEFSUBR (Fchar_variants);
2560 DEFSUBR (Fget_composite_char);
2561 DEFSUBR (Fcharset_mapping_table);
2562 DEFSUBR (Fset_charset_mapping_table);
2565 DEFSUBR (Fmake_char);
2566 DEFSUBR (Fchar_charset);
2567 DEFSUBR (Fchar_octet);
2568 DEFSUBR (Fsplit_char);
2570 #ifdef ENABLE_COMPOSITE_CHARS
2571 DEFSUBR (Fmake_composite_char);
2572 DEFSUBR (Fcomposite_char_string);
2575 defsymbol (&Qcharsetp, "charsetp");
2576 defsymbol (&Qregistry, "registry");
2577 defsymbol (&Qfinal, "final");
2578 defsymbol (&Qgraphic, "graphic");
2579 defsymbol (&Qdirection, "direction");
2580 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2581 defsymbol (&Qshort_name, "short-name");
2582 defsymbol (&Qlong_name, "long-name");
2584 defsymbol (&Ql2r, "l2r");
2585 defsymbol (&Qr2l, "r2l");
2587 /* Charsets, compatible with FSF 20.3
2588 Naming convention is Script-Charset[-Edition] */
2589 defsymbol (&Qascii, "ascii");
2590 defsymbol (&Qcontrol_1, "control-1");
2591 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2592 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2593 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2594 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2595 defsymbol (&Qthai_tis620, "thai-tis620");
2596 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2597 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2598 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2599 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2600 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2601 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2602 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2603 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2604 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2605 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2606 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2607 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2608 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2609 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2610 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2612 defsymbol (&Q_ucs, "->ucs");
2613 defsymbol (&Q_decomposition, "->decomposition");
2614 defsymbol (&Qcompat, "compat");
2615 defsymbol (&Qisolated, "isolated");
2616 defsymbol (&Qinitial, "initial");
2617 defsymbol (&Qmedial, "medial");
2618 defsymbol (&Qfinal, "final");
2619 defsymbol (&Qvertical, "vertical");
2620 defsymbol (&QnoBreak, "noBreak");
2621 defsymbol (&Qfraction, "fraction");
2622 defsymbol (&Qsuper, "super");
2623 defsymbol (&Qsub, "sub");
2624 defsymbol (&Qcircle, "circle");
2625 defsymbol (&Qsquare, "square");
2626 defsymbol (&Qwide, "wide");
2627 defsymbol (&Qnarrow, "narrow");
2628 defsymbol (&Qsmall, "small");
2629 defsymbol (&Qfont, "font");
2630 defsymbol (&Qucs, "ucs");
2631 defsymbol (&Qucs_bmp, "ucs-bmp");
2632 defsymbol (&Qlatin_viscii, "latin-viscii");
2633 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2634 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2635 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2636 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2637 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2638 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2639 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2640 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2641 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2642 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2643 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2644 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2645 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2646 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2647 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2648 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2649 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2650 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2651 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2652 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2653 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2654 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2655 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2656 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2657 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2658 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2659 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2661 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2662 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2664 defsymbol (&Qcomposite, "composite");
2668 vars_of_mule_charset (void)
2675 chlook = xnew (struct charset_lookup);
2676 dumpstruct (&chlook, &charset_lookup_description);
2678 /* Table of charsets indexed by leading byte. */
2679 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2680 chlook->charset_by_leading_byte[i] = Qnil;
2683 /* Table of charsets indexed by type/final-byte. */
2684 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2685 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2686 chlook->charset_by_attributes[i][j] = Qnil;
2688 /* Table of charsets indexed by type/final-byte/direction. */
2689 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2690 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2691 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2692 chlook->charset_by_attributes[i][j][k] = Qnil;
2696 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2698 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2699 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2703 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2704 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2705 Leading-code of private TYPE9N charset of column-width 1.
2707 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2711 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2712 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2713 Version number of UTF-2000.
2716 staticpro (&Vcharacter_attribute_table);
2717 Vcharacter_attribute_table = make_char_code_table (Qnil);
2719 staticpro (&Vcharacter_composition_table);
2720 Vcharacter_composition_table = make_char_code_table (Qnil);
2722 staticpro (&Vcharacter_variant_table);
2723 Vcharacter_variant_table = make_char_code_table (Qnil);
2725 Vdefault_coded_charset_priority_list = Qnil;
2726 DEFVAR_LISP ("default-coded-charset-priority-list",
2727 &Vdefault_coded_charset_priority_list /*
2728 Default order of preferred coded-character-sets.
2734 complex_vars_of_mule_charset (void)
2736 staticpro (&Vcharset_hash_table);
2737 Vcharset_hash_table =
2738 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2740 /* Predefined character sets. We store them into variables for
2744 staticpro (&Vcharset_ucs);
2746 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2747 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2748 build_string ("UCS"),
2749 build_string ("UCS"),
2750 build_string ("ISO/IEC 10646"),
2752 Qnil, 0, 0xFFFFFFF, 0, 0);
2753 staticpro (&Vcharset_ucs_bmp);
2755 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2756 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2757 build_string ("BMP"),
2758 build_string ("BMP"),
2759 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2760 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2761 Qnil, 0, 0xFFFF, 0, 0);
2763 # define MIN_CHAR_THAI 0
2764 # define MAX_CHAR_THAI 0
2765 # define MIN_CHAR_GREEK 0
2766 # define MAX_CHAR_GREEK 0
2767 # define MIN_CHAR_HEBREW 0
2768 # define MAX_CHAR_HEBREW 0
2769 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2770 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2771 # define MIN_CHAR_CYRILLIC 0
2772 # define MAX_CHAR_CYRILLIC 0
2774 staticpro (&Vcharset_ascii);
2776 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2777 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2778 build_string ("ASCII"),
2779 build_string ("ASCII)"),
2780 build_string ("ASCII (ISO646 IRV)"),
2781 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2782 Qnil, 0, 0x7F, 0, 0);
2783 staticpro (&Vcharset_control_1);
2784 Vcharset_control_1 =
2785 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2786 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2787 build_string ("C1"),
2788 build_string ("Control characters"),
2789 build_string ("Control characters 128-191"),
2791 Qnil, 0x80, 0x9F, 0, 0);
2792 staticpro (&Vcharset_latin_iso8859_1);
2793 Vcharset_latin_iso8859_1 =
2794 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2795 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2796 build_string ("Latin-1"),
2797 build_string ("ISO8859-1 (Latin-1)"),
2798 build_string ("ISO8859-1 (Latin-1)"),
2799 build_string ("iso8859-1"),
2800 Qnil, 0xA0, 0xFF, 0, 32);
2801 staticpro (&Vcharset_latin_iso8859_2);
2802 Vcharset_latin_iso8859_2 =
2803 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2804 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2805 build_string ("Latin-2"),
2806 build_string ("ISO8859-2 (Latin-2)"),
2807 build_string ("ISO8859-2 (Latin-2)"),
2808 build_string ("iso8859-2"),
2810 staticpro (&Vcharset_latin_iso8859_3);
2811 Vcharset_latin_iso8859_3 =
2812 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2813 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2814 build_string ("Latin-3"),
2815 build_string ("ISO8859-3 (Latin-3)"),
2816 build_string ("ISO8859-3 (Latin-3)"),
2817 build_string ("iso8859-3"),
2819 staticpro (&Vcharset_latin_iso8859_4);
2820 Vcharset_latin_iso8859_4 =
2821 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2822 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2823 build_string ("Latin-4"),
2824 build_string ("ISO8859-4 (Latin-4)"),
2825 build_string ("ISO8859-4 (Latin-4)"),
2826 build_string ("iso8859-4"),
2828 staticpro (&Vcharset_thai_tis620);
2829 Vcharset_thai_tis620 =
2830 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2831 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2832 build_string ("TIS620"),
2833 build_string ("TIS620 (Thai)"),
2834 build_string ("TIS620.2529 (Thai)"),
2835 build_string ("tis620"),
2836 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2837 staticpro (&Vcharset_greek_iso8859_7);
2838 Vcharset_greek_iso8859_7 =
2839 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2840 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2841 build_string ("ISO8859-7"),
2842 build_string ("ISO8859-7 (Greek)"),
2843 build_string ("ISO8859-7 (Greek)"),
2844 build_string ("iso8859-7"),
2845 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2846 staticpro (&Vcharset_arabic_iso8859_6);
2847 Vcharset_arabic_iso8859_6 =
2848 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2849 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2850 build_string ("ISO8859-6"),
2851 build_string ("ISO8859-6 (Arabic)"),
2852 build_string ("ISO8859-6 (Arabic)"),
2853 build_string ("iso8859-6"),
2855 staticpro (&Vcharset_hebrew_iso8859_8);
2856 Vcharset_hebrew_iso8859_8 =
2857 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2858 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2859 build_string ("ISO8859-8"),
2860 build_string ("ISO8859-8 (Hebrew)"),
2861 build_string ("ISO8859-8 (Hebrew)"),
2862 build_string ("iso8859-8"),
2863 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2864 staticpro (&Vcharset_katakana_jisx0201);
2865 Vcharset_katakana_jisx0201 =
2866 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2867 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2868 build_string ("JISX0201 Kana"),
2869 build_string ("JISX0201.1976 (Japanese Kana)"),
2870 build_string ("JISX0201.1976 Japanese Kana"),
2871 build_string ("jisx0201\\.1976"),
2873 staticpro (&Vcharset_latin_jisx0201);
2874 Vcharset_latin_jisx0201 =
2875 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2876 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2877 build_string ("JISX0201 Roman"),
2878 build_string ("JISX0201.1976 (Japanese Roman)"),
2879 build_string ("JISX0201.1976 Japanese Roman"),
2880 build_string ("jisx0201\\.1976"),
2882 staticpro (&Vcharset_cyrillic_iso8859_5);
2883 Vcharset_cyrillic_iso8859_5 =
2884 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2885 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2886 build_string ("ISO8859-5"),
2887 build_string ("ISO8859-5 (Cyrillic)"),
2888 build_string ("ISO8859-5 (Cyrillic)"),
2889 build_string ("iso8859-5"),
2890 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2891 staticpro (&Vcharset_latin_iso8859_9);
2892 Vcharset_latin_iso8859_9 =
2893 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2894 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2895 build_string ("Latin-5"),
2896 build_string ("ISO8859-9 (Latin-5)"),
2897 build_string ("ISO8859-9 (Latin-5)"),
2898 build_string ("iso8859-9"),
2900 staticpro (&Vcharset_japanese_jisx0208_1978);
2901 Vcharset_japanese_jisx0208_1978 =
2902 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2903 Qjapanese_jisx0208_1978, 94, 2,
2904 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2905 build_string ("JIS X0208:1978"),
2906 build_string ("JIS X0208:1978 (Japanese)"),
2908 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2909 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2911 staticpro (&Vcharset_chinese_gb2312);
2912 Vcharset_chinese_gb2312 =
2913 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2914 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2915 build_string ("GB2312"),
2916 build_string ("GB2312)"),
2917 build_string ("GB2312 Chinese simplified"),
2918 build_string ("gb2312"),
2920 staticpro (&Vcharset_japanese_jisx0208);
2921 Vcharset_japanese_jisx0208 =
2922 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2923 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2924 build_string ("JISX0208"),
2925 build_string ("JIS X0208:1983 (Japanese)"),
2926 build_string ("JIS X0208:1983 Japanese Kanji"),
2927 build_string ("jisx0208\\.1983"),
2930 staticpro (&Vcharset_japanese_jisx0208_1990);
2931 Vcharset_japanese_jisx0208_1990 =
2932 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2933 Qjapanese_jisx0208_1990, 94, 2,
2934 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2935 build_string ("JISX0208-1990"),
2936 build_string ("JIS X0208:1990 (Japanese)"),
2937 build_string ("JIS X0208:1990 Japanese Kanji"),
2938 build_string ("jisx0208\\.1990"),
2940 MIN_CHAR_JIS_X0208_1990,
2941 MAX_CHAR_JIS_X0208_1990, 0, 33);
2943 staticpro (&Vcharset_korean_ksc5601);
2944 Vcharset_korean_ksc5601 =
2945 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2946 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2947 build_string ("KSC5601"),
2948 build_string ("KSC5601 (Korean"),
2949 build_string ("KSC5601 Korean Hangul and Hanja"),
2950 build_string ("ksc5601"),
2952 staticpro (&Vcharset_japanese_jisx0212);
2953 Vcharset_japanese_jisx0212 =
2954 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2955 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2956 build_string ("JISX0212"),
2957 build_string ("JISX0212 (Japanese)"),
2958 build_string ("JISX0212 Japanese Supplement"),
2959 build_string ("jisx0212"),
2962 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2963 staticpro (&Vcharset_chinese_cns11643_1);
2964 Vcharset_chinese_cns11643_1 =
2965 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2966 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2967 build_string ("CNS11643-1"),
2968 build_string ("CNS11643-1 (Chinese traditional)"),
2970 ("CNS 11643 Plane 1 Chinese traditional"),
2971 build_string (CHINESE_CNS_PLANE_RE("1")),
2973 staticpro (&Vcharset_chinese_cns11643_2);
2974 Vcharset_chinese_cns11643_2 =
2975 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2976 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2977 build_string ("CNS11643-2"),
2978 build_string ("CNS11643-2 (Chinese traditional)"),
2980 ("CNS 11643 Plane 2 Chinese traditional"),
2981 build_string (CHINESE_CNS_PLANE_RE("2")),
2984 staticpro (&Vcharset_latin_viscii_lower);
2985 Vcharset_latin_viscii_lower =
2986 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2987 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2988 build_string ("VISCII lower"),
2989 build_string ("VISCII lower (Vietnamese)"),
2990 build_string ("VISCII lower (Vietnamese)"),
2991 build_string ("MULEVISCII-LOWER"),
2993 staticpro (&Vcharset_latin_viscii_upper);
2994 Vcharset_latin_viscii_upper =
2995 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2996 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2997 build_string ("VISCII upper"),
2998 build_string ("VISCII upper (Vietnamese)"),
2999 build_string ("VISCII upper (Vietnamese)"),
3000 build_string ("MULEVISCII-UPPER"),
3002 staticpro (&Vcharset_latin_viscii);
3003 Vcharset_latin_viscii =
3004 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3005 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3006 build_string ("VISCII"),
3007 build_string ("VISCII 1.1 (Vietnamese)"),
3008 build_string ("VISCII 1.1 (Vietnamese)"),
3009 build_string ("VISCII1\\.1"),
3011 staticpro (&Vcharset_ideograph_daikanwa);
3012 Vcharset_ideograph_daikanwa =
3013 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3014 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3015 build_string ("Daikanwa"),
3016 build_string ("Morohashi's Daikanwa"),
3017 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3018 build_string ("Daikanwa"),
3019 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3020 staticpro (&Vcharset_mojikyo_pj_1);
3021 Vcharset_mojikyo_pj_1 =
3022 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3023 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3024 build_string ("Mojikyo-PJ-1"),
3025 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3027 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3028 build_string ("jisx0208\\.Mojikyo-1$"),
3030 staticpro (&Vcharset_mojikyo_pj_2);
3031 Vcharset_mojikyo_pj_2 =
3032 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3033 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3034 build_string ("Mojikyo-PJ-2"),
3035 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3037 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3038 build_string ("jisx0208\\.Mojikyo-2$"),
3040 staticpro (&Vcharset_mojikyo_pj_3);
3041 Vcharset_mojikyo_pj_3 =
3042 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3043 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3044 build_string ("Mojikyo-PJ-3"),
3045 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3047 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3048 build_string ("jisx0208\\.Mojikyo-3$"),
3050 staticpro (&Vcharset_mojikyo_pj_4);
3051 Vcharset_mojikyo_pj_4 =
3052 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3053 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3054 build_string ("Mojikyo-PJ-4"),
3055 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3057 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3058 build_string ("jisx0208\\.Mojikyo-4$"),
3060 staticpro (&Vcharset_mojikyo_pj_5);
3061 Vcharset_mojikyo_pj_5 =
3062 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3063 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3064 build_string ("Mojikyo-PJ-5"),
3065 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3067 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3068 build_string ("jisx0208\\.Mojikyo-5$"),
3070 staticpro (&Vcharset_mojikyo_pj_6);
3071 Vcharset_mojikyo_pj_6 =
3072 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3073 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3074 build_string ("Mojikyo-PJ-6"),
3075 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3077 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3078 build_string ("jisx0208\\.Mojikyo-6$"),
3080 staticpro (&Vcharset_mojikyo_pj_7);
3081 Vcharset_mojikyo_pj_7 =
3082 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3083 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3084 build_string ("Mojikyo-PJ-7"),
3085 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3087 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3088 build_string ("jisx0208\\.Mojikyo-7$"),
3090 staticpro (&Vcharset_mojikyo_pj_8);
3091 Vcharset_mojikyo_pj_8 =
3092 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3093 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3094 build_string ("Mojikyo-PJ-8"),
3095 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3097 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3098 build_string ("jisx0208\\.Mojikyo-8$"),
3100 staticpro (&Vcharset_mojikyo_pj_9);
3101 Vcharset_mojikyo_pj_9 =
3102 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3103 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3104 build_string ("Mojikyo-PJ-9"),
3105 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3107 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3108 build_string ("jisx0208\\.Mojikyo-9$"),
3110 staticpro (&Vcharset_mojikyo_pj_10);
3111 Vcharset_mojikyo_pj_10 =
3112 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3113 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3114 build_string ("Mojikyo-PJ-10"),
3115 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3117 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3118 build_string ("jisx0208\\.Mojikyo-10$"),
3120 staticpro (&Vcharset_mojikyo_pj_11);
3121 Vcharset_mojikyo_pj_11 =
3122 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3123 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3124 build_string ("Mojikyo-PJ-11"),
3125 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3127 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3128 build_string ("jisx0208\\.Mojikyo-11$"),
3130 staticpro (&Vcharset_mojikyo_pj_12);
3131 Vcharset_mojikyo_pj_12 =
3132 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3133 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3134 build_string ("Mojikyo-PJ-12"),
3135 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3137 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3138 build_string ("jisx0208\\.Mojikyo-12$"),
3140 staticpro (&Vcharset_mojikyo_pj_13);
3141 Vcharset_mojikyo_pj_13 =
3142 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3143 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3144 build_string ("Mojikyo-PJ-13"),
3145 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3147 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3148 build_string ("jisx0208\\.Mojikyo-13$"),
3150 staticpro (&Vcharset_mojikyo_pj_14);
3151 Vcharset_mojikyo_pj_14 =
3152 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3153 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3154 build_string ("Mojikyo-PJ-14"),
3155 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3157 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3158 build_string ("jisx0208\\.Mojikyo-14$"),
3160 staticpro (&Vcharset_mojikyo_pj_15);
3161 Vcharset_mojikyo_pj_15 =
3162 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3163 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3164 build_string ("Mojikyo-PJ-15"),
3165 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3167 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3168 build_string ("jisx0208\\.Mojikyo-15$"),
3170 staticpro (&Vcharset_mojikyo_pj_16);
3171 Vcharset_mojikyo_pj_16 =
3172 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3173 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3174 build_string ("Mojikyo-PJ-16"),
3175 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3177 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3178 build_string ("jisx0208\\.Mojikyo-16$"),
3180 staticpro (&Vcharset_mojikyo_pj_17);
3181 Vcharset_mojikyo_pj_17 =
3182 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3183 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3184 build_string ("Mojikyo-PJ-17"),
3185 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3187 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3188 build_string ("jisx0208\\.Mojikyo-17$"),
3190 staticpro (&Vcharset_mojikyo_pj_18);
3191 Vcharset_mojikyo_pj_18 =
3192 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3193 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3194 build_string ("Mojikyo-PJ-18"),
3195 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3197 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3198 build_string ("jisx0208\\.Mojikyo-18$"),
3200 staticpro (&Vcharset_mojikyo_pj_19);
3201 Vcharset_mojikyo_pj_19 =
3202 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3203 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3204 build_string ("Mojikyo-PJ-19"),
3205 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3207 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3208 build_string ("jisx0208\\.Mojikyo-19$"),
3210 staticpro (&Vcharset_mojikyo_pj_20);
3211 Vcharset_mojikyo_pj_20 =
3212 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3213 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3214 build_string ("Mojikyo-PJ-20"),
3215 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3217 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3218 build_string ("jisx0208\\.Mojikyo-20$"),
3220 staticpro (&Vcharset_mojikyo_pj_21);
3221 Vcharset_mojikyo_pj_21 =
3222 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3223 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3224 build_string ("Mojikyo-PJ-21"),
3225 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3227 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3228 build_string ("jisx0208\\.Mojikyo-21$"),
3230 staticpro (&Vcharset_ethiopic_ucs);
3231 Vcharset_ethiopic_ucs =
3232 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3233 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3234 build_string ("Ethiopic (UCS)"),
3235 build_string ("Ethiopic (UCS)"),
3236 build_string ("Ethiopic of UCS"),
3237 build_string ("Ethiopic-Unicode"),
3238 Qnil, 0x1200, 0x137F, 0x1200, 0);
3240 staticpro (&Vcharset_chinese_big5_1);
3241 Vcharset_chinese_big5_1 =
3242 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3243 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3244 build_string ("Big5"),
3245 build_string ("Big5 (Level-1)"),
3247 ("Big5 Level-1 Chinese traditional"),
3248 build_string ("big5"),
3250 staticpro (&Vcharset_chinese_big5_2);
3251 Vcharset_chinese_big5_2 =
3252 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3253 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3254 build_string ("Big5"),
3255 build_string ("Big5 (Level-2)"),
3257 ("Big5 Level-2 Chinese traditional"),
3258 build_string ("big5"),
3261 #ifdef ENABLE_COMPOSITE_CHARS
3262 /* #### For simplicity, we put composite chars into a 96x96 charset.
3263 This is going to lead to problems because you can run out of
3264 room, esp. as we don't yet recycle numbers. */
3265 staticpro (&Vcharset_composite);
3266 Vcharset_composite =
3267 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3268 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3269 build_string ("Composite"),
3270 build_string ("Composite characters"),
3271 build_string ("Composite characters"),
3274 /* #### not dumped properly */
3275 composite_char_row_next = 32;
3276 composite_char_col_next = 32;
3278 Vcomposite_char_string2char_hash_table =
3279 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3280 Vcomposite_char_char2string_hash_table =
3281 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3282 staticpro (&Vcomposite_char_string2char_hash_table);
3283 staticpro (&Vcomposite_char_char2string_hash_table);
3284 #endif /* ENABLE_COMPOSITE_CHARS */