1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_china3_jef;
79 Lisp_Object Vcharset_ideograph_cbeta;
80 Lisp_Object Vcharset_ideograph_gt;
81 Lisp_Object Vcharset_ideograph_gt_pj_1;
82 Lisp_Object Vcharset_ideograph_gt_pj_2;
83 Lisp_Object Vcharset_ideograph_gt_pj_3;
84 Lisp_Object Vcharset_ideograph_gt_pj_4;
85 Lisp_Object Vcharset_ideograph_gt_pj_5;
86 Lisp_Object Vcharset_ideograph_gt_pj_6;
87 Lisp_Object Vcharset_ideograph_gt_pj_7;
88 Lisp_Object Vcharset_ideograph_gt_pj_8;
89 Lisp_Object Vcharset_ideograph_gt_pj_9;
90 Lisp_Object Vcharset_ideograph_gt_pj_10;
91 Lisp_Object Vcharset_ideograph_gt_pj_11;
92 Lisp_Object Vcharset_ideograph_daikanwa;
93 Lisp_Object Vcharset_mojikyo;
94 Lisp_Object Vcharset_mojikyo_2022_1;
95 Lisp_Object Vcharset_mojikyo_pj_1;
96 Lisp_Object Vcharset_mojikyo_pj_2;
97 Lisp_Object Vcharset_mojikyo_pj_3;
98 Lisp_Object Vcharset_mojikyo_pj_4;
99 Lisp_Object Vcharset_mojikyo_pj_5;
100 Lisp_Object Vcharset_mojikyo_pj_6;
101 Lisp_Object Vcharset_mojikyo_pj_7;
102 Lisp_Object Vcharset_mojikyo_pj_8;
103 Lisp_Object Vcharset_mojikyo_pj_9;
104 Lisp_Object Vcharset_mojikyo_pj_10;
105 Lisp_Object Vcharset_mojikyo_pj_11;
106 Lisp_Object Vcharset_mojikyo_pj_12;
107 Lisp_Object Vcharset_mojikyo_pj_13;
108 Lisp_Object Vcharset_mojikyo_pj_14;
109 Lisp_Object Vcharset_mojikyo_pj_15;
110 Lisp_Object Vcharset_mojikyo_pj_16;
111 Lisp_Object Vcharset_mojikyo_pj_17;
112 Lisp_Object Vcharset_mojikyo_pj_18;
113 Lisp_Object Vcharset_mojikyo_pj_19;
114 Lisp_Object Vcharset_mojikyo_pj_20;
115 Lisp_Object Vcharset_mojikyo_pj_21;
116 Lisp_Object Vcharset_ethiopic_ucs;
118 Lisp_Object Vcharset_chinese_big5_1;
119 Lisp_Object Vcharset_chinese_big5_2;
121 #ifdef ENABLE_COMPOSITE_CHARS
122 Lisp_Object Vcharset_composite;
124 /* Hash tables for composite chars. One maps string representing
125 composed chars to their equivalent chars; one goes the
127 Lisp_Object Vcomposite_char_char2string_hash_table;
128 Lisp_Object Vcomposite_char_string2char_hash_table;
130 static int composite_char_row_next;
131 static int composite_char_col_next;
133 #endif /* ENABLE_COMPOSITE_CHARS */
135 struct charset_lookup *chlook;
137 static const struct lrecord_description charset_lookup_description_1[] = {
138 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
147 static const struct struct_description charset_lookup_description = {
148 sizeof (struct charset_lookup),
149 charset_lookup_description_1
153 /* Table of number of bytes in the string representation of a character
154 indexed by the first byte of that representation.
156 rep_bytes_by_first_byte(c) is more efficient than the equivalent
157 canonical computation:
159 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
161 const Bytecount rep_bytes_by_first_byte[0xA0] =
162 { /* 0x00 - 0x7f are for straight ASCII */
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 /* 0x80 - 0x8f are for Dimension-1 official charsets */
173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
175 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
177 /* 0x90 - 0x9d are for Dimension-2 official charsets */
178 /* 0x9e is for Dimension-1 private charsets */
179 /* 0x9f is for Dimension-2 private charsets */
180 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
186 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
188 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
190 /* ad-hoc method for `ascii' */
191 if ((CHARSET_CHARS (cs) == 94) &&
192 (CHARSET_BYTE_OFFSET (cs) != 33))
193 return 128 - CHARSET_BYTE_OFFSET (cs);
195 return CHARSET_CHARS (cs);
198 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
200 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
202 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
206 if (XVECTOR_LENGTH (v) > ccs_len)
209 for (i = 0; i < XVECTOR_LENGTH (v); i++)
211 Lisp_Object c = XVECTOR_DATA(v)[i];
213 if (!NILP (c) && !CHARP (c))
217 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
229 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
232 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
242 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
243 nv = XVECTOR_DATA(v)[i];
249 XVECTOR_DATA(v)[i] = Qnil;
253 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
254 int code_point, Lisp_Object character);
256 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
257 int code_point, Lisp_Object character)
261 int ccs_len = XVECTOR_LENGTH (v);
266 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
267 nv = XVECTOR_DATA(v)[i];
271 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
277 XVECTOR_DATA(v)[i] = character;
281 put_char_ccs_code_point (Lisp_Object character,
282 Lisp_Object ccs, Lisp_Object value)
284 Lisp_Object encoding_table;
286 if (!EQ (XCHARSET_NAME (ccs), Qucs)
287 || (XCHAR (character) != XINT (value)))
289 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
290 int dim = XCHARSET_DIMENSION (ccs);
291 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
292 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
296 { /* obsolete representation: value must be a list of bytes */
297 Lisp_Object ret = Fcar (value);
301 signal_simple_error ("Invalid value for coded-charset", value);
302 code_point = XINT (ret);
303 if (XCHARSET_GRAPHIC (ccs) == 1)
311 signal_simple_error ("Invalid value for coded-charset",
315 signal_simple_error ("Invalid value for coded-charset",
318 if (XCHARSET_GRAPHIC (ccs) == 1)
320 code_point = (code_point << 8) | j;
323 value = make_int (code_point);
325 else if (INTP (value))
327 code_point = XINT (value);
328 if (XCHARSET_GRAPHIC (ccs) == 1)
330 code_point &= 0x7F7F7F7F;
331 value = make_int (code_point);
335 signal_simple_error ("Invalid value for coded-charset", value);
339 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
342 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
347 XCHARSET_DECODING_TABLE (ccs)
348 = v = make_older_vector (ccs_len, Qnil);
351 decoding_table_put_char (v, dim, byte_offset, code_point, character);
353 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
355 XCHARSET_ENCODING_TABLE (ccs)
356 = encoding_table = make_char_id_table (Qnil);
358 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
363 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
365 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
366 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
368 if (VECTORP (decoding_table))
370 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
374 decoding_table_remove_char (decoding_table,
375 XCHARSET_DIMENSION (ccs),
376 XCHARSET_BYTE_OFFSET (ccs),
380 if (CHAR_TABLEP (encoding_table))
382 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
390 int leading_code_private_11;
393 Lisp_Object Qcharsetp;
395 /* Qdoc_string, Qdimension, Qchars defined in general.c */
396 Lisp_Object Qregistry, Qfinal, Qgraphic;
397 Lisp_Object Qdirection;
398 Lisp_Object Qreverse_direction_charset;
399 Lisp_Object Qleading_byte;
400 Lisp_Object Qshort_name, Qlong_name;
416 Qjapanese_jisx0208_1978,
420 Qjapanese_jisx0208_1990,
435 Qvietnamese_viscii_lower,
436 Qvietnamese_viscii_upper,
483 Lisp_Object Ql2r, Qr2l;
485 Lisp_Object Vcharset_hash_table;
487 /* Composite characters are characters constructed by overstriking two
488 or more regular characters.
490 1) The old Mule implementation involves storing composite characters
491 in a buffer as a tag followed by all of the actual characters
492 used to make up the composite character. I think this is a bad
493 idea; it greatly complicates code that wants to handle strings
494 one character at a time because it has to deal with the possibility
495 of great big ungainly characters. It's much more reasonable to
496 simply store an index into a table of composite characters.
498 2) The current implementation only allows for 16,384 separate
499 composite characters over the lifetime of the XEmacs process.
500 This could become a potential problem if the user
501 edited lots of different files that use composite characters.
502 Due to FSF bogosity, increasing the number of allowable
503 composite characters under Mule would decrease the number
504 of possible faces that can exist. Mule already has shrunk
505 this to 2048, and further shrinkage would become uncomfortable.
506 No such problems exist in XEmacs.
508 Composite characters could be represented as 0x80 C1 C2 C3,
509 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
510 for slightly under 2^20 (one million) composite characters
511 over the XEmacs process lifetime, and you only need to
512 increase the size of a Mule character from 19 to 21 bits.
513 Or you could use 0x80 C1 C2 C3 C4, allowing for about
514 85 million (slightly over 2^26) composite characters. */
517 /************************************************************************/
518 /* Basic Emchar functions */
519 /************************************************************************/
521 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
522 string in STR. Returns the number of bytes stored.
523 Do not call this directly. Use the macro set_charptr_emchar() instead.
527 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
542 else if ( c <= 0x7ff )
544 *p++ = (c >> 6) | 0xc0;
545 *p++ = (c & 0x3f) | 0x80;
547 else if ( c <= 0xffff )
549 *p++ = (c >> 12) | 0xe0;
550 *p++ = ((c >> 6) & 0x3f) | 0x80;
551 *p++ = (c & 0x3f) | 0x80;
553 else if ( c <= 0x1fffff )
555 *p++ = (c >> 18) | 0xf0;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
560 else if ( c <= 0x3ffffff )
562 *p++ = (c >> 24) | 0xf8;
563 *p++ = ((c >> 18) & 0x3f) | 0x80;
564 *p++ = ((c >> 12) & 0x3f) | 0x80;
565 *p++ = ((c >> 6) & 0x3f) | 0x80;
566 *p++ = (c & 0x3f) | 0x80;
570 *p++ = (c >> 30) | 0xfc;
571 *p++ = ((c >> 24) & 0x3f) | 0x80;
572 *p++ = ((c >> 18) & 0x3f) | 0x80;
573 *p++ = ((c >> 12) & 0x3f) | 0x80;
574 *p++ = ((c >> 6) & 0x3f) | 0x80;
575 *p++ = (c & 0x3f) | 0x80;
578 BREAKUP_CHAR (c, charset, c1, c2);
579 lb = CHAR_LEADING_BYTE (c);
580 if (LEADING_BYTE_PRIVATE_P (lb))
581 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
583 if (EQ (charset, Vcharset_control_1))
592 /* Return the first character from a Mule-encoded string in STR,
593 assuming it's non-ASCII. Do not call this directly.
594 Use the macro charptr_emchar() instead. */
597 non_ascii_charptr_emchar (const Bufbyte *str)
610 else if ( b >= 0xf8 )
615 else if ( b >= 0xf0 )
620 else if ( b >= 0xe0 )
625 else if ( b >= 0xc0 )
635 for( ; len > 0; len-- )
638 ch = ( ch << 6 ) | ( b & 0x3f );
642 Bufbyte i0 = *str, i1, i2 = 0;
645 if (i0 == LEADING_BYTE_CONTROL_1)
646 return (Emchar) (*++str - 0x20);
648 if (LEADING_BYTE_PREFIX_P (i0))
653 charset = CHARSET_BY_LEADING_BYTE (i0);
654 if (XCHARSET_DIMENSION (charset) == 2)
657 return MAKE_CHAR (charset, i1, i2);
661 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
662 Do not call this directly. Use the macro valid_char_p() instead. */
666 non_ascii_valid_char_p (Emchar ch)
670 /* Must have only lowest 19 bits set */
674 f1 = CHAR_FIELD1 (ch);
675 f2 = CHAR_FIELD2 (ch);
676 f3 = CHAR_FIELD3 (ch);
682 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
683 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
684 f2 > MAX_CHAR_FIELD2_PRIVATE)
689 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
690 f2 <= MAX_CHAR_FIELD2_PRIVATE))
694 NOTE: This takes advantage of the fact that
695 FIELD2_TO_OFFICIAL_LEADING_BYTE and
696 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
698 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
699 if (EQ (charset, Qnil))
701 return (XCHARSET_CHARS (charset) == 96);
707 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
708 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
709 f1 > MAX_CHAR_FIELD1_PRIVATE)
711 if (f2 < 0x20 || f3 < 0x20)
714 #ifdef ENABLE_COMPOSITE_CHARS
715 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
717 if (UNBOUNDP (Fgethash (make_int (ch),
718 Vcomposite_char_char2string_hash_table,
723 #endif /* ENABLE_COMPOSITE_CHARS */
725 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
726 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
729 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
731 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
734 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
736 if (EQ (charset, Qnil))
738 return (XCHARSET_CHARS (charset) == 96);
744 /************************************************************************/
745 /* Basic string functions */
746 /************************************************************************/
748 /* Copy the character pointed to by SRC into DST. Do not call this
749 directly. Use the macro charptr_copy_char() instead.
750 Return the number of bytes copied. */
753 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
755 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
757 for (i = bytes; i; i--, dst++, src++)
763 /************************************************************************/
764 /* streams of Emchars */
765 /************************************************************************/
767 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
768 The functions below are not meant to be called directly; use
769 the macros in insdel.h. */
772 Lstream_get_emchar_1 (Lstream *stream, int ch)
774 Bufbyte str[MAX_EMCHAR_LEN];
775 Bufbyte *strptr = str;
778 str[0] = (Bufbyte) ch;
780 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
782 int c = Lstream_getc (stream);
783 bufpos_checking_assert (c >= 0);
784 *++strptr = (Bufbyte) c;
786 return charptr_emchar (str);
790 Lstream_fput_emchar (Lstream *stream, Emchar ch)
792 Bufbyte str[MAX_EMCHAR_LEN];
793 Bytecount len = set_charptr_emchar (str, ch);
794 return Lstream_write (stream, str, len);
798 Lstream_funget_emchar (Lstream *stream, Emchar ch)
800 Bufbyte str[MAX_EMCHAR_LEN];
801 Bytecount len = set_charptr_emchar (str, ch);
802 Lstream_unread (stream, str, len);
806 /************************************************************************/
808 /************************************************************************/
811 mark_charset (Lisp_Object obj)
813 Lisp_Charset *cs = XCHARSET (obj);
815 mark_object (cs->short_name);
816 mark_object (cs->long_name);
817 mark_object (cs->doc_string);
818 mark_object (cs->registry);
819 mark_object (cs->ccl_program);
821 mark_object (cs->encoding_table);
822 /* mark_object (cs->decoding_table); */
828 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
830 Lisp_Charset *cs = XCHARSET (obj);
834 error ("printing unreadable object #<charset %s 0x%x>",
835 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
838 write_c_string ("#<charset ", printcharfun);
839 print_internal (CHARSET_NAME (cs), printcharfun, 0);
840 write_c_string (" ", printcharfun);
841 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
842 write_c_string (" ", printcharfun);
843 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
844 write_c_string (" ", printcharfun);
845 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
846 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
848 CHARSET_DIMENSION (cs),
849 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
850 CHARSET_COLUMNS (cs),
851 CHARSET_GRAPHIC (cs),
853 write_c_string (buf, printcharfun);
854 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
855 sprintf (buf, " 0x%x>", cs->header.uid);
856 write_c_string (buf, printcharfun);
859 static const struct lrecord_description charset_description[] = {
860 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
861 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
862 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
863 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
864 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
865 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
866 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
868 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
869 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
874 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
875 mark_charset, print_charset, 0, 0, 0,
879 /* Make a new charset. */
880 /* #### SJT Should generic properties be allowed? */
882 make_charset (Charset_ID id, Lisp_Object name,
883 unsigned short chars, unsigned char dimension,
884 unsigned char columns, unsigned char graphic,
885 Bufbyte final, unsigned char direction, Lisp_Object short_name,
886 Lisp_Object long_name, Lisp_Object doc,
888 Lisp_Object decoding_table,
889 Emchar ucs_min, Emchar ucs_max,
890 Emchar code_offset, unsigned char byte_offset)
893 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
897 XSETCHARSET (obj, cs);
899 CHARSET_ID (cs) = id;
900 CHARSET_NAME (cs) = name;
901 CHARSET_SHORT_NAME (cs) = short_name;
902 CHARSET_LONG_NAME (cs) = long_name;
903 CHARSET_CHARS (cs) = chars;
904 CHARSET_DIMENSION (cs) = dimension;
905 CHARSET_DIRECTION (cs) = direction;
906 CHARSET_COLUMNS (cs) = columns;
907 CHARSET_GRAPHIC (cs) = graphic;
908 CHARSET_FINAL (cs) = final;
909 CHARSET_DOC_STRING (cs) = doc;
910 CHARSET_REGISTRY (cs) = reg;
911 CHARSET_CCL_PROGRAM (cs) = Qnil;
912 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
914 CHARSET_DECODING_TABLE(cs) = Qnil;
915 CHARSET_ENCODING_TABLE(cs) = Qnil;
916 CHARSET_UCS_MIN(cs) = ucs_min;
917 CHARSET_UCS_MAX(cs) = ucs_max;
918 CHARSET_CODE_OFFSET(cs) = code_offset;
919 CHARSET_BYTE_OFFSET(cs) = byte_offset;
923 if (id == LEADING_BYTE_ASCII)
924 CHARSET_REP_BYTES (cs) = 1;
926 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
928 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
933 /* some charsets do not have final characters. This includes
934 ASCII, Control-1, Composite, and the two faux private
936 unsigned char iso2022_type
937 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
939 if (code_offset == 0)
941 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
942 chlook->charset_by_attributes[iso2022_type][final] = obj;
946 (chlook->charset_by_attributes[iso2022_type][final][direction]));
947 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
951 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
952 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
954 /* Some charsets are "faux" and don't have names or really exist at
955 all except in the leading-byte table. */
957 Fputhash (name, obj, Vcharset_hash_table);
962 get_unallocated_leading_byte (int dimension)
967 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
970 lb = chlook->next_allocated_leading_byte++;
974 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
977 lb = chlook->next_allocated_1_byte_leading_byte++;
981 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
984 lb = chlook->next_allocated_2_byte_leading_byte++;
990 ("No more character sets free for this dimension",
991 make_int (dimension));
997 /* Number of Big5 characters which have the same code in 1st byte. */
999 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1002 decode_builtin_char (Lisp_Object charset, int code_point)
1006 if (EQ (charset, Vcharset_chinese_big5))
1008 int c1 = code_point >> 8;
1009 int c2 = code_point & 0xFF;
1012 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1014 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1015 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1017 I = (c1 - 0xA1) * BIG5_SAME_ROW
1018 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1022 charset = Vcharset_chinese_big5_1;
1026 charset = Vcharset_chinese_big5_2;
1027 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1029 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1032 if ((final = XCHARSET_FINAL (charset)) >= '0')
1034 if (XCHARSET_DIMENSION (charset) == 1)
1036 switch (XCHARSET_CHARS (charset))
1040 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1043 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1051 switch (XCHARSET_CHARS (charset))
1054 return MIN_CHAR_94x94
1055 + (final - '0') * 94 * 94
1056 + (((code_point >> 8) & 0x7F) - 33) * 94
1057 + ((code_point & 0x7F) - 33);
1059 return MIN_CHAR_96x96
1060 + (final - '0') * 96 * 96
1061 + (((code_point >> 8) & 0x7F) - 32) * 96
1062 + ((code_point & 0x7F) - 32);
1069 else if (XCHARSET_UCS_MAX (charset))
1072 = (XCHARSET_DIMENSION (charset) == 1
1074 code_point - XCHARSET_BYTE_OFFSET (charset)
1076 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1077 * XCHARSET_CHARS (charset)
1078 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1079 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1080 if ((cid < XCHARSET_UCS_MIN (charset))
1081 || (XCHARSET_UCS_MAX (charset) < cid))
1090 range_charset_code_point (Lisp_Object charset, Emchar ch)
1094 if ((XCHARSET_UCS_MIN (charset) <= ch)
1095 && (ch <= XCHARSET_UCS_MAX (charset)))
1097 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1099 if (XCHARSET_CHARS (charset) == 256)
1101 else if (XCHARSET_DIMENSION (charset) == 1)
1102 return d + XCHARSET_BYTE_OFFSET (charset);
1103 else if (XCHARSET_DIMENSION (charset) == 2)
1105 ((d / XCHARSET_CHARS (charset)
1106 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1107 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1108 else if (XCHARSET_DIMENSION (charset) == 3)
1110 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1111 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1112 | ((d / XCHARSET_CHARS (charset)
1113 % XCHARSET_CHARS (charset)
1114 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1115 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1116 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1118 ((d / (XCHARSET_CHARS (charset)
1119 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1120 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1121 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1122 % XCHARSET_CHARS (charset)
1123 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1124 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1125 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1126 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1128 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1130 if (XCHARSET_DIMENSION (charset) == 1)
1132 if (XCHARSET_CHARS (charset) == 94)
1134 if (((d = ch - (MIN_CHAR_94
1135 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1139 else if (XCHARSET_CHARS (charset) == 96)
1141 if (((d = ch - (MIN_CHAR_96
1142 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1149 else if (XCHARSET_DIMENSION (charset) == 2)
1151 if (XCHARSET_CHARS (charset) == 94)
1153 if (((d = ch - (MIN_CHAR_94x94
1154 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1157 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1159 else if (XCHARSET_CHARS (charset) == 96)
1161 if (((d = ch - (MIN_CHAR_96x96
1162 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1165 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1171 if (EQ (charset, Vcharset_mojikyo_2022_1)
1172 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1174 int m = ch - MIN_CHAR_MOJIKYO - 1;
1175 int byte1 = m / (94 * 60) + 33;
1176 int byte2 = (m % (94 * 60)) / 94;
1177 int byte3 = m % 94 + 33;
1183 return (byte1 << 16) | (byte2 << 8) | byte3;
1189 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1191 if (c <= MAX_CHAR_BASIC_LATIN)
1193 *charset = Vcharset_ascii;
1198 *charset = Vcharset_control_1;
1203 *charset = Vcharset_latin_iso8859_1;
1207 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1209 *charset = Vcharset_hebrew_iso8859_8;
1210 return c - MIN_CHAR_HEBREW + 0x20;
1213 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1215 *charset = Vcharset_thai_tis620;
1216 return c - MIN_CHAR_THAI + 0x20;
1219 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1220 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1222 return list2 (Vcharset_katakana_jisx0201,
1223 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1226 else if (c <= MAX_CHAR_BMP)
1228 *charset = Vcharset_ucs_bmp;
1231 else if (c < MIN_CHAR_DAIKANWA)
1233 *charset = Vcharset_ucs;
1236 else if (c <= MAX_CHAR_DAIKANWA)
1238 *charset = Vcharset_ideograph_daikanwa;
1239 return c - MIN_CHAR_DAIKANWA;
1242 else if (c <= MAX_CHAR_MOJIKYO_0)
1244 *charset = Vcharset_mojikyo;
1245 return c - MIN_CHAR_MOJIKYO_0;
1248 else if (c < MIN_CHAR_94)
1250 *charset = Vcharset_ucs;
1253 else if (c <= MAX_CHAR_94)
1255 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1256 ((c - MIN_CHAR_94) / 94) + '0',
1257 CHARSET_LEFT_TO_RIGHT);
1258 if (!NILP (*charset))
1259 return ((c - MIN_CHAR_94) % 94) + 33;
1262 *charset = Vcharset_ucs;
1266 else if (c <= MAX_CHAR_96)
1268 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1269 ((c - MIN_CHAR_96) / 96) + '0',
1270 CHARSET_LEFT_TO_RIGHT);
1271 if (!NILP (*charset))
1272 return ((c - MIN_CHAR_96) % 96) + 32;
1275 *charset = Vcharset_ucs;
1279 else if (c <= MAX_CHAR_94x94)
1282 = CHARSET_BY_ATTRIBUTES (94, 2,
1283 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1284 CHARSET_LEFT_TO_RIGHT);
1285 if (!NILP (*charset))
1286 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1287 | (((c - MIN_CHAR_94x94) % 94) + 33);
1290 *charset = Vcharset_ucs;
1294 else if (c <= MAX_CHAR_96x96)
1297 = CHARSET_BY_ATTRIBUTES (96, 2,
1298 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1299 CHARSET_LEFT_TO_RIGHT);
1300 if (!NILP (*charset))
1301 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1302 | (((c - MIN_CHAR_96x96) % 96) + 32);
1305 *charset = Vcharset_ucs;
1310 else if (c < MIN_CHAR_MOJIKYO)
1312 *charset = Vcharset_ucs;
1315 else if (c <= MAX_CHAR_MOJIKYO)
1317 *charset = Vcharset_mojikyo;
1318 return c - MIN_CHAR_MOJIKYO;
1320 else if (c < MIN_CHAR_CHINA3_JEF)
1322 *charset = Vcharset_ucs;
1325 else if (c <= MAX_CHAR_CHINA3_JEF)
1327 *charset = Vcharset_china3_jef;
1328 return c - MIN_CHAR_CHINA3_JEF;
1330 else if (c <= MAX_CHAR_CBETA)
1332 *charset = Vcharset_ideograph_cbeta;
1333 return c - MIN_CHAR_CBETA;
1338 *charset = Vcharset_ucs;
1343 Lisp_Object Vdefault_coded_charset_priority_list;
1347 /************************************************************************/
1348 /* Basic charset Lisp functions */
1349 /************************************************************************/
1351 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1352 Return non-nil if OBJECT is a charset.
1356 return CHARSETP (object) ? Qt : Qnil;
1359 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1360 Retrieve the charset of the given name.
1361 If CHARSET-OR-NAME is a charset object, it is simply returned.
1362 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1363 nil is returned. Otherwise the associated charset object is returned.
1367 if (CHARSETP (charset_or_name))
1368 return charset_or_name;
1370 CHECK_SYMBOL (charset_or_name);
1371 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1374 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1375 Retrieve the charset of the given name.
1376 Same as `find-charset' except an error is signalled if there is no such
1377 charset instead of returning nil.
1381 Lisp_Object charset = Ffind_charset (name);
1384 signal_simple_error ("No such charset", name);
1388 /* We store the charsets in hash tables with the names as the key and the
1389 actual charset object as the value. Occasionally we need to use them
1390 in a list format. These routines provide us with that. */
1391 struct charset_list_closure
1393 Lisp_Object *charset_list;
1397 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1398 void *charset_list_closure)
1400 /* This function can GC */
1401 struct charset_list_closure *chcl =
1402 (struct charset_list_closure*) charset_list_closure;
1403 Lisp_Object *charset_list = chcl->charset_list;
1405 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1409 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1410 Return a list of the names of all defined charsets.
1414 Lisp_Object charset_list = Qnil;
1415 struct gcpro gcpro1;
1416 struct charset_list_closure charset_list_closure;
1418 GCPRO1 (charset_list);
1419 charset_list_closure.charset_list = &charset_list;
1420 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1421 &charset_list_closure);
1424 return charset_list;
1427 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1428 Return the name of charset CHARSET.
1432 return XCHARSET_NAME (Fget_charset (charset));
1435 /* #### SJT Should generic properties be allowed? */
1436 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1437 Define a new character set.
1438 This function is for use with Mule support.
1439 NAME is a symbol, the name by which the character set is normally referred.
1440 DOC-STRING is a string describing the character set.
1441 PROPS is a property list, describing the specific nature of the
1442 character set. Recognized properties are:
1444 'short-name Short version of the charset name (ex: Latin-1)
1445 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1446 'registry A regular expression matching the font registry field for
1448 'dimension Number of octets used to index a character in this charset.
1449 Either 1 or 2. Defaults to 1.
1450 'columns Number of columns used to display a character in this charset.
1451 Only used in TTY mode. (Under X, the actual width of a
1452 character can be derived from the font used to display the
1453 characters.) If unspecified, defaults to the dimension
1454 (this is almost always the correct value).
1455 'chars Number of characters in each dimension (94 or 96).
1456 Defaults to 94. Note that if the dimension is 2, the
1457 character set thus described is 94x94 or 96x96.
1458 'final Final byte of ISO 2022 escape sequence. Must be
1459 supplied. Each combination of (DIMENSION, CHARS) defines a
1460 separate namespace for final bytes. Note that ISO
1461 2022 restricts the final byte to the range
1462 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1463 dimension == 2. Note also that final bytes in the range
1464 0x30 - 0x3F are reserved for user-defined (not official)
1466 'graphic 0 (use left half of font on output) or 1 (use right half
1467 of font on output). Defaults to 0. For example, for
1468 a font whose registry is ISO8859-1, the left half
1469 (octets 0x20 - 0x7F) is the `ascii' character set, while
1470 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1471 character set. With 'graphic set to 0, the octets
1472 will have their high bit cleared; with it set to 1,
1473 the octets will have their high bit set.
1474 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1476 'ccl-program A compiled CCL program used to convert a character in
1477 this charset into an index into the font. This is in
1478 addition to the 'graphic property. The CCL program
1479 is passed the octets of the character, with the high
1480 bit cleared and set depending upon whether the value
1481 of the 'graphic property is 0 or 1.
1483 (name, doc_string, props))
1485 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1486 int direction = CHARSET_LEFT_TO_RIGHT;
1487 Lisp_Object registry = Qnil;
1488 Lisp_Object charset;
1489 Lisp_Object ccl_program = Qnil;
1490 Lisp_Object short_name = Qnil, long_name = Qnil;
1491 int byte_offset = -1;
1493 CHECK_SYMBOL (name);
1494 if (!NILP (doc_string))
1495 CHECK_STRING (doc_string);
1497 charset = Ffind_charset (name);
1498 if (!NILP (charset))
1499 signal_simple_error ("Cannot redefine existing charset", name);
1502 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1504 if (EQ (keyword, Qshort_name))
1506 CHECK_STRING (value);
1510 if (EQ (keyword, Qlong_name))
1512 CHECK_STRING (value);
1516 else if (EQ (keyword, Qdimension))
1519 dimension = XINT (value);
1520 if (dimension < 1 || dimension > 2)
1521 signal_simple_error ("Invalid value for 'dimension", value);
1524 else if (EQ (keyword, Qchars))
1527 chars = XINT (value);
1528 if (chars != 94 && chars != 96)
1529 signal_simple_error ("Invalid value for 'chars", value);
1532 else if (EQ (keyword, Qcolumns))
1535 columns = XINT (value);
1536 if (columns != 1 && columns != 2)
1537 signal_simple_error ("Invalid value for 'columns", value);
1540 else if (EQ (keyword, Qgraphic))
1543 graphic = XINT (value);
1545 if (graphic < 0 || graphic > 2)
1547 if (graphic < 0 || graphic > 1)
1549 signal_simple_error ("Invalid value for 'graphic", value);
1552 else if (EQ (keyword, Qregistry))
1554 CHECK_STRING (value);
1558 else if (EQ (keyword, Qdirection))
1560 if (EQ (value, Ql2r))
1561 direction = CHARSET_LEFT_TO_RIGHT;
1562 else if (EQ (value, Qr2l))
1563 direction = CHARSET_RIGHT_TO_LEFT;
1565 signal_simple_error ("Invalid value for 'direction", value);
1568 else if (EQ (keyword, Qfinal))
1570 CHECK_CHAR_COERCE_INT (value);
1571 final = XCHAR (value);
1572 if (final < '0' || final > '~')
1573 signal_simple_error ("Invalid value for 'final", value);
1576 else if (EQ (keyword, Qccl_program))
1578 struct ccl_program test_ccl;
1580 if (setup_ccl_program (&test_ccl, value) < 0)
1581 signal_simple_error ("Invalid value for 'ccl-program", value);
1582 ccl_program = value;
1586 signal_simple_error ("Unrecognized property", keyword);
1591 error ("'final must be specified");
1592 if (dimension == 2 && final > 0x5F)
1594 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1597 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1598 CHARSET_LEFT_TO_RIGHT)) ||
1599 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1600 CHARSET_RIGHT_TO_LEFT)))
1602 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1604 id = get_unallocated_leading_byte (dimension);
1606 if (NILP (doc_string))
1607 doc_string = build_string ("");
1609 if (NILP (registry))
1610 registry = build_string ("");
1612 if (NILP (short_name))
1613 XSETSTRING (short_name, XSYMBOL (name)->name);
1615 if (NILP (long_name))
1616 long_name = doc_string;
1619 columns = dimension;
1621 if (byte_offset < 0)
1625 else if (chars == 96)
1631 charset = make_charset (id, name, chars, dimension, columns, graphic,
1632 final, direction, short_name, long_name,
1633 doc_string, registry,
1634 Qnil, 0, 0, 0, byte_offset);
1635 if (!NILP (ccl_program))
1636 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1640 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1642 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1643 NEW-NAME is the name of the new charset. Return the new charset.
1645 (charset, new_name))
1647 Lisp_Object new_charset = Qnil;
1648 int id, chars, dimension, columns, graphic, final;
1650 Lisp_Object registry, doc_string, short_name, long_name;
1653 charset = Fget_charset (charset);
1654 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1655 signal_simple_error ("Charset already has reverse-direction charset",
1658 CHECK_SYMBOL (new_name);
1659 if (!NILP (Ffind_charset (new_name)))
1660 signal_simple_error ("Cannot redefine existing charset", new_name);
1662 cs = XCHARSET (charset);
1664 chars = CHARSET_CHARS (cs);
1665 dimension = CHARSET_DIMENSION (cs);
1666 columns = CHARSET_COLUMNS (cs);
1667 id = get_unallocated_leading_byte (dimension);
1669 graphic = CHARSET_GRAPHIC (cs);
1670 final = CHARSET_FINAL (cs);
1671 direction = CHARSET_RIGHT_TO_LEFT;
1672 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1673 direction = CHARSET_LEFT_TO_RIGHT;
1674 doc_string = CHARSET_DOC_STRING (cs);
1675 short_name = CHARSET_SHORT_NAME (cs);
1676 long_name = CHARSET_LONG_NAME (cs);
1677 registry = CHARSET_REGISTRY (cs);
1679 new_charset = make_charset (id, new_name, chars, dimension, columns,
1680 graphic, final, direction, short_name, long_name,
1681 doc_string, registry,
1683 CHARSET_DECODING_TABLE(cs),
1684 CHARSET_UCS_MIN(cs),
1685 CHARSET_UCS_MAX(cs),
1686 CHARSET_CODE_OFFSET(cs),
1687 CHARSET_BYTE_OFFSET(cs)
1693 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1694 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1699 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1700 Define symbol ALIAS as an alias for CHARSET.
1704 CHECK_SYMBOL (alias);
1705 charset = Fget_charset (charset);
1706 return Fputhash (alias, charset, Vcharset_hash_table);
1709 /* #### Reverse direction charsets not yet implemented. */
1711 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1713 Return the reverse-direction charset parallel to CHARSET, if any.
1714 This is the charset with the same properties (in particular, the same
1715 dimension, number of characters per dimension, and final byte) as
1716 CHARSET but whose characters are displayed in the opposite direction.
1720 charset = Fget_charset (charset);
1721 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1725 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1726 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1727 If DIRECTION is omitted, both directions will be checked (left-to-right
1728 will be returned if character sets exist for both directions).
1730 (dimension, chars, final, direction))
1732 int dm, ch, fi, di = -1;
1733 Lisp_Object obj = Qnil;
1735 CHECK_INT (dimension);
1736 dm = XINT (dimension);
1737 if (dm < 1 || dm > 2)
1738 signal_simple_error ("Invalid value for DIMENSION", dimension);
1742 if (ch != 94 && ch != 96)
1743 signal_simple_error ("Invalid value for CHARS", chars);
1745 CHECK_CHAR_COERCE_INT (final);
1747 if (fi < '0' || fi > '~')
1748 signal_simple_error ("Invalid value for FINAL", final);
1750 if (EQ (direction, Ql2r))
1751 di = CHARSET_LEFT_TO_RIGHT;
1752 else if (EQ (direction, Qr2l))
1753 di = CHARSET_RIGHT_TO_LEFT;
1754 else if (!NILP (direction))
1755 signal_simple_error ("Invalid value for DIRECTION", direction);
1757 if (dm == 2 && fi > 0x5F)
1759 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1763 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1765 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1768 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1771 return XCHARSET_NAME (obj);
1775 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1776 Return short name of CHARSET.
1780 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1783 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1784 Return long name of CHARSET.
1788 return XCHARSET_LONG_NAME (Fget_charset (charset));
1791 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1792 Return description of CHARSET.
1796 return XCHARSET_DOC_STRING (Fget_charset (charset));
1799 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1800 Return dimension of CHARSET.
1804 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1807 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1808 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1809 Recognized properties are those listed in `make-charset', as well as
1810 'name and 'doc-string.
1816 charset = Fget_charset (charset);
1817 cs = XCHARSET (charset);
1819 CHECK_SYMBOL (prop);
1820 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1821 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1822 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1823 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1824 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1825 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1826 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1827 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1828 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1829 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1830 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1831 if (EQ (prop, Qdirection))
1832 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1833 if (EQ (prop, Qreverse_direction_charset))
1835 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1836 /* #### Is this translation OK? If so, error checking sufficient? */
1837 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1839 signal_simple_error ("Unrecognized charset property name", prop);
1840 return Qnil; /* not reached */
1843 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1844 Return charset identification number of CHARSET.
1848 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1851 /* #### We need to figure out which properties we really want to
1854 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1855 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1857 (charset, ccl_program))
1859 struct ccl_program test_ccl;
1861 charset = Fget_charset (charset);
1862 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1863 signal_simple_error ("Invalid ccl-program", ccl_program);
1864 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1869 invalidate_charset_font_caches (Lisp_Object charset)
1871 /* Invalidate font cache entries for charset on all devices. */
1872 Lisp_Object devcons, concons, hash_table;
1873 DEVICE_LOOP_NO_BREAK (devcons, concons)
1875 struct device *d = XDEVICE (XCAR (devcons));
1876 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1877 if (!UNBOUNDP (hash_table))
1878 Fclrhash (hash_table);
1882 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1883 Set the 'registry property of CHARSET to REGISTRY.
1885 (charset, registry))
1887 charset = Fget_charset (charset);
1888 CHECK_STRING (registry);
1889 XCHARSET_REGISTRY (charset) = registry;
1890 invalidate_charset_font_caches (charset);
1891 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1896 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1897 Return mapping-table of CHARSET.
1901 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1904 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1905 Set mapping-table of CHARSET to TABLE.
1909 struct Lisp_Charset *cs;
1913 charset = Fget_charset (charset);
1914 cs = XCHARSET (charset);
1918 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1919 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1920 CHARSET_DECODING_TABLE(cs) = Qnil;
1923 else if (VECTORP (table))
1925 int ccs_len = CHARSET_BYTE_SIZE (cs);
1926 int ret = decoding_table_check_elements (table,
1927 CHARSET_DIMENSION (cs),
1932 signal_simple_error ("Too big table", table);
1934 signal_simple_error ("Invalid element is found", table);
1936 signal_simple_error ("Something wrong", table);
1938 CHARSET_DECODING_TABLE(cs) = Qnil;
1941 signal_error (Qwrong_type_argument,
1942 list2 (build_translated_string ("vector-or-nil-p"),
1945 byte_offset = CHARSET_BYTE_OFFSET (cs);
1946 switch (CHARSET_DIMENSION (cs))
1949 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1951 Lisp_Object c = XVECTOR_DATA(table)[i];
1954 put_char_ccs_code_point (c, charset,
1955 make_int (i + byte_offset));
1959 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1961 Lisp_Object v = XVECTOR_DATA(table)[i];
1967 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1969 Lisp_Object c = XVECTOR_DATA(v)[j];
1972 put_char_ccs_code_point
1974 make_int ( ( (i + byte_offset) << 8 )
1980 put_char_ccs_code_point (v, charset,
1981 make_int (i + byte_offset));
1990 /************************************************************************/
1991 /* Lisp primitives for working with characters */
1992 /************************************************************************/
1995 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
1996 Make a character from CHARSET and code-point CODE.
2002 charset = Fget_charset (charset);
2005 if (XCHARSET_GRAPHIC (charset) == 1)
2007 c = DECODE_CHAR (charset, c);
2008 return c >= 0 ? make_char (c) : Qnil;
2011 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2012 Make a builtin character from CHARSET and code-point CODE.
2018 charset = Fget_charset (charset);
2020 if (EQ (charset, Vcharset_latin_viscii))
2022 Lisp_Object chr = Fdecode_char (charset, code);
2028 (ret = Fget_char_attribute (chr,
2029 Vcharset_latin_viscii_lower,
2032 charset = Vcharset_latin_viscii_lower;
2036 (ret = Fget_char_attribute (chr,
2037 Vcharset_latin_viscii_upper,
2040 charset = Vcharset_latin_viscii_upper;
2047 if (XCHARSET_GRAPHIC (charset) == 1)
2050 c = decode_builtin_char (charset, c);
2051 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
2055 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2056 Make a character from CHARSET and octets ARG1 and ARG2.
2057 ARG2 is required only for characters from two-dimensional charsets.
2058 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2059 character s with caron.
2061 (charset, arg1, arg2))
2065 int lowlim, highlim;
2067 charset = Fget_charset (charset);
2068 cs = XCHARSET (charset);
2070 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2071 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2073 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2075 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2076 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2079 /* It is useful (and safe, according to Olivier Galibert) to strip
2080 the 8th bit off ARG1 and ARG2 because it allows programmers to
2081 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2082 Latin 2 code of the character. */
2090 if (a1 < lowlim || a1 > highlim)
2091 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2093 if (CHARSET_DIMENSION (cs) == 1)
2097 ("Charset is of dimension one; second octet must be nil", arg2);
2098 return make_char (MAKE_CHAR (charset, a1, 0));
2107 a2 = XINT (arg2) & 0x7f;
2109 if (a2 < lowlim || a2 > highlim)
2110 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2112 return make_char (MAKE_CHAR (charset, a1, a2));
2115 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2116 Return the character set of CHARACTER.
2120 CHECK_CHAR_COERCE_INT (character);
2122 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2125 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2126 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2127 N defaults to 0 if omitted.
2131 Lisp_Object charset;
2134 CHECK_CHAR_COERCE_INT (character);
2136 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2138 if (NILP (n) || EQ (n, Qzero))
2139 return make_int (octet0);
2140 else if (EQ (n, make_int (1)))
2141 return make_int (octet1);
2143 signal_simple_error ("Octet number must be 0 or 1", n);
2146 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2147 Return list of charset and one or two position-codes of CHARACTER.
2151 /* This function can GC */
2152 struct gcpro gcpro1, gcpro2;
2153 Lisp_Object charset = Qnil;
2154 Lisp_Object rc = Qnil;
2162 GCPRO2 (charset, rc);
2163 CHECK_CHAR_COERCE_INT (character);
2166 code_point = ENCODE_CHAR (XCHAR (character), charset);
2167 dimension = XCHARSET_DIMENSION (charset);
2168 while (dimension > 0)
2170 rc = Fcons (make_int (code_point & 255), rc);
2174 rc = Fcons (XCHARSET_NAME (charset), rc);
2176 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2178 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2180 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2184 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2193 #ifdef ENABLE_COMPOSITE_CHARS
2194 /************************************************************************/
2195 /* composite character functions */
2196 /************************************************************************/
2199 lookup_composite_char (Bufbyte *str, int len)
2201 Lisp_Object lispstr = make_string (str, len);
2202 Lisp_Object ch = Fgethash (lispstr,
2203 Vcomposite_char_string2char_hash_table,
2209 if (composite_char_row_next >= 128)
2210 signal_simple_error ("No more composite chars available", lispstr);
2211 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2212 composite_char_col_next);
2213 Fputhash (make_char (emch), lispstr,
2214 Vcomposite_char_char2string_hash_table);
2215 Fputhash (lispstr, make_char (emch),
2216 Vcomposite_char_string2char_hash_table);
2217 composite_char_col_next++;
2218 if (composite_char_col_next >= 128)
2220 composite_char_col_next = 32;
2221 composite_char_row_next++;
2230 composite_char_string (Emchar ch)
2232 Lisp_Object str = Fgethash (make_char (ch),
2233 Vcomposite_char_char2string_hash_table,
2235 assert (!UNBOUNDP (str));
2239 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2240 Convert a string into a single composite character.
2241 The character is the result of overstriking all the characters in
2246 CHECK_STRING (string);
2247 return make_char (lookup_composite_char (XSTRING_DATA (string),
2248 XSTRING_LENGTH (string)));
2251 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2252 Return a string of the characters comprising a composite character.
2260 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2261 signal_simple_error ("Must be composite char", ch);
2262 return composite_char_string (emch);
2264 #endif /* ENABLE_COMPOSITE_CHARS */
2267 /************************************************************************/
2268 /* initialization */
2269 /************************************************************************/
2272 syms_of_mule_charset (void)
2274 INIT_LRECORD_IMPLEMENTATION (charset);
2276 DEFSUBR (Fcharsetp);
2277 DEFSUBR (Ffind_charset);
2278 DEFSUBR (Fget_charset);
2279 DEFSUBR (Fcharset_list);
2280 DEFSUBR (Fcharset_name);
2281 DEFSUBR (Fmake_charset);
2282 DEFSUBR (Fmake_reverse_direction_charset);
2283 /* DEFSUBR (Freverse_direction_charset); */
2284 DEFSUBR (Fdefine_charset_alias);
2285 DEFSUBR (Fcharset_from_attributes);
2286 DEFSUBR (Fcharset_short_name);
2287 DEFSUBR (Fcharset_long_name);
2288 DEFSUBR (Fcharset_description);
2289 DEFSUBR (Fcharset_dimension);
2290 DEFSUBR (Fcharset_property);
2291 DEFSUBR (Fcharset_id);
2292 DEFSUBR (Fset_charset_ccl_program);
2293 DEFSUBR (Fset_charset_registry);
2295 DEFSUBR (Fcharset_mapping_table);
2296 DEFSUBR (Fset_charset_mapping_table);
2300 DEFSUBR (Fdecode_char);
2301 DEFSUBR (Fdecode_builtin_char);
2303 DEFSUBR (Fmake_char);
2304 DEFSUBR (Fchar_charset);
2305 DEFSUBR (Fchar_octet);
2306 DEFSUBR (Fsplit_char);
2308 #ifdef ENABLE_COMPOSITE_CHARS
2309 DEFSUBR (Fmake_composite_char);
2310 DEFSUBR (Fcomposite_char_string);
2313 defsymbol (&Qcharsetp, "charsetp");
2314 defsymbol (&Qregistry, "registry");
2315 defsymbol (&Qfinal, "final");
2316 defsymbol (&Qgraphic, "graphic");
2317 defsymbol (&Qdirection, "direction");
2318 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2319 defsymbol (&Qshort_name, "short-name");
2320 defsymbol (&Qlong_name, "long-name");
2322 defsymbol (&Ql2r, "l2r");
2323 defsymbol (&Qr2l, "r2l");
2325 /* Charsets, compatible with FSF 20.3
2326 Naming convention is Script-Charset[-Edition] */
2327 defsymbol (&Qascii, "ascii");
2328 defsymbol (&Qcontrol_1, "control-1");
2329 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2330 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2331 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2332 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2333 defsymbol (&Qthai_tis620, "thai-tis620");
2334 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2335 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2336 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2337 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2338 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2339 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2340 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2341 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2342 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2343 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2344 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2345 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2346 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2347 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2348 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2349 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2351 defsymbol (&Qucs, "ucs");
2352 defsymbol (&Qucs_bmp, "ucs-bmp");
2353 defsymbol (&Qucs_cns, "ucs-cns");
2354 defsymbol (&Qucs_jis, "ucs-jis");
2355 defsymbol (&Qucs_big5, "ucs-big5");
2356 defsymbol (&Qlatin_viscii, "latin-viscii");
2357 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2358 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2359 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2360 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2361 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2362 defsymbol (&Qideograph_gt, "ideograph-gt");
2363 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2364 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2365 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2366 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2367 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2368 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2369 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2370 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2371 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2372 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2373 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2374 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2375 defsymbol (&Qchinese_big5, "chinese-big5");
2376 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2377 defsymbol (&Qchina3_jef, "china3-jef");
2378 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2379 defsymbol (&Qmojikyo, "mojikyo");
2380 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2381 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2382 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2383 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2384 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2385 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2386 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2387 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2388 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2389 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2390 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2391 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2392 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2393 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2394 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2395 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2396 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2397 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2398 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2399 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2400 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2401 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2402 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2404 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2405 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2407 defsymbol (&Qcomposite, "composite");
2411 vars_of_mule_charset (void)
2418 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2419 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2421 /* Table of charsets indexed by leading byte. */
2422 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2423 chlook->charset_by_leading_byte[i] = Qnil;
2426 /* Table of charsets indexed by type/final-byte. */
2427 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2428 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2429 chlook->charset_by_attributes[i][j] = Qnil;
2431 /* Table of charsets indexed by type/final-byte/direction. */
2432 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2433 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2434 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2435 chlook->charset_by_attributes[i][j][k] = Qnil;
2439 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2441 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2442 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2446 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2447 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2448 Leading-code of private TYPE9N charset of column-width 1.
2450 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2454 Vdefault_coded_charset_priority_list = Qnil;
2455 DEFVAR_LISP ("default-coded-charset-priority-list",
2456 &Vdefault_coded_charset_priority_list /*
2457 Default order of preferred coded-character-sets.
2463 complex_vars_of_mule_charset (void)
2465 staticpro (&Vcharset_hash_table);
2466 Vcharset_hash_table =
2467 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2469 /* Predefined character sets. We store them into variables for
2473 staticpro (&Vcharset_ucs);
2475 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2476 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2477 build_string ("UCS"),
2478 build_string ("UCS"),
2479 build_string ("ISO/IEC 10646"),
2481 Qnil, 0, 0xFFFFFFF, 0, 0);
2482 staticpro (&Vcharset_ucs_bmp);
2484 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2485 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2486 build_string ("BMP"),
2487 build_string ("BMP"),
2488 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2489 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2490 Qnil, 0, 0xFFFF, 0, 0);
2491 staticpro (&Vcharset_ucs_cns);
2493 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2494 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2495 build_string ("UCS for CNS"),
2496 build_string ("UCS for CNS 11643"),
2497 build_string ("ISO/IEC 10646 for CNS 11643"),
2500 staticpro (&Vcharset_ucs_jis);
2502 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2503 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2504 build_string ("UCS for JIS"),
2505 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2506 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2509 staticpro (&Vcharset_ucs_big5);
2511 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2512 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2513 build_string ("UCS for Big5"),
2514 build_string ("UCS for Big5"),
2515 build_string ("ISO/IEC 10646 for Big5"),
2519 # define MIN_CHAR_THAI 0
2520 # define MAX_CHAR_THAI 0
2521 /* # define MIN_CHAR_HEBREW 0 */
2522 /* # define MAX_CHAR_HEBREW 0 */
2523 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2524 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2526 staticpro (&Vcharset_ascii);
2528 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2529 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2530 build_string ("ASCII"),
2531 build_string ("ASCII)"),
2532 build_string ("ASCII (ISO646 IRV)"),
2533 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2534 Qnil, 0, 0x7F, 0, 0);
2535 staticpro (&Vcharset_control_1);
2536 Vcharset_control_1 =
2537 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2538 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2539 build_string ("C1"),
2540 build_string ("Control characters"),
2541 build_string ("Control characters 128-191"),
2543 Qnil, 0x80, 0x9F, 0, 0);
2544 staticpro (&Vcharset_latin_iso8859_1);
2545 Vcharset_latin_iso8859_1 =
2546 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2547 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2548 build_string ("Latin-1"),
2549 build_string ("ISO8859-1 (Latin-1)"),
2550 build_string ("ISO8859-1 (Latin-1)"),
2551 build_string ("iso8859-1"),
2552 Qnil, 0xA0, 0xFF, 0, 32);
2553 staticpro (&Vcharset_latin_iso8859_2);
2554 Vcharset_latin_iso8859_2 =
2555 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2556 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2557 build_string ("Latin-2"),
2558 build_string ("ISO8859-2 (Latin-2)"),
2559 build_string ("ISO8859-2 (Latin-2)"),
2560 build_string ("iso8859-2"),
2562 staticpro (&Vcharset_latin_iso8859_3);
2563 Vcharset_latin_iso8859_3 =
2564 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2565 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2566 build_string ("Latin-3"),
2567 build_string ("ISO8859-3 (Latin-3)"),
2568 build_string ("ISO8859-3 (Latin-3)"),
2569 build_string ("iso8859-3"),
2571 staticpro (&Vcharset_latin_iso8859_4);
2572 Vcharset_latin_iso8859_4 =
2573 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2574 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2575 build_string ("Latin-4"),
2576 build_string ("ISO8859-4 (Latin-4)"),
2577 build_string ("ISO8859-4 (Latin-4)"),
2578 build_string ("iso8859-4"),
2580 staticpro (&Vcharset_thai_tis620);
2581 Vcharset_thai_tis620 =
2582 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2583 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2584 build_string ("TIS620"),
2585 build_string ("TIS620 (Thai)"),
2586 build_string ("TIS620.2529 (Thai)"),
2587 build_string ("tis620"),
2588 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2589 staticpro (&Vcharset_greek_iso8859_7);
2590 Vcharset_greek_iso8859_7 =
2591 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2592 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2593 build_string ("ISO8859-7"),
2594 build_string ("ISO8859-7 (Greek)"),
2595 build_string ("ISO8859-7 (Greek)"),
2596 build_string ("iso8859-7"),
2598 staticpro (&Vcharset_arabic_iso8859_6);
2599 Vcharset_arabic_iso8859_6 =
2600 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2601 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2602 build_string ("ISO8859-6"),
2603 build_string ("ISO8859-6 (Arabic)"),
2604 build_string ("ISO8859-6 (Arabic)"),
2605 build_string ("iso8859-6"),
2607 staticpro (&Vcharset_hebrew_iso8859_8);
2608 Vcharset_hebrew_iso8859_8 =
2609 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2610 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2611 build_string ("ISO8859-8"),
2612 build_string ("ISO8859-8 (Hebrew)"),
2613 build_string ("ISO8859-8 (Hebrew)"),
2614 build_string ("iso8859-8"),
2616 0 /* MIN_CHAR_HEBREW */,
2617 0 /* MAX_CHAR_HEBREW */, 0, 32);
2618 staticpro (&Vcharset_katakana_jisx0201);
2619 Vcharset_katakana_jisx0201 =
2620 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2621 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2622 build_string ("JISX0201 Kana"),
2623 build_string ("JISX0201.1976 (Japanese Kana)"),
2624 build_string ("JISX0201.1976 Japanese Kana"),
2625 build_string ("jisx0201\\.1976"),
2627 staticpro (&Vcharset_latin_jisx0201);
2628 Vcharset_latin_jisx0201 =
2629 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2630 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2631 build_string ("JISX0201 Roman"),
2632 build_string ("JISX0201.1976 (Japanese Roman)"),
2633 build_string ("JISX0201.1976 Japanese Roman"),
2634 build_string ("jisx0201\\.1976"),
2636 staticpro (&Vcharset_cyrillic_iso8859_5);
2637 Vcharset_cyrillic_iso8859_5 =
2638 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2639 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2640 build_string ("ISO8859-5"),
2641 build_string ("ISO8859-5 (Cyrillic)"),
2642 build_string ("ISO8859-5 (Cyrillic)"),
2643 build_string ("iso8859-5"),
2645 staticpro (&Vcharset_latin_iso8859_9);
2646 Vcharset_latin_iso8859_9 =
2647 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2648 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2649 build_string ("Latin-5"),
2650 build_string ("ISO8859-9 (Latin-5)"),
2651 build_string ("ISO8859-9 (Latin-5)"),
2652 build_string ("iso8859-9"),
2654 staticpro (&Vcharset_japanese_jisx0208_1978);
2655 Vcharset_japanese_jisx0208_1978 =
2656 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2657 Qjapanese_jisx0208_1978, 94, 2,
2658 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2659 build_string ("JIS X0208:1978"),
2660 build_string ("JIS X0208:1978 (Japanese)"),
2662 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2663 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2665 staticpro (&Vcharset_chinese_gb2312);
2666 Vcharset_chinese_gb2312 =
2667 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2668 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2669 build_string ("GB2312"),
2670 build_string ("GB2312)"),
2671 build_string ("GB2312 Chinese simplified"),
2672 build_string ("gb2312"),
2674 staticpro (&Vcharset_chinese_gb12345);
2675 Vcharset_chinese_gb12345 =
2676 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2677 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2678 build_string ("G1"),
2679 build_string ("GB 12345)"),
2680 build_string ("GB 12345-1990"),
2681 build_string ("GB12345\\(\\.1990\\)?-0"),
2683 staticpro (&Vcharset_japanese_jisx0208);
2684 Vcharset_japanese_jisx0208 =
2685 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2686 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2687 build_string ("JISX0208"),
2688 build_string ("JIS X0208:1983 (Japanese)"),
2689 build_string ("JIS X0208:1983 Japanese Kanji"),
2690 build_string ("jisx0208\\.1983"),
2693 staticpro (&Vcharset_japanese_jisx0208_1990);
2694 Vcharset_japanese_jisx0208_1990 =
2695 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2696 Qjapanese_jisx0208_1990, 94, 2,
2697 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2698 build_string ("JISX0208-1990"),
2699 build_string ("JIS X0208:1990 (Japanese)"),
2700 build_string ("JIS X0208:1990 Japanese Kanji"),
2701 build_string ("jisx0208\\.1990"),
2703 MIN_CHAR_JIS_X0208_1990,
2704 MAX_CHAR_JIS_X0208_1990, 0, 33);
2706 staticpro (&Vcharset_korean_ksc5601);
2707 Vcharset_korean_ksc5601 =
2708 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2709 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2710 build_string ("KSC5601"),
2711 build_string ("KSC5601 (Korean"),
2712 build_string ("KSC5601 Korean Hangul and Hanja"),
2713 build_string ("ksc5601"),
2715 staticpro (&Vcharset_japanese_jisx0212);
2716 Vcharset_japanese_jisx0212 =
2717 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2718 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2719 build_string ("JISX0212"),
2720 build_string ("JISX0212 (Japanese)"),
2721 build_string ("JISX0212 Japanese Supplement"),
2722 build_string ("jisx0212"),
2725 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2726 staticpro (&Vcharset_chinese_cns11643_1);
2727 Vcharset_chinese_cns11643_1 =
2728 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2729 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2730 build_string ("CNS11643-1"),
2731 build_string ("CNS11643-1 (Chinese traditional)"),
2733 ("CNS 11643 Plane 1 Chinese traditional"),
2734 build_string (CHINESE_CNS_PLANE_RE("1")),
2736 staticpro (&Vcharset_chinese_cns11643_2);
2737 Vcharset_chinese_cns11643_2 =
2738 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2739 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2740 build_string ("CNS11643-2"),
2741 build_string ("CNS11643-2 (Chinese traditional)"),
2743 ("CNS 11643 Plane 2 Chinese traditional"),
2744 build_string (CHINESE_CNS_PLANE_RE("2")),
2747 staticpro (&Vcharset_latin_tcvn5712);
2748 Vcharset_latin_tcvn5712 =
2749 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2750 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2751 build_string ("TCVN 5712"),
2752 build_string ("TCVN 5712 (VSCII-2)"),
2753 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2754 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2756 staticpro (&Vcharset_latin_viscii_lower);
2757 Vcharset_latin_viscii_lower =
2758 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2759 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2760 build_string ("VISCII lower"),
2761 build_string ("VISCII lower (Vietnamese)"),
2762 build_string ("VISCII lower (Vietnamese)"),
2763 build_string ("MULEVISCII-LOWER"),
2765 staticpro (&Vcharset_latin_viscii_upper);
2766 Vcharset_latin_viscii_upper =
2767 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2768 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2769 build_string ("VISCII upper"),
2770 build_string ("VISCII upper (Vietnamese)"),
2771 build_string ("VISCII upper (Vietnamese)"),
2772 build_string ("MULEVISCII-UPPER"),
2774 staticpro (&Vcharset_latin_viscii);
2775 Vcharset_latin_viscii =
2776 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2777 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2778 build_string ("VISCII"),
2779 build_string ("VISCII 1.1 (Vietnamese)"),
2780 build_string ("VISCII 1.1 (Vietnamese)"),
2781 build_string ("VISCII1\\.1"),
2783 staticpro (&Vcharset_chinese_big5);
2784 Vcharset_chinese_big5 =
2785 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2786 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2787 build_string ("Big5"),
2788 build_string ("Big5"),
2789 build_string ("Big5 Chinese traditional"),
2790 build_string ("big5"),
2792 staticpro (&Vcharset_chinese_big5_cdp);
2793 Vcharset_chinese_big5_cdp =
2794 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2795 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2796 build_string ("Big5-CDP"),
2797 build_string ("Big5 + CDP extension"),
2798 build_string ("Big5 with CDP extension"),
2799 build_string ("big5\\.cdp-0"),
2801 staticpro (&Vcharset_china3_jef);
2802 Vcharset_china3_jef =
2803 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2804 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2805 build_string ("JC3"),
2806 build_string ("JEF + CHINA3"),
2807 build_string ("JEF + CHINA3 private characters"),
2808 build_string ("china3jef-0"),
2809 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2810 staticpro (&Vcharset_ideograph_cbeta);
2811 Vcharset_ideograph_cbeta =
2812 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2813 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2814 build_string ("CB"),
2815 build_string ("CBETA"),
2816 build_string ("CBETA private characters"),
2817 build_string ("cbeta-0"),
2818 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2819 staticpro (&Vcharset_ideograph_gt);
2820 Vcharset_ideograph_gt =
2821 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2822 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2823 build_string ("GT"),
2824 build_string ("GT"),
2825 build_string ("GT"),
2827 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2828 #define DEF_GT_PJ(n) \
2829 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2830 Vcharset_ideograph_gt_pj_##n = \
2831 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2832 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2833 build_string ("GT-PJ-"#n), \
2834 build_string ("GT (pseudo JIS encoding) part "#n), \
2835 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2837 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2851 staticpro (&Vcharset_ideograph_daikanwa);
2852 Vcharset_ideograph_daikanwa =
2853 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2854 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2855 build_string ("Daikanwa"),
2856 build_string ("Morohashi's Daikanwa"),
2857 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2858 build_string ("Daikanwa"),
2859 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2860 staticpro (&Vcharset_mojikyo);
2862 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2863 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2864 build_string ("Mojikyo"),
2865 build_string ("Mojikyo"),
2866 build_string ("Konjaku-Mojikyo"),
2868 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2869 staticpro (&Vcharset_mojikyo_2022_1);
2870 Vcharset_mojikyo_2022_1 =
2871 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2872 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2873 build_string ("Mojikyo-2022-1"),
2874 build_string ("Mojikyo ISO-2022 Part 1"),
2875 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2879 #define DEF_MOJIKYO_PJ(n) \
2880 staticpro (&Vcharset_mojikyo_pj_##n); \
2881 Vcharset_mojikyo_pj_##n = \
2882 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2883 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2884 build_string ("Mojikyo-PJ-"#n), \
2885 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2887 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2889 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2901 DEF_MOJIKYO_PJ (10);
2902 DEF_MOJIKYO_PJ (11);
2903 DEF_MOJIKYO_PJ (12);
2904 DEF_MOJIKYO_PJ (13);
2905 DEF_MOJIKYO_PJ (14);
2906 DEF_MOJIKYO_PJ (15);
2907 DEF_MOJIKYO_PJ (16);
2908 DEF_MOJIKYO_PJ (17);
2909 DEF_MOJIKYO_PJ (18);
2910 DEF_MOJIKYO_PJ (19);
2911 DEF_MOJIKYO_PJ (20);
2912 DEF_MOJIKYO_PJ (21);
2914 staticpro (&Vcharset_ethiopic_ucs);
2915 Vcharset_ethiopic_ucs =
2916 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2917 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2918 build_string ("Ethiopic (UCS)"),
2919 build_string ("Ethiopic (UCS)"),
2920 build_string ("Ethiopic of UCS"),
2921 build_string ("Ethiopic-Unicode"),
2922 Qnil, 0x1200, 0x137F, 0x1200, 0);
2924 staticpro (&Vcharset_chinese_big5_1);
2925 Vcharset_chinese_big5_1 =
2926 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2927 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2928 build_string ("Big5"),
2929 build_string ("Big5 (Level-1)"),
2931 ("Big5 Level-1 Chinese traditional"),
2932 build_string ("big5"),
2934 staticpro (&Vcharset_chinese_big5_2);
2935 Vcharset_chinese_big5_2 =
2936 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2937 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2938 build_string ("Big5"),
2939 build_string ("Big5 (Level-2)"),
2941 ("Big5 Level-2 Chinese traditional"),
2942 build_string ("big5"),
2945 #ifdef ENABLE_COMPOSITE_CHARS
2946 /* #### For simplicity, we put composite chars into a 96x96 charset.
2947 This is going to lead to problems because you can run out of
2948 room, esp. as we don't yet recycle numbers. */
2949 staticpro (&Vcharset_composite);
2950 Vcharset_composite =
2951 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2952 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2953 build_string ("Composite"),
2954 build_string ("Composite characters"),
2955 build_string ("Composite characters"),
2958 /* #### not dumped properly */
2959 composite_char_row_next = 32;
2960 composite_char_col_next = 32;
2962 Vcomposite_char_string2char_hash_table =
2963 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2964 Vcomposite_char_char2string_hash_table =
2965 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2966 staticpro (&Vcomposite_char_string2char_hash_table);
2967 staticpro (&Vcomposite_char_char2string_hash_table);
2968 #endif /* ENABLE_COMPOSITE_CHARS */