1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_ks;
72 Lisp_Object Vcharset_ucs_big5;
73 Lisp_Object Vcharset_latin_viscii;
74 Lisp_Object Vcharset_latin_tcvn5712;
75 Lisp_Object Vcharset_latin_viscii_lower;
76 Lisp_Object Vcharset_latin_viscii_upper;
77 Lisp_Object Vcharset_chinese_big5;
78 Lisp_Object Vcharset_chinese_big5_cdp;
79 Lisp_Object Vcharset_ideograph_hanziku_1;
80 Lisp_Object Vcharset_ideograph_hanziku_2;
81 Lisp_Object Vcharset_ideograph_hanziku_3;
82 Lisp_Object Vcharset_ideograph_hanziku_4;
83 Lisp_Object Vcharset_ideograph_hanziku_5;
84 Lisp_Object Vcharset_ideograph_hanziku_6;
85 Lisp_Object Vcharset_ideograph_hanziku_7;
86 Lisp_Object Vcharset_ideograph_hanziku_8;
87 Lisp_Object Vcharset_ideograph_hanziku_9;
88 Lisp_Object Vcharset_ideograph_hanziku_10;
89 Lisp_Object Vcharset_ideograph_hanziku_11;
90 Lisp_Object Vcharset_ideograph_hanziku_12;
91 Lisp_Object Vcharset_china3_jef;
92 Lisp_Object Vcharset_ideograph_cbeta;
93 Lisp_Object Vcharset_ideograph_gt;
94 Lisp_Object Vcharset_ideograph_gt_pj_1;
95 Lisp_Object Vcharset_ideograph_gt_pj_2;
96 Lisp_Object Vcharset_ideograph_gt_pj_3;
97 Lisp_Object Vcharset_ideograph_gt_pj_4;
98 Lisp_Object Vcharset_ideograph_gt_pj_5;
99 Lisp_Object Vcharset_ideograph_gt_pj_6;
100 Lisp_Object Vcharset_ideograph_gt_pj_7;
101 Lisp_Object Vcharset_ideograph_gt_pj_8;
102 Lisp_Object Vcharset_ideograph_gt_pj_9;
103 Lisp_Object Vcharset_ideograph_gt_pj_10;
104 Lisp_Object Vcharset_ideograph_gt_pj_11;
105 Lisp_Object Vcharset_ideograph_daikanwa;
106 Lisp_Object Vcharset_mojikyo;
107 Lisp_Object Vcharset_mojikyo_2022_1;
108 Lisp_Object Vcharset_mojikyo_pj_1;
109 Lisp_Object Vcharset_mojikyo_pj_2;
110 Lisp_Object Vcharset_mojikyo_pj_3;
111 Lisp_Object Vcharset_mojikyo_pj_4;
112 Lisp_Object Vcharset_mojikyo_pj_5;
113 Lisp_Object Vcharset_mojikyo_pj_6;
114 Lisp_Object Vcharset_mojikyo_pj_7;
115 Lisp_Object Vcharset_mojikyo_pj_8;
116 Lisp_Object Vcharset_mojikyo_pj_9;
117 Lisp_Object Vcharset_mojikyo_pj_10;
118 Lisp_Object Vcharset_mojikyo_pj_11;
119 Lisp_Object Vcharset_mojikyo_pj_12;
120 Lisp_Object Vcharset_mojikyo_pj_13;
121 Lisp_Object Vcharset_mojikyo_pj_14;
122 Lisp_Object Vcharset_mojikyo_pj_15;
123 Lisp_Object Vcharset_mojikyo_pj_16;
124 Lisp_Object Vcharset_mojikyo_pj_17;
125 Lisp_Object Vcharset_mojikyo_pj_18;
126 Lisp_Object Vcharset_mojikyo_pj_19;
127 Lisp_Object Vcharset_mojikyo_pj_20;
128 Lisp_Object Vcharset_mojikyo_pj_21;
129 Lisp_Object Vcharset_ethiopic_ucs;
131 Lisp_Object Vcharset_chinese_big5_1;
132 Lisp_Object Vcharset_chinese_big5_2;
134 #ifdef ENABLE_COMPOSITE_CHARS
135 Lisp_Object Vcharset_composite;
137 /* Hash tables for composite chars. One maps string representing
138 composed chars to their equivalent chars; one goes the
140 Lisp_Object Vcomposite_char_char2string_hash_table;
141 Lisp_Object Vcomposite_char_string2char_hash_table;
143 static int composite_char_row_next;
144 static int composite_char_col_next;
146 #endif /* ENABLE_COMPOSITE_CHARS */
148 struct charset_lookup *chlook;
150 static const struct lrecord_description charset_lookup_description_1[] = {
151 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
160 static const struct struct_description charset_lookup_description = {
161 sizeof (struct charset_lookup),
162 charset_lookup_description_1
166 /* Table of number of bytes in the string representation of a character
167 indexed by the first byte of that representation.
169 rep_bytes_by_first_byte(c) is more efficient than the equivalent
170 canonical computation:
172 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
174 const Bytecount rep_bytes_by_first_byte[0xA0] =
175 { /* 0x00 - 0x7f are for straight ASCII */
176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 /* 0x80 - 0x8f are for Dimension-1 official charsets */
186 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
188 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 /* 0x90 - 0x9d are for Dimension-2 official charsets */
191 /* 0x9e is for Dimension-1 private charsets */
192 /* 0x9f is for Dimension-2 private charsets */
193 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
199 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
201 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
203 /* ad-hoc method for `ascii' */
204 if ((CHARSET_CHARS (cs) == 94) &&
205 (CHARSET_BYTE_OFFSET (cs) != 33))
206 return 128 - CHARSET_BYTE_OFFSET (cs);
208 return CHARSET_CHARS (cs);
211 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
213 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
215 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
219 if (XVECTOR_LENGTH (v) > ccs_len)
222 for (i = 0; i < XVECTOR_LENGTH (v); i++)
224 Lisp_Object c = XVECTOR_DATA(v)[i];
226 if (!NILP (c) && !CHARP (c))
230 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
242 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
245 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
255 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
256 nv = XVECTOR_DATA(v)[i];
262 XVECTOR_DATA(v)[i] = Qnil;
266 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
267 int code_point, Lisp_Object character);
269 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
270 int code_point, Lisp_Object character)
274 int ccs_len = XVECTOR_LENGTH (v);
279 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
280 nv = XVECTOR_DATA(v)[i];
284 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
290 XVECTOR_DATA(v)[i] = character;
294 put_char_ccs_code_point (Lisp_Object character,
295 Lisp_Object ccs, Lisp_Object value)
297 if (!EQ (XCHARSET_NAME (ccs), Qucs)
299 || (XCHAR (character) != XINT (value)))
301 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
302 int dim = XCHARSET_DIMENSION (ccs);
303 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
304 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
308 { /* obsolete representation: value must be a list of bytes */
309 Lisp_Object ret = Fcar (value);
313 signal_simple_error ("Invalid value for coded-charset", value);
314 code_point = XINT (ret);
315 if (XCHARSET_GRAPHIC (ccs) == 1)
323 signal_simple_error ("Invalid value for coded-charset",
327 signal_simple_error ("Invalid value for coded-charset",
330 if (XCHARSET_GRAPHIC (ccs) == 1)
332 code_point = (code_point << 8) | j;
335 value = make_int (code_point);
337 else if (INTP (value))
339 code_point = XINT (value);
340 if (XCHARSET_GRAPHIC (ccs) == 1)
342 code_point &= 0x7F7F7F7F;
343 value = make_int (code_point);
347 signal_simple_error ("Invalid value for coded-charset", value);
351 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
354 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
359 XCHARSET_DECODING_TABLE (ccs)
360 = v = make_older_vector (ccs_len, Qnil);
363 decoding_table_put_char (v, dim, byte_offset, code_point, character);
369 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
371 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
372 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
374 if (VECTORP (decoding_table))
376 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
380 decoding_table_remove_char (decoding_table,
381 XCHARSET_DIMENSION (ccs),
382 XCHARSET_BYTE_OFFSET (ccs),
386 if (CHAR_TABLEP (encoding_table))
388 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
396 int leading_code_private_11;
399 Lisp_Object Qcharsetp;
401 /* Qdoc_string, Qdimension, Qchars defined in general.c */
402 Lisp_Object Qregistry, Qfinal, Qgraphic;
403 Lisp_Object Qdirection;
404 Lisp_Object Qreverse_direction_charset;
405 Lisp_Object Qleading_byte;
406 Lisp_Object Qshort_name, Qlong_name;
422 Qjapanese_jisx0208_1978,
426 Qjapanese_jisx0208_1990,
442 Qvietnamese_viscii_lower,
443 Qvietnamese_viscii_upper,
446 Qideograph_hanziku_1,
447 Qideograph_hanziku_2,
448 Qideograph_hanziku_3,
449 Qideograph_hanziku_4,
450 Qideograph_hanziku_5,
451 Qideograph_hanziku_6,
452 Qideograph_hanziku_7,
453 Qideograph_hanziku_8,
454 Qideograph_hanziku_9,
455 Qideograph_hanziku_10,
456 Qideograph_hanziku_11,
457 Qideograph_hanziku_12,
502 Lisp_Object Ql2r, Qr2l;
504 Lisp_Object Vcharset_hash_table;
506 /* Composite characters are characters constructed by overstriking two
507 or more regular characters.
509 1) The old Mule implementation involves storing composite characters
510 in a buffer as a tag followed by all of the actual characters
511 used to make up the composite character. I think this is a bad
512 idea; it greatly complicates code that wants to handle strings
513 one character at a time because it has to deal with the possibility
514 of great big ungainly characters. It's much more reasonable to
515 simply store an index into a table of composite characters.
517 2) The current implementation only allows for 16,384 separate
518 composite characters over the lifetime of the XEmacs process.
519 This could become a potential problem if the user
520 edited lots of different files that use composite characters.
521 Due to FSF bogosity, increasing the number of allowable
522 composite characters under Mule would decrease the number
523 of possible faces that can exist. Mule already has shrunk
524 this to 2048, and further shrinkage would become uncomfortable.
525 No such problems exist in XEmacs.
527 Composite characters could be represented as 0x80 C1 C2 C3,
528 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
529 for slightly under 2^20 (one million) composite characters
530 over the XEmacs process lifetime, and you only need to
531 increase the size of a Mule character from 19 to 21 bits.
532 Or you could use 0x80 C1 C2 C3 C4, allowing for about
533 85 million (slightly over 2^26) composite characters. */
536 /************************************************************************/
537 /* Basic Emchar functions */
538 /************************************************************************/
540 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
541 string in STR. Returns the number of bytes stored.
542 Do not call this directly. Use the macro set_charptr_emchar() instead.
546 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
561 else if ( c <= 0x7ff )
563 *p++ = (c >> 6) | 0xc0;
564 *p++ = (c & 0x3f) | 0x80;
566 else if ( c <= 0xffff )
568 *p++ = (c >> 12) | 0xe0;
569 *p++ = ((c >> 6) & 0x3f) | 0x80;
570 *p++ = (c & 0x3f) | 0x80;
572 else if ( c <= 0x1fffff )
574 *p++ = (c >> 18) | 0xf0;
575 *p++ = ((c >> 12) & 0x3f) | 0x80;
576 *p++ = ((c >> 6) & 0x3f) | 0x80;
577 *p++ = (c & 0x3f) | 0x80;
579 else if ( c <= 0x3ffffff )
581 *p++ = (c >> 24) | 0xf8;
582 *p++ = ((c >> 18) & 0x3f) | 0x80;
583 *p++ = ((c >> 12) & 0x3f) | 0x80;
584 *p++ = ((c >> 6) & 0x3f) | 0x80;
585 *p++ = (c & 0x3f) | 0x80;
589 *p++ = (c >> 30) | 0xfc;
590 *p++ = ((c >> 24) & 0x3f) | 0x80;
591 *p++ = ((c >> 18) & 0x3f) | 0x80;
592 *p++ = ((c >> 12) & 0x3f) | 0x80;
593 *p++ = ((c >> 6) & 0x3f) | 0x80;
594 *p++ = (c & 0x3f) | 0x80;
597 BREAKUP_CHAR (c, charset, c1, c2);
598 lb = CHAR_LEADING_BYTE (c);
599 if (LEADING_BYTE_PRIVATE_P (lb))
600 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
602 if (EQ (charset, Vcharset_control_1))
611 /* Return the first character from a Mule-encoded string in STR,
612 assuming it's non-ASCII. Do not call this directly.
613 Use the macro charptr_emchar() instead. */
616 non_ascii_charptr_emchar (const Bufbyte *str)
629 else if ( b >= 0xf8 )
634 else if ( b >= 0xf0 )
639 else if ( b >= 0xe0 )
644 else if ( b >= 0xc0 )
654 for( ; len > 0; len-- )
657 ch = ( ch << 6 ) | ( b & 0x3f );
661 Bufbyte i0 = *str, i1, i2 = 0;
664 if (i0 == LEADING_BYTE_CONTROL_1)
665 return (Emchar) (*++str - 0x20);
667 if (LEADING_BYTE_PREFIX_P (i0))
672 charset = CHARSET_BY_LEADING_BYTE (i0);
673 if (XCHARSET_DIMENSION (charset) == 2)
676 return MAKE_CHAR (charset, i1, i2);
680 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
681 Do not call this directly. Use the macro valid_char_p() instead. */
685 non_ascii_valid_char_p (Emchar ch)
689 /* Must have only lowest 19 bits set */
693 f1 = CHAR_FIELD1 (ch);
694 f2 = CHAR_FIELD2 (ch);
695 f3 = CHAR_FIELD3 (ch);
701 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
702 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
703 f2 > MAX_CHAR_FIELD2_PRIVATE)
708 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
709 f2 <= MAX_CHAR_FIELD2_PRIVATE))
713 NOTE: This takes advantage of the fact that
714 FIELD2_TO_OFFICIAL_LEADING_BYTE and
715 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
717 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
718 if (EQ (charset, Qnil))
720 return (XCHARSET_CHARS (charset) == 96);
726 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
727 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
728 f1 > MAX_CHAR_FIELD1_PRIVATE)
730 if (f2 < 0x20 || f3 < 0x20)
733 #ifdef ENABLE_COMPOSITE_CHARS
734 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
736 if (UNBOUNDP (Fgethash (make_int (ch),
737 Vcomposite_char_char2string_hash_table,
742 #endif /* ENABLE_COMPOSITE_CHARS */
744 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
745 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
748 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
750 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
753 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
755 if (EQ (charset, Qnil))
757 return (XCHARSET_CHARS (charset) == 96);
763 /************************************************************************/
764 /* Basic string functions */
765 /************************************************************************/
767 /* Copy the character pointed to by SRC into DST. Do not call this
768 directly. Use the macro charptr_copy_char() instead.
769 Return the number of bytes copied. */
772 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
774 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
776 for (i = bytes; i; i--, dst++, src++)
782 /************************************************************************/
783 /* streams of Emchars */
784 /************************************************************************/
786 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
787 The functions below are not meant to be called directly; use
788 the macros in insdel.h. */
791 Lstream_get_emchar_1 (Lstream *stream, int ch)
793 Bufbyte str[MAX_EMCHAR_LEN];
794 Bufbyte *strptr = str;
797 str[0] = (Bufbyte) ch;
799 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
801 int c = Lstream_getc (stream);
802 bufpos_checking_assert (c >= 0);
803 *++strptr = (Bufbyte) c;
805 return charptr_emchar (str);
809 Lstream_fput_emchar (Lstream *stream, Emchar ch)
811 Bufbyte str[MAX_EMCHAR_LEN];
812 Bytecount len = set_charptr_emchar (str, ch);
813 return Lstream_write (stream, str, len);
817 Lstream_funget_emchar (Lstream *stream, Emchar ch)
819 Bufbyte str[MAX_EMCHAR_LEN];
820 Bytecount len = set_charptr_emchar (str, ch);
821 Lstream_unread (stream, str, len);
825 /************************************************************************/
827 /************************************************************************/
830 mark_charset (Lisp_Object obj)
832 Lisp_Charset *cs = XCHARSET (obj);
834 mark_object (cs->short_name);
835 mark_object (cs->long_name);
836 mark_object (cs->doc_string);
837 mark_object (cs->registry);
838 mark_object (cs->ccl_program);
840 /* mark_object (cs->decoding_table); */
846 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
848 Lisp_Charset *cs = XCHARSET (obj);
852 error ("printing unreadable object #<charset %s 0x%x>",
853 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
856 write_c_string ("#<charset ", printcharfun);
857 print_internal (CHARSET_NAME (cs), printcharfun, 0);
858 write_c_string (" ", printcharfun);
859 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
860 write_c_string (" ", printcharfun);
861 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
862 write_c_string (" ", printcharfun);
863 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
864 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
866 CHARSET_DIMENSION (cs),
867 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
868 CHARSET_COLUMNS (cs),
869 CHARSET_GRAPHIC (cs),
871 write_c_string (buf, printcharfun);
872 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
873 sprintf (buf, " 0x%x>", cs->header.uid);
874 write_c_string (buf, printcharfun);
877 static const struct lrecord_description charset_description[] = {
878 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
879 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
880 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
881 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
882 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
883 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
884 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
891 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
892 mark_charset, print_charset, 0, 0, 0,
896 /* Make a new charset. */
897 /* #### SJT Should generic properties be allowed? */
899 make_charset (Charset_ID id, Lisp_Object name,
900 unsigned short chars, unsigned char dimension,
901 unsigned char columns, unsigned char graphic,
902 Bufbyte final, unsigned char direction, Lisp_Object short_name,
903 Lisp_Object long_name, Lisp_Object doc,
905 Lisp_Object decoding_table,
906 Emchar ucs_min, Emchar ucs_max,
907 Emchar code_offset, unsigned char byte_offset)
910 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
914 XSETCHARSET (obj, cs);
916 CHARSET_ID (cs) = id;
917 CHARSET_NAME (cs) = name;
918 CHARSET_SHORT_NAME (cs) = short_name;
919 CHARSET_LONG_NAME (cs) = long_name;
920 CHARSET_CHARS (cs) = chars;
921 CHARSET_DIMENSION (cs) = dimension;
922 CHARSET_DIRECTION (cs) = direction;
923 CHARSET_COLUMNS (cs) = columns;
924 CHARSET_GRAPHIC (cs) = graphic;
925 CHARSET_FINAL (cs) = final;
926 CHARSET_DOC_STRING (cs) = doc;
927 CHARSET_REGISTRY (cs) = reg;
928 CHARSET_CCL_PROGRAM (cs) = Qnil;
929 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
931 CHARSET_DECODING_TABLE(cs) = Qnil;
932 CHARSET_UCS_MIN(cs) = ucs_min;
933 CHARSET_UCS_MAX(cs) = ucs_max;
934 CHARSET_CODE_OFFSET(cs) = code_offset;
935 CHARSET_BYTE_OFFSET(cs) = byte_offset;
939 if (id == LEADING_BYTE_ASCII)
940 CHARSET_REP_BYTES (cs) = 1;
942 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
944 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
949 /* some charsets do not have final characters. This includes
950 ASCII, Control-1, Composite, and the two faux private
952 unsigned char iso2022_type
953 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
955 if (code_offset == 0)
957 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
958 chlook->charset_by_attributes[iso2022_type][final] = obj;
962 (chlook->charset_by_attributes[iso2022_type][final][direction]));
963 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
967 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
968 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
970 /* Some charsets are "faux" and don't have names or really exist at
971 all except in the leading-byte table. */
973 Fputhash (name, obj, Vcharset_hash_table);
978 get_unallocated_leading_byte (int dimension)
983 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
986 lb = chlook->next_allocated_leading_byte++;
990 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
993 lb = chlook->next_allocated_1_byte_leading_byte++;
997 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1000 lb = chlook->next_allocated_2_byte_leading_byte++;
1006 ("No more character sets free for this dimension",
1007 make_int (dimension));
1013 /* Number of Big5 characters which have the same code in 1st byte. */
1015 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1018 decode_builtin_char (Lisp_Object charset, int code_point)
1022 if (EQ (charset, Vcharset_chinese_big5))
1024 int c1 = code_point >> 8;
1025 int c2 = code_point & 0xFF;
1028 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1030 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1031 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1033 I = (c1 - 0xA1) * BIG5_SAME_ROW
1034 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1038 charset = Vcharset_chinese_big5_1;
1042 charset = Vcharset_chinese_big5_2;
1043 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1045 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1048 if ((final = XCHARSET_FINAL (charset)) >= '0')
1050 if (XCHARSET_DIMENSION (charset) == 1)
1052 switch (XCHARSET_CHARS (charset))
1056 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1059 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1067 switch (XCHARSET_CHARS (charset))
1070 return MIN_CHAR_94x94
1071 + (final - '0') * 94 * 94
1072 + (((code_point >> 8) & 0x7F) - 33) * 94
1073 + ((code_point & 0x7F) - 33);
1075 return MIN_CHAR_96x96
1076 + (final - '0') * 96 * 96
1077 + (((code_point >> 8) & 0x7F) - 32) * 96
1078 + ((code_point & 0x7F) - 32);
1085 else if (XCHARSET_UCS_MAX (charset))
1088 = (XCHARSET_DIMENSION (charset) == 1
1090 code_point - XCHARSET_BYTE_OFFSET (charset)
1092 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1093 * XCHARSET_CHARS (charset)
1094 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1095 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1096 if ((cid < XCHARSET_UCS_MIN (charset))
1097 || (XCHARSET_UCS_MAX (charset) < cid))
1106 range_charset_code_point (Lisp_Object charset, Emchar ch)
1110 if ((XCHARSET_UCS_MIN (charset) <= ch)
1111 && (ch <= XCHARSET_UCS_MAX (charset)))
1113 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1115 if (XCHARSET_CHARS (charset) == 256)
1117 else if (XCHARSET_DIMENSION (charset) == 1)
1118 return d + XCHARSET_BYTE_OFFSET (charset);
1119 else if (XCHARSET_DIMENSION (charset) == 2)
1121 ((d / XCHARSET_CHARS (charset)
1122 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1123 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1124 else if (XCHARSET_DIMENSION (charset) == 3)
1126 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1127 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1128 | ((d / XCHARSET_CHARS (charset)
1129 % XCHARSET_CHARS (charset)
1130 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1131 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1132 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1134 ((d / (XCHARSET_CHARS (charset)
1135 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1136 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1137 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1138 % XCHARSET_CHARS (charset)
1139 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1140 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1141 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1142 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1144 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1146 if (XCHARSET_DIMENSION (charset) == 1)
1148 if (XCHARSET_CHARS (charset) == 94)
1150 if (((d = ch - (MIN_CHAR_94
1151 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1155 else if (XCHARSET_CHARS (charset) == 96)
1157 if (((d = ch - (MIN_CHAR_96
1158 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1165 else if (XCHARSET_DIMENSION (charset) == 2)
1167 if (XCHARSET_CHARS (charset) == 94)
1169 if (((d = ch - (MIN_CHAR_94x94
1170 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1173 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1175 else if (XCHARSET_CHARS (charset) == 96)
1177 if (((d = ch - (MIN_CHAR_96x96
1178 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1181 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1187 if (EQ (charset, Vcharset_mojikyo_2022_1)
1188 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1190 int m = ch - MIN_CHAR_MOJIKYO - 1;
1191 int byte1 = m / (94 * 60) + 33;
1192 int byte2 = (m % (94 * 60)) / 94;
1193 int byte3 = m % 94 + 33;
1199 return (byte1 << 16) | (byte2 << 8) | byte3;
1205 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1207 if (c <= MAX_CHAR_BASIC_LATIN)
1209 *charset = Vcharset_ascii;
1214 *charset = Vcharset_control_1;
1219 *charset = Vcharset_latin_iso8859_1;
1223 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1225 *charset = Vcharset_hebrew_iso8859_8;
1226 return c - MIN_CHAR_HEBREW + 0x20;
1229 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1231 *charset = Vcharset_thai_tis620;
1232 return c - MIN_CHAR_THAI + 0x20;
1235 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1236 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1238 return list2 (Vcharset_katakana_jisx0201,
1239 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1242 else if (c <= MAX_CHAR_BMP)
1244 *charset = Vcharset_ucs_bmp;
1247 else if (c < MIN_CHAR_DAIKANWA)
1249 *charset = Vcharset_ucs;
1252 else if (c <= MAX_CHAR_DAIKANWA)
1254 *charset = Vcharset_ideograph_daikanwa;
1255 return c - MIN_CHAR_DAIKANWA;
1258 else if (c <= MAX_CHAR_MOJIKYO_0)
1260 *charset = Vcharset_mojikyo;
1261 return c - MIN_CHAR_MOJIKYO_0;
1264 else if (c < MIN_CHAR_94)
1266 *charset = Vcharset_ucs;
1269 else if (c <= MAX_CHAR_94)
1271 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1272 ((c - MIN_CHAR_94) / 94) + '0',
1273 CHARSET_LEFT_TO_RIGHT);
1274 if (!NILP (*charset))
1275 return ((c - MIN_CHAR_94) % 94) + 33;
1278 *charset = Vcharset_ucs;
1282 else if (c <= MAX_CHAR_96)
1284 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1285 ((c - MIN_CHAR_96) / 96) + '0',
1286 CHARSET_LEFT_TO_RIGHT);
1287 if (!NILP (*charset))
1288 return ((c - MIN_CHAR_96) % 96) + 32;
1291 *charset = Vcharset_ucs;
1295 else if (c <= MAX_CHAR_94x94)
1298 = CHARSET_BY_ATTRIBUTES (94, 2,
1299 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1303 | (((c - MIN_CHAR_94x94) % 94) + 33);
1306 *charset = Vcharset_ucs;
1310 else if (c <= MAX_CHAR_96x96)
1313 = CHARSET_BY_ATTRIBUTES (96, 2,
1314 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1315 CHARSET_LEFT_TO_RIGHT);
1316 if (!NILP (*charset))
1317 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1318 | (((c - MIN_CHAR_96x96) % 96) + 32);
1321 *charset = Vcharset_ucs;
1326 else if (c < MIN_CHAR_MOJIKYO)
1328 *charset = Vcharset_ucs;
1331 else if (c <= MAX_CHAR_MOJIKYO)
1333 *charset = Vcharset_mojikyo;
1334 return c - MIN_CHAR_MOJIKYO;
1336 else if (c < MIN_CHAR_CHINA3_JEF)
1338 *charset = Vcharset_ucs;
1341 else if (c <= MAX_CHAR_CHINA3_JEF)
1343 *charset = Vcharset_china3_jef;
1344 return c - MIN_CHAR_CHINA3_JEF;
1346 else if (c <= MAX_CHAR_CBETA)
1348 *charset = Vcharset_ideograph_cbeta;
1349 return c - MIN_CHAR_CBETA;
1354 *charset = Vcharset_ucs;
1359 Lisp_Object Vdefault_coded_charset_priority_list;
1363 /************************************************************************/
1364 /* Basic charset Lisp functions */
1365 /************************************************************************/
1367 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1368 Return non-nil if OBJECT is a charset.
1372 return CHARSETP (object) ? Qt : Qnil;
1375 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1376 Retrieve the charset of the given name.
1377 If CHARSET-OR-NAME is a charset object, it is simply returned.
1378 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1379 nil is returned. Otherwise the associated charset object is returned.
1383 if (CHARSETP (charset_or_name))
1384 return charset_or_name;
1386 CHECK_SYMBOL (charset_or_name);
1387 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1390 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1391 Retrieve the charset of the given name.
1392 Same as `find-charset' except an error is signalled if there is no such
1393 charset instead of returning nil.
1397 Lisp_Object charset = Ffind_charset (name);
1400 signal_simple_error ("No such charset", name);
1404 /* We store the charsets in hash tables with the names as the key and the
1405 actual charset object as the value. Occasionally we need to use them
1406 in a list format. These routines provide us with that. */
1407 struct charset_list_closure
1409 Lisp_Object *charset_list;
1413 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1414 void *charset_list_closure)
1416 /* This function can GC */
1417 struct charset_list_closure *chcl =
1418 (struct charset_list_closure*) charset_list_closure;
1419 Lisp_Object *charset_list = chcl->charset_list;
1421 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1425 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1426 Return a list of the names of all defined charsets.
1430 Lisp_Object charset_list = Qnil;
1431 struct gcpro gcpro1;
1432 struct charset_list_closure charset_list_closure;
1434 GCPRO1 (charset_list);
1435 charset_list_closure.charset_list = &charset_list;
1436 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1437 &charset_list_closure);
1440 return charset_list;
1443 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1444 Return the name of charset CHARSET.
1448 return XCHARSET_NAME (Fget_charset (charset));
1451 /* #### SJT Should generic properties be allowed? */
1452 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1453 Define a new character set.
1454 This function is for use with Mule support.
1455 NAME is a symbol, the name by which the character set is normally referred.
1456 DOC-STRING is a string describing the character set.
1457 PROPS is a property list, describing the specific nature of the
1458 character set. Recognized properties are:
1460 'short-name Short version of the charset name (ex: Latin-1)
1461 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1462 'registry A regular expression matching the font registry field for
1464 'dimension Number of octets used to index a character in this charset.
1465 Either 1 or 2. Defaults to 1.
1466 'columns Number of columns used to display a character in this charset.
1467 Only used in TTY mode. (Under X, the actual width of a
1468 character can be derived from the font used to display the
1469 characters.) If unspecified, defaults to the dimension
1470 (this is almost always the correct value).
1471 'chars Number of characters in each dimension (94 or 96).
1472 Defaults to 94. Note that if the dimension is 2, the
1473 character set thus described is 94x94 or 96x96.
1474 'final Final byte of ISO 2022 escape sequence. Must be
1475 supplied. Each combination of (DIMENSION, CHARS) defines a
1476 separate namespace for final bytes. Note that ISO
1477 2022 restricts the final byte to the range
1478 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1479 dimension == 2. Note also that final bytes in the range
1480 0x30 - 0x3F are reserved for user-defined (not official)
1482 'graphic 0 (use left half of font on output) or 1 (use right half
1483 of font on output). Defaults to 0. For example, for
1484 a font whose registry is ISO8859-1, the left half
1485 (octets 0x20 - 0x7F) is the `ascii' character set, while
1486 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1487 character set. With 'graphic set to 0, the octets
1488 will have their high bit cleared; with it set to 1,
1489 the octets will have their high bit set.
1490 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1492 'ccl-program A compiled CCL program used to convert a character in
1493 this charset into an index into the font. This is in
1494 addition to the 'graphic property. The CCL program
1495 is passed the octets of the character, with the high
1496 bit cleared and set depending upon whether the value
1497 of the 'graphic property is 0 or 1.
1499 (name, doc_string, props))
1501 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1502 int direction = CHARSET_LEFT_TO_RIGHT;
1503 Lisp_Object registry = Qnil;
1504 Lisp_Object charset;
1505 Lisp_Object ccl_program = Qnil;
1506 Lisp_Object short_name = Qnil, long_name = Qnil;
1507 int byte_offset = -1;
1509 CHECK_SYMBOL (name);
1510 if (!NILP (doc_string))
1511 CHECK_STRING (doc_string);
1513 charset = Ffind_charset (name);
1514 if (!NILP (charset))
1515 signal_simple_error ("Cannot redefine existing charset", name);
1518 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1520 if (EQ (keyword, Qshort_name))
1522 CHECK_STRING (value);
1526 if (EQ (keyword, Qlong_name))
1528 CHECK_STRING (value);
1532 else if (EQ (keyword, Qdimension))
1535 dimension = XINT (value);
1536 if (dimension < 1 || dimension > 2)
1537 signal_simple_error ("Invalid value for 'dimension", value);
1540 else if (EQ (keyword, Qchars))
1543 chars = XINT (value);
1544 if (chars != 94 && chars != 96)
1545 signal_simple_error ("Invalid value for 'chars", value);
1548 else if (EQ (keyword, Qcolumns))
1551 columns = XINT (value);
1552 if (columns != 1 && columns != 2)
1553 signal_simple_error ("Invalid value for 'columns", value);
1556 else if (EQ (keyword, Qgraphic))
1559 graphic = XINT (value);
1561 if (graphic < 0 || graphic > 2)
1563 if (graphic < 0 || graphic > 1)
1565 signal_simple_error ("Invalid value for 'graphic", value);
1568 else if (EQ (keyword, Qregistry))
1570 CHECK_STRING (value);
1574 else if (EQ (keyword, Qdirection))
1576 if (EQ (value, Ql2r))
1577 direction = CHARSET_LEFT_TO_RIGHT;
1578 else if (EQ (value, Qr2l))
1579 direction = CHARSET_RIGHT_TO_LEFT;
1581 signal_simple_error ("Invalid value for 'direction", value);
1584 else if (EQ (keyword, Qfinal))
1586 CHECK_CHAR_COERCE_INT (value);
1587 final = XCHAR (value);
1588 if (final < '0' || final > '~')
1589 signal_simple_error ("Invalid value for 'final", value);
1592 else if (EQ (keyword, Qccl_program))
1594 struct ccl_program test_ccl;
1596 if (setup_ccl_program (&test_ccl, value) < 0)
1597 signal_simple_error ("Invalid value for 'ccl-program", value);
1598 ccl_program = value;
1602 signal_simple_error ("Unrecognized property", keyword);
1607 error ("'final must be specified");
1608 if (dimension == 2 && final > 0x5F)
1610 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1613 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1614 CHARSET_LEFT_TO_RIGHT)) ||
1615 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1616 CHARSET_RIGHT_TO_LEFT)))
1618 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1620 id = get_unallocated_leading_byte (dimension);
1622 if (NILP (doc_string))
1623 doc_string = build_string ("");
1625 if (NILP (registry))
1626 registry = build_string ("");
1628 if (NILP (short_name))
1629 XSETSTRING (short_name, XSYMBOL (name)->name);
1631 if (NILP (long_name))
1632 long_name = doc_string;
1635 columns = dimension;
1637 if (byte_offset < 0)
1641 else if (chars == 96)
1647 charset = make_charset (id, name, chars, dimension, columns, graphic,
1648 final, direction, short_name, long_name,
1649 doc_string, registry,
1650 Qnil, 0, 0, 0, byte_offset);
1651 if (!NILP (ccl_program))
1652 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1656 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1658 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1659 NEW-NAME is the name of the new charset. Return the new charset.
1661 (charset, new_name))
1663 Lisp_Object new_charset = Qnil;
1664 int id, chars, dimension, columns, graphic, final;
1666 Lisp_Object registry, doc_string, short_name, long_name;
1669 charset = Fget_charset (charset);
1670 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1671 signal_simple_error ("Charset already has reverse-direction charset",
1674 CHECK_SYMBOL (new_name);
1675 if (!NILP (Ffind_charset (new_name)))
1676 signal_simple_error ("Cannot redefine existing charset", new_name);
1678 cs = XCHARSET (charset);
1680 chars = CHARSET_CHARS (cs);
1681 dimension = CHARSET_DIMENSION (cs);
1682 columns = CHARSET_COLUMNS (cs);
1683 id = get_unallocated_leading_byte (dimension);
1685 graphic = CHARSET_GRAPHIC (cs);
1686 final = CHARSET_FINAL (cs);
1687 direction = CHARSET_RIGHT_TO_LEFT;
1688 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1689 direction = CHARSET_LEFT_TO_RIGHT;
1690 doc_string = CHARSET_DOC_STRING (cs);
1691 short_name = CHARSET_SHORT_NAME (cs);
1692 long_name = CHARSET_LONG_NAME (cs);
1693 registry = CHARSET_REGISTRY (cs);
1695 new_charset = make_charset (id, new_name, chars, dimension, columns,
1696 graphic, final, direction, short_name, long_name,
1697 doc_string, registry,
1699 CHARSET_DECODING_TABLE(cs),
1700 CHARSET_UCS_MIN(cs),
1701 CHARSET_UCS_MAX(cs),
1702 CHARSET_CODE_OFFSET(cs),
1703 CHARSET_BYTE_OFFSET(cs)
1709 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1710 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1715 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1716 Define symbol ALIAS as an alias for CHARSET.
1720 CHECK_SYMBOL (alias);
1721 charset = Fget_charset (charset);
1722 return Fputhash (alias, charset, Vcharset_hash_table);
1725 /* #### Reverse direction charsets not yet implemented. */
1727 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1729 Return the reverse-direction charset parallel to CHARSET, if any.
1730 This is the charset with the same properties (in particular, the same
1731 dimension, number of characters per dimension, and final byte) as
1732 CHARSET but whose characters are displayed in the opposite direction.
1736 charset = Fget_charset (charset);
1737 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1741 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1742 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1743 If DIRECTION is omitted, both directions will be checked (left-to-right
1744 will be returned if character sets exist for both directions).
1746 (dimension, chars, final, direction))
1748 int dm, ch, fi, di = -1;
1749 Lisp_Object obj = Qnil;
1751 CHECK_INT (dimension);
1752 dm = XINT (dimension);
1753 if (dm < 1 || dm > 2)
1754 signal_simple_error ("Invalid value for DIMENSION", dimension);
1758 if (ch != 94 && ch != 96)
1759 signal_simple_error ("Invalid value for CHARS", chars);
1761 CHECK_CHAR_COERCE_INT (final);
1763 if (fi < '0' || fi > '~')
1764 signal_simple_error ("Invalid value for FINAL", final);
1766 if (EQ (direction, Ql2r))
1767 di = CHARSET_LEFT_TO_RIGHT;
1768 else if (EQ (direction, Qr2l))
1769 di = CHARSET_RIGHT_TO_LEFT;
1770 else if (!NILP (direction))
1771 signal_simple_error ("Invalid value for DIRECTION", direction);
1773 if (dm == 2 && fi > 0x5F)
1775 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1779 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1781 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1784 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1787 return XCHARSET_NAME (obj);
1791 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1792 Return short name of CHARSET.
1796 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1799 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1800 Return long name of CHARSET.
1804 return XCHARSET_LONG_NAME (Fget_charset (charset));
1807 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1808 Return description of CHARSET.
1812 return XCHARSET_DOC_STRING (Fget_charset (charset));
1815 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1816 Return dimension of CHARSET.
1820 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1823 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1824 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1825 Recognized properties are those listed in `make-charset', as well as
1826 'name and 'doc-string.
1832 charset = Fget_charset (charset);
1833 cs = XCHARSET (charset);
1835 CHECK_SYMBOL (prop);
1836 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1837 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1838 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1839 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1840 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1841 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1842 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1843 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1844 Qnil : make_char (CHARSET_FINAL (cs));
1845 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1846 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1847 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1848 if (EQ (prop, Qdirection))
1849 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1850 if (EQ (prop, Qreverse_direction_charset))
1852 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1853 /* #### Is this translation OK? If so, error checking sufficient? */
1854 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1856 signal_simple_error ("Unrecognized charset property name", prop);
1857 return Qnil; /* not reached */
1860 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1861 Return charset identification number of CHARSET.
1865 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1868 /* #### We need to figure out which properties we really want to
1871 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1872 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1874 (charset, ccl_program))
1876 struct ccl_program test_ccl;
1878 charset = Fget_charset (charset);
1879 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1880 signal_simple_error ("Invalid ccl-program", ccl_program);
1881 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1886 invalidate_charset_font_caches (Lisp_Object charset)
1888 /* Invalidate font cache entries for charset on all devices. */
1889 Lisp_Object devcons, concons, hash_table;
1890 DEVICE_LOOP_NO_BREAK (devcons, concons)
1892 struct device *d = XDEVICE (XCAR (devcons));
1893 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1894 if (!UNBOUNDP (hash_table))
1895 Fclrhash (hash_table);
1899 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1900 Set the 'registry property of CHARSET to REGISTRY.
1902 (charset, registry))
1904 charset = Fget_charset (charset);
1905 CHECK_STRING (registry);
1906 XCHARSET_REGISTRY (charset) = registry;
1907 invalidate_charset_font_caches (charset);
1908 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1913 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1914 Return mapping-table of CHARSET.
1918 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1921 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1922 Set mapping-table of CHARSET to TABLE.
1926 struct Lisp_Charset *cs;
1930 charset = Fget_charset (charset);
1931 cs = XCHARSET (charset);
1935 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1936 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1937 CHARSET_DECODING_TABLE(cs) = Qnil;
1940 else if (VECTORP (table))
1942 int ccs_len = CHARSET_BYTE_SIZE (cs);
1943 int ret = decoding_table_check_elements (table,
1944 CHARSET_DIMENSION (cs),
1949 signal_simple_error ("Too big table", table);
1951 signal_simple_error ("Invalid element is found", table);
1953 signal_simple_error ("Something wrong", table);
1955 CHARSET_DECODING_TABLE(cs) = Qnil;
1958 signal_error (Qwrong_type_argument,
1959 list2 (build_translated_string ("vector-or-nil-p"),
1962 byte_offset = CHARSET_BYTE_OFFSET (cs);
1963 switch (CHARSET_DIMENSION (cs))
1966 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1968 Lisp_Object c = XVECTOR_DATA(table)[i];
1971 Fput_char_attribute (c, XCHARSET_NAME (charset),
1972 make_int (i + byte_offset));
1976 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1978 Lisp_Object v = XVECTOR_DATA(table)[i];
1984 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1986 Lisp_Object c = XVECTOR_DATA(v)[j];
1990 (c, XCHARSET_NAME (charset),
1991 make_int ( ( (i + byte_offset) << 8 )
1997 Fput_char_attribute (v, XCHARSET_NAME (charset),
1998 make_int (i + byte_offset));
2007 /************************************************************************/
2008 /* Lisp primitives for working with characters */
2009 /************************************************************************/
2012 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2013 Make a character from CHARSET and code-point CODE.
2014 If DEFINED_ONLY is non-nil, builtin character is not returned.
2015 If corresponding character is not found, nil is returned.
2017 (charset, code, defined_only))
2021 charset = Fget_charset (charset);
2024 if (XCHARSET_GRAPHIC (charset) == 1)
2026 if (NILP (defined_only))
2027 c = DECODE_CHAR (charset, c);
2029 c = DECODE_DEFINED_CHAR (charset, c);
2030 return c >= 0 ? make_char (c) : Qnil;
2033 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2034 Make a builtin character from CHARSET and code-point CODE.
2040 charset = Fget_charset (charset);
2042 if (EQ (charset, Vcharset_latin_viscii))
2044 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2050 (ret = Fget_char_attribute (chr,
2051 Vcharset_latin_viscii_lower,
2054 charset = Vcharset_latin_viscii_lower;
2058 (ret = Fget_char_attribute (chr,
2059 Vcharset_latin_viscii_upper,
2062 charset = Vcharset_latin_viscii_upper;
2069 if (XCHARSET_GRAPHIC (charset) == 1)
2072 c = decode_builtin_char (charset, c);
2073 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2077 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2078 Make a character from CHARSET and octets ARG1 and ARG2.
2079 ARG2 is required only for characters from two-dimensional charsets.
2080 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2081 character s with caron.
2083 (charset, arg1, arg2))
2087 int lowlim, highlim;
2089 charset = Fget_charset (charset);
2090 cs = XCHARSET (charset);
2092 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2093 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2095 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2097 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2098 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2101 /* It is useful (and safe, according to Olivier Galibert) to strip
2102 the 8th bit off ARG1 and ARG2 because it allows programmers to
2103 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2104 Latin 2 code of the character. */
2112 if (a1 < lowlim || a1 > highlim)
2113 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2115 if (CHARSET_DIMENSION (cs) == 1)
2119 ("Charset is of dimension one; second octet must be nil", arg2);
2120 return make_char (MAKE_CHAR (charset, a1, 0));
2129 a2 = XINT (arg2) & 0x7f;
2131 if (a2 < lowlim || a2 > highlim)
2132 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2134 return make_char (MAKE_CHAR (charset, a1, a2));
2137 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2138 Return the character set of CHARACTER.
2142 CHECK_CHAR_COERCE_INT (character);
2144 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2147 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2148 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2149 N defaults to 0 if omitted.
2153 Lisp_Object charset;
2156 CHECK_CHAR_COERCE_INT (character);
2158 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2160 if (NILP (n) || EQ (n, Qzero))
2161 return make_int (octet0);
2162 else if (EQ (n, make_int (1)))
2163 return make_int (octet1);
2165 signal_simple_error ("Octet number must be 0 or 1", n);
2168 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2169 Return list of charset and one or two position-codes of CHARACTER.
2173 /* This function can GC */
2174 struct gcpro gcpro1, gcpro2;
2175 Lisp_Object charset = Qnil;
2176 Lisp_Object rc = Qnil;
2184 GCPRO2 (charset, rc);
2185 CHECK_CHAR_COERCE_INT (character);
2188 code_point = ENCODE_CHAR (XCHAR (character), charset);
2189 dimension = XCHARSET_DIMENSION (charset);
2190 while (dimension > 0)
2192 rc = Fcons (make_int (code_point & 255), rc);
2196 rc = Fcons (XCHARSET_NAME (charset), rc);
2198 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2200 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2202 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2206 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2215 #ifdef ENABLE_COMPOSITE_CHARS
2216 /************************************************************************/
2217 /* composite character functions */
2218 /************************************************************************/
2221 lookup_composite_char (Bufbyte *str, int len)
2223 Lisp_Object lispstr = make_string (str, len);
2224 Lisp_Object ch = Fgethash (lispstr,
2225 Vcomposite_char_string2char_hash_table,
2231 if (composite_char_row_next >= 128)
2232 signal_simple_error ("No more composite chars available", lispstr);
2233 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2234 composite_char_col_next);
2235 Fputhash (make_char (emch), lispstr,
2236 Vcomposite_char_char2string_hash_table);
2237 Fputhash (lispstr, make_char (emch),
2238 Vcomposite_char_string2char_hash_table);
2239 composite_char_col_next++;
2240 if (composite_char_col_next >= 128)
2242 composite_char_col_next = 32;
2243 composite_char_row_next++;
2252 composite_char_string (Emchar ch)
2254 Lisp_Object str = Fgethash (make_char (ch),
2255 Vcomposite_char_char2string_hash_table,
2257 assert (!UNBOUNDP (str));
2261 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2262 Convert a string into a single composite character.
2263 The character is the result of overstriking all the characters in
2268 CHECK_STRING (string);
2269 return make_char (lookup_composite_char (XSTRING_DATA (string),
2270 XSTRING_LENGTH (string)));
2273 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2274 Return a string of the characters comprising a composite character.
2282 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2283 signal_simple_error ("Must be composite char", ch);
2284 return composite_char_string (emch);
2286 #endif /* ENABLE_COMPOSITE_CHARS */
2289 /************************************************************************/
2290 /* initialization */
2291 /************************************************************************/
2294 syms_of_mule_charset (void)
2296 INIT_LRECORD_IMPLEMENTATION (charset);
2298 DEFSUBR (Fcharsetp);
2299 DEFSUBR (Ffind_charset);
2300 DEFSUBR (Fget_charset);
2301 DEFSUBR (Fcharset_list);
2302 DEFSUBR (Fcharset_name);
2303 DEFSUBR (Fmake_charset);
2304 DEFSUBR (Fmake_reverse_direction_charset);
2305 /* DEFSUBR (Freverse_direction_charset); */
2306 DEFSUBR (Fdefine_charset_alias);
2307 DEFSUBR (Fcharset_from_attributes);
2308 DEFSUBR (Fcharset_short_name);
2309 DEFSUBR (Fcharset_long_name);
2310 DEFSUBR (Fcharset_description);
2311 DEFSUBR (Fcharset_dimension);
2312 DEFSUBR (Fcharset_property);
2313 DEFSUBR (Fcharset_id);
2314 DEFSUBR (Fset_charset_ccl_program);
2315 DEFSUBR (Fset_charset_registry);
2317 DEFSUBR (Fcharset_mapping_table);
2318 DEFSUBR (Fset_charset_mapping_table);
2322 DEFSUBR (Fdecode_char);
2323 DEFSUBR (Fdecode_builtin_char);
2325 DEFSUBR (Fmake_char);
2326 DEFSUBR (Fchar_charset);
2327 DEFSUBR (Fchar_octet);
2328 DEFSUBR (Fsplit_char);
2330 #ifdef ENABLE_COMPOSITE_CHARS
2331 DEFSUBR (Fmake_composite_char);
2332 DEFSUBR (Fcomposite_char_string);
2335 defsymbol (&Qcharsetp, "charsetp");
2336 defsymbol (&Qregistry, "registry");
2337 defsymbol (&Qfinal, "final");
2338 defsymbol (&Qgraphic, "graphic");
2339 defsymbol (&Qdirection, "direction");
2340 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2341 defsymbol (&Qshort_name, "short-name");
2342 defsymbol (&Qlong_name, "long-name");
2344 defsymbol (&Ql2r, "l2r");
2345 defsymbol (&Qr2l, "r2l");
2347 /* Charsets, compatible with FSF 20.3
2348 Naming convention is Script-Charset[-Edition] */
2349 defsymbol (&Qascii, "ascii");
2350 defsymbol (&Qcontrol_1, "control-1");
2351 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2352 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2353 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2354 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2355 defsymbol (&Qthai_tis620, "thai-tis620");
2356 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2357 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2358 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2359 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2360 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2361 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2362 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2363 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2364 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2365 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2366 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2367 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2368 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2369 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2370 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2371 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2373 defsymbol (&Qucs, "ucs");
2374 defsymbol (&Qucs_bmp, "ucs-bmp");
2375 defsymbol (&Qucs_cns, "ucs-cns");
2376 defsymbol (&Qucs_jis, "ucs-jis");
2377 defsymbol (&Qucs_ks, "ucs-ks");
2378 defsymbol (&Qucs_big5, "ucs-big5");
2379 defsymbol (&Qlatin_viscii, "latin-viscii");
2380 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2381 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2382 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2383 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2384 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2385 defsymbol (&Qideograph_gt, "ideograph-gt");
2386 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2387 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2388 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2389 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2390 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2391 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2392 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2393 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2394 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2395 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2396 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2397 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2398 defsymbol (&Qchinese_big5, "chinese-big5");
2399 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2400 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2401 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2402 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2403 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2404 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2405 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2406 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2407 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2408 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2409 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2410 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2411 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2412 defsymbol (&Qchina3_jef, "china3-jef");
2413 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2414 defsymbol (&Qmojikyo, "mojikyo");
2415 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2416 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2417 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2418 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2419 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2420 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2421 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2422 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2423 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2424 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2425 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2426 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2427 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2428 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2429 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2430 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2431 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2432 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2433 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2434 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2435 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2436 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2437 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2439 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2440 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2442 defsymbol (&Qcomposite, "composite");
2446 vars_of_mule_charset (void)
2453 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2454 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2456 /* Table of charsets indexed by leading byte. */
2457 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2458 chlook->charset_by_leading_byte[i] = Qnil;
2461 /* Table of charsets indexed by type/final-byte. */
2462 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2463 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2464 chlook->charset_by_attributes[i][j] = Qnil;
2466 /* Table of charsets indexed by type/final-byte/direction. */
2467 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2468 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2469 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2470 chlook->charset_by_attributes[i][j][k] = Qnil;
2474 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2476 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2477 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2481 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2482 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2483 Leading-code of private TYPE9N charset of column-width 1.
2485 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2489 Vdefault_coded_charset_priority_list = Qnil;
2490 DEFVAR_LISP ("default-coded-charset-priority-list",
2491 &Vdefault_coded_charset_priority_list /*
2492 Default order of preferred coded-character-sets.
2498 complex_vars_of_mule_charset (void)
2500 staticpro (&Vcharset_hash_table);
2501 Vcharset_hash_table =
2502 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2504 /* Predefined character sets. We store them into variables for
2508 staticpro (&Vcharset_ucs);
2510 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2511 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2512 build_string ("UCS"),
2513 build_string ("UCS"),
2514 build_string ("ISO/IEC 10646"),
2516 Qnil, 0, 0xFFFFFFF, 0, 0);
2517 staticpro (&Vcharset_ucs_bmp);
2519 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2520 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2521 build_string ("BMP"),
2522 build_string ("BMP"),
2523 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2524 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2525 Qnil, 0, 0xFFFF, 0, 0);
2526 staticpro (&Vcharset_ucs_cns);
2528 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2529 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2530 build_string ("UCS for CNS"),
2531 build_string ("UCS for CNS 11643"),
2532 build_string ("ISO/IEC 10646 for CNS 11643"),
2535 staticpro (&Vcharset_ucs_jis);
2537 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2538 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2539 build_string ("UCS for JIS"),
2540 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2541 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2544 staticpro (&Vcharset_ucs_ks);
2546 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2547 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2548 build_string ("UCS for KS"),
2549 build_string ("UCS for CCS defined by KS"),
2550 build_string ("ISO/IEC 10646 for Korean Standards"),
2553 staticpro (&Vcharset_ucs_big5);
2555 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2556 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2557 build_string ("UCS for Big5"),
2558 build_string ("UCS for Big5"),
2559 build_string ("ISO/IEC 10646 for Big5"),
2563 # define MIN_CHAR_THAI 0
2564 # define MAX_CHAR_THAI 0
2565 /* # define MIN_CHAR_HEBREW 0 */
2566 /* # define MAX_CHAR_HEBREW 0 */
2567 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2568 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2570 staticpro (&Vcharset_ascii);
2572 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2573 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2574 build_string ("ASCII"),
2575 build_string ("ASCII)"),
2576 build_string ("ASCII (ISO646 IRV)"),
2577 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2578 Qnil, 0, 0x7F, 0, 0);
2579 staticpro (&Vcharset_control_1);
2580 Vcharset_control_1 =
2581 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2582 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2583 build_string ("C1"),
2584 build_string ("Control characters"),
2585 build_string ("Control characters 128-191"),
2587 Qnil, 0x80, 0x9F, 0, 0);
2588 staticpro (&Vcharset_latin_iso8859_1);
2589 Vcharset_latin_iso8859_1 =
2590 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2591 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2592 build_string ("Latin-1"),
2593 build_string ("ISO8859-1 (Latin-1)"),
2594 build_string ("ISO8859-1 (Latin-1)"),
2595 build_string ("iso8859-1"),
2596 Qnil, 0xA0, 0xFF, 0, 32);
2597 staticpro (&Vcharset_latin_iso8859_2);
2598 Vcharset_latin_iso8859_2 =
2599 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2600 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2601 build_string ("Latin-2"),
2602 build_string ("ISO8859-2 (Latin-2)"),
2603 build_string ("ISO8859-2 (Latin-2)"),
2604 build_string ("iso8859-2"),
2606 staticpro (&Vcharset_latin_iso8859_3);
2607 Vcharset_latin_iso8859_3 =
2608 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2609 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2610 build_string ("Latin-3"),
2611 build_string ("ISO8859-3 (Latin-3)"),
2612 build_string ("ISO8859-3 (Latin-3)"),
2613 build_string ("iso8859-3"),
2615 staticpro (&Vcharset_latin_iso8859_4);
2616 Vcharset_latin_iso8859_4 =
2617 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2618 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2619 build_string ("Latin-4"),
2620 build_string ("ISO8859-4 (Latin-4)"),
2621 build_string ("ISO8859-4 (Latin-4)"),
2622 build_string ("iso8859-4"),
2624 staticpro (&Vcharset_thai_tis620);
2625 Vcharset_thai_tis620 =
2626 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2627 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2628 build_string ("TIS620"),
2629 build_string ("TIS620 (Thai)"),
2630 build_string ("TIS620.2529 (Thai)"),
2631 build_string ("tis620"),
2632 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2633 staticpro (&Vcharset_greek_iso8859_7);
2634 Vcharset_greek_iso8859_7 =
2635 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2636 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2637 build_string ("ISO8859-7"),
2638 build_string ("ISO8859-7 (Greek)"),
2639 build_string ("ISO8859-7 (Greek)"),
2640 build_string ("iso8859-7"),
2642 staticpro (&Vcharset_arabic_iso8859_6);
2643 Vcharset_arabic_iso8859_6 =
2644 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2645 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2646 build_string ("ISO8859-6"),
2647 build_string ("ISO8859-6 (Arabic)"),
2648 build_string ("ISO8859-6 (Arabic)"),
2649 build_string ("iso8859-6"),
2651 staticpro (&Vcharset_hebrew_iso8859_8);
2652 Vcharset_hebrew_iso8859_8 =
2653 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2654 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2655 build_string ("ISO8859-8"),
2656 build_string ("ISO8859-8 (Hebrew)"),
2657 build_string ("ISO8859-8 (Hebrew)"),
2658 build_string ("iso8859-8"),
2660 0 /* MIN_CHAR_HEBREW */,
2661 0 /* MAX_CHAR_HEBREW */, 0, 32);
2662 staticpro (&Vcharset_katakana_jisx0201);
2663 Vcharset_katakana_jisx0201 =
2664 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2665 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2666 build_string ("JISX0201 Kana"),
2667 build_string ("JISX0201.1976 (Japanese Kana)"),
2668 build_string ("JISX0201.1976 Japanese Kana"),
2669 build_string ("jisx0201\\.1976"),
2671 staticpro (&Vcharset_latin_jisx0201);
2672 Vcharset_latin_jisx0201 =
2673 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2674 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2675 build_string ("JISX0201 Roman"),
2676 build_string ("JISX0201.1976 (Japanese Roman)"),
2677 build_string ("JISX0201.1976 Japanese Roman"),
2678 build_string ("jisx0201\\.1976"),
2680 staticpro (&Vcharset_cyrillic_iso8859_5);
2681 Vcharset_cyrillic_iso8859_5 =
2682 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2683 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2684 build_string ("ISO8859-5"),
2685 build_string ("ISO8859-5 (Cyrillic)"),
2686 build_string ("ISO8859-5 (Cyrillic)"),
2687 build_string ("iso8859-5"),
2689 staticpro (&Vcharset_latin_iso8859_9);
2690 Vcharset_latin_iso8859_9 =
2691 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2692 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2693 build_string ("Latin-5"),
2694 build_string ("ISO8859-9 (Latin-5)"),
2695 build_string ("ISO8859-9 (Latin-5)"),
2696 build_string ("iso8859-9"),
2698 staticpro (&Vcharset_japanese_jisx0208_1978);
2699 Vcharset_japanese_jisx0208_1978 =
2700 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2701 Qjapanese_jisx0208_1978, 94, 2,
2702 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2703 build_string ("JIS X0208:1978"),
2704 build_string ("JIS X0208:1978 (Japanese)"),
2706 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2707 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2709 staticpro (&Vcharset_chinese_gb2312);
2710 Vcharset_chinese_gb2312 =
2711 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2712 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2713 build_string ("GB2312"),
2714 build_string ("GB2312)"),
2715 build_string ("GB2312 Chinese simplified"),
2716 build_string ("gb2312"),
2718 staticpro (&Vcharset_chinese_gb12345);
2719 Vcharset_chinese_gb12345 =
2720 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2721 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2722 build_string ("G1"),
2723 build_string ("GB 12345)"),
2724 build_string ("GB 12345-1990"),
2725 build_string ("GB12345\\(\\.1990\\)?-0"),
2727 staticpro (&Vcharset_japanese_jisx0208);
2728 Vcharset_japanese_jisx0208 =
2729 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2730 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2731 build_string ("JISX0208"),
2732 build_string ("JIS X0208:1983 (Japanese)"),
2733 build_string ("JIS X0208:1983 Japanese Kanji"),
2734 build_string ("jisx0208\\.1983"),
2737 staticpro (&Vcharset_japanese_jisx0208_1990);
2738 Vcharset_japanese_jisx0208_1990 =
2739 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2740 Qjapanese_jisx0208_1990, 94, 2,
2741 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2742 build_string ("JISX0208-1990"),
2743 build_string ("JIS X0208:1990 (Japanese)"),
2744 build_string ("JIS X0208:1990 Japanese Kanji"),
2745 build_string ("jisx0208\\.1990"),
2747 MIN_CHAR_JIS_X0208_1990,
2748 MAX_CHAR_JIS_X0208_1990, 0, 33);
2750 staticpro (&Vcharset_korean_ksc5601);
2751 Vcharset_korean_ksc5601 =
2752 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2753 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2754 build_string ("KSC5601"),
2755 build_string ("KSC5601 (Korean"),
2756 build_string ("KSC5601 Korean Hangul and Hanja"),
2757 build_string ("ksc5601"),
2759 staticpro (&Vcharset_japanese_jisx0212);
2760 Vcharset_japanese_jisx0212 =
2761 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2762 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2763 build_string ("JISX0212"),
2764 build_string ("JISX0212 (Japanese)"),
2765 build_string ("JISX0212 Japanese Supplement"),
2766 build_string ("jisx0212"),
2769 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2770 staticpro (&Vcharset_chinese_cns11643_1);
2771 Vcharset_chinese_cns11643_1 =
2772 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2773 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2774 build_string ("CNS11643-1"),
2775 build_string ("CNS11643-1 (Chinese traditional)"),
2777 ("CNS 11643 Plane 1 Chinese traditional"),
2778 build_string (CHINESE_CNS_PLANE_RE("1")),
2780 staticpro (&Vcharset_chinese_cns11643_2);
2781 Vcharset_chinese_cns11643_2 =
2782 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2783 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2784 build_string ("CNS11643-2"),
2785 build_string ("CNS11643-2 (Chinese traditional)"),
2787 ("CNS 11643 Plane 2 Chinese traditional"),
2788 build_string (CHINESE_CNS_PLANE_RE("2")),
2791 staticpro (&Vcharset_latin_tcvn5712);
2792 Vcharset_latin_tcvn5712 =
2793 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2794 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2795 build_string ("TCVN 5712"),
2796 build_string ("TCVN 5712 (VSCII-2)"),
2797 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2798 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2800 staticpro (&Vcharset_latin_viscii_lower);
2801 Vcharset_latin_viscii_lower =
2802 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2803 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2804 build_string ("VISCII lower"),
2805 build_string ("VISCII lower (Vietnamese)"),
2806 build_string ("VISCII lower (Vietnamese)"),
2807 build_string ("MULEVISCII-LOWER"),
2809 staticpro (&Vcharset_latin_viscii_upper);
2810 Vcharset_latin_viscii_upper =
2811 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2812 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2813 build_string ("VISCII upper"),
2814 build_string ("VISCII upper (Vietnamese)"),
2815 build_string ("VISCII upper (Vietnamese)"),
2816 build_string ("MULEVISCII-UPPER"),
2818 staticpro (&Vcharset_latin_viscii);
2819 Vcharset_latin_viscii =
2820 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2821 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2822 build_string ("VISCII"),
2823 build_string ("VISCII 1.1 (Vietnamese)"),
2824 build_string ("VISCII 1.1 (Vietnamese)"),
2825 build_string ("VISCII1\\.1"),
2827 staticpro (&Vcharset_chinese_big5);
2828 Vcharset_chinese_big5 =
2829 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2830 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2831 build_string ("Big5"),
2832 build_string ("Big5"),
2833 build_string ("Big5 Chinese traditional"),
2834 build_string ("big5"),
2836 0 /* MIN_CHAR_BIG5_CDP */,
2837 0 /* MAX_CHAR_BIG5_CDP */, 0, 0);
2838 staticpro (&Vcharset_chinese_big5_cdp);
2839 Vcharset_chinese_big5_cdp =
2840 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2841 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2842 build_string ("Big5-CDP"),
2843 build_string ("Big5 + CDP extension"),
2844 build_string ("Big5 with CDP extension"),
2845 build_string ("big5\\.cdp-0"),
2846 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2847 #define DEF_HANZIKU(n) \
2848 staticpro (&Vcharset_ideograph_hanziku_##n); \
2849 Vcharset_ideograph_hanziku_##n = \
2850 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2851 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2852 build_string ("HZK-"#n), \
2853 build_string ("HANZIKU-"#n), \
2854 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2856 ("hanziku-"#n"$"), \
2857 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2870 staticpro (&Vcharset_china3_jef);
2871 Vcharset_china3_jef =
2872 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2873 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2874 build_string ("JC3"),
2875 build_string ("JEF + CHINA3"),
2876 build_string ("JEF + CHINA3 private characters"),
2877 build_string ("china3jef-0"),
2878 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2879 staticpro (&Vcharset_ideograph_cbeta);
2880 Vcharset_ideograph_cbeta =
2881 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2882 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2883 build_string ("CB"),
2884 build_string ("CBETA"),
2885 build_string ("CBETA private characters"),
2886 build_string ("cbeta-0"),
2887 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2888 staticpro (&Vcharset_ideograph_gt);
2889 Vcharset_ideograph_gt =
2890 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2891 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2892 build_string ("GT"),
2893 build_string ("GT"),
2894 build_string ("GT"),
2896 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2897 #define DEF_GT_PJ(n) \
2898 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2899 Vcharset_ideograph_gt_pj_##n = \
2900 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2901 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2902 build_string ("GT-PJ-"#n), \
2903 build_string ("GT (pseudo JIS encoding) part "#n), \
2904 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2906 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2920 staticpro (&Vcharset_ideograph_daikanwa);
2921 Vcharset_ideograph_daikanwa =
2922 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2923 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2924 build_string ("Daikanwa"),
2925 build_string ("Morohashi's Daikanwa"),
2926 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2927 build_string ("Daikanwa"),
2928 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2929 staticpro (&Vcharset_mojikyo);
2931 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2932 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2933 build_string ("Mojikyo"),
2934 build_string ("Mojikyo"),
2935 build_string ("Konjaku-Mojikyo"),
2937 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2938 staticpro (&Vcharset_mojikyo_2022_1);
2939 Vcharset_mojikyo_2022_1 =
2940 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2941 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Mojikyo-2022-1"),
2943 build_string ("Mojikyo ISO-2022 Part 1"),
2944 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2948 #define DEF_MOJIKYO_PJ(n) \
2949 staticpro (&Vcharset_mojikyo_pj_##n); \
2950 Vcharset_mojikyo_pj_##n = \
2951 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2952 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2953 build_string ("Mojikyo-PJ-"#n), \
2954 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2956 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2958 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2970 DEF_MOJIKYO_PJ (10);
2971 DEF_MOJIKYO_PJ (11);
2972 DEF_MOJIKYO_PJ (12);
2973 DEF_MOJIKYO_PJ (13);
2974 DEF_MOJIKYO_PJ (14);
2975 DEF_MOJIKYO_PJ (15);
2976 DEF_MOJIKYO_PJ (16);
2977 DEF_MOJIKYO_PJ (17);
2978 DEF_MOJIKYO_PJ (18);
2979 DEF_MOJIKYO_PJ (19);
2980 DEF_MOJIKYO_PJ (20);
2981 DEF_MOJIKYO_PJ (21);
2983 staticpro (&Vcharset_ethiopic_ucs);
2984 Vcharset_ethiopic_ucs =
2985 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2986 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2987 build_string ("Ethiopic (UCS)"),
2988 build_string ("Ethiopic (UCS)"),
2989 build_string ("Ethiopic of UCS"),
2990 build_string ("Ethiopic-Unicode"),
2991 Qnil, 0x1200, 0x137F, 0x1200, 0);
2993 staticpro (&Vcharset_chinese_big5_1);
2994 Vcharset_chinese_big5_1 =
2995 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2996 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2997 build_string ("Big5"),
2998 build_string ("Big5 (Level-1)"),
3000 ("Big5 Level-1 Chinese traditional"),
3001 build_string ("big5"),
3003 staticpro (&Vcharset_chinese_big5_2);
3004 Vcharset_chinese_big5_2 =
3005 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3006 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3007 build_string ("Big5"),
3008 build_string ("Big5 (Level-2)"),
3010 ("Big5 Level-2 Chinese traditional"),
3011 build_string ("big5"),
3014 #ifdef ENABLE_COMPOSITE_CHARS
3015 /* #### For simplicity, we put composite chars into a 96x96 charset.
3016 This is going to lead to problems because you can run out of
3017 room, esp. as we don't yet recycle numbers. */
3018 staticpro (&Vcharset_composite);
3019 Vcharset_composite =
3020 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3021 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3022 build_string ("Composite"),
3023 build_string ("Composite characters"),
3024 build_string ("Composite characters"),
3027 /* #### not dumped properly */
3028 composite_char_row_next = 32;
3029 composite_char_col_next = 32;
3031 Vcomposite_char_string2char_hash_table =
3032 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3033 Vcomposite_char_char2string_hash_table =
3034 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3035 staticpro (&Vcomposite_char_string2char_hash_table);
3036 staticpro (&Vcomposite_char_char2string_hash_table);
3037 #endif /* ENABLE_COMPOSITE_CHARS */