1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_mojikyo;
110 Lisp_Object Vcharset_mojikyo_2022_1;
111 Lisp_Object Vcharset_mojikyo_pj_1;
112 Lisp_Object Vcharset_mojikyo_pj_2;
113 Lisp_Object Vcharset_mojikyo_pj_3;
114 Lisp_Object Vcharset_mojikyo_pj_4;
115 Lisp_Object Vcharset_mojikyo_pj_5;
116 Lisp_Object Vcharset_mojikyo_pj_6;
117 Lisp_Object Vcharset_mojikyo_pj_7;
118 Lisp_Object Vcharset_mojikyo_pj_8;
119 Lisp_Object Vcharset_mojikyo_pj_9;
120 Lisp_Object Vcharset_mojikyo_pj_10;
121 Lisp_Object Vcharset_mojikyo_pj_11;
122 Lisp_Object Vcharset_mojikyo_pj_12;
123 Lisp_Object Vcharset_mojikyo_pj_13;
124 Lisp_Object Vcharset_mojikyo_pj_14;
125 Lisp_Object Vcharset_mojikyo_pj_15;
126 Lisp_Object Vcharset_mojikyo_pj_16;
127 Lisp_Object Vcharset_mojikyo_pj_17;
128 Lisp_Object Vcharset_mojikyo_pj_18;
129 Lisp_Object Vcharset_mojikyo_pj_19;
130 Lisp_Object Vcharset_mojikyo_pj_20;
131 Lisp_Object Vcharset_mojikyo_pj_21;
132 Lisp_Object Vcharset_ethiopic_ucs;
134 Lisp_Object Vcharset_chinese_big5_1;
135 Lisp_Object Vcharset_chinese_big5_2;
137 #ifdef ENABLE_COMPOSITE_CHARS
138 Lisp_Object Vcharset_composite;
140 /* Hash tables for composite chars. One maps string representing
141 composed chars to their equivalent chars; one goes the
143 Lisp_Object Vcomposite_char_char2string_hash_table;
144 Lisp_Object Vcomposite_char_string2char_hash_table;
146 static int composite_char_row_next;
147 static int composite_char_col_next;
149 #endif /* ENABLE_COMPOSITE_CHARS */
151 struct charset_lookup *chlook;
153 static const struct lrecord_description charset_lookup_description_1[] = {
154 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
163 static const struct struct_description charset_lookup_description = {
164 sizeof (struct charset_lookup),
165 charset_lookup_description_1
169 /* Table of number of bytes in the string representation of a character
170 indexed by the first byte of that representation.
172 rep_bytes_by_first_byte(c) is more efficient than the equivalent
173 canonical computation:
175 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
177 const Bytecount rep_bytes_by_first_byte[0xA0] =
178 { /* 0x00 - 0x7f are for straight ASCII */
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
185 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
186 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
187 /* 0x80 - 0x8f are for Dimension-1 official charsets */
189 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
191 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
193 /* 0x90 - 0x9d are for Dimension-2 official charsets */
194 /* 0x9e is for Dimension-1 private charsets */
195 /* 0x9f is for Dimension-2 private charsets */
196 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
202 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
204 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
206 /* ad-hoc method for `ascii' */
207 if ((CHARSET_CHARS (cs) == 94) &&
208 (CHARSET_BYTE_OFFSET (cs) != 33))
209 return 128 - CHARSET_BYTE_OFFSET (cs);
211 return CHARSET_CHARS (cs);
214 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
216 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
218 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
222 if (XVECTOR_LENGTH (v) > ccs_len)
225 for (i = 0; i < XVECTOR_LENGTH (v); i++)
227 Lisp_Object c = XVECTOR_DATA(v)[i];
229 if (!NILP (c) && !CHARP (c))
233 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
245 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
248 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
258 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
259 nv = XVECTOR_DATA(v)[i];
265 XVECTOR_DATA(v)[i] = Qnil;
269 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
270 int code_point, Lisp_Object character);
272 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
273 int code_point, Lisp_Object character)
277 int ccs_len = XVECTOR_LENGTH (v);
282 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
283 nv = XVECTOR_DATA(v)[i];
287 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
293 XVECTOR_DATA(v)[i] = character;
297 put_char_ccs_code_point (Lisp_Object character,
298 Lisp_Object ccs, Lisp_Object value)
300 if (!EQ (XCHARSET_NAME (ccs), Qucs)
302 || (XCHAR (character) != XINT (value)))
304 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
305 int dim = XCHARSET_DIMENSION (ccs);
306 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
307 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
311 { /* obsolete representation: value must be a list of bytes */
312 Lisp_Object ret = Fcar (value);
316 signal_simple_error ("Invalid value for coded-charset", value);
317 code_point = XINT (ret);
318 if (XCHARSET_GRAPHIC (ccs) == 1)
326 signal_simple_error ("Invalid value for coded-charset",
330 signal_simple_error ("Invalid value for coded-charset",
333 if (XCHARSET_GRAPHIC (ccs) == 1)
335 code_point = (code_point << 8) | j;
338 value = make_int (code_point);
340 else if (INTP (value))
342 code_point = XINT (value);
343 if (XCHARSET_GRAPHIC (ccs) == 1)
345 code_point &= 0x7F7F7F7F;
346 value = make_int (code_point);
350 signal_simple_error ("Invalid value for coded-charset", value);
354 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
357 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
362 XCHARSET_DECODING_TABLE (ccs)
363 = v = make_older_vector (ccs_len, Qnil);
366 decoding_table_put_char (v, dim, byte_offset, code_point, character);
372 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
374 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
375 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
377 if (VECTORP (decoding_table))
379 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
383 decoding_table_remove_char (decoding_table,
384 XCHARSET_DIMENSION (ccs),
385 XCHARSET_BYTE_OFFSET (ccs),
389 if (CHAR_TABLEP (encoding_table))
391 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
399 int leading_code_private_11;
402 Lisp_Object Qcharsetp;
404 /* Qdoc_string, Qdimension, Qchars defined in general.c */
405 Lisp_Object Qregistry, Qfinal, Qgraphic;
406 Lisp_Object Qdirection;
407 Lisp_Object Qreverse_direction_charset;
408 Lisp_Object Qleading_byte;
409 Lisp_Object Qshort_name, Qlong_name;
425 Qjapanese_jisx0208_1978,
429 Qjapanese_jisx0208_1990,
447 Qvietnamese_viscii_lower,
448 Qvietnamese_viscii_upper,
451 Qideograph_hanziku_1,
452 Qideograph_hanziku_2,
453 Qideograph_hanziku_3,
454 Qideograph_hanziku_4,
455 Qideograph_hanziku_5,
456 Qideograph_hanziku_6,
457 Qideograph_hanziku_7,
458 Qideograph_hanziku_8,
459 Qideograph_hanziku_9,
460 Qideograph_hanziku_10,
461 Qideograph_hanziku_11,
462 Qideograph_hanziku_12,
465 Qideograph_daikanwa_2,
508 Lisp_Object Ql2r, Qr2l;
510 Lisp_Object Vcharset_hash_table;
512 /* Composite characters are characters constructed by overstriking two
513 or more regular characters.
515 1) The old Mule implementation involves storing composite characters
516 in a buffer as a tag followed by all of the actual characters
517 used to make up the composite character. I think this is a bad
518 idea; it greatly complicates code that wants to handle strings
519 one character at a time because it has to deal with the possibility
520 of great big ungainly characters. It's much more reasonable to
521 simply store an index into a table of composite characters.
523 2) The current implementation only allows for 16,384 separate
524 composite characters over the lifetime of the XEmacs process.
525 This could become a potential problem if the user
526 edited lots of different files that use composite characters.
527 Due to FSF bogosity, increasing the number of allowable
528 composite characters under Mule would decrease the number
529 of possible faces that can exist. Mule already has shrunk
530 this to 2048, and further shrinkage would become uncomfortable.
531 No such problems exist in XEmacs.
533 Composite characters could be represented as 0x80 C1 C2 C3,
534 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
535 for slightly under 2^20 (one million) composite characters
536 over the XEmacs process lifetime, and you only need to
537 increase the size of a Mule character from 19 to 21 bits.
538 Or you could use 0x80 C1 C2 C3 C4, allowing for about
539 85 million (slightly over 2^26) composite characters. */
542 /************************************************************************/
543 /* Basic Emchar functions */
544 /************************************************************************/
546 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
547 string in STR. Returns the number of bytes stored.
548 Do not call this directly. Use the macro set_charptr_emchar() instead.
552 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
567 else if ( c <= 0x7ff )
569 *p++ = (c >> 6) | 0xc0;
570 *p++ = (c & 0x3f) | 0x80;
572 else if ( c <= 0xffff )
574 *p++ = (c >> 12) | 0xe0;
575 *p++ = ((c >> 6) & 0x3f) | 0x80;
576 *p++ = (c & 0x3f) | 0x80;
578 else if ( c <= 0x1fffff )
580 *p++ = (c >> 18) | 0xf0;
581 *p++ = ((c >> 12) & 0x3f) | 0x80;
582 *p++ = ((c >> 6) & 0x3f) | 0x80;
583 *p++ = (c & 0x3f) | 0x80;
585 else if ( c <= 0x3ffffff )
587 *p++ = (c >> 24) | 0xf8;
588 *p++ = ((c >> 18) & 0x3f) | 0x80;
589 *p++ = ((c >> 12) & 0x3f) | 0x80;
590 *p++ = ((c >> 6) & 0x3f) | 0x80;
591 *p++ = (c & 0x3f) | 0x80;
595 *p++ = (c >> 30) | 0xfc;
596 *p++ = ((c >> 24) & 0x3f) | 0x80;
597 *p++ = ((c >> 18) & 0x3f) | 0x80;
598 *p++ = ((c >> 12) & 0x3f) | 0x80;
599 *p++ = ((c >> 6) & 0x3f) | 0x80;
600 *p++ = (c & 0x3f) | 0x80;
603 BREAKUP_CHAR (c, charset, c1, c2);
604 lb = CHAR_LEADING_BYTE (c);
605 if (LEADING_BYTE_PRIVATE_P (lb))
606 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
608 if (EQ (charset, Vcharset_control_1))
617 /* Return the first character from a Mule-encoded string in STR,
618 assuming it's non-ASCII. Do not call this directly.
619 Use the macro charptr_emchar() instead. */
622 non_ascii_charptr_emchar (const Bufbyte *str)
635 else if ( b >= 0xf8 )
640 else if ( b >= 0xf0 )
645 else if ( b >= 0xe0 )
650 else if ( b >= 0xc0 )
660 for( ; len > 0; len-- )
663 ch = ( ch << 6 ) | ( b & 0x3f );
667 Bufbyte i0 = *str, i1, i2 = 0;
670 if (i0 == LEADING_BYTE_CONTROL_1)
671 return (Emchar) (*++str - 0x20);
673 if (LEADING_BYTE_PREFIX_P (i0))
678 charset = CHARSET_BY_LEADING_BYTE (i0);
679 if (XCHARSET_DIMENSION (charset) == 2)
682 return MAKE_CHAR (charset, i1, i2);
686 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
687 Do not call this directly. Use the macro valid_char_p() instead. */
691 non_ascii_valid_char_p (Emchar ch)
695 /* Must have only lowest 19 bits set */
699 f1 = CHAR_FIELD1 (ch);
700 f2 = CHAR_FIELD2 (ch);
701 f3 = CHAR_FIELD3 (ch);
707 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
708 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
709 f2 > MAX_CHAR_FIELD2_PRIVATE)
714 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
715 f2 <= MAX_CHAR_FIELD2_PRIVATE))
719 NOTE: This takes advantage of the fact that
720 FIELD2_TO_OFFICIAL_LEADING_BYTE and
721 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
723 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
724 if (EQ (charset, Qnil))
726 return (XCHARSET_CHARS (charset) == 96);
732 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
733 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
734 f1 > MAX_CHAR_FIELD1_PRIVATE)
736 if (f2 < 0x20 || f3 < 0x20)
739 #ifdef ENABLE_COMPOSITE_CHARS
740 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
742 if (UNBOUNDP (Fgethash (make_int (ch),
743 Vcomposite_char_char2string_hash_table,
748 #endif /* ENABLE_COMPOSITE_CHARS */
750 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
751 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
754 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
756 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
759 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
761 if (EQ (charset, Qnil))
763 return (XCHARSET_CHARS (charset) == 96);
769 /************************************************************************/
770 /* Basic string functions */
771 /************************************************************************/
773 /* Copy the character pointed to by SRC into DST. Do not call this
774 directly. Use the macro charptr_copy_char() instead.
775 Return the number of bytes copied. */
778 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
780 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
782 for (i = bytes; i; i--, dst++, src++)
788 /************************************************************************/
789 /* streams of Emchars */
790 /************************************************************************/
792 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
793 The functions below are not meant to be called directly; use
794 the macros in insdel.h. */
797 Lstream_get_emchar_1 (Lstream *stream, int ch)
799 Bufbyte str[MAX_EMCHAR_LEN];
800 Bufbyte *strptr = str;
803 str[0] = (Bufbyte) ch;
805 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
807 int c = Lstream_getc (stream);
808 bufpos_checking_assert (c >= 0);
809 *++strptr = (Bufbyte) c;
811 return charptr_emchar (str);
815 Lstream_fput_emchar (Lstream *stream, Emchar ch)
817 Bufbyte str[MAX_EMCHAR_LEN];
818 Bytecount len = set_charptr_emchar (str, ch);
819 return Lstream_write (stream, str, len);
823 Lstream_funget_emchar (Lstream *stream, Emchar ch)
825 Bufbyte str[MAX_EMCHAR_LEN];
826 Bytecount len = set_charptr_emchar (str, ch);
827 Lstream_unread (stream, str, len);
831 /************************************************************************/
833 /************************************************************************/
836 mark_charset (Lisp_Object obj)
838 Lisp_Charset *cs = XCHARSET (obj);
840 mark_object (cs->short_name);
841 mark_object (cs->long_name);
842 mark_object (cs->doc_string);
843 mark_object (cs->registry);
844 mark_object (cs->ccl_program);
846 /* mark_object (cs->decoding_table); */
852 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
854 Lisp_Charset *cs = XCHARSET (obj);
858 error ("printing unreadable object #<charset %s 0x%x>",
859 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
862 write_c_string ("#<charset ", printcharfun);
863 print_internal (CHARSET_NAME (cs), printcharfun, 0);
864 write_c_string (" ", printcharfun);
865 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
866 write_c_string (" ", printcharfun);
867 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
868 write_c_string (" ", printcharfun);
869 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
870 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
872 CHARSET_DIMENSION (cs),
873 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
874 CHARSET_COLUMNS (cs),
875 CHARSET_GRAPHIC (cs),
877 write_c_string (buf, printcharfun);
878 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
879 sprintf (buf, " 0x%x>", cs->header.uid);
880 write_c_string (buf, printcharfun);
883 static const struct lrecord_description charset_description[] = {
884 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
885 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
887 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
888 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
889 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
890 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
892 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
897 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
898 mark_charset, print_charset, 0, 0, 0,
902 /* Make a new charset. */
903 /* #### SJT Should generic properties be allowed? */
905 make_charset (Charset_ID id, Lisp_Object name,
906 unsigned short chars, unsigned char dimension,
907 unsigned char columns, unsigned char graphic,
908 Bufbyte final, unsigned char direction, Lisp_Object short_name,
909 Lisp_Object long_name, Lisp_Object doc,
911 Lisp_Object decoding_table,
912 Emchar ucs_min, Emchar ucs_max,
913 Emchar code_offset, unsigned char byte_offset)
916 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
920 XSETCHARSET (obj, cs);
922 CHARSET_ID (cs) = id;
923 CHARSET_NAME (cs) = name;
924 CHARSET_SHORT_NAME (cs) = short_name;
925 CHARSET_LONG_NAME (cs) = long_name;
926 CHARSET_CHARS (cs) = chars;
927 CHARSET_DIMENSION (cs) = dimension;
928 CHARSET_DIRECTION (cs) = direction;
929 CHARSET_COLUMNS (cs) = columns;
930 CHARSET_GRAPHIC (cs) = graphic;
931 CHARSET_FINAL (cs) = final;
932 CHARSET_DOC_STRING (cs) = doc;
933 CHARSET_REGISTRY (cs) = reg;
934 CHARSET_CCL_PROGRAM (cs) = Qnil;
935 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
937 CHARSET_DECODING_TABLE(cs) = Qnil;
938 CHARSET_UCS_MIN(cs) = ucs_min;
939 CHARSET_UCS_MAX(cs) = ucs_max;
940 CHARSET_CODE_OFFSET(cs) = code_offset;
941 CHARSET_BYTE_OFFSET(cs) = byte_offset;
945 if (id == LEADING_BYTE_ASCII)
946 CHARSET_REP_BYTES (cs) = 1;
948 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
950 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
955 /* some charsets do not have final characters. This includes
956 ASCII, Control-1, Composite, and the two faux private
958 unsigned char iso2022_type
959 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
961 if (code_offset == 0)
963 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
964 chlook->charset_by_attributes[iso2022_type][final] = obj;
968 (chlook->charset_by_attributes[iso2022_type][final][direction]));
969 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
973 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
974 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
976 /* Some charsets are "faux" and don't have names or really exist at
977 all except in the leading-byte table. */
979 Fputhash (name, obj, Vcharset_hash_table);
984 get_unallocated_leading_byte (int dimension)
989 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
992 lb = chlook->next_allocated_leading_byte++;
996 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
999 lb = chlook->next_allocated_1_byte_leading_byte++;
1003 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1006 lb = chlook->next_allocated_2_byte_leading_byte++;
1012 ("No more character sets free for this dimension",
1013 make_int (dimension));
1019 /* Number of Big5 characters which have the same code in 1st byte. */
1021 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1024 decode_builtin_char (Lisp_Object charset, int code_point)
1028 if (EQ (charset, Vcharset_chinese_big5))
1030 int c1 = code_point >> 8;
1031 int c2 = code_point & 0xFF;
1034 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1036 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1037 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1039 I = (c1 - 0xA1) * BIG5_SAME_ROW
1040 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1044 charset = Vcharset_chinese_big5_1;
1048 charset = Vcharset_chinese_big5_2;
1049 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1051 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1054 if ((final = XCHARSET_FINAL (charset)) >= '0')
1056 if (XCHARSET_DIMENSION (charset) == 1)
1058 switch (XCHARSET_CHARS (charset))
1062 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1065 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1073 switch (XCHARSET_CHARS (charset))
1076 return MIN_CHAR_94x94
1077 + (final - '0') * 94 * 94
1078 + (((code_point >> 8) & 0x7F) - 33) * 94
1079 + ((code_point & 0x7F) - 33);
1081 return MIN_CHAR_96x96
1082 + (final - '0') * 96 * 96
1083 + (((code_point >> 8) & 0x7F) - 32) * 96
1084 + ((code_point & 0x7F) - 32);
1091 else if (XCHARSET_UCS_MAX (charset))
1094 = (XCHARSET_DIMENSION (charset) == 1
1096 code_point - XCHARSET_BYTE_OFFSET (charset)
1098 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1099 * XCHARSET_CHARS (charset)
1100 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1101 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1102 if ((cid < XCHARSET_UCS_MIN (charset))
1103 || (XCHARSET_UCS_MAX (charset) < cid))
1112 range_charset_code_point (Lisp_Object charset, Emchar ch)
1116 if ((XCHARSET_UCS_MIN (charset) <= ch)
1117 && (ch <= XCHARSET_UCS_MAX (charset)))
1119 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1121 if (XCHARSET_CHARS (charset) == 256)
1123 else if (XCHARSET_DIMENSION (charset) == 1)
1124 return d + XCHARSET_BYTE_OFFSET (charset);
1125 else if (XCHARSET_DIMENSION (charset) == 2)
1127 ((d / XCHARSET_CHARS (charset)
1128 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1129 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1130 else if (XCHARSET_DIMENSION (charset) == 3)
1132 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1133 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1134 | ((d / XCHARSET_CHARS (charset)
1135 % XCHARSET_CHARS (charset)
1136 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1137 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1138 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1140 ((d / (XCHARSET_CHARS (charset)
1141 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1142 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1143 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1144 % XCHARSET_CHARS (charset)
1145 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1146 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1147 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1148 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1150 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1152 if (XCHARSET_DIMENSION (charset) == 1)
1154 if (XCHARSET_CHARS (charset) == 94)
1156 if (((d = ch - (MIN_CHAR_94
1157 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1161 else if (XCHARSET_CHARS (charset) == 96)
1163 if (((d = ch - (MIN_CHAR_96
1164 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1171 else if (XCHARSET_DIMENSION (charset) == 2)
1173 if (XCHARSET_CHARS (charset) == 94)
1175 if (((d = ch - (MIN_CHAR_94x94
1176 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1179 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1181 else if (XCHARSET_CHARS (charset) == 96)
1183 if (((d = ch - (MIN_CHAR_96x96
1184 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1187 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1193 if (EQ (charset, Vcharset_mojikyo_2022_1)
1194 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1196 int m = ch - MIN_CHAR_MOJIKYO - 1;
1197 int byte1 = m / (94 * 60) + 33;
1198 int byte2 = (m % (94 * 60)) / 94;
1199 int byte3 = m % 94 + 33;
1205 return (byte1 << 16) | (byte2 << 8) | byte3;
1211 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1213 if (c <= MAX_CHAR_BASIC_LATIN)
1215 *charset = Vcharset_ascii;
1220 *charset = Vcharset_control_1;
1225 *charset = Vcharset_latin_iso8859_1;
1229 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1231 *charset = Vcharset_hebrew_iso8859_8;
1232 return c - MIN_CHAR_HEBREW + 0x20;
1235 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1237 *charset = Vcharset_thai_tis620;
1238 return c - MIN_CHAR_THAI + 0x20;
1241 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1242 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1244 return list2 (Vcharset_katakana_jisx0201,
1245 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1248 else if (c <= MAX_CHAR_BMP)
1250 *charset = Vcharset_ucs_bmp;
1253 else if (c <= MAX_CHAR_SMP)
1255 *charset = Vcharset_ucs_smp;
1256 return c - MIN_CHAR_SMP;
1258 else if (c <= MAX_CHAR_SIP)
1260 *charset = Vcharset_ucs_sip;
1261 return c - MIN_CHAR_SIP;
1263 else if (c < MIN_CHAR_DAIKANWA)
1265 *charset = Vcharset_ucs;
1268 else if (c <= MAX_CHAR_DAIKANWA)
1270 *charset = Vcharset_ideograph_daikanwa;
1271 return c - MIN_CHAR_DAIKANWA;
1274 else if (c <= MAX_CHAR_MOJIKYO_0)
1276 *charset = Vcharset_mojikyo;
1277 return c - MIN_CHAR_MOJIKYO_0;
1280 else if (c < MIN_CHAR_94)
1282 *charset = Vcharset_ucs;
1285 else if (c <= MAX_CHAR_94)
1287 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1288 ((c - MIN_CHAR_94) / 94) + '0',
1289 CHARSET_LEFT_TO_RIGHT);
1290 if (!NILP (*charset))
1291 return ((c - MIN_CHAR_94) % 94) + 33;
1294 *charset = Vcharset_ucs;
1298 else if (c <= MAX_CHAR_96)
1300 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1301 ((c - MIN_CHAR_96) / 96) + '0',
1302 CHARSET_LEFT_TO_RIGHT);
1303 if (!NILP (*charset))
1304 return ((c - MIN_CHAR_96) % 96) + 32;
1307 *charset = Vcharset_ucs;
1311 else if (c <= MAX_CHAR_94x94)
1314 = CHARSET_BY_ATTRIBUTES (94, 2,
1315 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1316 CHARSET_LEFT_TO_RIGHT);
1317 if (!NILP (*charset))
1318 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1319 | (((c - MIN_CHAR_94x94) % 94) + 33);
1322 *charset = Vcharset_ucs;
1326 else if (c <= MAX_CHAR_96x96)
1329 = CHARSET_BY_ATTRIBUTES (96, 2,
1330 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1331 CHARSET_LEFT_TO_RIGHT);
1332 if (!NILP (*charset))
1333 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1334 | (((c - MIN_CHAR_96x96) % 96) + 32);
1337 *charset = Vcharset_ucs;
1342 else if (c < MIN_CHAR_MOJIKYO)
1344 *charset = Vcharset_ucs;
1347 else if (c <= MAX_CHAR_MOJIKYO)
1349 *charset = Vcharset_mojikyo;
1350 return c - MIN_CHAR_MOJIKYO;
1352 else if (c < MIN_CHAR_CHINA3_JEF)
1354 *charset = Vcharset_ucs;
1357 else if (c <= MAX_CHAR_CHINA3_JEF)
1359 *charset = Vcharset_china3_jef;
1360 return c - MIN_CHAR_CHINA3_JEF;
1362 else if (c <= MAX_CHAR_CBETA)
1364 *charset = Vcharset_ideograph_cbeta;
1365 return c - MIN_CHAR_CBETA;
1370 *charset = Vcharset_ucs;
1375 Lisp_Object Vdefault_coded_charset_priority_list;
1379 /************************************************************************/
1380 /* Basic charset Lisp functions */
1381 /************************************************************************/
1383 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1384 Return non-nil if OBJECT is a charset.
1388 return CHARSETP (object) ? Qt : Qnil;
1391 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1392 Retrieve the charset of the given name.
1393 If CHARSET-OR-NAME is a charset object, it is simply returned.
1394 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1395 nil is returned. Otherwise the associated charset object is returned.
1399 if (CHARSETP (charset_or_name))
1400 return charset_or_name;
1402 CHECK_SYMBOL (charset_or_name);
1403 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1406 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1407 Retrieve the charset of the given name.
1408 Same as `find-charset' except an error is signalled if there is no such
1409 charset instead of returning nil.
1413 Lisp_Object charset = Ffind_charset (name);
1416 signal_simple_error ("No such charset", name);
1420 /* We store the charsets in hash tables with the names as the key and the
1421 actual charset object as the value. Occasionally we need to use them
1422 in a list format. These routines provide us with that. */
1423 struct charset_list_closure
1425 Lisp_Object *charset_list;
1429 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1430 void *charset_list_closure)
1432 /* This function can GC */
1433 struct charset_list_closure *chcl =
1434 (struct charset_list_closure*) charset_list_closure;
1435 Lisp_Object *charset_list = chcl->charset_list;
1437 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1441 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1442 Return a list of the names of all defined charsets.
1446 Lisp_Object charset_list = Qnil;
1447 struct gcpro gcpro1;
1448 struct charset_list_closure charset_list_closure;
1450 GCPRO1 (charset_list);
1451 charset_list_closure.charset_list = &charset_list;
1452 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1453 &charset_list_closure);
1456 return charset_list;
1459 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1460 Return the name of charset CHARSET.
1464 return XCHARSET_NAME (Fget_charset (charset));
1467 /* #### SJT Should generic properties be allowed? */
1468 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1469 Define a new character set.
1470 This function is for use with Mule support.
1471 NAME is a symbol, the name by which the character set is normally referred.
1472 DOC-STRING is a string describing the character set.
1473 PROPS is a property list, describing the specific nature of the
1474 character set. Recognized properties are:
1476 'short-name Short version of the charset name (ex: Latin-1)
1477 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1478 'registry A regular expression matching the font registry field for
1480 'dimension Number of octets used to index a character in this charset.
1481 Either 1 or 2. Defaults to 1.
1482 'columns Number of columns used to display a character in this charset.
1483 Only used in TTY mode. (Under X, the actual width of a
1484 character can be derived from the font used to display the
1485 characters.) If unspecified, defaults to the dimension
1486 (this is almost always the correct value).
1487 'chars Number of characters in each dimension (94 or 96).
1488 Defaults to 94. Note that if the dimension is 2, the
1489 character set thus described is 94x94 or 96x96.
1490 'final Final byte of ISO 2022 escape sequence. Must be
1491 supplied. Each combination of (DIMENSION, CHARS) defines a
1492 separate namespace for final bytes. Note that ISO
1493 2022 restricts the final byte to the range
1494 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1495 dimension == 2. Note also that final bytes in the range
1496 0x30 - 0x3F are reserved for user-defined (not official)
1498 'graphic 0 (use left half of font on output) or 1 (use right half
1499 of font on output). Defaults to 0. For example, for
1500 a font whose registry is ISO8859-1, the left half
1501 (octets 0x20 - 0x7F) is the `ascii' character set, while
1502 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1503 character set. With 'graphic set to 0, the octets
1504 will have their high bit cleared; with it set to 1,
1505 the octets will have their high bit set.
1506 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1508 'ccl-program A compiled CCL program used to convert a character in
1509 this charset into an index into the font. This is in
1510 addition to the 'graphic property. The CCL program
1511 is passed the octets of the character, with the high
1512 bit cleared and set depending upon whether the value
1513 of the 'graphic property is 0 or 1.
1515 (name, doc_string, props))
1517 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1518 int direction = CHARSET_LEFT_TO_RIGHT;
1519 Lisp_Object registry = Qnil;
1520 Lisp_Object charset;
1521 Lisp_Object ccl_program = Qnil;
1522 Lisp_Object short_name = Qnil, long_name = Qnil;
1523 int byte_offset = -1;
1525 CHECK_SYMBOL (name);
1526 if (!NILP (doc_string))
1527 CHECK_STRING (doc_string);
1529 charset = Ffind_charset (name);
1530 if (!NILP (charset))
1531 signal_simple_error ("Cannot redefine existing charset", name);
1534 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1536 if (EQ (keyword, Qshort_name))
1538 CHECK_STRING (value);
1542 if (EQ (keyword, Qlong_name))
1544 CHECK_STRING (value);
1548 else if (EQ (keyword, Qdimension))
1551 dimension = XINT (value);
1552 if (dimension < 1 || dimension > 2)
1553 signal_simple_error ("Invalid value for 'dimension", value);
1556 else if (EQ (keyword, Qchars))
1559 chars = XINT (value);
1560 if (chars != 94 && chars != 96)
1561 signal_simple_error ("Invalid value for 'chars", value);
1564 else if (EQ (keyword, Qcolumns))
1567 columns = XINT (value);
1568 if (columns != 1 && columns != 2)
1569 signal_simple_error ("Invalid value for 'columns", value);
1572 else if (EQ (keyword, Qgraphic))
1575 graphic = XINT (value);
1577 if (graphic < 0 || graphic > 2)
1579 if (graphic < 0 || graphic > 1)
1581 signal_simple_error ("Invalid value for 'graphic", value);
1584 else if (EQ (keyword, Qregistry))
1586 CHECK_STRING (value);
1590 else if (EQ (keyword, Qdirection))
1592 if (EQ (value, Ql2r))
1593 direction = CHARSET_LEFT_TO_RIGHT;
1594 else if (EQ (value, Qr2l))
1595 direction = CHARSET_RIGHT_TO_LEFT;
1597 signal_simple_error ("Invalid value for 'direction", value);
1600 else if (EQ (keyword, Qfinal))
1602 CHECK_CHAR_COERCE_INT (value);
1603 final = XCHAR (value);
1604 if (final < '0' || final > '~')
1605 signal_simple_error ("Invalid value for 'final", value);
1608 else if (EQ (keyword, Qccl_program))
1610 struct ccl_program test_ccl;
1612 if (setup_ccl_program (&test_ccl, value) < 0)
1613 signal_simple_error ("Invalid value for 'ccl-program", value);
1614 ccl_program = value;
1618 signal_simple_error ("Unrecognized property", keyword);
1623 error ("'final must be specified");
1624 if (dimension == 2 && final > 0x5F)
1626 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1629 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1630 CHARSET_LEFT_TO_RIGHT)) ||
1631 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1632 CHARSET_RIGHT_TO_LEFT)))
1634 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1636 id = get_unallocated_leading_byte (dimension);
1638 if (NILP (doc_string))
1639 doc_string = build_string ("");
1641 if (NILP (registry))
1642 registry = build_string ("");
1644 if (NILP (short_name))
1645 XSETSTRING (short_name, XSYMBOL (name)->name);
1647 if (NILP (long_name))
1648 long_name = doc_string;
1651 columns = dimension;
1653 if (byte_offset < 0)
1657 else if (chars == 96)
1663 charset = make_charset (id, name, chars, dimension, columns, graphic,
1664 final, direction, short_name, long_name,
1665 doc_string, registry,
1666 Qnil, 0, 0, 0, byte_offset);
1667 if (!NILP (ccl_program))
1668 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1672 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1674 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1675 NEW-NAME is the name of the new charset. Return the new charset.
1677 (charset, new_name))
1679 Lisp_Object new_charset = Qnil;
1680 int id, chars, dimension, columns, graphic, final;
1682 Lisp_Object registry, doc_string, short_name, long_name;
1685 charset = Fget_charset (charset);
1686 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1687 signal_simple_error ("Charset already has reverse-direction charset",
1690 CHECK_SYMBOL (new_name);
1691 if (!NILP (Ffind_charset (new_name)))
1692 signal_simple_error ("Cannot redefine existing charset", new_name);
1694 cs = XCHARSET (charset);
1696 chars = CHARSET_CHARS (cs);
1697 dimension = CHARSET_DIMENSION (cs);
1698 columns = CHARSET_COLUMNS (cs);
1699 id = get_unallocated_leading_byte (dimension);
1701 graphic = CHARSET_GRAPHIC (cs);
1702 final = CHARSET_FINAL (cs);
1703 direction = CHARSET_RIGHT_TO_LEFT;
1704 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1705 direction = CHARSET_LEFT_TO_RIGHT;
1706 doc_string = CHARSET_DOC_STRING (cs);
1707 short_name = CHARSET_SHORT_NAME (cs);
1708 long_name = CHARSET_LONG_NAME (cs);
1709 registry = CHARSET_REGISTRY (cs);
1711 new_charset = make_charset (id, new_name, chars, dimension, columns,
1712 graphic, final, direction, short_name, long_name,
1713 doc_string, registry,
1715 CHARSET_DECODING_TABLE(cs),
1716 CHARSET_UCS_MIN(cs),
1717 CHARSET_UCS_MAX(cs),
1718 CHARSET_CODE_OFFSET(cs),
1719 CHARSET_BYTE_OFFSET(cs)
1725 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1726 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1731 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1732 Define symbol ALIAS as an alias for CHARSET.
1736 CHECK_SYMBOL (alias);
1737 charset = Fget_charset (charset);
1738 return Fputhash (alias, charset, Vcharset_hash_table);
1741 /* #### Reverse direction charsets not yet implemented. */
1743 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1745 Return the reverse-direction charset parallel to CHARSET, if any.
1746 This is the charset with the same properties (in particular, the same
1747 dimension, number of characters per dimension, and final byte) as
1748 CHARSET but whose characters are displayed in the opposite direction.
1752 charset = Fget_charset (charset);
1753 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1757 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1758 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1759 If DIRECTION is omitted, both directions will be checked (left-to-right
1760 will be returned if character sets exist for both directions).
1762 (dimension, chars, final, direction))
1764 int dm, ch, fi, di = -1;
1765 Lisp_Object obj = Qnil;
1767 CHECK_INT (dimension);
1768 dm = XINT (dimension);
1769 if (dm < 1 || dm > 2)
1770 signal_simple_error ("Invalid value for DIMENSION", dimension);
1774 if (ch != 94 && ch != 96)
1775 signal_simple_error ("Invalid value for CHARS", chars);
1777 CHECK_CHAR_COERCE_INT (final);
1779 if (fi < '0' || fi > '~')
1780 signal_simple_error ("Invalid value for FINAL", final);
1782 if (EQ (direction, Ql2r))
1783 di = CHARSET_LEFT_TO_RIGHT;
1784 else if (EQ (direction, Qr2l))
1785 di = CHARSET_RIGHT_TO_LEFT;
1786 else if (!NILP (direction))
1787 signal_simple_error ("Invalid value for DIRECTION", direction);
1789 if (dm == 2 && fi > 0x5F)
1791 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1795 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1797 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1800 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1803 return XCHARSET_NAME (obj);
1807 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1808 Return short name of CHARSET.
1812 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1815 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1816 Return long name of CHARSET.
1820 return XCHARSET_LONG_NAME (Fget_charset (charset));
1823 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1824 Return description of CHARSET.
1828 return XCHARSET_DOC_STRING (Fget_charset (charset));
1831 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1832 Return dimension of CHARSET.
1836 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1839 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1840 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1841 Recognized properties are those listed in `make-charset', as well as
1842 'name and 'doc-string.
1848 charset = Fget_charset (charset);
1849 cs = XCHARSET (charset);
1851 CHECK_SYMBOL (prop);
1852 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1853 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1854 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1855 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1856 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1857 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1858 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1859 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1860 Qnil : make_char (CHARSET_FINAL (cs));
1861 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1862 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1863 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1864 if (EQ (prop, Qdirection))
1865 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1866 if (EQ (prop, Qreverse_direction_charset))
1868 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1869 /* #### Is this translation OK? If so, error checking sufficient? */
1870 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1872 signal_simple_error ("Unrecognized charset property name", prop);
1873 return Qnil; /* not reached */
1876 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1877 Return charset identification number of CHARSET.
1881 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1884 /* #### We need to figure out which properties we really want to
1887 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1888 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1890 (charset, ccl_program))
1892 struct ccl_program test_ccl;
1894 charset = Fget_charset (charset);
1895 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1896 signal_simple_error ("Invalid ccl-program", ccl_program);
1897 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1902 invalidate_charset_font_caches (Lisp_Object charset)
1904 /* Invalidate font cache entries for charset on all devices. */
1905 Lisp_Object devcons, concons, hash_table;
1906 DEVICE_LOOP_NO_BREAK (devcons, concons)
1908 struct device *d = XDEVICE (XCAR (devcons));
1909 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1910 if (!UNBOUNDP (hash_table))
1911 Fclrhash (hash_table);
1915 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1916 Set the 'registry property of CHARSET to REGISTRY.
1918 (charset, registry))
1920 charset = Fget_charset (charset);
1921 CHECK_STRING (registry);
1922 XCHARSET_REGISTRY (charset) = registry;
1923 invalidate_charset_font_caches (charset);
1924 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1929 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1930 Return mapping-table of CHARSET.
1934 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1937 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1938 Set mapping-table of CHARSET to TABLE.
1942 struct Lisp_Charset *cs;
1946 charset = Fget_charset (charset);
1947 cs = XCHARSET (charset);
1951 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1952 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1953 CHARSET_DECODING_TABLE(cs) = Qnil;
1956 else if (VECTORP (table))
1958 int ccs_len = CHARSET_BYTE_SIZE (cs);
1959 int ret = decoding_table_check_elements (table,
1960 CHARSET_DIMENSION (cs),
1965 signal_simple_error ("Too big table", table);
1967 signal_simple_error ("Invalid element is found", table);
1969 signal_simple_error ("Something wrong", table);
1971 CHARSET_DECODING_TABLE(cs) = Qnil;
1974 signal_error (Qwrong_type_argument,
1975 list2 (build_translated_string ("vector-or-nil-p"),
1978 byte_offset = CHARSET_BYTE_OFFSET (cs);
1979 switch (CHARSET_DIMENSION (cs))
1982 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1984 Lisp_Object c = XVECTOR_DATA(table)[i];
1987 Fput_char_attribute (c, XCHARSET_NAME (charset),
1988 make_int (i + byte_offset));
1992 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1994 Lisp_Object v = XVECTOR_DATA(table)[i];
2000 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2002 Lisp_Object c = XVECTOR_DATA(v)[j];
2006 (c, XCHARSET_NAME (charset),
2007 make_int ( ( (i + byte_offset) << 8 )
2013 Fput_char_attribute (v, XCHARSET_NAME (charset),
2014 make_int (i + byte_offset));
2023 /************************************************************************/
2024 /* Lisp primitives for working with characters */
2025 /************************************************************************/
2028 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2029 Make a character from CHARSET and code-point CODE.
2030 If DEFINED_ONLY is non-nil, builtin character is not returned.
2031 If corresponding character is not found, nil is returned.
2033 (charset, code, defined_only))
2037 charset = Fget_charset (charset);
2040 if (XCHARSET_GRAPHIC (charset) == 1)
2042 if (NILP (defined_only))
2043 c = DECODE_CHAR (charset, c);
2045 c = DECODE_DEFINED_CHAR (charset, c);
2046 return c >= 0 ? make_char (c) : Qnil;
2049 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2050 Make a builtin character from CHARSET and code-point CODE.
2056 charset = Fget_charset (charset);
2058 if (EQ (charset, Vcharset_latin_viscii))
2060 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2066 (ret = Fget_char_attribute (chr,
2067 Vcharset_latin_viscii_lower,
2070 charset = Vcharset_latin_viscii_lower;
2074 (ret = Fget_char_attribute (chr,
2075 Vcharset_latin_viscii_upper,
2078 charset = Vcharset_latin_viscii_upper;
2085 if (XCHARSET_GRAPHIC (charset) == 1)
2088 c = decode_builtin_char (charset, c);
2089 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2093 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2094 Make a character from CHARSET and octets ARG1 and ARG2.
2095 ARG2 is required only for characters from two-dimensional charsets.
2096 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2097 character s with caron.
2099 (charset, arg1, arg2))
2103 int lowlim, highlim;
2105 charset = Fget_charset (charset);
2106 cs = XCHARSET (charset);
2108 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2109 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2111 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2113 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2114 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2117 /* It is useful (and safe, according to Olivier Galibert) to strip
2118 the 8th bit off ARG1 and ARG2 because it allows programmers to
2119 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2120 Latin 2 code of the character. */
2128 if (a1 < lowlim || a1 > highlim)
2129 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2131 if (CHARSET_DIMENSION (cs) == 1)
2135 ("Charset is of dimension one; second octet must be nil", arg2);
2136 return make_char (MAKE_CHAR (charset, a1, 0));
2145 a2 = XINT (arg2) & 0x7f;
2147 if (a2 < lowlim || a2 > highlim)
2148 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2150 return make_char (MAKE_CHAR (charset, a1, a2));
2153 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2154 Return the character set of CHARACTER.
2158 CHECK_CHAR_COERCE_INT (character);
2160 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2163 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2164 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2165 N defaults to 0 if omitted.
2169 Lisp_Object charset;
2172 CHECK_CHAR_COERCE_INT (character);
2174 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2176 if (NILP (n) || EQ (n, Qzero))
2177 return make_int (octet0);
2178 else if (EQ (n, make_int (1)))
2179 return make_int (octet1);
2181 signal_simple_error ("Octet number must be 0 or 1", n);
2184 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2185 Return list of charset and one or two position-codes of CHARACTER.
2189 /* This function can GC */
2190 struct gcpro gcpro1, gcpro2;
2191 Lisp_Object charset = Qnil;
2192 Lisp_Object rc = Qnil;
2200 GCPRO2 (charset, rc);
2201 CHECK_CHAR_COERCE_INT (character);
2204 code_point = ENCODE_CHAR (XCHAR (character), charset);
2205 dimension = XCHARSET_DIMENSION (charset);
2206 while (dimension > 0)
2208 rc = Fcons (make_int (code_point & 255), rc);
2212 rc = Fcons (XCHARSET_NAME (charset), rc);
2214 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2216 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2218 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2222 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2231 #ifdef ENABLE_COMPOSITE_CHARS
2232 /************************************************************************/
2233 /* composite character functions */
2234 /************************************************************************/
2237 lookup_composite_char (Bufbyte *str, int len)
2239 Lisp_Object lispstr = make_string (str, len);
2240 Lisp_Object ch = Fgethash (lispstr,
2241 Vcomposite_char_string2char_hash_table,
2247 if (composite_char_row_next >= 128)
2248 signal_simple_error ("No more composite chars available", lispstr);
2249 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2250 composite_char_col_next);
2251 Fputhash (make_char (emch), lispstr,
2252 Vcomposite_char_char2string_hash_table);
2253 Fputhash (lispstr, make_char (emch),
2254 Vcomposite_char_string2char_hash_table);
2255 composite_char_col_next++;
2256 if (composite_char_col_next >= 128)
2258 composite_char_col_next = 32;
2259 composite_char_row_next++;
2268 composite_char_string (Emchar ch)
2270 Lisp_Object str = Fgethash (make_char (ch),
2271 Vcomposite_char_char2string_hash_table,
2273 assert (!UNBOUNDP (str));
2277 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2278 Convert a string into a single composite character.
2279 The character is the result of overstriking all the characters in
2284 CHECK_STRING (string);
2285 return make_char (lookup_composite_char (XSTRING_DATA (string),
2286 XSTRING_LENGTH (string)));
2289 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2290 Return a string of the characters comprising a composite character.
2298 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2299 signal_simple_error ("Must be composite char", ch);
2300 return composite_char_string (emch);
2302 #endif /* ENABLE_COMPOSITE_CHARS */
2305 /************************************************************************/
2306 /* initialization */
2307 /************************************************************************/
2310 syms_of_mule_charset (void)
2312 INIT_LRECORD_IMPLEMENTATION (charset);
2314 DEFSUBR (Fcharsetp);
2315 DEFSUBR (Ffind_charset);
2316 DEFSUBR (Fget_charset);
2317 DEFSUBR (Fcharset_list);
2318 DEFSUBR (Fcharset_name);
2319 DEFSUBR (Fmake_charset);
2320 DEFSUBR (Fmake_reverse_direction_charset);
2321 /* DEFSUBR (Freverse_direction_charset); */
2322 DEFSUBR (Fdefine_charset_alias);
2323 DEFSUBR (Fcharset_from_attributes);
2324 DEFSUBR (Fcharset_short_name);
2325 DEFSUBR (Fcharset_long_name);
2326 DEFSUBR (Fcharset_description);
2327 DEFSUBR (Fcharset_dimension);
2328 DEFSUBR (Fcharset_property);
2329 DEFSUBR (Fcharset_id);
2330 DEFSUBR (Fset_charset_ccl_program);
2331 DEFSUBR (Fset_charset_registry);
2333 DEFSUBR (Fcharset_mapping_table);
2334 DEFSUBR (Fset_charset_mapping_table);
2338 DEFSUBR (Fdecode_char);
2339 DEFSUBR (Fdecode_builtin_char);
2341 DEFSUBR (Fmake_char);
2342 DEFSUBR (Fchar_charset);
2343 DEFSUBR (Fchar_octet);
2344 DEFSUBR (Fsplit_char);
2346 #ifdef ENABLE_COMPOSITE_CHARS
2347 DEFSUBR (Fmake_composite_char);
2348 DEFSUBR (Fcomposite_char_string);
2351 defsymbol (&Qcharsetp, "charsetp");
2352 defsymbol (&Qregistry, "registry");
2353 defsymbol (&Qfinal, "final");
2354 defsymbol (&Qgraphic, "graphic");
2355 defsymbol (&Qdirection, "direction");
2356 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2357 defsymbol (&Qshort_name, "short-name");
2358 defsymbol (&Qlong_name, "long-name");
2360 defsymbol (&Ql2r, "l2r");
2361 defsymbol (&Qr2l, "r2l");
2363 /* Charsets, compatible with FSF 20.3
2364 Naming convention is Script-Charset[-Edition] */
2365 defsymbol (&Qascii, "ascii");
2366 defsymbol (&Qcontrol_1, "control-1");
2367 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2368 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2369 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2370 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2371 defsymbol (&Qthai_tis620, "thai-tis620");
2372 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2373 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2374 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2375 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2376 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2377 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2378 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2379 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2380 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2381 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2382 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2383 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2384 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2385 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2386 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2387 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2389 defsymbol (&Qucs, "ucs");
2390 defsymbol (&Qucs_bmp, "ucs-bmp");
2391 defsymbol (&Qucs_smp, "ucs-smp");
2392 defsymbol (&Qucs_sip, "ucs-sip");
2393 defsymbol (&Qucs_cns, "ucs-cns");
2394 defsymbol (&Qucs_jis, "ucs-jis");
2395 defsymbol (&Qucs_ks, "ucs-ks");
2396 defsymbol (&Qucs_big5, "ucs-big5");
2397 defsymbol (&Qlatin_viscii, "latin-viscii");
2398 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2399 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2400 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2401 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2402 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2403 defsymbol (&Qideograph_gt, "ideograph-gt");
2404 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2405 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2406 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2407 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2408 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2409 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2410 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2411 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2412 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2413 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2414 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2415 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2416 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2417 defsymbol (&Qchinese_big5, "chinese-big5");
2418 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2419 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2420 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2421 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2422 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2423 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2424 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2425 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2426 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2427 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2428 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2429 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2430 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2431 defsymbol (&Qchina3_jef, "china3-jef");
2432 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2433 defsymbol (&Qmojikyo, "mojikyo");
2434 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2435 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2436 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2437 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2438 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2439 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2440 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2441 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2442 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2443 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2444 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2445 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2446 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2447 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2448 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2449 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2450 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2451 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2452 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2453 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2454 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2455 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2456 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2458 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2459 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2461 defsymbol (&Qcomposite, "composite");
2465 vars_of_mule_charset (void)
2472 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2473 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2475 /* Table of charsets indexed by leading byte. */
2476 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2477 chlook->charset_by_leading_byte[i] = Qnil;
2480 /* Table of charsets indexed by type/final-byte. */
2481 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2482 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2483 chlook->charset_by_attributes[i][j] = Qnil;
2485 /* Table of charsets indexed by type/final-byte/direction. */
2486 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2487 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2488 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2489 chlook->charset_by_attributes[i][j][k] = Qnil;
2493 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2495 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2496 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2500 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2501 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2502 Leading-code of private TYPE9N charset of column-width 1.
2504 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2508 Vdefault_coded_charset_priority_list = Qnil;
2509 DEFVAR_LISP ("default-coded-charset-priority-list",
2510 &Vdefault_coded_charset_priority_list /*
2511 Default order of preferred coded-character-sets.
2517 complex_vars_of_mule_charset (void)
2519 staticpro (&Vcharset_hash_table);
2520 Vcharset_hash_table =
2521 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2523 /* Predefined character sets. We store them into variables for
2527 staticpro (&Vcharset_ucs);
2529 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2530 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2531 build_string ("UCS"),
2532 build_string ("UCS"),
2533 build_string ("ISO/IEC 10646"),
2535 Qnil, 0, 0xFFFFFFF, 0, 0);
2536 staticpro (&Vcharset_ucs_bmp);
2538 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2539 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2540 build_string ("BMP"),
2541 build_string ("UCS-BMP"),
2542 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2544 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2545 Qnil, 0, 0xFFFF, 0, 0);
2546 staticpro (&Vcharset_ucs_smp);
2548 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2549 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2550 build_string ("SMP"),
2551 build_string ("UCS-SMP"),
2552 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2553 build_string ("UCS00-1"),
2554 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, 0, 0);
2555 staticpro (&Vcharset_ucs_sip);
2557 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2558 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2559 build_string ("SIP"),
2560 build_string ("UCS-SIP"),
2561 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2562 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2563 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, 0, 0);
2564 staticpro (&Vcharset_ucs_cns);
2566 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2567 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2568 build_string ("UCS for CNS"),
2569 build_string ("UCS for CNS 11643"),
2570 build_string ("ISO/IEC 10646 for CNS 11643"),
2573 staticpro (&Vcharset_ucs_jis);
2575 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2576 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2577 build_string ("UCS for JIS"),
2578 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2579 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2582 staticpro (&Vcharset_ucs_ks);
2584 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2585 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2586 build_string ("UCS for KS"),
2587 build_string ("UCS for CCS defined by KS"),
2588 build_string ("ISO/IEC 10646 for Korean Standards"),
2591 staticpro (&Vcharset_ucs_big5);
2593 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2594 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2595 build_string ("UCS for Big5"),
2596 build_string ("UCS for Big5"),
2597 build_string ("ISO/IEC 10646 for Big5"),
2601 # define MIN_CHAR_THAI 0
2602 # define MAX_CHAR_THAI 0
2603 /* # define MIN_CHAR_HEBREW 0 */
2604 /* # define MAX_CHAR_HEBREW 0 */
2605 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2606 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2608 staticpro (&Vcharset_ascii);
2610 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2611 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2612 build_string ("ASCII"),
2613 build_string ("ASCII)"),
2614 build_string ("ASCII (ISO646 IRV)"),
2615 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2616 Qnil, 0, 0x7F, 0, 0);
2617 staticpro (&Vcharset_control_1);
2618 Vcharset_control_1 =
2619 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2620 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2621 build_string ("C1"),
2622 build_string ("Control characters"),
2623 build_string ("Control characters 128-191"),
2625 Qnil, 0x80, 0x9F, 0, 0);
2626 staticpro (&Vcharset_latin_iso8859_1);
2627 Vcharset_latin_iso8859_1 =
2628 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2629 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2630 build_string ("Latin-1"),
2631 build_string ("ISO8859-1 (Latin-1)"),
2632 build_string ("ISO8859-1 (Latin-1)"),
2633 build_string ("iso8859-1"),
2634 Qnil, 0xA0, 0xFF, 0, 32);
2635 staticpro (&Vcharset_latin_iso8859_2);
2636 Vcharset_latin_iso8859_2 =
2637 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2638 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2639 build_string ("Latin-2"),
2640 build_string ("ISO8859-2 (Latin-2)"),
2641 build_string ("ISO8859-2 (Latin-2)"),
2642 build_string ("iso8859-2"),
2644 staticpro (&Vcharset_latin_iso8859_3);
2645 Vcharset_latin_iso8859_3 =
2646 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2647 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2648 build_string ("Latin-3"),
2649 build_string ("ISO8859-3 (Latin-3)"),
2650 build_string ("ISO8859-3 (Latin-3)"),
2651 build_string ("iso8859-3"),
2653 staticpro (&Vcharset_latin_iso8859_4);
2654 Vcharset_latin_iso8859_4 =
2655 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2656 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2657 build_string ("Latin-4"),
2658 build_string ("ISO8859-4 (Latin-4)"),
2659 build_string ("ISO8859-4 (Latin-4)"),
2660 build_string ("iso8859-4"),
2662 staticpro (&Vcharset_thai_tis620);
2663 Vcharset_thai_tis620 =
2664 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2665 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2666 build_string ("TIS620"),
2667 build_string ("TIS620 (Thai)"),
2668 build_string ("TIS620.2529 (Thai)"),
2669 build_string ("tis620"),
2670 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2671 staticpro (&Vcharset_greek_iso8859_7);
2672 Vcharset_greek_iso8859_7 =
2673 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2674 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2675 build_string ("ISO8859-7"),
2676 build_string ("ISO8859-7 (Greek)"),
2677 build_string ("ISO8859-7 (Greek)"),
2678 build_string ("iso8859-7"),
2680 staticpro (&Vcharset_arabic_iso8859_6);
2681 Vcharset_arabic_iso8859_6 =
2682 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2683 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2684 build_string ("ISO8859-6"),
2685 build_string ("ISO8859-6 (Arabic)"),
2686 build_string ("ISO8859-6 (Arabic)"),
2687 build_string ("iso8859-6"),
2689 staticpro (&Vcharset_hebrew_iso8859_8);
2690 Vcharset_hebrew_iso8859_8 =
2691 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2692 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2693 build_string ("ISO8859-8"),
2694 build_string ("ISO8859-8 (Hebrew)"),
2695 build_string ("ISO8859-8 (Hebrew)"),
2696 build_string ("iso8859-8"),
2698 0 /* MIN_CHAR_HEBREW */,
2699 0 /* MAX_CHAR_HEBREW */, 0, 32);
2700 staticpro (&Vcharset_katakana_jisx0201);
2701 Vcharset_katakana_jisx0201 =
2702 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2703 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2704 build_string ("JISX0201 Kana"),
2705 build_string ("JISX0201.1976 (Japanese Kana)"),
2706 build_string ("JISX0201.1976 Japanese Kana"),
2707 build_string ("jisx0201\\.1976"),
2709 staticpro (&Vcharset_latin_jisx0201);
2710 Vcharset_latin_jisx0201 =
2711 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2712 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2713 build_string ("JISX0201 Roman"),
2714 build_string ("JISX0201.1976 (Japanese Roman)"),
2715 build_string ("JISX0201.1976 Japanese Roman"),
2716 build_string ("jisx0201\\.1976"),
2718 staticpro (&Vcharset_cyrillic_iso8859_5);
2719 Vcharset_cyrillic_iso8859_5 =
2720 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2721 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2722 build_string ("ISO8859-5"),
2723 build_string ("ISO8859-5 (Cyrillic)"),
2724 build_string ("ISO8859-5 (Cyrillic)"),
2725 build_string ("iso8859-5"),
2727 staticpro (&Vcharset_latin_iso8859_9);
2728 Vcharset_latin_iso8859_9 =
2729 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2730 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2731 build_string ("Latin-5"),
2732 build_string ("ISO8859-9 (Latin-5)"),
2733 build_string ("ISO8859-9 (Latin-5)"),
2734 build_string ("iso8859-9"),
2736 staticpro (&Vcharset_japanese_jisx0208_1978);
2737 Vcharset_japanese_jisx0208_1978 =
2738 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2739 Qjapanese_jisx0208_1978, 94, 2,
2740 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2741 build_string ("JIS X0208:1978"),
2742 build_string ("JIS X0208:1978 (Japanese)"),
2744 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2745 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2747 staticpro (&Vcharset_chinese_gb2312);
2748 Vcharset_chinese_gb2312 =
2749 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2750 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2751 build_string ("GB2312"),
2752 build_string ("GB2312)"),
2753 build_string ("GB2312 Chinese simplified"),
2754 build_string ("gb2312"),
2756 staticpro (&Vcharset_chinese_gb12345);
2757 Vcharset_chinese_gb12345 =
2758 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2759 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2760 build_string ("G1"),
2761 build_string ("GB 12345)"),
2762 build_string ("GB 12345-1990"),
2763 build_string ("GB12345\\(\\.1990\\)?-0"),
2765 staticpro (&Vcharset_japanese_jisx0208);
2766 Vcharset_japanese_jisx0208 =
2767 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2768 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2769 build_string ("JISX0208"),
2770 build_string ("JIS X0208:1983 (Japanese)"),
2771 build_string ("JIS X0208:1983 Japanese Kanji"),
2772 build_string ("jisx0208\\.1983"),
2775 staticpro (&Vcharset_japanese_jisx0208_1990);
2776 Vcharset_japanese_jisx0208_1990 =
2777 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2778 Qjapanese_jisx0208_1990, 94, 2,
2779 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2780 build_string ("JISX0208-1990"),
2781 build_string ("JIS X0208:1990 (Japanese)"),
2782 build_string ("JIS X0208:1990 Japanese Kanji"),
2783 build_string ("jisx0208\\.1990"),
2785 MIN_CHAR_JIS_X0208_1990,
2786 MAX_CHAR_JIS_X0208_1990, 0, 33);
2788 staticpro (&Vcharset_korean_ksc5601);
2789 Vcharset_korean_ksc5601 =
2790 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2791 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2792 build_string ("KSC5601"),
2793 build_string ("KSC5601 (Korean"),
2794 build_string ("KSC5601 Korean Hangul and Hanja"),
2795 build_string ("ksc5601"),
2797 staticpro (&Vcharset_japanese_jisx0212);
2798 Vcharset_japanese_jisx0212 =
2799 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2800 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2801 build_string ("JISX0212"),
2802 build_string ("JISX0212 (Japanese)"),
2803 build_string ("JISX0212 Japanese Supplement"),
2804 build_string ("jisx0212"),
2807 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2808 staticpro (&Vcharset_chinese_cns11643_1);
2809 Vcharset_chinese_cns11643_1 =
2810 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2811 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2812 build_string ("CNS11643-1"),
2813 build_string ("CNS11643-1 (Chinese traditional)"),
2815 ("CNS 11643 Plane 1 Chinese traditional"),
2816 build_string (CHINESE_CNS_PLANE_RE("1")),
2818 staticpro (&Vcharset_chinese_cns11643_2);
2819 Vcharset_chinese_cns11643_2 =
2820 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2821 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2822 build_string ("CNS11643-2"),
2823 build_string ("CNS11643-2 (Chinese traditional)"),
2825 ("CNS 11643 Plane 2 Chinese traditional"),
2826 build_string (CHINESE_CNS_PLANE_RE("2")),
2829 staticpro (&Vcharset_latin_tcvn5712);
2830 Vcharset_latin_tcvn5712 =
2831 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2832 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2833 build_string ("TCVN 5712"),
2834 build_string ("TCVN 5712 (VSCII-2)"),
2835 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2836 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2838 staticpro (&Vcharset_latin_viscii_lower);
2839 Vcharset_latin_viscii_lower =
2840 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2841 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2842 build_string ("VISCII lower"),
2843 build_string ("VISCII lower (Vietnamese)"),
2844 build_string ("VISCII lower (Vietnamese)"),
2845 build_string ("MULEVISCII-LOWER"),
2847 staticpro (&Vcharset_latin_viscii_upper);
2848 Vcharset_latin_viscii_upper =
2849 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2850 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2851 build_string ("VISCII upper"),
2852 build_string ("VISCII upper (Vietnamese)"),
2853 build_string ("VISCII upper (Vietnamese)"),
2854 build_string ("MULEVISCII-UPPER"),
2856 staticpro (&Vcharset_latin_viscii);
2857 Vcharset_latin_viscii =
2858 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2859 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2860 build_string ("VISCII"),
2861 build_string ("VISCII 1.1 (Vietnamese)"),
2862 build_string ("VISCII 1.1 (Vietnamese)"),
2863 build_string ("VISCII1\\.1"),
2865 staticpro (&Vcharset_chinese_big5);
2866 Vcharset_chinese_big5 =
2867 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2868 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2869 build_string ("Big5"),
2870 build_string ("Big5"),
2871 build_string ("Big5 Chinese traditional"),
2872 build_string ("big5"),
2874 0 /* MIN_CHAR_BIG5_CDP */,
2875 0 /* MAX_CHAR_BIG5_CDP */, 0, 0);
2876 staticpro (&Vcharset_chinese_big5_cdp);
2877 Vcharset_chinese_big5_cdp =
2878 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2879 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2880 build_string ("Big5-CDP"),
2881 build_string ("Big5 + CDP extension"),
2882 build_string ("Big5 with CDP extension"),
2883 build_string ("big5\\.cdp-0"),
2884 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2885 #define DEF_HANZIKU(n) \
2886 staticpro (&Vcharset_ideograph_hanziku_##n); \
2887 Vcharset_ideograph_hanziku_##n = \
2888 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2889 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2890 build_string ("HZK-"#n), \
2891 build_string ("HANZIKU-"#n), \
2892 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2894 ("hanziku-"#n"$"), \
2895 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2908 staticpro (&Vcharset_china3_jef);
2909 Vcharset_china3_jef =
2910 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2911 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2912 build_string ("JC3"),
2913 build_string ("JEF + CHINA3"),
2914 build_string ("JEF + CHINA3 private characters"),
2915 build_string ("china3jef-0"),
2916 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2917 staticpro (&Vcharset_ideograph_cbeta);
2918 Vcharset_ideograph_cbeta =
2919 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2920 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2921 build_string ("CB"),
2922 build_string ("CBETA"),
2923 build_string ("CBETA private characters"),
2924 build_string ("cbeta-0"),
2925 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2926 staticpro (&Vcharset_ideograph_gt);
2927 Vcharset_ideograph_gt =
2928 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2929 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2930 build_string ("GT"),
2931 build_string ("GT"),
2932 build_string ("GT"),
2934 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2935 #define DEF_GT_PJ(n) \
2936 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2937 Vcharset_ideograph_gt_pj_##n = \
2938 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2939 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2940 build_string ("GT-PJ-"#n), \
2941 build_string ("GT (pseudo JIS encoding) part "#n), \
2942 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2944 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2958 staticpro (&Vcharset_ideograph_daikanwa_2);
2959 Vcharset_ideograph_daikanwa_2 =
2960 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
2961 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2962 build_string ("Daikanwa Rev."),
2963 build_string ("Morohashi's Daikanwa Rev."),
2965 ("Daikanwa dictionary (revised version)"),
2966 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
2968 staticpro (&Vcharset_ideograph_daikanwa);
2969 Vcharset_ideograph_daikanwa =
2970 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
2971 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2972 build_string ("Daikanwa"),
2973 build_string ("Morohashi's Daikanwa Rev.2"),
2975 ("Daikanwa dictionary (second revised version)"),
2976 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
2977 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2978 staticpro (&Vcharset_mojikyo);
2980 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2981 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2982 build_string ("Mojikyo"),
2983 build_string ("Mojikyo"),
2984 build_string ("Konjaku-Mojikyo"),
2986 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2987 staticpro (&Vcharset_mojikyo_2022_1);
2988 Vcharset_mojikyo_2022_1 =
2989 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2990 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2991 build_string ("Mojikyo-2022-1"),
2992 build_string ("Mojikyo ISO-2022 Part 1"),
2993 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2997 #define DEF_MOJIKYO_PJ(n) \
2998 staticpro (&Vcharset_mojikyo_pj_##n); \
2999 Vcharset_mojikyo_pj_##n = \
3000 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
3001 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3002 build_string ("Mojikyo-PJ-"#n), \
3003 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
3005 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
3007 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
3019 DEF_MOJIKYO_PJ (10);
3020 DEF_MOJIKYO_PJ (11);
3021 DEF_MOJIKYO_PJ (12);
3022 DEF_MOJIKYO_PJ (13);
3023 DEF_MOJIKYO_PJ (14);
3024 DEF_MOJIKYO_PJ (15);
3025 DEF_MOJIKYO_PJ (16);
3026 DEF_MOJIKYO_PJ (17);
3027 DEF_MOJIKYO_PJ (18);
3028 DEF_MOJIKYO_PJ (19);
3029 DEF_MOJIKYO_PJ (20);
3030 DEF_MOJIKYO_PJ (21);
3032 staticpro (&Vcharset_ethiopic_ucs);
3033 Vcharset_ethiopic_ucs =
3034 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3035 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3036 build_string ("Ethiopic (UCS)"),
3037 build_string ("Ethiopic (UCS)"),
3038 build_string ("Ethiopic of UCS"),
3039 build_string ("Ethiopic-Unicode"),
3040 Qnil, 0x1200, 0x137F, 0x1200, 0);
3042 staticpro (&Vcharset_chinese_big5_1);
3043 Vcharset_chinese_big5_1 =
3044 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3045 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3046 build_string ("Big5"),
3047 build_string ("Big5 (Level-1)"),
3049 ("Big5 Level-1 Chinese traditional"),
3050 build_string ("big5"),
3052 staticpro (&Vcharset_chinese_big5_2);
3053 Vcharset_chinese_big5_2 =
3054 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3055 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3056 build_string ("Big5"),
3057 build_string ("Big5 (Level-2)"),
3059 ("Big5 Level-2 Chinese traditional"),
3060 build_string ("big5"),
3063 #ifdef ENABLE_COMPOSITE_CHARS
3064 /* #### For simplicity, we put composite chars into a 96x96 charset.
3065 This is going to lead to problems because you can run out of
3066 room, esp. as we don't yet recycle numbers. */
3067 staticpro (&Vcharset_composite);
3068 Vcharset_composite =
3069 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3070 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3071 build_string ("Composite"),
3072 build_string ("Composite characters"),
3073 build_string ("Composite characters"),
3076 /* #### not dumped properly */
3077 composite_char_row_next = 32;
3078 composite_char_col_next = 32;
3080 Vcomposite_char_string2char_hash_table =
3081 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3082 Vcomposite_char_char2string_hash_table =
3083 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3084 staticpro (&Vcomposite_char_string2char_hash_table);
3085 staticpro (&Vcomposite_char_char2string_hash_table);
3086 #endif /* ENABLE_COMPOSITE_CHARS */