1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa;
108 Lisp_Object Vcharset_mojikyo;
109 Lisp_Object Vcharset_mojikyo_2022_1;
110 Lisp_Object Vcharset_mojikyo_pj_1;
111 Lisp_Object Vcharset_mojikyo_pj_2;
112 Lisp_Object Vcharset_mojikyo_pj_3;
113 Lisp_Object Vcharset_mojikyo_pj_4;
114 Lisp_Object Vcharset_mojikyo_pj_5;
115 Lisp_Object Vcharset_mojikyo_pj_6;
116 Lisp_Object Vcharset_mojikyo_pj_7;
117 Lisp_Object Vcharset_mojikyo_pj_8;
118 Lisp_Object Vcharset_mojikyo_pj_9;
119 Lisp_Object Vcharset_mojikyo_pj_10;
120 Lisp_Object Vcharset_mojikyo_pj_11;
121 Lisp_Object Vcharset_mojikyo_pj_12;
122 Lisp_Object Vcharset_mojikyo_pj_13;
123 Lisp_Object Vcharset_mojikyo_pj_14;
124 Lisp_Object Vcharset_mojikyo_pj_15;
125 Lisp_Object Vcharset_mojikyo_pj_16;
126 Lisp_Object Vcharset_mojikyo_pj_17;
127 Lisp_Object Vcharset_mojikyo_pj_18;
128 Lisp_Object Vcharset_mojikyo_pj_19;
129 Lisp_Object Vcharset_mojikyo_pj_20;
130 Lisp_Object Vcharset_mojikyo_pj_21;
131 Lisp_Object Vcharset_ethiopic_ucs;
133 Lisp_Object Vcharset_chinese_big5_1;
134 Lisp_Object Vcharset_chinese_big5_2;
136 #ifdef ENABLE_COMPOSITE_CHARS
137 Lisp_Object Vcharset_composite;
139 /* Hash tables for composite chars. One maps string representing
140 composed chars to their equivalent chars; one goes the
142 Lisp_Object Vcomposite_char_char2string_hash_table;
143 Lisp_Object Vcomposite_char_string2char_hash_table;
145 static int composite_char_row_next;
146 static int composite_char_col_next;
148 #endif /* ENABLE_COMPOSITE_CHARS */
150 struct charset_lookup *chlook;
152 static const struct lrecord_description charset_lookup_description_1[] = {
153 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
162 static const struct struct_description charset_lookup_description = {
163 sizeof (struct charset_lookup),
164 charset_lookup_description_1
168 /* Table of number of bytes in the string representation of a character
169 indexed by the first byte of that representation.
171 rep_bytes_by_first_byte(c) is more efficient than the equivalent
172 canonical computation:
174 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
176 const Bytecount rep_bytes_by_first_byte[0xA0] =
177 { /* 0x00 - 0x7f are for straight ASCII */
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
185 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
186 /* 0x80 - 0x8f are for Dimension-1 official charsets */
188 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
190 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
192 /* 0x90 - 0x9d are for Dimension-2 official charsets */
193 /* 0x9e is for Dimension-1 private charsets */
194 /* 0x9f is for Dimension-2 private charsets */
195 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
201 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
203 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
205 /* ad-hoc method for `ascii' */
206 if ((CHARSET_CHARS (cs) == 94) &&
207 (CHARSET_BYTE_OFFSET (cs) != 33))
208 return 128 - CHARSET_BYTE_OFFSET (cs);
210 return CHARSET_CHARS (cs);
213 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
215 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
217 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
221 if (XVECTOR_LENGTH (v) > ccs_len)
224 for (i = 0; i < XVECTOR_LENGTH (v); i++)
226 Lisp_Object c = XVECTOR_DATA(v)[i];
228 if (!NILP (c) && !CHARP (c))
232 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
244 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
247 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
257 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
258 nv = XVECTOR_DATA(v)[i];
264 XVECTOR_DATA(v)[i] = Qnil;
268 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
269 int code_point, Lisp_Object character);
271 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
272 int code_point, Lisp_Object character)
276 int ccs_len = XVECTOR_LENGTH (v);
281 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
282 nv = XVECTOR_DATA(v)[i];
286 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
292 XVECTOR_DATA(v)[i] = character;
296 put_char_ccs_code_point (Lisp_Object character,
297 Lisp_Object ccs, Lisp_Object value)
299 if (!EQ (XCHARSET_NAME (ccs), Qucs)
301 || (XCHAR (character) != XINT (value)))
303 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
304 int dim = XCHARSET_DIMENSION (ccs);
305 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
306 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
310 { /* obsolete representation: value must be a list of bytes */
311 Lisp_Object ret = Fcar (value);
315 signal_simple_error ("Invalid value for coded-charset", value);
316 code_point = XINT (ret);
317 if (XCHARSET_GRAPHIC (ccs) == 1)
325 signal_simple_error ("Invalid value for coded-charset",
329 signal_simple_error ("Invalid value for coded-charset",
332 if (XCHARSET_GRAPHIC (ccs) == 1)
334 code_point = (code_point << 8) | j;
337 value = make_int (code_point);
339 else if (INTP (value))
341 code_point = XINT (value);
342 if (XCHARSET_GRAPHIC (ccs) == 1)
344 code_point &= 0x7F7F7F7F;
345 value = make_int (code_point);
349 signal_simple_error ("Invalid value for coded-charset", value);
353 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
356 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
361 XCHARSET_DECODING_TABLE (ccs)
362 = v = make_older_vector (ccs_len, Qnil);
365 decoding_table_put_char (v, dim, byte_offset, code_point, character);
371 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
373 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
374 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
376 if (VECTORP (decoding_table))
378 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
382 decoding_table_remove_char (decoding_table,
383 XCHARSET_DIMENSION (ccs),
384 XCHARSET_BYTE_OFFSET (ccs),
388 if (CHAR_TABLEP (encoding_table))
390 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
398 int leading_code_private_11;
401 Lisp_Object Qcharsetp;
403 /* Qdoc_string, Qdimension, Qchars defined in general.c */
404 Lisp_Object Qregistry, Qfinal, Qgraphic;
405 Lisp_Object Qdirection;
406 Lisp_Object Qreverse_direction_charset;
407 Lisp_Object Qleading_byte;
408 Lisp_Object Qshort_name, Qlong_name;
424 Qjapanese_jisx0208_1978,
428 Qjapanese_jisx0208_1990,
446 Qvietnamese_viscii_lower,
447 Qvietnamese_viscii_upper,
450 Qideograph_hanziku_1,
451 Qideograph_hanziku_2,
452 Qideograph_hanziku_3,
453 Qideograph_hanziku_4,
454 Qideograph_hanziku_5,
455 Qideograph_hanziku_6,
456 Qideograph_hanziku_7,
457 Qideograph_hanziku_8,
458 Qideograph_hanziku_9,
459 Qideograph_hanziku_10,
460 Qideograph_hanziku_11,
461 Qideograph_hanziku_12,
506 Lisp_Object Ql2r, Qr2l;
508 Lisp_Object Vcharset_hash_table;
510 /* Composite characters are characters constructed by overstriking two
511 or more regular characters.
513 1) The old Mule implementation involves storing composite characters
514 in a buffer as a tag followed by all of the actual characters
515 used to make up the composite character. I think this is a bad
516 idea; it greatly complicates code that wants to handle strings
517 one character at a time because it has to deal with the possibility
518 of great big ungainly characters. It's much more reasonable to
519 simply store an index into a table of composite characters.
521 2) The current implementation only allows for 16,384 separate
522 composite characters over the lifetime of the XEmacs process.
523 This could become a potential problem if the user
524 edited lots of different files that use composite characters.
525 Due to FSF bogosity, increasing the number of allowable
526 composite characters under Mule would decrease the number
527 of possible faces that can exist. Mule already has shrunk
528 this to 2048, and further shrinkage would become uncomfortable.
529 No such problems exist in XEmacs.
531 Composite characters could be represented as 0x80 C1 C2 C3,
532 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
533 for slightly under 2^20 (one million) composite characters
534 over the XEmacs process lifetime, and you only need to
535 increase the size of a Mule character from 19 to 21 bits.
536 Or you could use 0x80 C1 C2 C3 C4, allowing for about
537 85 million (slightly over 2^26) composite characters. */
540 /************************************************************************/
541 /* Basic Emchar functions */
542 /************************************************************************/
544 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
545 string in STR. Returns the number of bytes stored.
546 Do not call this directly. Use the macro set_charptr_emchar() instead.
550 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
565 else if ( c <= 0x7ff )
567 *p++ = (c >> 6) | 0xc0;
568 *p++ = (c & 0x3f) | 0x80;
570 else if ( c <= 0xffff )
572 *p++ = (c >> 12) | 0xe0;
573 *p++ = ((c >> 6) & 0x3f) | 0x80;
574 *p++ = (c & 0x3f) | 0x80;
576 else if ( c <= 0x1fffff )
578 *p++ = (c >> 18) | 0xf0;
579 *p++ = ((c >> 12) & 0x3f) | 0x80;
580 *p++ = ((c >> 6) & 0x3f) | 0x80;
581 *p++ = (c & 0x3f) | 0x80;
583 else if ( c <= 0x3ffffff )
585 *p++ = (c >> 24) | 0xf8;
586 *p++ = ((c >> 18) & 0x3f) | 0x80;
587 *p++ = ((c >> 12) & 0x3f) | 0x80;
588 *p++ = ((c >> 6) & 0x3f) | 0x80;
589 *p++ = (c & 0x3f) | 0x80;
593 *p++ = (c >> 30) | 0xfc;
594 *p++ = ((c >> 24) & 0x3f) | 0x80;
595 *p++ = ((c >> 18) & 0x3f) | 0x80;
596 *p++ = ((c >> 12) & 0x3f) | 0x80;
597 *p++ = ((c >> 6) & 0x3f) | 0x80;
598 *p++ = (c & 0x3f) | 0x80;
601 BREAKUP_CHAR (c, charset, c1, c2);
602 lb = CHAR_LEADING_BYTE (c);
603 if (LEADING_BYTE_PRIVATE_P (lb))
604 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
606 if (EQ (charset, Vcharset_control_1))
615 /* Return the first character from a Mule-encoded string in STR,
616 assuming it's non-ASCII. Do not call this directly.
617 Use the macro charptr_emchar() instead. */
620 non_ascii_charptr_emchar (const Bufbyte *str)
633 else if ( b >= 0xf8 )
638 else if ( b >= 0xf0 )
643 else if ( b >= 0xe0 )
648 else if ( b >= 0xc0 )
658 for( ; len > 0; len-- )
661 ch = ( ch << 6 ) | ( b & 0x3f );
665 Bufbyte i0 = *str, i1, i2 = 0;
668 if (i0 == LEADING_BYTE_CONTROL_1)
669 return (Emchar) (*++str - 0x20);
671 if (LEADING_BYTE_PREFIX_P (i0))
676 charset = CHARSET_BY_LEADING_BYTE (i0);
677 if (XCHARSET_DIMENSION (charset) == 2)
680 return MAKE_CHAR (charset, i1, i2);
684 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
685 Do not call this directly. Use the macro valid_char_p() instead. */
689 non_ascii_valid_char_p (Emchar ch)
693 /* Must have only lowest 19 bits set */
697 f1 = CHAR_FIELD1 (ch);
698 f2 = CHAR_FIELD2 (ch);
699 f3 = CHAR_FIELD3 (ch);
705 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
706 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
707 f2 > MAX_CHAR_FIELD2_PRIVATE)
712 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
713 f2 <= MAX_CHAR_FIELD2_PRIVATE))
717 NOTE: This takes advantage of the fact that
718 FIELD2_TO_OFFICIAL_LEADING_BYTE and
719 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
721 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
722 if (EQ (charset, Qnil))
724 return (XCHARSET_CHARS (charset) == 96);
730 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
731 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
732 f1 > MAX_CHAR_FIELD1_PRIVATE)
734 if (f2 < 0x20 || f3 < 0x20)
737 #ifdef ENABLE_COMPOSITE_CHARS
738 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
740 if (UNBOUNDP (Fgethash (make_int (ch),
741 Vcomposite_char_char2string_hash_table,
746 #endif /* ENABLE_COMPOSITE_CHARS */
748 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
749 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
752 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
754 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
757 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
759 if (EQ (charset, Qnil))
761 return (XCHARSET_CHARS (charset) == 96);
767 /************************************************************************/
768 /* Basic string functions */
769 /************************************************************************/
771 /* Copy the character pointed to by SRC into DST. Do not call this
772 directly. Use the macro charptr_copy_char() instead.
773 Return the number of bytes copied. */
776 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
778 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
780 for (i = bytes; i; i--, dst++, src++)
786 /************************************************************************/
787 /* streams of Emchars */
788 /************************************************************************/
790 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
791 The functions below are not meant to be called directly; use
792 the macros in insdel.h. */
795 Lstream_get_emchar_1 (Lstream *stream, int ch)
797 Bufbyte str[MAX_EMCHAR_LEN];
798 Bufbyte *strptr = str;
801 str[0] = (Bufbyte) ch;
803 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
805 int c = Lstream_getc (stream);
806 bufpos_checking_assert (c >= 0);
807 *++strptr = (Bufbyte) c;
809 return charptr_emchar (str);
813 Lstream_fput_emchar (Lstream *stream, Emchar ch)
815 Bufbyte str[MAX_EMCHAR_LEN];
816 Bytecount len = set_charptr_emchar (str, ch);
817 return Lstream_write (stream, str, len);
821 Lstream_funget_emchar (Lstream *stream, Emchar ch)
823 Bufbyte str[MAX_EMCHAR_LEN];
824 Bytecount len = set_charptr_emchar (str, ch);
825 Lstream_unread (stream, str, len);
829 /************************************************************************/
831 /************************************************************************/
834 mark_charset (Lisp_Object obj)
836 Lisp_Charset *cs = XCHARSET (obj);
838 mark_object (cs->short_name);
839 mark_object (cs->long_name);
840 mark_object (cs->doc_string);
841 mark_object (cs->registry);
842 mark_object (cs->ccl_program);
844 /* mark_object (cs->decoding_table); */
850 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
852 Lisp_Charset *cs = XCHARSET (obj);
856 error ("printing unreadable object #<charset %s 0x%x>",
857 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
860 write_c_string ("#<charset ", printcharfun);
861 print_internal (CHARSET_NAME (cs), printcharfun, 0);
862 write_c_string (" ", printcharfun);
863 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
864 write_c_string (" ", printcharfun);
865 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
866 write_c_string (" ", printcharfun);
867 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
868 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
870 CHARSET_DIMENSION (cs),
871 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
872 CHARSET_COLUMNS (cs),
873 CHARSET_GRAPHIC (cs),
875 write_c_string (buf, printcharfun);
876 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
877 sprintf (buf, " 0x%x>", cs->header.uid);
878 write_c_string (buf, printcharfun);
881 static const struct lrecord_description charset_description[] = {
882 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
883 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
884 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
885 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
887 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
888 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
890 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
895 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
896 mark_charset, print_charset, 0, 0, 0,
900 /* Make a new charset. */
901 /* #### SJT Should generic properties be allowed? */
903 make_charset (Charset_ID id, Lisp_Object name,
904 unsigned short chars, unsigned char dimension,
905 unsigned char columns, unsigned char graphic,
906 Bufbyte final, unsigned char direction, Lisp_Object short_name,
907 Lisp_Object long_name, Lisp_Object doc,
909 Lisp_Object decoding_table,
910 Emchar ucs_min, Emchar ucs_max,
911 Emchar code_offset, unsigned char byte_offset)
914 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
918 XSETCHARSET (obj, cs);
920 CHARSET_ID (cs) = id;
921 CHARSET_NAME (cs) = name;
922 CHARSET_SHORT_NAME (cs) = short_name;
923 CHARSET_LONG_NAME (cs) = long_name;
924 CHARSET_CHARS (cs) = chars;
925 CHARSET_DIMENSION (cs) = dimension;
926 CHARSET_DIRECTION (cs) = direction;
927 CHARSET_COLUMNS (cs) = columns;
928 CHARSET_GRAPHIC (cs) = graphic;
929 CHARSET_FINAL (cs) = final;
930 CHARSET_DOC_STRING (cs) = doc;
931 CHARSET_REGISTRY (cs) = reg;
932 CHARSET_CCL_PROGRAM (cs) = Qnil;
933 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
935 CHARSET_DECODING_TABLE(cs) = Qnil;
936 CHARSET_UCS_MIN(cs) = ucs_min;
937 CHARSET_UCS_MAX(cs) = ucs_max;
938 CHARSET_CODE_OFFSET(cs) = code_offset;
939 CHARSET_BYTE_OFFSET(cs) = byte_offset;
943 if (id == LEADING_BYTE_ASCII)
944 CHARSET_REP_BYTES (cs) = 1;
946 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
948 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
953 /* some charsets do not have final characters. This includes
954 ASCII, Control-1, Composite, and the two faux private
956 unsigned char iso2022_type
957 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
959 if (code_offset == 0)
961 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
962 chlook->charset_by_attributes[iso2022_type][final] = obj;
966 (chlook->charset_by_attributes[iso2022_type][final][direction]));
967 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
971 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
972 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
974 /* Some charsets are "faux" and don't have names or really exist at
975 all except in the leading-byte table. */
977 Fputhash (name, obj, Vcharset_hash_table);
982 get_unallocated_leading_byte (int dimension)
987 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
990 lb = chlook->next_allocated_leading_byte++;
994 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
997 lb = chlook->next_allocated_1_byte_leading_byte++;
1001 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1004 lb = chlook->next_allocated_2_byte_leading_byte++;
1010 ("No more character sets free for this dimension",
1011 make_int (dimension));
1017 /* Number of Big5 characters which have the same code in 1st byte. */
1019 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1022 decode_builtin_char (Lisp_Object charset, int code_point)
1026 if (EQ (charset, Vcharset_chinese_big5))
1028 int c1 = code_point >> 8;
1029 int c2 = code_point & 0xFF;
1032 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1034 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1035 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1037 I = (c1 - 0xA1) * BIG5_SAME_ROW
1038 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1042 charset = Vcharset_chinese_big5_1;
1046 charset = Vcharset_chinese_big5_2;
1047 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1049 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1052 if ((final = XCHARSET_FINAL (charset)) >= '0')
1054 if (XCHARSET_DIMENSION (charset) == 1)
1056 switch (XCHARSET_CHARS (charset))
1060 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1063 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1071 switch (XCHARSET_CHARS (charset))
1074 return MIN_CHAR_94x94
1075 + (final - '0') * 94 * 94
1076 + (((code_point >> 8) & 0x7F) - 33) * 94
1077 + ((code_point & 0x7F) - 33);
1079 return MIN_CHAR_96x96
1080 + (final - '0') * 96 * 96
1081 + (((code_point >> 8) & 0x7F) - 32) * 96
1082 + ((code_point & 0x7F) - 32);
1089 else if (XCHARSET_UCS_MAX (charset))
1092 = (XCHARSET_DIMENSION (charset) == 1
1094 code_point - XCHARSET_BYTE_OFFSET (charset)
1096 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1097 * XCHARSET_CHARS (charset)
1098 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1099 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1100 if ((cid < XCHARSET_UCS_MIN (charset))
1101 || (XCHARSET_UCS_MAX (charset) < cid))
1110 range_charset_code_point (Lisp_Object charset, Emchar ch)
1114 if ((XCHARSET_UCS_MIN (charset) <= ch)
1115 && (ch <= XCHARSET_UCS_MAX (charset)))
1117 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1119 if (XCHARSET_CHARS (charset) == 256)
1121 else if (XCHARSET_DIMENSION (charset) == 1)
1122 return d + XCHARSET_BYTE_OFFSET (charset);
1123 else if (XCHARSET_DIMENSION (charset) == 2)
1125 ((d / XCHARSET_CHARS (charset)
1126 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1127 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1128 else if (XCHARSET_DIMENSION (charset) == 3)
1130 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1131 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1132 | ((d / XCHARSET_CHARS (charset)
1133 % XCHARSET_CHARS (charset)
1134 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1135 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1136 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1138 ((d / (XCHARSET_CHARS (charset)
1139 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1140 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1141 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1142 % XCHARSET_CHARS (charset)
1143 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1144 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1145 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1146 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1148 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1150 if (XCHARSET_DIMENSION (charset) == 1)
1152 if (XCHARSET_CHARS (charset) == 94)
1154 if (((d = ch - (MIN_CHAR_94
1155 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1159 else if (XCHARSET_CHARS (charset) == 96)
1161 if (((d = ch - (MIN_CHAR_96
1162 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1169 else if (XCHARSET_DIMENSION (charset) == 2)
1171 if (XCHARSET_CHARS (charset) == 94)
1173 if (((d = ch - (MIN_CHAR_94x94
1174 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1177 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1179 else if (XCHARSET_CHARS (charset) == 96)
1181 if (((d = ch - (MIN_CHAR_96x96
1182 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1185 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1191 if (EQ (charset, Vcharset_mojikyo_2022_1)
1192 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1194 int m = ch - MIN_CHAR_MOJIKYO - 1;
1195 int byte1 = m / (94 * 60) + 33;
1196 int byte2 = (m % (94 * 60)) / 94;
1197 int byte3 = m % 94 + 33;
1203 return (byte1 << 16) | (byte2 << 8) | byte3;
1209 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1211 if (c <= MAX_CHAR_BASIC_LATIN)
1213 *charset = Vcharset_ascii;
1218 *charset = Vcharset_control_1;
1223 *charset = Vcharset_latin_iso8859_1;
1227 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1229 *charset = Vcharset_hebrew_iso8859_8;
1230 return c - MIN_CHAR_HEBREW + 0x20;
1233 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1235 *charset = Vcharset_thai_tis620;
1236 return c - MIN_CHAR_THAI + 0x20;
1239 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1240 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1242 return list2 (Vcharset_katakana_jisx0201,
1243 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1246 else if (c <= MAX_CHAR_BMP)
1248 *charset = Vcharset_ucs_bmp;
1251 else if (c <= MAX_CHAR_SMP)
1253 *charset = Vcharset_ucs_smp;
1254 return c - MIN_CHAR_SMP;
1256 else if (c <= MAX_CHAR_SIP)
1258 *charset = Vcharset_ucs_sip;
1259 return c - MIN_CHAR_SIP;
1261 else if (c < MIN_CHAR_DAIKANWA)
1263 *charset = Vcharset_ucs;
1266 else if (c <= MAX_CHAR_DAIKANWA)
1268 *charset = Vcharset_ideograph_daikanwa;
1269 return c - MIN_CHAR_DAIKANWA;
1272 else if (c <= MAX_CHAR_MOJIKYO_0)
1274 *charset = Vcharset_mojikyo;
1275 return c - MIN_CHAR_MOJIKYO_0;
1278 else if (c < MIN_CHAR_94)
1280 *charset = Vcharset_ucs;
1283 else if (c <= MAX_CHAR_94)
1285 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1286 ((c - MIN_CHAR_94) / 94) + '0',
1287 CHARSET_LEFT_TO_RIGHT);
1288 if (!NILP (*charset))
1289 return ((c - MIN_CHAR_94) % 94) + 33;
1292 *charset = Vcharset_ucs;
1296 else if (c <= MAX_CHAR_96)
1298 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1299 ((c - MIN_CHAR_96) / 96) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return ((c - MIN_CHAR_96) % 96) + 32;
1305 *charset = Vcharset_ucs;
1309 else if (c <= MAX_CHAR_94x94)
1312 = CHARSET_BY_ATTRIBUTES (94, 2,
1313 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1314 CHARSET_LEFT_TO_RIGHT);
1315 if (!NILP (*charset))
1316 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1317 | (((c - MIN_CHAR_94x94) % 94) + 33);
1320 *charset = Vcharset_ucs;
1324 else if (c <= MAX_CHAR_96x96)
1327 = CHARSET_BY_ATTRIBUTES (96, 2,
1328 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1329 CHARSET_LEFT_TO_RIGHT);
1330 if (!NILP (*charset))
1331 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1332 | (((c - MIN_CHAR_96x96) % 96) + 32);
1335 *charset = Vcharset_ucs;
1340 else if (c < MIN_CHAR_MOJIKYO)
1342 *charset = Vcharset_ucs;
1345 else if (c <= MAX_CHAR_MOJIKYO)
1347 *charset = Vcharset_mojikyo;
1348 return c - MIN_CHAR_MOJIKYO;
1350 else if (c < MIN_CHAR_CHINA3_JEF)
1352 *charset = Vcharset_ucs;
1355 else if (c <= MAX_CHAR_CHINA3_JEF)
1357 *charset = Vcharset_china3_jef;
1358 return c - MIN_CHAR_CHINA3_JEF;
1360 else if (c <= MAX_CHAR_CBETA)
1362 *charset = Vcharset_ideograph_cbeta;
1363 return c - MIN_CHAR_CBETA;
1368 *charset = Vcharset_ucs;
1373 Lisp_Object Vdefault_coded_charset_priority_list;
1377 /************************************************************************/
1378 /* Basic charset Lisp functions */
1379 /************************************************************************/
1381 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1382 Return non-nil if OBJECT is a charset.
1386 return CHARSETP (object) ? Qt : Qnil;
1389 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1390 Retrieve the charset of the given name.
1391 If CHARSET-OR-NAME is a charset object, it is simply returned.
1392 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1393 nil is returned. Otherwise the associated charset object is returned.
1397 if (CHARSETP (charset_or_name))
1398 return charset_or_name;
1400 CHECK_SYMBOL (charset_or_name);
1401 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1404 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1405 Retrieve the charset of the given name.
1406 Same as `find-charset' except an error is signalled if there is no such
1407 charset instead of returning nil.
1411 Lisp_Object charset = Ffind_charset (name);
1414 signal_simple_error ("No such charset", name);
1418 /* We store the charsets in hash tables with the names as the key and the
1419 actual charset object as the value. Occasionally we need to use them
1420 in a list format. These routines provide us with that. */
1421 struct charset_list_closure
1423 Lisp_Object *charset_list;
1427 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1428 void *charset_list_closure)
1430 /* This function can GC */
1431 struct charset_list_closure *chcl =
1432 (struct charset_list_closure*) charset_list_closure;
1433 Lisp_Object *charset_list = chcl->charset_list;
1435 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1439 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1440 Return a list of the names of all defined charsets.
1444 Lisp_Object charset_list = Qnil;
1445 struct gcpro gcpro1;
1446 struct charset_list_closure charset_list_closure;
1448 GCPRO1 (charset_list);
1449 charset_list_closure.charset_list = &charset_list;
1450 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1451 &charset_list_closure);
1454 return charset_list;
1457 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1458 Return the name of charset CHARSET.
1462 return XCHARSET_NAME (Fget_charset (charset));
1465 /* #### SJT Should generic properties be allowed? */
1466 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1467 Define a new character set.
1468 This function is for use with Mule support.
1469 NAME is a symbol, the name by which the character set is normally referred.
1470 DOC-STRING is a string describing the character set.
1471 PROPS is a property list, describing the specific nature of the
1472 character set. Recognized properties are:
1474 'short-name Short version of the charset name (ex: Latin-1)
1475 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1476 'registry A regular expression matching the font registry field for
1478 'dimension Number of octets used to index a character in this charset.
1479 Either 1 or 2. Defaults to 1.
1480 'columns Number of columns used to display a character in this charset.
1481 Only used in TTY mode. (Under X, the actual width of a
1482 character can be derived from the font used to display the
1483 characters.) If unspecified, defaults to the dimension
1484 (this is almost always the correct value).
1485 'chars Number of characters in each dimension (94 or 96).
1486 Defaults to 94. Note that if the dimension is 2, the
1487 character set thus described is 94x94 or 96x96.
1488 'final Final byte of ISO 2022 escape sequence. Must be
1489 supplied. Each combination of (DIMENSION, CHARS) defines a
1490 separate namespace for final bytes. Note that ISO
1491 2022 restricts the final byte to the range
1492 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1493 dimension == 2. Note also that final bytes in the range
1494 0x30 - 0x3F are reserved for user-defined (not official)
1496 'graphic 0 (use left half of font on output) or 1 (use right half
1497 of font on output). Defaults to 0. For example, for
1498 a font whose registry is ISO8859-1, the left half
1499 (octets 0x20 - 0x7F) is the `ascii' character set, while
1500 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1501 character set. With 'graphic set to 0, the octets
1502 will have their high bit cleared; with it set to 1,
1503 the octets will have their high bit set.
1504 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1506 'ccl-program A compiled CCL program used to convert a character in
1507 this charset into an index into the font. This is in
1508 addition to the 'graphic property. The CCL program
1509 is passed the octets of the character, with the high
1510 bit cleared and set depending upon whether the value
1511 of the 'graphic property is 0 or 1.
1513 (name, doc_string, props))
1515 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1516 int direction = CHARSET_LEFT_TO_RIGHT;
1517 Lisp_Object registry = Qnil;
1518 Lisp_Object charset;
1519 Lisp_Object ccl_program = Qnil;
1520 Lisp_Object short_name = Qnil, long_name = Qnil;
1521 int byte_offset = -1;
1523 CHECK_SYMBOL (name);
1524 if (!NILP (doc_string))
1525 CHECK_STRING (doc_string);
1527 charset = Ffind_charset (name);
1528 if (!NILP (charset))
1529 signal_simple_error ("Cannot redefine existing charset", name);
1532 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1534 if (EQ (keyword, Qshort_name))
1536 CHECK_STRING (value);
1540 if (EQ (keyword, Qlong_name))
1542 CHECK_STRING (value);
1546 else if (EQ (keyword, Qdimension))
1549 dimension = XINT (value);
1550 if (dimension < 1 || dimension > 2)
1551 signal_simple_error ("Invalid value for 'dimension", value);
1554 else if (EQ (keyword, Qchars))
1557 chars = XINT (value);
1558 if (chars != 94 && chars != 96)
1559 signal_simple_error ("Invalid value for 'chars", value);
1562 else if (EQ (keyword, Qcolumns))
1565 columns = XINT (value);
1566 if (columns != 1 && columns != 2)
1567 signal_simple_error ("Invalid value for 'columns", value);
1570 else if (EQ (keyword, Qgraphic))
1573 graphic = XINT (value);
1575 if (graphic < 0 || graphic > 2)
1577 if (graphic < 0 || graphic > 1)
1579 signal_simple_error ("Invalid value for 'graphic", value);
1582 else if (EQ (keyword, Qregistry))
1584 CHECK_STRING (value);
1588 else if (EQ (keyword, Qdirection))
1590 if (EQ (value, Ql2r))
1591 direction = CHARSET_LEFT_TO_RIGHT;
1592 else if (EQ (value, Qr2l))
1593 direction = CHARSET_RIGHT_TO_LEFT;
1595 signal_simple_error ("Invalid value for 'direction", value);
1598 else if (EQ (keyword, Qfinal))
1600 CHECK_CHAR_COERCE_INT (value);
1601 final = XCHAR (value);
1602 if (final < '0' || final > '~')
1603 signal_simple_error ("Invalid value for 'final", value);
1606 else if (EQ (keyword, Qccl_program))
1608 struct ccl_program test_ccl;
1610 if (setup_ccl_program (&test_ccl, value) < 0)
1611 signal_simple_error ("Invalid value for 'ccl-program", value);
1612 ccl_program = value;
1616 signal_simple_error ("Unrecognized property", keyword);
1621 error ("'final must be specified");
1622 if (dimension == 2 && final > 0x5F)
1624 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1627 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1628 CHARSET_LEFT_TO_RIGHT)) ||
1629 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1630 CHARSET_RIGHT_TO_LEFT)))
1632 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1634 id = get_unallocated_leading_byte (dimension);
1636 if (NILP (doc_string))
1637 doc_string = build_string ("");
1639 if (NILP (registry))
1640 registry = build_string ("");
1642 if (NILP (short_name))
1643 XSETSTRING (short_name, XSYMBOL (name)->name);
1645 if (NILP (long_name))
1646 long_name = doc_string;
1649 columns = dimension;
1651 if (byte_offset < 0)
1655 else if (chars == 96)
1661 charset = make_charset (id, name, chars, dimension, columns, graphic,
1662 final, direction, short_name, long_name,
1663 doc_string, registry,
1664 Qnil, 0, 0, 0, byte_offset);
1665 if (!NILP (ccl_program))
1666 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1670 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1672 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1673 NEW-NAME is the name of the new charset. Return the new charset.
1675 (charset, new_name))
1677 Lisp_Object new_charset = Qnil;
1678 int id, chars, dimension, columns, graphic, final;
1680 Lisp_Object registry, doc_string, short_name, long_name;
1683 charset = Fget_charset (charset);
1684 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1685 signal_simple_error ("Charset already has reverse-direction charset",
1688 CHECK_SYMBOL (new_name);
1689 if (!NILP (Ffind_charset (new_name)))
1690 signal_simple_error ("Cannot redefine existing charset", new_name);
1692 cs = XCHARSET (charset);
1694 chars = CHARSET_CHARS (cs);
1695 dimension = CHARSET_DIMENSION (cs);
1696 columns = CHARSET_COLUMNS (cs);
1697 id = get_unallocated_leading_byte (dimension);
1699 graphic = CHARSET_GRAPHIC (cs);
1700 final = CHARSET_FINAL (cs);
1701 direction = CHARSET_RIGHT_TO_LEFT;
1702 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1703 direction = CHARSET_LEFT_TO_RIGHT;
1704 doc_string = CHARSET_DOC_STRING (cs);
1705 short_name = CHARSET_SHORT_NAME (cs);
1706 long_name = CHARSET_LONG_NAME (cs);
1707 registry = CHARSET_REGISTRY (cs);
1709 new_charset = make_charset (id, new_name, chars, dimension, columns,
1710 graphic, final, direction, short_name, long_name,
1711 doc_string, registry,
1713 CHARSET_DECODING_TABLE(cs),
1714 CHARSET_UCS_MIN(cs),
1715 CHARSET_UCS_MAX(cs),
1716 CHARSET_CODE_OFFSET(cs),
1717 CHARSET_BYTE_OFFSET(cs)
1723 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1724 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1729 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1730 Define symbol ALIAS as an alias for CHARSET.
1734 CHECK_SYMBOL (alias);
1735 charset = Fget_charset (charset);
1736 return Fputhash (alias, charset, Vcharset_hash_table);
1739 /* #### Reverse direction charsets not yet implemented. */
1741 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1743 Return the reverse-direction charset parallel to CHARSET, if any.
1744 This is the charset with the same properties (in particular, the same
1745 dimension, number of characters per dimension, and final byte) as
1746 CHARSET but whose characters are displayed in the opposite direction.
1750 charset = Fget_charset (charset);
1751 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1755 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1756 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1757 If DIRECTION is omitted, both directions will be checked (left-to-right
1758 will be returned if character sets exist for both directions).
1760 (dimension, chars, final, direction))
1762 int dm, ch, fi, di = -1;
1763 Lisp_Object obj = Qnil;
1765 CHECK_INT (dimension);
1766 dm = XINT (dimension);
1767 if (dm < 1 || dm > 2)
1768 signal_simple_error ("Invalid value for DIMENSION", dimension);
1772 if (ch != 94 && ch != 96)
1773 signal_simple_error ("Invalid value for CHARS", chars);
1775 CHECK_CHAR_COERCE_INT (final);
1777 if (fi < '0' || fi > '~')
1778 signal_simple_error ("Invalid value for FINAL", final);
1780 if (EQ (direction, Ql2r))
1781 di = CHARSET_LEFT_TO_RIGHT;
1782 else if (EQ (direction, Qr2l))
1783 di = CHARSET_RIGHT_TO_LEFT;
1784 else if (!NILP (direction))
1785 signal_simple_error ("Invalid value for DIRECTION", direction);
1787 if (dm == 2 && fi > 0x5F)
1789 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1793 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1795 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1798 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1801 return XCHARSET_NAME (obj);
1805 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1806 Return short name of CHARSET.
1810 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1813 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1814 Return long name of CHARSET.
1818 return XCHARSET_LONG_NAME (Fget_charset (charset));
1821 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1822 Return description of CHARSET.
1826 return XCHARSET_DOC_STRING (Fget_charset (charset));
1829 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1830 Return dimension of CHARSET.
1834 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1837 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1838 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1839 Recognized properties are those listed in `make-charset', as well as
1840 'name and 'doc-string.
1846 charset = Fget_charset (charset);
1847 cs = XCHARSET (charset);
1849 CHECK_SYMBOL (prop);
1850 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1851 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1852 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1853 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1854 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1855 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1856 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1857 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1858 Qnil : make_char (CHARSET_FINAL (cs));
1859 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1860 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1861 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1862 if (EQ (prop, Qdirection))
1863 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1864 if (EQ (prop, Qreverse_direction_charset))
1866 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1867 /* #### Is this translation OK? If so, error checking sufficient? */
1868 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1870 signal_simple_error ("Unrecognized charset property name", prop);
1871 return Qnil; /* not reached */
1874 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1875 Return charset identification number of CHARSET.
1879 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1882 /* #### We need to figure out which properties we really want to
1885 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1886 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1888 (charset, ccl_program))
1890 struct ccl_program test_ccl;
1892 charset = Fget_charset (charset);
1893 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1894 signal_simple_error ("Invalid ccl-program", ccl_program);
1895 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1900 invalidate_charset_font_caches (Lisp_Object charset)
1902 /* Invalidate font cache entries for charset on all devices. */
1903 Lisp_Object devcons, concons, hash_table;
1904 DEVICE_LOOP_NO_BREAK (devcons, concons)
1906 struct device *d = XDEVICE (XCAR (devcons));
1907 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1908 if (!UNBOUNDP (hash_table))
1909 Fclrhash (hash_table);
1913 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1914 Set the 'registry property of CHARSET to REGISTRY.
1916 (charset, registry))
1918 charset = Fget_charset (charset);
1919 CHECK_STRING (registry);
1920 XCHARSET_REGISTRY (charset) = registry;
1921 invalidate_charset_font_caches (charset);
1922 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1927 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1928 Return mapping-table of CHARSET.
1932 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1935 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1936 Set mapping-table of CHARSET to TABLE.
1940 struct Lisp_Charset *cs;
1944 charset = Fget_charset (charset);
1945 cs = XCHARSET (charset);
1949 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1950 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1951 CHARSET_DECODING_TABLE(cs) = Qnil;
1954 else if (VECTORP (table))
1956 int ccs_len = CHARSET_BYTE_SIZE (cs);
1957 int ret = decoding_table_check_elements (table,
1958 CHARSET_DIMENSION (cs),
1963 signal_simple_error ("Too big table", table);
1965 signal_simple_error ("Invalid element is found", table);
1967 signal_simple_error ("Something wrong", table);
1969 CHARSET_DECODING_TABLE(cs) = Qnil;
1972 signal_error (Qwrong_type_argument,
1973 list2 (build_translated_string ("vector-or-nil-p"),
1976 byte_offset = CHARSET_BYTE_OFFSET (cs);
1977 switch (CHARSET_DIMENSION (cs))
1980 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1982 Lisp_Object c = XVECTOR_DATA(table)[i];
1985 Fput_char_attribute (c, XCHARSET_NAME (charset),
1986 make_int (i + byte_offset));
1990 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1992 Lisp_Object v = XVECTOR_DATA(table)[i];
1998 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2000 Lisp_Object c = XVECTOR_DATA(v)[j];
2004 (c, XCHARSET_NAME (charset),
2005 make_int ( ( (i + byte_offset) << 8 )
2011 Fput_char_attribute (v, XCHARSET_NAME (charset),
2012 make_int (i + byte_offset));
2021 /************************************************************************/
2022 /* Lisp primitives for working with characters */
2023 /************************************************************************/
2026 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2027 Make a character from CHARSET and code-point CODE.
2028 If DEFINED_ONLY is non-nil, builtin character is not returned.
2029 If corresponding character is not found, nil is returned.
2031 (charset, code, defined_only))
2035 charset = Fget_charset (charset);
2038 if (XCHARSET_GRAPHIC (charset) == 1)
2040 if (NILP (defined_only))
2041 c = DECODE_CHAR (charset, c);
2043 c = DECODE_DEFINED_CHAR (charset, c);
2044 return c >= 0 ? make_char (c) : Qnil;
2047 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2048 Make a builtin character from CHARSET and code-point CODE.
2054 charset = Fget_charset (charset);
2056 if (EQ (charset, Vcharset_latin_viscii))
2058 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2064 (ret = Fget_char_attribute (chr,
2065 Vcharset_latin_viscii_lower,
2068 charset = Vcharset_latin_viscii_lower;
2072 (ret = Fget_char_attribute (chr,
2073 Vcharset_latin_viscii_upper,
2076 charset = Vcharset_latin_viscii_upper;
2083 if (XCHARSET_GRAPHIC (charset) == 1)
2086 c = decode_builtin_char (charset, c);
2087 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2091 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2092 Make a character from CHARSET and octets ARG1 and ARG2.
2093 ARG2 is required only for characters from two-dimensional charsets.
2094 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2095 character s with caron.
2097 (charset, arg1, arg2))
2101 int lowlim, highlim;
2103 charset = Fget_charset (charset);
2104 cs = XCHARSET (charset);
2106 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2107 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2109 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2111 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2112 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2115 /* It is useful (and safe, according to Olivier Galibert) to strip
2116 the 8th bit off ARG1 and ARG2 because it allows programmers to
2117 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2118 Latin 2 code of the character. */
2126 if (a1 < lowlim || a1 > highlim)
2127 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2129 if (CHARSET_DIMENSION (cs) == 1)
2133 ("Charset is of dimension one; second octet must be nil", arg2);
2134 return make_char (MAKE_CHAR (charset, a1, 0));
2143 a2 = XINT (arg2) & 0x7f;
2145 if (a2 < lowlim || a2 > highlim)
2146 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2148 return make_char (MAKE_CHAR (charset, a1, a2));
2151 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2152 Return the character set of CHARACTER.
2156 CHECK_CHAR_COERCE_INT (character);
2158 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2161 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2162 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2163 N defaults to 0 if omitted.
2167 Lisp_Object charset;
2170 CHECK_CHAR_COERCE_INT (character);
2172 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2174 if (NILP (n) || EQ (n, Qzero))
2175 return make_int (octet0);
2176 else if (EQ (n, make_int (1)))
2177 return make_int (octet1);
2179 signal_simple_error ("Octet number must be 0 or 1", n);
2182 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2183 Return list of charset and one or two position-codes of CHARACTER.
2187 /* This function can GC */
2188 struct gcpro gcpro1, gcpro2;
2189 Lisp_Object charset = Qnil;
2190 Lisp_Object rc = Qnil;
2198 GCPRO2 (charset, rc);
2199 CHECK_CHAR_COERCE_INT (character);
2202 code_point = ENCODE_CHAR (XCHAR (character), charset);
2203 dimension = XCHARSET_DIMENSION (charset);
2204 while (dimension > 0)
2206 rc = Fcons (make_int (code_point & 255), rc);
2210 rc = Fcons (XCHARSET_NAME (charset), rc);
2212 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2214 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2216 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2220 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2229 #ifdef ENABLE_COMPOSITE_CHARS
2230 /************************************************************************/
2231 /* composite character functions */
2232 /************************************************************************/
2235 lookup_composite_char (Bufbyte *str, int len)
2237 Lisp_Object lispstr = make_string (str, len);
2238 Lisp_Object ch = Fgethash (lispstr,
2239 Vcomposite_char_string2char_hash_table,
2245 if (composite_char_row_next >= 128)
2246 signal_simple_error ("No more composite chars available", lispstr);
2247 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2248 composite_char_col_next);
2249 Fputhash (make_char (emch), lispstr,
2250 Vcomposite_char_char2string_hash_table);
2251 Fputhash (lispstr, make_char (emch),
2252 Vcomposite_char_string2char_hash_table);
2253 composite_char_col_next++;
2254 if (composite_char_col_next >= 128)
2256 composite_char_col_next = 32;
2257 composite_char_row_next++;
2266 composite_char_string (Emchar ch)
2268 Lisp_Object str = Fgethash (make_char (ch),
2269 Vcomposite_char_char2string_hash_table,
2271 assert (!UNBOUNDP (str));
2275 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2276 Convert a string into a single composite character.
2277 The character is the result of overstriking all the characters in
2282 CHECK_STRING (string);
2283 return make_char (lookup_composite_char (XSTRING_DATA (string),
2284 XSTRING_LENGTH (string)));
2287 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2288 Return a string of the characters comprising a composite character.
2296 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2297 signal_simple_error ("Must be composite char", ch);
2298 return composite_char_string (emch);
2300 #endif /* ENABLE_COMPOSITE_CHARS */
2303 /************************************************************************/
2304 /* initialization */
2305 /************************************************************************/
2308 syms_of_mule_charset (void)
2310 INIT_LRECORD_IMPLEMENTATION (charset);
2312 DEFSUBR (Fcharsetp);
2313 DEFSUBR (Ffind_charset);
2314 DEFSUBR (Fget_charset);
2315 DEFSUBR (Fcharset_list);
2316 DEFSUBR (Fcharset_name);
2317 DEFSUBR (Fmake_charset);
2318 DEFSUBR (Fmake_reverse_direction_charset);
2319 /* DEFSUBR (Freverse_direction_charset); */
2320 DEFSUBR (Fdefine_charset_alias);
2321 DEFSUBR (Fcharset_from_attributes);
2322 DEFSUBR (Fcharset_short_name);
2323 DEFSUBR (Fcharset_long_name);
2324 DEFSUBR (Fcharset_description);
2325 DEFSUBR (Fcharset_dimension);
2326 DEFSUBR (Fcharset_property);
2327 DEFSUBR (Fcharset_id);
2328 DEFSUBR (Fset_charset_ccl_program);
2329 DEFSUBR (Fset_charset_registry);
2331 DEFSUBR (Fcharset_mapping_table);
2332 DEFSUBR (Fset_charset_mapping_table);
2336 DEFSUBR (Fdecode_char);
2337 DEFSUBR (Fdecode_builtin_char);
2339 DEFSUBR (Fmake_char);
2340 DEFSUBR (Fchar_charset);
2341 DEFSUBR (Fchar_octet);
2342 DEFSUBR (Fsplit_char);
2344 #ifdef ENABLE_COMPOSITE_CHARS
2345 DEFSUBR (Fmake_composite_char);
2346 DEFSUBR (Fcomposite_char_string);
2349 defsymbol (&Qcharsetp, "charsetp");
2350 defsymbol (&Qregistry, "registry");
2351 defsymbol (&Qfinal, "final");
2352 defsymbol (&Qgraphic, "graphic");
2353 defsymbol (&Qdirection, "direction");
2354 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2355 defsymbol (&Qshort_name, "short-name");
2356 defsymbol (&Qlong_name, "long-name");
2358 defsymbol (&Ql2r, "l2r");
2359 defsymbol (&Qr2l, "r2l");
2361 /* Charsets, compatible with FSF 20.3
2362 Naming convention is Script-Charset[-Edition] */
2363 defsymbol (&Qascii, "ascii");
2364 defsymbol (&Qcontrol_1, "control-1");
2365 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2366 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2367 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2368 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2369 defsymbol (&Qthai_tis620, "thai-tis620");
2370 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2371 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2372 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2373 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2374 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2375 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2376 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2377 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2378 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2379 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2380 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2381 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2382 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2383 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2384 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2385 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2387 defsymbol (&Qucs, "ucs");
2388 defsymbol (&Qucs_bmp, "ucs-bmp");
2389 defsymbol (&Qucs_smp, "ucs-smp");
2390 defsymbol (&Qucs_sip, "ucs-sip");
2391 defsymbol (&Qucs_cns, "ucs-cns");
2392 defsymbol (&Qucs_jis, "ucs-jis");
2393 defsymbol (&Qucs_ks, "ucs-ks");
2394 defsymbol (&Qucs_big5, "ucs-big5");
2395 defsymbol (&Qlatin_viscii, "latin-viscii");
2396 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2397 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2398 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2399 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2400 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2401 defsymbol (&Qideograph_gt, "ideograph-gt");
2402 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2403 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2404 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2405 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2406 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2407 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2408 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2409 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2410 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2411 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2412 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2413 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2414 defsymbol (&Qchinese_big5, "chinese-big5");
2415 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2416 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2417 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2418 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2419 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2420 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2421 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2422 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2423 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2424 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2425 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2426 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2427 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2428 defsymbol (&Qchina3_jef, "china3-jef");
2429 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2430 defsymbol (&Qmojikyo, "mojikyo");
2431 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2432 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2433 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2434 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2435 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2436 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2437 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2438 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2439 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2440 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2441 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2442 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2443 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2444 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2445 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2446 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2447 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2448 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2449 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2450 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2451 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2452 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2453 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2455 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2456 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2458 defsymbol (&Qcomposite, "composite");
2462 vars_of_mule_charset (void)
2469 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2470 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2472 /* Table of charsets indexed by leading byte. */
2473 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2474 chlook->charset_by_leading_byte[i] = Qnil;
2477 /* Table of charsets indexed by type/final-byte. */
2478 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2479 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2480 chlook->charset_by_attributes[i][j] = Qnil;
2482 /* Table of charsets indexed by type/final-byte/direction. */
2483 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2484 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2485 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2486 chlook->charset_by_attributes[i][j][k] = Qnil;
2490 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2492 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2493 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2497 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2498 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2499 Leading-code of private TYPE9N charset of column-width 1.
2501 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2505 Vdefault_coded_charset_priority_list = Qnil;
2506 DEFVAR_LISP ("default-coded-charset-priority-list",
2507 &Vdefault_coded_charset_priority_list /*
2508 Default order of preferred coded-character-sets.
2514 complex_vars_of_mule_charset (void)
2516 staticpro (&Vcharset_hash_table);
2517 Vcharset_hash_table =
2518 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2520 /* Predefined character sets. We store them into variables for
2524 staticpro (&Vcharset_ucs);
2526 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2527 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2528 build_string ("UCS"),
2529 build_string ("UCS"),
2530 build_string ("ISO/IEC 10646"),
2532 Qnil, 0, 0xFFFFFFF, 0, 0);
2533 staticpro (&Vcharset_ucs_bmp);
2535 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2536 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2537 build_string ("BMP"),
2538 build_string ("UCS-BMP"),
2539 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2541 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2542 Qnil, 0, 0xFFFF, 0, 0);
2543 staticpro (&Vcharset_ucs_smp);
2545 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2546 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2547 build_string ("SMP"),
2548 build_string ("UCS-SMP"),
2549 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2550 build_string ("UCS00-1"),
2551 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, 0, 0);
2552 staticpro (&Vcharset_ucs_sip);
2554 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2555 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2556 build_string ("SIP"),
2557 build_string ("UCS-SIP"),
2558 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2559 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2560 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, 0, 0);
2561 staticpro (&Vcharset_ucs_cns);
2563 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2564 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2565 build_string ("UCS for CNS"),
2566 build_string ("UCS for CNS 11643"),
2567 build_string ("ISO/IEC 10646 for CNS 11643"),
2570 staticpro (&Vcharset_ucs_jis);
2572 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2573 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2574 build_string ("UCS for JIS"),
2575 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2576 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2579 staticpro (&Vcharset_ucs_ks);
2581 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2582 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2583 build_string ("UCS for KS"),
2584 build_string ("UCS for CCS defined by KS"),
2585 build_string ("ISO/IEC 10646 for Korean Standards"),
2588 staticpro (&Vcharset_ucs_big5);
2590 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2591 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2592 build_string ("UCS for Big5"),
2593 build_string ("UCS for Big5"),
2594 build_string ("ISO/IEC 10646 for Big5"),
2598 # define MIN_CHAR_THAI 0
2599 # define MAX_CHAR_THAI 0
2600 /* # define MIN_CHAR_HEBREW 0 */
2601 /* # define MAX_CHAR_HEBREW 0 */
2602 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2603 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2605 staticpro (&Vcharset_ascii);
2607 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2608 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2609 build_string ("ASCII"),
2610 build_string ("ASCII)"),
2611 build_string ("ASCII (ISO646 IRV)"),
2612 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2613 Qnil, 0, 0x7F, 0, 0);
2614 staticpro (&Vcharset_control_1);
2615 Vcharset_control_1 =
2616 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2617 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2618 build_string ("C1"),
2619 build_string ("Control characters"),
2620 build_string ("Control characters 128-191"),
2622 Qnil, 0x80, 0x9F, 0, 0);
2623 staticpro (&Vcharset_latin_iso8859_1);
2624 Vcharset_latin_iso8859_1 =
2625 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2626 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2627 build_string ("Latin-1"),
2628 build_string ("ISO8859-1 (Latin-1)"),
2629 build_string ("ISO8859-1 (Latin-1)"),
2630 build_string ("iso8859-1"),
2631 Qnil, 0xA0, 0xFF, 0, 32);
2632 staticpro (&Vcharset_latin_iso8859_2);
2633 Vcharset_latin_iso8859_2 =
2634 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2635 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2636 build_string ("Latin-2"),
2637 build_string ("ISO8859-2 (Latin-2)"),
2638 build_string ("ISO8859-2 (Latin-2)"),
2639 build_string ("iso8859-2"),
2641 staticpro (&Vcharset_latin_iso8859_3);
2642 Vcharset_latin_iso8859_3 =
2643 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2644 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2645 build_string ("Latin-3"),
2646 build_string ("ISO8859-3 (Latin-3)"),
2647 build_string ("ISO8859-3 (Latin-3)"),
2648 build_string ("iso8859-3"),
2650 staticpro (&Vcharset_latin_iso8859_4);
2651 Vcharset_latin_iso8859_4 =
2652 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2653 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2654 build_string ("Latin-4"),
2655 build_string ("ISO8859-4 (Latin-4)"),
2656 build_string ("ISO8859-4 (Latin-4)"),
2657 build_string ("iso8859-4"),
2659 staticpro (&Vcharset_thai_tis620);
2660 Vcharset_thai_tis620 =
2661 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2662 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2663 build_string ("TIS620"),
2664 build_string ("TIS620 (Thai)"),
2665 build_string ("TIS620.2529 (Thai)"),
2666 build_string ("tis620"),
2667 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2668 staticpro (&Vcharset_greek_iso8859_7);
2669 Vcharset_greek_iso8859_7 =
2670 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2671 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2672 build_string ("ISO8859-7"),
2673 build_string ("ISO8859-7 (Greek)"),
2674 build_string ("ISO8859-7 (Greek)"),
2675 build_string ("iso8859-7"),
2677 staticpro (&Vcharset_arabic_iso8859_6);
2678 Vcharset_arabic_iso8859_6 =
2679 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2680 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2681 build_string ("ISO8859-6"),
2682 build_string ("ISO8859-6 (Arabic)"),
2683 build_string ("ISO8859-6 (Arabic)"),
2684 build_string ("iso8859-6"),
2686 staticpro (&Vcharset_hebrew_iso8859_8);
2687 Vcharset_hebrew_iso8859_8 =
2688 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2689 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2690 build_string ("ISO8859-8"),
2691 build_string ("ISO8859-8 (Hebrew)"),
2692 build_string ("ISO8859-8 (Hebrew)"),
2693 build_string ("iso8859-8"),
2695 0 /* MIN_CHAR_HEBREW */,
2696 0 /* MAX_CHAR_HEBREW */, 0, 32);
2697 staticpro (&Vcharset_katakana_jisx0201);
2698 Vcharset_katakana_jisx0201 =
2699 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2700 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2701 build_string ("JISX0201 Kana"),
2702 build_string ("JISX0201.1976 (Japanese Kana)"),
2703 build_string ("JISX0201.1976 Japanese Kana"),
2704 build_string ("jisx0201\\.1976"),
2706 staticpro (&Vcharset_latin_jisx0201);
2707 Vcharset_latin_jisx0201 =
2708 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2709 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2710 build_string ("JISX0201 Roman"),
2711 build_string ("JISX0201.1976 (Japanese Roman)"),
2712 build_string ("JISX0201.1976 Japanese Roman"),
2713 build_string ("jisx0201\\.1976"),
2715 staticpro (&Vcharset_cyrillic_iso8859_5);
2716 Vcharset_cyrillic_iso8859_5 =
2717 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2718 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2719 build_string ("ISO8859-5"),
2720 build_string ("ISO8859-5 (Cyrillic)"),
2721 build_string ("ISO8859-5 (Cyrillic)"),
2722 build_string ("iso8859-5"),
2724 staticpro (&Vcharset_latin_iso8859_9);
2725 Vcharset_latin_iso8859_9 =
2726 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2727 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2728 build_string ("Latin-5"),
2729 build_string ("ISO8859-9 (Latin-5)"),
2730 build_string ("ISO8859-9 (Latin-5)"),
2731 build_string ("iso8859-9"),
2733 staticpro (&Vcharset_japanese_jisx0208_1978);
2734 Vcharset_japanese_jisx0208_1978 =
2735 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2736 Qjapanese_jisx0208_1978, 94, 2,
2737 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2738 build_string ("JIS X0208:1978"),
2739 build_string ("JIS X0208:1978 (Japanese)"),
2741 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2742 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2744 staticpro (&Vcharset_chinese_gb2312);
2745 Vcharset_chinese_gb2312 =
2746 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2747 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2748 build_string ("GB2312"),
2749 build_string ("GB2312)"),
2750 build_string ("GB2312 Chinese simplified"),
2751 build_string ("gb2312"),
2753 staticpro (&Vcharset_chinese_gb12345);
2754 Vcharset_chinese_gb12345 =
2755 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2756 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2757 build_string ("G1"),
2758 build_string ("GB 12345)"),
2759 build_string ("GB 12345-1990"),
2760 build_string ("GB12345\\(\\.1990\\)?-0"),
2762 staticpro (&Vcharset_japanese_jisx0208);
2763 Vcharset_japanese_jisx0208 =
2764 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2765 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2766 build_string ("JISX0208"),
2767 build_string ("JIS X0208:1983 (Japanese)"),
2768 build_string ("JIS X0208:1983 Japanese Kanji"),
2769 build_string ("jisx0208\\.1983"),
2772 staticpro (&Vcharset_japanese_jisx0208_1990);
2773 Vcharset_japanese_jisx0208_1990 =
2774 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2775 Qjapanese_jisx0208_1990, 94, 2,
2776 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2777 build_string ("JISX0208-1990"),
2778 build_string ("JIS X0208:1990 (Japanese)"),
2779 build_string ("JIS X0208:1990 Japanese Kanji"),
2780 build_string ("jisx0208\\.1990"),
2782 MIN_CHAR_JIS_X0208_1990,
2783 MAX_CHAR_JIS_X0208_1990, 0, 33);
2785 staticpro (&Vcharset_korean_ksc5601);
2786 Vcharset_korean_ksc5601 =
2787 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2788 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2789 build_string ("KSC5601"),
2790 build_string ("KSC5601 (Korean"),
2791 build_string ("KSC5601 Korean Hangul and Hanja"),
2792 build_string ("ksc5601"),
2794 staticpro (&Vcharset_japanese_jisx0212);
2795 Vcharset_japanese_jisx0212 =
2796 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2797 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2798 build_string ("JISX0212"),
2799 build_string ("JISX0212 (Japanese)"),
2800 build_string ("JISX0212 Japanese Supplement"),
2801 build_string ("jisx0212"),
2804 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2805 staticpro (&Vcharset_chinese_cns11643_1);
2806 Vcharset_chinese_cns11643_1 =
2807 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2808 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2809 build_string ("CNS11643-1"),
2810 build_string ("CNS11643-1 (Chinese traditional)"),
2812 ("CNS 11643 Plane 1 Chinese traditional"),
2813 build_string (CHINESE_CNS_PLANE_RE("1")),
2815 staticpro (&Vcharset_chinese_cns11643_2);
2816 Vcharset_chinese_cns11643_2 =
2817 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2818 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2819 build_string ("CNS11643-2"),
2820 build_string ("CNS11643-2 (Chinese traditional)"),
2822 ("CNS 11643 Plane 2 Chinese traditional"),
2823 build_string (CHINESE_CNS_PLANE_RE("2")),
2826 staticpro (&Vcharset_latin_tcvn5712);
2827 Vcharset_latin_tcvn5712 =
2828 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2829 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2830 build_string ("TCVN 5712"),
2831 build_string ("TCVN 5712 (VSCII-2)"),
2832 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2833 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2835 staticpro (&Vcharset_latin_viscii_lower);
2836 Vcharset_latin_viscii_lower =
2837 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2838 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2839 build_string ("VISCII lower"),
2840 build_string ("VISCII lower (Vietnamese)"),
2841 build_string ("VISCII lower (Vietnamese)"),
2842 build_string ("MULEVISCII-LOWER"),
2844 staticpro (&Vcharset_latin_viscii_upper);
2845 Vcharset_latin_viscii_upper =
2846 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2847 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2848 build_string ("VISCII upper"),
2849 build_string ("VISCII upper (Vietnamese)"),
2850 build_string ("VISCII upper (Vietnamese)"),
2851 build_string ("MULEVISCII-UPPER"),
2853 staticpro (&Vcharset_latin_viscii);
2854 Vcharset_latin_viscii =
2855 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2856 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2857 build_string ("VISCII"),
2858 build_string ("VISCII 1.1 (Vietnamese)"),
2859 build_string ("VISCII 1.1 (Vietnamese)"),
2860 build_string ("VISCII1\\.1"),
2862 staticpro (&Vcharset_chinese_big5);
2863 Vcharset_chinese_big5 =
2864 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2865 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2866 build_string ("Big5"),
2867 build_string ("Big5"),
2868 build_string ("Big5 Chinese traditional"),
2869 build_string ("big5"),
2871 0 /* MIN_CHAR_BIG5_CDP */,
2872 0 /* MAX_CHAR_BIG5_CDP */, 0, 0);
2873 staticpro (&Vcharset_chinese_big5_cdp);
2874 Vcharset_chinese_big5_cdp =
2875 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2876 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2877 build_string ("Big5-CDP"),
2878 build_string ("Big5 + CDP extension"),
2879 build_string ("Big5 with CDP extension"),
2880 build_string ("big5\\.cdp-0"),
2881 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2882 #define DEF_HANZIKU(n) \
2883 staticpro (&Vcharset_ideograph_hanziku_##n); \
2884 Vcharset_ideograph_hanziku_##n = \
2885 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2886 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2887 build_string ("HZK-"#n), \
2888 build_string ("HANZIKU-"#n), \
2889 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2891 ("hanziku-"#n"$"), \
2892 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2905 staticpro (&Vcharset_china3_jef);
2906 Vcharset_china3_jef =
2907 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2908 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2909 build_string ("JC3"),
2910 build_string ("JEF + CHINA3"),
2911 build_string ("JEF + CHINA3 private characters"),
2912 build_string ("china3jef-0"),
2913 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2914 staticpro (&Vcharset_ideograph_cbeta);
2915 Vcharset_ideograph_cbeta =
2916 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2917 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2918 build_string ("CB"),
2919 build_string ("CBETA"),
2920 build_string ("CBETA private characters"),
2921 build_string ("cbeta-0"),
2922 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2923 staticpro (&Vcharset_ideograph_gt);
2924 Vcharset_ideograph_gt =
2925 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2926 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2927 build_string ("GT"),
2928 build_string ("GT"),
2929 build_string ("GT"),
2931 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2932 #define DEF_GT_PJ(n) \
2933 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2934 Vcharset_ideograph_gt_pj_##n = \
2935 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2936 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2937 build_string ("GT-PJ-"#n), \
2938 build_string ("GT (pseudo JIS encoding) part "#n), \
2939 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2941 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2955 staticpro (&Vcharset_ideograph_daikanwa);
2956 Vcharset_ideograph_daikanwa =
2957 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2958 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2959 build_string ("Daikanwa"),
2960 build_string ("Morohashi's Daikanwa Rev.2"),
2962 ("Daikanwa dictionary (second revised version)"),
2963 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
2964 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2965 staticpro (&Vcharset_mojikyo);
2967 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2968 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2969 build_string ("Mojikyo"),
2970 build_string ("Mojikyo"),
2971 build_string ("Konjaku-Mojikyo"),
2973 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2974 staticpro (&Vcharset_mojikyo_2022_1);
2975 Vcharset_mojikyo_2022_1 =
2976 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2977 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2978 build_string ("Mojikyo-2022-1"),
2979 build_string ("Mojikyo ISO-2022 Part 1"),
2980 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2984 #define DEF_MOJIKYO_PJ(n) \
2985 staticpro (&Vcharset_mojikyo_pj_##n); \
2986 Vcharset_mojikyo_pj_##n = \
2987 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2988 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2989 build_string ("Mojikyo-PJ-"#n), \
2990 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2992 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2994 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
3006 DEF_MOJIKYO_PJ (10);
3007 DEF_MOJIKYO_PJ (11);
3008 DEF_MOJIKYO_PJ (12);
3009 DEF_MOJIKYO_PJ (13);
3010 DEF_MOJIKYO_PJ (14);
3011 DEF_MOJIKYO_PJ (15);
3012 DEF_MOJIKYO_PJ (16);
3013 DEF_MOJIKYO_PJ (17);
3014 DEF_MOJIKYO_PJ (18);
3015 DEF_MOJIKYO_PJ (19);
3016 DEF_MOJIKYO_PJ (20);
3017 DEF_MOJIKYO_PJ (21);
3019 staticpro (&Vcharset_ethiopic_ucs);
3020 Vcharset_ethiopic_ucs =
3021 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3022 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3023 build_string ("Ethiopic (UCS)"),
3024 build_string ("Ethiopic (UCS)"),
3025 build_string ("Ethiopic of UCS"),
3026 build_string ("Ethiopic-Unicode"),
3027 Qnil, 0x1200, 0x137F, 0x1200, 0);
3029 staticpro (&Vcharset_chinese_big5_1);
3030 Vcharset_chinese_big5_1 =
3031 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3032 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3033 build_string ("Big5"),
3034 build_string ("Big5 (Level-1)"),
3036 ("Big5 Level-1 Chinese traditional"),
3037 build_string ("big5"),
3039 staticpro (&Vcharset_chinese_big5_2);
3040 Vcharset_chinese_big5_2 =
3041 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3042 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3043 build_string ("Big5"),
3044 build_string ("Big5 (Level-2)"),
3046 ("Big5 Level-2 Chinese traditional"),
3047 build_string ("big5"),
3050 #ifdef ENABLE_COMPOSITE_CHARS
3051 /* #### For simplicity, we put composite chars into a 96x96 charset.
3052 This is going to lead to problems because you can run out of
3053 room, esp. as we don't yet recycle numbers. */
3054 staticpro (&Vcharset_composite);
3055 Vcharset_composite =
3056 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3057 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3058 build_string ("Composite"),
3059 build_string ("Composite characters"),
3060 build_string ("Composite characters"),
3063 /* #### not dumped properly */
3064 composite_char_row_next = 32;
3065 composite_char_col_next = 32;
3067 Vcomposite_char_string2char_hash_table =
3068 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3069 Vcomposite_char_char2string_hash_table =
3070 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3071 staticpro (&Vcomposite_char_string2char_hash_table);
3072 staticpro (&Vcomposite_char_char2string_hash_table);
3073 #endif /* ENABLE_COMPOSITE_CHARS */