1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
193 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
195 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
199 if (XVECTOR_LENGTH (v) > ccs_len)
202 for (i = 0; i < XVECTOR_LENGTH (v); i++)
204 Lisp_Object c = XVECTOR_DATA(v)[i];
206 if (!NILP (c) && !CHARP (c))
210 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
222 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
225 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
235 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
236 nv = XVECTOR_DATA(v)[i];
242 XVECTOR_DATA(v)[i] = Qnil;
246 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
247 int code_point, Lisp_Object character);
249 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
250 int code_point, Lisp_Object character)
254 int ccs_len = XVECTOR_LENGTH (v);
259 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
260 nv = XVECTOR_DATA(v)[i];
264 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
270 XVECTOR_DATA(v)[i] = character;
274 put_char_ccs_code_point (Lisp_Object character,
275 Lisp_Object ccs, Lisp_Object value)
277 if (!EQ (XCHARSET_NAME (ccs), Qucs)
279 || (XCHAR (character) != XINT (value)))
281 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
282 int dim = XCHARSET_DIMENSION (ccs);
283 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
284 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
288 { /* obsolete representation: value must be a list of bytes */
289 Lisp_Object ret = Fcar (value);
293 signal_simple_error ("Invalid value for coded-charset", value);
294 code_point = XINT (ret);
295 if (XCHARSET_GRAPHIC (ccs) == 1)
303 signal_simple_error ("Invalid value for coded-charset",
307 signal_simple_error ("Invalid value for coded-charset",
310 if (XCHARSET_GRAPHIC (ccs) == 1)
312 code_point = (code_point << 8) | j;
315 value = make_int (code_point);
317 else if (INTP (value))
319 code_point = XINT (value);
320 if (XCHARSET_GRAPHIC (ccs) == 1)
322 code_point &= 0x7F7F7F7F;
323 value = make_int (code_point);
327 signal_simple_error ("Invalid value for coded-charset", value);
331 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
334 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
339 XCHARSET_DECODING_TABLE (ccs)
340 = v = make_vector (ccs_len, Qnil);
343 decoding_table_put_char (v, dim, byte_offset, code_point, character);
349 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
351 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
352 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
354 if (VECTORP (decoding_table))
356 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
360 decoding_table_remove_char (decoding_table,
361 XCHARSET_DIMENSION (ccs),
362 XCHARSET_BYTE_OFFSET (ccs),
366 if (CHAR_TABLEP (encoding_table))
368 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
376 int leading_code_private_11;
379 Lisp_Object Qcharsetp;
381 /* Qdoc_string, Qdimension, Qchars defined in general.c */
382 Lisp_Object Qregistry, Qfinal, Qgraphic;
383 Lisp_Object Qdirection;
384 Lisp_Object Qreverse_direction_charset;
385 Lisp_Object Qleading_byte;
386 Lisp_Object Qshort_name, Qlong_name;
388 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
389 Lisp_Object Qmother, Qconversion, Q94x60;
406 Qjapanese_jisx0208_1978,
410 Qjapanese_jisx0208_1990,
428 Qvietnamese_viscii_lower,
429 Qvietnamese_viscii_upper,
432 Qideograph_hanziku_1,
433 Qideograph_hanziku_2,
434 Qideograph_hanziku_3,
435 Qideograph_hanziku_4,
436 Qideograph_hanziku_5,
437 Qideograph_hanziku_6,
438 Qideograph_hanziku_7,
439 Qideograph_hanziku_8,
440 Qideograph_hanziku_9,
441 Qideograph_hanziku_10,
442 Qideograph_hanziku_11,
443 Qideograph_hanziku_12,
446 Qideograph_daikanwa_2,
466 Lisp_Object Ql2r, Qr2l;
468 Lisp_Object Vcharset_hash_table;
470 /* Composite characters are characters constructed by overstriking two
471 or more regular characters.
473 1) The old Mule implementation involves storing composite characters
474 in a buffer as a tag followed by all of the actual characters
475 used to make up the composite character. I think this is a bad
476 idea; it greatly complicates code that wants to handle strings
477 one character at a time because it has to deal with the possibility
478 of great big ungainly characters. It's much more reasonable to
479 simply store an index into a table of composite characters.
481 2) The current implementation only allows for 16,384 separate
482 composite characters over the lifetime of the XEmacs process.
483 This could become a potential problem if the user
484 edited lots of different files that use composite characters.
485 Due to FSF bogosity, increasing the number of allowable
486 composite characters under Mule would decrease the number
487 of possible faces that can exist. Mule already has shrunk
488 this to 2048, and further shrinkage would become uncomfortable.
489 No such problems exist in XEmacs.
491 Composite characters could be represented as 0x80 C1 C2 C3,
492 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
493 for slightly under 2^20 (one million) composite characters
494 over the XEmacs process lifetime, and you only need to
495 increase the size of a Mule character from 19 to 21 bits.
496 Or you could use 0x80 C1 C2 C3 C4, allowing for about
497 85 million (slightly over 2^26) composite characters. */
500 /************************************************************************/
501 /* Basic Emchar functions */
502 /************************************************************************/
504 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
505 string in STR. Returns the number of bytes stored.
506 Do not call this directly. Use the macro set_charptr_emchar() instead.
510 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
525 else if ( c <= 0x7ff )
527 *p++ = (c >> 6) | 0xc0;
528 *p++ = (c & 0x3f) | 0x80;
530 else if ( c <= 0xffff )
532 *p++ = (c >> 12) | 0xe0;
533 *p++ = ((c >> 6) & 0x3f) | 0x80;
534 *p++ = (c & 0x3f) | 0x80;
536 else if ( c <= 0x1fffff )
538 *p++ = (c >> 18) | 0xf0;
539 *p++ = ((c >> 12) & 0x3f) | 0x80;
540 *p++ = ((c >> 6) & 0x3f) | 0x80;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0x3ffffff )
545 *p++ = (c >> 24) | 0xf8;
546 *p++ = ((c >> 18) & 0x3f) | 0x80;
547 *p++ = ((c >> 12) & 0x3f) | 0x80;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
553 *p++ = (c >> 30) | 0xfc;
554 *p++ = ((c >> 24) & 0x3f) | 0x80;
555 *p++ = ((c >> 18) & 0x3f) | 0x80;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
561 BREAKUP_CHAR (c, charset, c1, c2);
562 lb = CHAR_LEADING_BYTE (c);
563 if (LEADING_BYTE_PRIVATE_P (lb))
564 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
566 if (EQ (charset, Vcharset_control_1))
575 /* Return the first character from a Mule-encoded string in STR,
576 assuming it's non-ASCII. Do not call this directly.
577 Use the macro charptr_emchar() instead. */
580 non_ascii_charptr_emchar (const Bufbyte *str)
593 else if ( b >= 0xf8 )
598 else if ( b >= 0xf0 )
603 else if ( b >= 0xe0 )
608 else if ( b >= 0xc0 )
618 for( ; len > 0; len-- )
621 ch = ( ch << 6 ) | ( b & 0x3f );
625 Bufbyte i0 = *str, i1, i2 = 0;
628 if (i0 == LEADING_BYTE_CONTROL_1)
629 return (Emchar) (*++str - 0x20);
631 if (LEADING_BYTE_PREFIX_P (i0))
636 charset = CHARSET_BY_LEADING_BYTE (i0);
637 if (XCHARSET_DIMENSION (charset) == 2)
640 return MAKE_CHAR (charset, i1, i2);
644 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
645 Do not call this directly. Use the macro valid_char_p() instead. */
649 non_ascii_valid_char_p (Emchar ch)
653 /* Must have only lowest 19 bits set */
657 f1 = CHAR_FIELD1 (ch);
658 f2 = CHAR_FIELD2 (ch);
659 f3 = CHAR_FIELD3 (ch);
665 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
666 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
667 f2 > MAX_CHAR_FIELD2_PRIVATE)
672 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
673 f2 <= MAX_CHAR_FIELD2_PRIVATE))
677 NOTE: This takes advantage of the fact that
678 FIELD2_TO_OFFICIAL_LEADING_BYTE and
679 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
681 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
682 if (EQ (charset, Qnil))
684 return (XCHARSET_CHARS (charset) == 96);
690 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
691 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
692 f1 > MAX_CHAR_FIELD1_PRIVATE)
694 if (f2 < 0x20 || f3 < 0x20)
697 #ifdef ENABLE_COMPOSITE_CHARS
698 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
700 if (UNBOUNDP (Fgethash (make_int (ch),
701 Vcomposite_char_char2string_hash_table,
706 #endif /* ENABLE_COMPOSITE_CHARS */
708 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
709 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
712 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
714 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
717 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
719 if (EQ (charset, Qnil))
721 return (XCHARSET_CHARS (charset) == 96);
727 /************************************************************************/
728 /* Basic string functions */
729 /************************************************************************/
731 /* Copy the character pointed to by SRC into DST. Do not call this
732 directly. Use the macro charptr_copy_char() instead.
733 Return the number of bytes copied. */
736 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
738 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
740 for (i = bytes; i; i--, dst++, src++)
746 /************************************************************************/
747 /* streams of Emchars */
748 /************************************************************************/
750 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
751 The functions below are not meant to be called directly; use
752 the macros in insdel.h. */
755 Lstream_get_emchar_1 (Lstream *stream, int ch)
757 Bufbyte str[MAX_EMCHAR_LEN];
758 Bufbyte *strptr = str;
761 str[0] = (Bufbyte) ch;
763 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
765 int c = Lstream_getc (stream);
766 bufpos_checking_assert (c >= 0);
767 *++strptr = (Bufbyte) c;
769 return charptr_emchar (str);
773 Lstream_fput_emchar (Lstream *stream, Emchar ch)
775 Bufbyte str[MAX_EMCHAR_LEN];
776 Bytecount len = set_charptr_emchar (str, ch);
777 return Lstream_write (stream, str, len);
781 Lstream_funget_emchar (Lstream *stream, Emchar ch)
783 Bufbyte str[MAX_EMCHAR_LEN];
784 Bytecount len = set_charptr_emchar (str, ch);
785 Lstream_unread (stream, str, len);
789 /************************************************************************/
791 /************************************************************************/
794 mark_charset (Lisp_Object obj)
796 Lisp_Charset *cs = XCHARSET (obj);
798 mark_object (cs->short_name);
799 mark_object (cs->long_name);
800 mark_object (cs->doc_string);
801 mark_object (cs->registry);
802 mark_object (cs->ccl_program);
804 mark_object (cs->decoding_table);
805 mark_object (cs->mother);
811 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
813 Lisp_Charset *cs = XCHARSET (obj);
817 error ("printing unreadable object #<charset %s 0x%x>",
818 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
821 write_c_string ("#<charset ", printcharfun);
822 print_internal (CHARSET_NAME (cs), printcharfun, 0);
823 write_c_string (" ", printcharfun);
824 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
825 write_c_string (" ", printcharfun);
826 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
827 write_c_string (" ", printcharfun);
828 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
829 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
831 CHARSET_DIMENSION (cs),
832 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
833 CHARSET_COLUMNS (cs),
834 CHARSET_GRAPHIC (cs),
836 write_c_string (buf, printcharfun);
837 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
838 sprintf (buf, " 0x%x>", cs->header.uid);
839 write_c_string (buf, printcharfun);
842 static const struct lrecord_description charset_description[] = {
843 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
844 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
845 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
846 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
847 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
848 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
849 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
851 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
852 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
857 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
858 mark_charset, print_charset, 0, 0, 0,
862 /* Make a new charset. */
863 /* #### SJT Should generic properties be allowed? */
865 make_charset (Charset_ID id, Lisp_Object name,
866 unsigned short chars, unsigned char dimension,
867 unsigned char columns, unsigned char graphic,
868 Bufbyte final, unsigned char direction, Lisp_Object short_name,
869 Lisp_Object long_name, Lisp_Object doc,
871 Lisp_Object decoding_table,
872 Emchar min_code, Emchar max_code,
873 Emchar code_offset, unsigned char byte_offset,
874 Lisp_Object mother, unsigned char conversion)
877 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
881 XSETCHARSET (obj, cs);
883 CHARSET_ID (cs) = id;
884 CHARSET_NAME (cs) = name;
885 CHARSET_SHORT_NAME (cs) = short_name;
886 CHARSET_LONG_NAME (cs) = long_name;
887 CHARSET_CHARS (cs) = chars;
888 CHARSET_DIMENSION (cs) = dimension;
889 CHARSET_DIRECTION (cs) = direction;
890 CHARSET_COLUMNS (cs) = columns;
891 CHARSET_GRAPHIC (cs) = graphic;
892 CHARSET_FINAL (cs) = final;
893 CHARSET_DOC_STRING (cs) = doc;
894 CHARSET_REGISTRY (cs) = reg;
895 CHARSET_CCL_PROGRAM (cs) = Qnil;
896 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
898 CHARSET_DECODING_TABLE(cs) = Qnil;
899 CHARSET_MIN_CODE (cs) = min_code;
900 CHARSET_MAX_CODE (cs) = max_code;
901 CHARSET_CODE_OFFSET (cs) = code_offset;
902 CHARSET_BYTE_OFFSET (cs) = byte_offset;
903 CHARSET_MOTHER (cs) = mother;
904 CHARSET_CONVERSION (cs) = conversion;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
921 unsigned char iso2022_type
922 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
924 if (code_offset == 0)
926 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
927 chlook->charset_by_attributes[iso2022_type][final] = obj;
931 (chlook->charset_by_attributes[iso2022_type][final][direction]));
932 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
936 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
937 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = chlook->next_allocated_leading_byte++;
959 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = chlook->next_allocated_1_byte_leading_byte++;
966 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = chlook->next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
982 /* Number of Big5 characters which have the same code in 1st byte. */
984 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
987 decode_builtin_char (Lisp_Object charset, int code_point)
991 if (EQ (charset, Vcharset_chinese_big5))
993 int c1 = code_point >> 8;
994 int c2 = code_point & 0xFF;
997 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
999 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1000 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1002 I = (c1 - 0xA1) * BIG5_SAME_ROW
1003 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1007 charset = Vcharset_chinese_big5_1;
1011 charset = Vcharset_chinese_big5_2;
1012 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1014 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1017 if ((final = XCHARSET_FINAL (charset)) >= '0')
1019 if (XCHARSET_DIMENSION (charset) == 1)
1021 switch (XCHARSET_CHARS (charset))
1025 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1028 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1036 switch (XCHARSET_CHARS (charset))
1039 return MIN_CHAR_94x94
1040 + (final - '0') * 94 * 94
1041 + (((code_point >> 8) & 0x7F) - 33) * 94
1042 + ((code_point & 0x7F) - 33);
1044 return MIN_CHAR_96x96
1045 + (final - '0') * 96 * 96
1046 + (((code_point >> 8) & 0x7F) - 32) * 96
1047 + ((code_point & 0x7F) - 32);
1054 else if (XCHARSET_MAX_CODE (charset))
1057 = (XCHARSET_DIMENSION (charset) == 1
1059 code_point - XCHARSET_BYTE_OFFSET (charset)
1061 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1062 * XCHARSET_CHARS (charset)
1063 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1064 + XCHARSET_CODE_OFFSET (charset);
1065 if ((cid < XCHARSET_MIN_CODE (charset))
1066 || (XCHARSET_MAX_CODE (charset) < cid))
1075 charset_code_point (Lisp_Object charset, Emchar ch)
1077 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1080 if ( CHAR_TABLEP (encoding_table)
1081 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1086 Lisp_Object mother = XCHARSET_MOTHER (charset);
1087 int min = XCHARSET_MIN_CODE (charset);
1088 int max = XCHARSET_MAX_CODE (charset);
1091 if ( CHARSETP (mother) )
1092 code = charset_code_point (mother, ch);
1095 if ( (min <= code) && (code <= max) )
1097 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1101 int cell = m % 94 + 33;
1107 return (row << 8) | cell;
1109 else if (XCHARSET_CHARS (charset) == 94)
1111 int d =code - XCHARSET_CODE_OFFSET (charset);
1113 if (XCHARSET_DIMENSION (charset) == 1)
1115 else if (XCHARSET_DIMENSION (charset) == 2)
1116 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1117 else if (XCHARSET_DIMENSION (charset) == 3)
1119 ( (d / (94 * 94) + 33) << 16)
1120 | ((d / 94 % 94 + 33) << 8)
1122 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1124 ( (d / (94 * 94 * 94) + 33) << 24)
1125 | ((d / (94 * 94) % 94 + 33) << 16)
1126 | ((d / 94 % 94 + 33) << 8)
1129 else if (XCHARSET_CHARS (charset) == 96)
1131 int d =code - XCHARSET_CODE_OFFSET (charset);
1133 if (XCHARSET_DIMENSION (charset) == 1)
1135 else if (XCHARSET_DIMENSION (charset) == 2)
1136 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1137 else if (XCHARSET_DIMENSION (charset) == 3)
1139 ( (d / (96 * 96) + 32) << 16)
1140 | ((d / 96 % 96 + 32) << 8)
1142 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1144 ( (d / (96 * 96 * 96) + 32) << 24)
1145 | ((d / (96 * 96) % 96 + 32) << 16)
1146 | ((d / 96 % 96 + 32) << 8)
1150 return code - XCHARSET_CODE_OFFSET (charset);
1152 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1153 (XCHARSET_CODE_OFFSET (charset)
1154 == XCHARSET_MIN_CODE (charset)) )
1158 if (XCHARSET_DIMENSION (charset) == 1)
1160 if (XCHARSET_CHARS (charset) == 94)
1162 if (((d = ch - (MIN_CHAR_94
1163 + (XCHARSET_FINAL (charset) - '0') * 94))
1168 else if (XCHARSET_CHARS (charset) == 96)
1170 if (((d = ch - (MIN_CHAR_96
1171 + (XCHARSET_FINAL (charset) - '0') * 96))
1179 else if (XCHARSET_DIMENSION (charset) == 2)
1181 if (XCHARSET_CHARS (charset) == 94)
1183 if (((d = ch - (MIN_CHAR_94x94
1185 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1188 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1190 else if (XCHARSET_CHARS (charset) == 96)
1192 if (((d = ch - (MIN_CHAR_96x96
1194 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1197 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1208 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1210 if (c <= MAX_CHAR_BASIC_LATIN)
1212 *charset = Vcharset_ascii;
1217 *charset = Vcharset_control_1;
1222 *charset = Vcharset_latin_iso8859_1;
1226 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1228 *charset = Vcharset_hebrew_iso8859_8;
1229 return c - MIN_CHAR_HEBREW + 0x20;
1232 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1234 *charset = Vcharset_thai_tis620;
1235 return c - MIN_CHAR_THAI + 0x20;
1238 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1239 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1241 return list2 (Vcharset_katakana_jisx0201,
1242 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1245 else if (c <= MAX_CHAR_BMP)
1247 *charset = Vcharset_ucs_bmp;
1250 else if (c <= MAX_CHAR_SMP)
1252 *charset = Vcharset_ucs_smp;
1253 return c - MIN_CHAR_SMP;
1255 else if (c <= MAX_CHAR_SIP)
1257 *charset = Vcharset_ucs_sip;
1258 return c - MIN_CHAR_SIP;
1260 else if (c < MIN_CHAR_DAIKANWA)
1262 *charset = Vcharset_ucs;
1265 else if (c <= MAX_CHAR_DAIKANWA)
1267 *charset = Vcharset_ideograph_daikanwa;
1268 return c - MIN_CHAR_DAIKANWA;
1270 else if (c < MIN_CHAR_94)
1272 *charset = Vcharset_ucs;
1275 else if (c <= MAX_CHAR_94)
1277 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1278 ((c - MIN_CHAR_94) / 94) + '0',
1279 CHARSET_LEFT_TO_RIGHT);
1280 if (!NILP (*charset))
1281 return ((c - MIN_CHAR_94) % 94) + 33;
1284 *charset = Vcharset_ucs;
1288 else if (c <= MAX_CHAR_96)
1290 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1291 ((c - MIN_CHAR_96) / 96) + '0',
1292 CHARSET_LEFT_TO_RIGHT);
1293 if (!NILP (*charset))
1294 return ((c - MIN_CHAR_96) % 96) + 32;
1297 *charset = Vcharset_ucs;
1301 else if (c <= MAX_CHAR_94x94)
1304 = CHARSET_BY_ATTRIBUTES (94, 2,
1305 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1306 CHARSET_LEFT_TO_RIGHT);
1307 if (!NILP (*charset))
1308 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1309 | (((c - MIN_CHAR_94x94) % 94) + 33);
1312 *charset = Vcharset_ucs;
1316 else if (c <= MAX_CHAR_96x96)
1319 = CHARSET_BY_ATTRIBUTES (96, 2,
1320 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1321 CHARSET_LEFT_TO_RIGHT);
1322 if (!NILP (*charset))
1323 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1324 | (((c - MIN_CHAR_96x96) % 96) + 32);
1327 *charset = Vcharset_ucs;
1333 *charset = Vcharset_ucs;
1338 Lisp_Object Vdefault_coded_charset_priority_list;
1342 /************************************************************************/
1343 /* Basic charset Lisp functions */
1344 /************************************************************************/
1346 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1347 Return non-nil if OBJECT is a charset.
1351 return CHARSETP (object) ? Qt : Qnil;
1354 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1355 Retrieve the charset of the given name.
1356 If CHARSET-OR-NAME is a charset object, it is simply returned.
1357 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1358 nil is returned. Otherwise the associated charset object is returned.
1362 if (CHARSETP (charset_or_name))
1363 return charset_or_name;
1365 CHECK_SYMBOL (charset_or_name);
1366 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1369 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1370 Retrieve the charset of the given name.
1371 Same as `find-charset' except an error is signalled if there is no such
1372 charset instead of returning nil.
1376 Lisp_Object charset = Ffind_charset (name);
1379 signal_simple_error ("No such charset", name);
1383 /* We store the charsets in hash tables with the names as the key and the
1384 actual charset object as the value. Occasionally we need to use them
1385 in a list format. These routines provide us with that. */
1386 struct charset_list_closure
1388 Lisp_Object *charset_list;
1392 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1393 void *charset_list_closure)
1395 /* This function can GC */
1396 struct charset_list_closure *chcl =
1397 (struct charset_list_closure*) charset_list_closure;
1398 Lisp_Object *charset_list = chcl->charset_list;
1400 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1404 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1405 Return a list of the names of all defined charsets.
1409 Lisp_Object charset_list = Qnil;
1410 struct gcpro gcpro1;
1411 struct charset_list_closure charset_list_closure;
1413 GCPRO1 (charset_list);
1414 charset_list_closure.charset_list = &charset_list;
1415 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1416 &charset_list_closure);
1419 return charset_list;
1422 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1423 Return the name of charset CHARSET.
1427 return XCHARSET_NAME (Fget_charset (charset));
1430 /* #### SJT Should generic properties be allowed? */
1431 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1432 Define a new character set.
1433 This function is for use with Mule support.
1434 NAME is a symbol, the name by which the character set is normally referred.
1435 DOC-STRING is a string describing the character set.
1436 PROPS is a property list, describing the specific nature of the
1437 character set. Recognized properties are:
1439 'short-name Short version of the charset name (ex: Latin-1)
1440 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1441 'registry A regular expression matching the font registry field for
1443 'dimension Number of octets used to index a character in this charset.
1444 Either 1 or 2. Defaults to 1.
1445 'columns Number of columns used to display a character in this charset.
1446 Only used in TTY mode. (Under X, the actual width of a
1447 character can be derived from the font used to display the
1448 characters.) If unspecified, defaults to the dimension
1449 (this is almost always the correct value).
1450 'chars Number of characters in each dimension (94 or 96).
1451 Defaults to 94. Note that if the dimension is 2, the
1452 character set thus described is 94x94 or 96x96.
1453 'final Final byte of ISO 2022 escape sequence. Must be
1454 supplied. Each combination of (DIMENSION, CHARS) defines a
1455 separate namespace for final bytes. Note that ISO
1456 2022 restricts the final byte to the range
1457 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1458 dimension == 2. Note also that final bytes in the range
1459 0x30 - 0x3F are reserved for user-defined (not official)
1461 'graphic 0 (use left half of font on output) or 1 (use right half
1462 of font on output). Defaults to 0. For example, for
1463 a font whose registry is ISO8859-1, the left half
1464 (octets 0x20 - 0x7F) is the `ascii' character set, while
1465 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1466 character set. With 'graphic set to 0, the octets
1467 will have their high bit cleared; with it set to 1,
1468 the octets will have their high bit set.
1469 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1471 'ccl-program A compiled CCL program used to convert a character in
1472 this charset into an index into the font. This is in
1473 addition to the 'graphic property. The CCL program
1474 is passed the octets of the character, with the high
1475 bit cleared and set depending upon whether the value
1476 of the 'graphic property is 0 or 1.
1478 (name, doc_string, props))
1480 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1481 int direction = CHARSET_LEFT_TO_RIGHT;
1482 Lisp_Object registry = Qnil;
1483 Lisp_Object charset;
1484 Lisp_Object ccl_program = Qnil;
1485 Lisp_Object short_name = Qnil, long_name = Qnil;
1486 Lisp_Object mother = Qnil;
1487 int min_code = 0, max_code = 0, code_offset = 0;
1488 int byte_offset = -1;
1491 CHECK_SYMBOL (name);
1492 if (!NILP (doc_string))
1493 CHECK_STRING (doc_string);
1495 charset = Ffind_charset (name);
1496 if (!NILP (charset))
1497 signal_simple_error ("Cannot redefine existing charset", name);
1500 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1502 if (EQ (keyword, Qshort_name))
1504 CHECK_STRING (value);
1508 if (EQ (keyword, Qlong_name))
1510 CHECK_STRING (value);
1514 else if (EQ (keyword, Qdimension))
1517 dimension = XINT (value);
1518 if (dimension < 1 ||
1525 signal_simple_error ("Invalid value for 'dimension", value);
1528 else if (EQ (keyword, Qchars))
1531 chars = XINT (value);
1532 if (chars != 94 && chars != 96
1534 && chars != 128 && chars != 256
1537 signal_simple_error ("Invalid value for 'chars", value);
1540 else if (EQ (keyword, Qcolumns))
1543 columns = XINT (value);
1544 if (columns != 1 && columns != 2)
1545 signal_simple_error ("Invalid value for 'columns", value);
1548 else if (EQ (keyword, Qgraphic))
1551 graphic = XINT (value);
1559 signal_simple_error ("Invalid value for 'graphic", value);
1562 else if (EQ (keyword, Qregistry))
1564 CHECK_STRING (value);
1568 else if (EQ (keyword, Qdirection))
1570 if (EQ (value, Ql2r))
1571 direction = CHARSET_LEFT_TO_RIGHT;
1572 else if (EQ (value, Qr2l))
1573 direction = CHARSET_RIGHT_TO_LEFT;
1575 signal_simple_error ("Invalid value for 'direction", value);
1578 else if (EQ (keyword, Qfinal))
1580 CHECK_CHAR_COERCE_INT (value);
1581 final = XCHAR (value);
1582 if (final < '0' || final > '~')
1583 signal_simple_error ("Invalid value for 'final", value);
1587 else if (EQ (keyword, Qmother))
1589 mother = Fget_charset (value);
1592 else if (EQ (keyword, Qmin_code))
1595 min_code = XUINT (value);
1598 else if (EQ (keyword, Qmax_code))
1601 max_code = XUINT (value);
1604 else if (EQ (keyword, Qcode_offset))
1607 code_offset = XUINT (value);
1610 else if (EQ (keyword, Qconversion))
1612 if (EQ (value, Q94x60))
1613 conversion = CONVERSION_94x60;
1617 else if (EQ (keyword, Qccl_program))
1619 struct ccl_program test_ccl;
1621 if (setup_ccl_program (&test_ccl, value) < 0)
1622 signal_simple_error ("Invalid value for 'ccl-program", value);
1623 ccl_program = value;
1627 signal_simple_error ("Unrecognized property", keyword);
1633 error ("'final must be specified");
1635 if (dimension == 2 && final > 0x5F)
1637 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1640 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1641 CHARSET_LEFT_TO_RIGHT)) ||
1642 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1643 CHARSET_RIGHT_TO_LEFT)))
1645 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1647 id = get_unallocated_leading_byte (dimension);
1649 if (NILP (doc_string))
1650 doc_string = build_string ("");
1652 if (NILP (registry))
1653 registry = build_string ("");
1655 if (NILP (short_name))
1656 XSETSTRING (short_name, XSYMBOL (name)->name);
1658 if (NILP (long_name))
1659 long_name = doc_string;
1662 columns = dimension;
1664 if (byte_offset < 0)
1668 else if (chars == 96)
1674 charset = make_charset (id, name, chars, dimension, columns, graphic,
1675 final, direction, short_name, long_name,
1676 doc_string, registry,
1677 Qnil, min_code, max_code, code_offset, byte_offset,
1678 mother, conversion);
1679 if (!NILP (ccl_program))
1680 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1684 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1686 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1687 NEW-NAME is the name of the new charset. Return the new charset.
1689 (charset, new_name))
1691 Lisp_Object new_charset = Qnil;
1692 int id, chars, dimension, columns, graphic, final;
1694 Lisp_Object registry, doc_string, short_name, long_name;
1697 charset = Fget_charset (charset);
1698 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1699 signal_simple_error ("Charset already has reverse-direction charset",
1702 CHECK_SYMBOL (new_name);
1703 if (!NILP (Ffind_charset (new_name)))
1704 signal_simple_error ("Cannot redefine existing charset", new_name);
1706 cs = XCHARSET (charset);
1708 chars = CHARSET_CHARS (cs);
1709 dimension = CHARSET_DIMENSION (cs);
1710 columns = CHARSET_COLUMNS (cs);
1711 id = get_unallocated_leading_byte (dimension);
1713 graphic = CHARSET_GRAPHIC (cs);
1714 final = CHARSET_FINAL (cs);
1715 direction = CHARSET_RIGHT_TO_LEFT;
1716 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1717 direction = CHARSET_LEFT_TO_RIGHT;
1718 doc_string = CHARSET_DOC_STRING (cs);
1719 short_name = CHARSET_SHORT_NAME (cs);
1720 long_name = CHARSET_LONG_NAME (cs);
1721 registry = CHARSET_REGISTRY (cs);
1723 new_charset = make_charset (id, new_name, chars, dimension, columns,
1724 graphic, final, direction, short_name, long_name,
1725 doc_string, registry,
1727 CHARSET_DECODING_TABLE(cs),
1728 CHARSET_MIN_CODE(cs),
1729 CHARSET_MAX_CODE(cs),
1730 CHARSET_CODE_OFFSET(cs),
1731 CHARSET_BYTE_OFFSET(cs),
1733 CHARSET_CONVERSION (cs)
1735 Qnil, 0, 0, 0, 0, Qnil, 0
1739 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1740 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1745 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1746 Define symbol ALIAS as an alias for CHARSET.
1750 CHECK_SYMBOL (alias);
1751 charset = Fget_charset (charset);
1752 return Fputhash (alias, charset, Vcharset_hash_table);
1755 /* #### Reverse direction charsets not yet implemented. */
1757 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1759 Return the reverse-direction charset parallel to CHARSET, if any.
1760 This is the charset with the same properties (in particular, the same
1761 dimension, number of characters per dimension, and final byte) as
1762 CHARSET but whose characters are displayed in the opposite direction.
1766 charset = Fget_charset (charset);
1767 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1771 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1772 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1773 If DIRECTION is omitted, both directions will be checked (left-to-right
1774 will be returned if character sets exist for both directions).
1776 (dimension, chars, final, direction))
1778 int dm, ch, fi, di = -1;
1779 Lisp_Object obj = Qnil;
1781 CHECK_INT (dimension);
1782 dm = XINT (dimension);
1783 if (dm < 1 || dm > 2)
1784 signal_simple_error ("Invalid value for DIMENSION", dimension);
1788 if (ch != 94 && ch != 96)
1789 signal_simple_error ("Invalid value for CHARS", chars);
1791 CHECK_CHAR_COERCE_INT (final);
1793 if (fi < '0' || fi > '~')
1794 signal_simple_error ("Invalid value for FINAL", final);
1796 if (EQ (direction, Ql2r))
1797 di = CHARSET_LEFT_TO_RIGHT;
1798 else if (EQ (direction, Qr2l))
1799 di = CHARSET_RIGHT_TO_LEFT;
1800 else if (!NILP (direction))
1801 signal_simple_error ("Invalid value for DIRECTION", direction);
1803 if (dm == 2 && fi > 0x5F)
1805 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1809 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1811 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1814 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1817 return XCHARSET_NAME (obj);
1821 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1822 Return short name of CHARSET.
1826 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1829 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1830 Return long name of CHARSET.
1834 return XCHARSET_LONG_NAME (Fget_charset (charset));
1837 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1838 Return description of CHARSET.
1842 return XCHARSET_DOC_STRING (Fget_charset (charset));
1845 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1846 Return dimension of CHARSET.
1850 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1853 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1854 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1855 Recognized properties are those listed in `make-charset', as well as
1856 'name and 'doc-string.
1862 charset = Fget_charset (charset);
1863 cs = XCHARSET (charset);
1865 CHECK_SYMBOL (prop);
1866 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1867 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1868 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1869 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1870 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1871 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1872 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1873 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1874 Qnil : make_char (CHARSET_FINAL (cs));
1875 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1876 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1877 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1878 if (EQ (prop, Qdirection))
1879 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1880 if (EQ (prop, Qreverse_direction_charset))
1882 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1883 /* #### Is this translation OK? If so, error checking sufficient? */
1884 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1887 if (EQ (prop, Qmother))
1888 return CHARSET_MOTHER (cs);
1889 if (EQ (prop, Qmin_code))
1890 return make_int (CHARSET_MIN_CODE (cs));
1891 if (EQ (prop, Qmax_code))
1892 return make_int (CHARSET_MAX_CODE (cs));
1894 signal_simple_error ("Unrecognized charset property name", prop);
1895 return Qnil; /* not reached */
1898 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1899 Return charset identification number of CHARSET.
1903 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1906 /* #### We need to figure out which properties we really want to
1909 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1910 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1912 (charset, ccl_program))
1914 struct ccl_program test_ccl;
1916 charset = Fget_charset (charset);
1917 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1918 signal_simple_error ("Invalid ccl-program", ccl_program);
1919 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1924 invalidate_charset_font_caches (Lisp_Object charset)
1926 /* Invalidate font cache entries for charset on all devices. */
1927 Lisp_Object devcons, concons, hash_table;
1928 DEVICE_LOOP_NO_BREAK (devcons, concons)
1930 struct device *d = XDEVICE (XCAR (devcons));
1931 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1932 if (!UNBOUNDP (hash_table))
1933 Fclrhash (hash_table);
1937 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1938 Set the 'registry property of CHARSET to REGISTRY.
1940 (charset, registry))
1942 charset = Fget_charset (charset);
1943 CHECK_STRING (registry);
1944 XCHARSET_REGISTRY (charset) = registry;
1945 invalidate_charset_font_caches (charset);
1946 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1951 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1952 Return mapping-table of CHARSET.
1956 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1959 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1960 Set mapping-table of CHARSET to TABLE.
1964 struct Lisp_Charset *cs;
1968 charset = Fget_charset (charset);
1969 cs = XCHARSET (charset);
1973 CHARSET_DECODING_TABLE(cs) = Qnil;
1976 else if (VECTORP (table))
1978 int ccs_len = CHARSET_BYTE_SIZE (cs);
1979 int ret = decoding_table_check_elements (table,
1980 CHARSET_DIMENSION (cs),
1985 signal_simple_error ("Too big table", table);
1987 signal_simple_error ("Invalid element is found", table);
1989 signal_simple_error ("Something wrong", table);
1991 CHARSET_DECODING_TABLE(cs) = Qnil;
1994 signal_error (Qwrong_type_argument,
1995 list2 (build_translated_string ("vector-or-nil-p"),
1998 byte_offset = CHARSET_BYTE_OFFSET (cs);
1999 switch (CHARSET_DIMENSION (cs))
2002 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2004 Lisp_Object c = XVECTOR_DATA(table)[i];
2007 Fput_char_attribute (c, XCHARSET_NAME (charset),
2008 make_int (i + byte_offset));
2012 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2014 Lisp_Object v = XVECTOR_DATA(table)[i];
2020 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2022 Lisp_Object c = XVECTOR_DATA(v)[j];
2026 (c, XCHARSET_NAME (charset),
2027 make_int ( ( (i + byte_offset) << 8 )
2033 Fput_char_attribute (v, XCHARSET_NAME (charset),
2034 make_int (i + byte_offset));
2043 /************************************************************************/
2044 /* Lisp primitives for working with characters */
2045 /************************************************************************/
2048 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2049 Make a character from CHARSET and code-point CODE.
2050 If DEFINED_ONLY is non-nil, builtin character is not returned.
2051 If corresponding character is not found, nil is returned.
2053 (charset, code, defined_only))
2057 charset = Fget_charset (charset);
2060 if (XCHARSET_GRAPHIC (charset) == 1)
2062 if (NILP (defined_only))
2063 c = DECODE_CHAR (charset, c);
2065 c = DECODE_DEFINED_CHAR (charset, c);
2066 return c >= 0 ? make_char (c) : Qnil;
2069 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2070 Make a builtin character from CHARSET and code-point CODE.
2076 charset = Fget_charset (charset);
2078 if (EQ (charset, Vcharset_latin_viscii))
2080 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2086 (ret = Fget_char_attribute (chr,
2087 Vcharset_latin_viscii_lower,
2090 charset = Vcharset_latin_viscii_lower;
2094 (ret = Fget_char_attribute (chr,
2095 Vcharset_latin_viscii_upper,
2098 charset = Vcharset_latin_viscii_upper;
2105 if (XCHARSET_GRAPHIC (charset) == 1)
2108 c = decode_builtin_char (charset, c);
2109 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2113 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2114 Make a character from CHARSET and octets ARG1 and ARG2.
2115 ARG2 is required only for characters from two-dimensional charsets.
2116 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2117 character s with caron.
2119 (charset, arg1, arg2))
2123 int lowlim, highlim;
2125 charset = Fget_charset (charset);
2126 cs = XCHARSET (charset);
2128 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2129 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2131 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2133 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2134 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2137 /* It is useful (and safe, according to Olivier Galibert) to strip
2138 the 8th bit off ARG1 and ARG2 because it allows programmers to
2139 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2140 Latin 2 code of the character. */
2148 if (a1 < lowlim || a1 > highlim)
2149 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2151 if (CHARSET_DIMENSION (cs) == 1)
2155 ("Charset is of dimension one; second octet must be nil", arg2);
2156 return make_char (MAKE_CHAR (charset, a1, 0));
2165 a2 = XINT (arg2) & 0x7f;
2167 if (a2 < lowlim || a2 > highlim)
2168 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2170 return make_char (MAKE_CHAR (charset, a1, a2));
2173 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2174 Return the character set of CHARACTER.
2178 CHECK_CHAR_COERCE_INT (character);
2180 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2183 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2184 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2185 N defaults to 0 if omitted.
2189 Lisp_Object charset;
2192 CHECK_CHAR_COERCE_INT (character);
2194 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2196 if (NILP (n) || EQ (n, Qzero))
2197 return make_int (octet0);
2198 else if (EQ (n, make_int (1)))
2199 return make_int (octet1);
2201 signal_simple_error ("Octet number must be 0 or 1", n);
2205 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2206 Return code-point of CHARACTER in specified CHARSET.
2208 (character, charset))
2212 CHECK_CHAR_COERCE_INT (character);
2213 charset = Fget_charset (charset);
2214 code_point = charset_code_point (charset, XCHAR (character));
2215 if (code_point >= 0)
2216 return make_int (code_point);
2222 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2223 Return list of charset and one or two position-codes of CHARACTER.
2227 /* This function can GC */
2228 struct gcpro gcpro1, gcpro2;
2229 Lisp_Object charset = Qnil;
2230 Lisp_Object rc = Qnil;
2238 GCPRO2 (charset, rc);
2239 CHECK_CHAR_COERCE_INT (character);
2242 code_point = ENCODE_CHAR (XCHAR (character), charset);
2243 dimension = XCHARSET_DIMENSION (charset);
2244 while (dimension > 0)
2246 rc = Fcons (make_int (code_point & 255), rc);
2250 rc = Fcons (XCHARSET_NAME (charset), rc);
2252 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2254 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2256 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2260 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2269 #ifdef ENABLE_COMPOSITE_CHARS
2270 /************************************************************************/
2271 /* composite character functions */
2272 /************************************************************************/
2275 lookup_composite_char (Bufbyte *str, int len)
2277 Lisp_Object lispstr = make_string (str, len);
2278 Lisp_Object ch = Fgethash (lispstr,
2279 Vcomposite_char_string2char_hash_table,
2285 if (composite_char_row_next >= 128)
2286 signal_simple_error ("No more composite chars available", lispstr);
2287 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2288 composite_char_col_next);
2289 Fputhash (make_char (emch), lispstr,
2290 Vcomposite_char_char2string_hash_table);
2291 Fputhash (lispstr, make_char (emch),
2292 Vcomposite_char_string2char_hash_table);
2293 composite_char_col_next++;
2294 if (composite_char_col_next >= 128)
2296 composite_char_col_next = 32;
2297 composite_char_row_next++;
2306 composite_char_string (Emchar ch)
2308 Lisp_Object str = Fgethash (make_char (ch),
2309 Vcomposite_char_char2string_hash_table,
2311 assert (!UNBOUNDP (str));
2315 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2316 Convert a string into a single composite character.
2317 The character is the result of overstriking all the characters in
2322 CHECK_STRING (string);
2323 return make_char (lookup_composite_char (XSTRING_DATA (string),
2324 XSTRING_LENGTH (string)));
2327 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2328 Return a string of the characters comprising a composite character.
2336 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2337 signal_simple_error ("Must be composite char", ch);
2338 return composite_char_string (emch);
2340 #endif /* ENABLE_COMPOSITE_CHARS */
2343 /************************************************************************/
2344 /* initialization */
2345 /************************************************************************/
2348 syms_of_mule_charset (void)
2350 INIT_LRECORD_IMPLEMENTATION (charset);
2352 DEFSUBR (Fcharsetp);
2353 DEFSUBR (Ffind_charset);
2354 DEFSUBR (Fget_charset);
2355 DEFSUBR (Fcharset_list);
2356 DEFSUBR (Fcharset_name);
2357 DEFSUBR (Fmake_charset);
2358 DEFSUBR (Fmake_reverse_direction_charset);
2359 /* DEFSUBR (Freverse_direction_charset); */
2360 DEFSUBR (Fdefine_charset_alias);
2361 DEFSUBR (Fcharset_from_attributes);
2362 DEFSUBR (Fcharset_short_name);
2363 DEFSUBR (Fcharset_long_name);
2364 DEFSUBR (Fcharset_description);
2365 DEFSUBR (Fcharset_dimension);
2366 DEFSUBR (Fcharset_property);
2367 DEFSUBR (Fcharset_id);
2368 DEFSUBR (Fset_charset_ccl_program);
2369 DEFSUBR (Fset_charset_registry);
2371 DEFSUBR (Fcharset_mapping_table);
2372 DEFSUBR (Fset_charset_mapping_table);
2376 DEFSUBR (Fdecode_char);
2377 DEFSUBR (Fdecode_builtin_char);
2378 DEFSUBR (Fencode_char);
2380 DEFSUBR (Fmake_char);
2381 DEFSUBR (Fchar_charset);
2382 DEFSUBR (Fchar_octet);
2383 DEFSUBR (Fsplit_char);
2385 #ifdef ENABLE_COMPOSITE_CHARS
2386 DEFSUBR (Fmake_composite_char);
2387 DEFSUBR (Fcomposite_char_string);
2390 defsymbol (&Qcharsetp, "charsetp");
2391 defsymbol (&Qregistry, "registry");
2392 defsymbol (&Qfinal, "final");
2393 defsymbol (&Qgraphic, "graphic");
2394 defsymbol (&Qdirection, "direction");
2395 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2396 defsymbol (&Qshort_name, "short-name");
2397 defsymbol (&Qlong_name, "long-name");
2399 defsymbol (&Qmother, "mother");
2400 defsymbol (&Qmin_code, "min-code");
2401 defsymbol (&Qmax_code, "max-code");
2402 defsymbol (&Qcode_offset, "code-offset");
2403 defsymbol (&Qconversion, "conversion");
2404 defsymbol (&Q94x60, "94x60");
2407 defsymbol (&Ql2r, "l2r");
2408 defsymbol (&Qr2l, "r2l");
2410 /* Charsets, compatible with FSF 20.3
2411 Naming convention is Script-Charset[-Edition] */
2412 defsymbol (&Qascii, "ascii");
2413 defsymbol (&Qcontrol_1, "control-1");
2414 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2415 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2416 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2417 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2418 defsymbol (&Qthai_tis620, "thai-tis620");
2419 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2420 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2421 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2422 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2423 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2424 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2425 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2426 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2427 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2428 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2429 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2430 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2431 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2432 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2433 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2434 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2436 defsymbol (&Qucs, "ucs");
2437 defsymbol (&Qucs_bmp, "ucs-bmp");
2438 defsymbol (&Qucs_smp, "ucs-smp");
2439 defsymbol (&Qucs_sip, "ucs-sip");
2440 defsymbol (&Qucs_cns, "ucs-cns");
2441 defsymbol (&Qucs_jis, "ucs-jis");
2442 defsymbol (&Qucs_ks, "ucs-ks");
2443 defsymbol (&Qucs_big5, "ucs-big5");
2444 defsymbol (&Qlatin_viscii, "latin-viscii");
2445 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2446 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2447 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2448 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2449 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2450 defsymbol (&Qideograph_gt, "ideograph-gt");
2451 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2452 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2453 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2454 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2455 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2456 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2457 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2458 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2459 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2460 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2461 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2462 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2463 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2464 defsymbol (&Qchinese_big5, "chinese-big5");
2465 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2466 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2467 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2468 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2469 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2470 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2471 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2472 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2473 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2474 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2475 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2476 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2477 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2478 defsymbol (&Qchina3_jef, "china3-jef");
2479 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2480 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2482 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2483 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2485 defsymbol (&Qcomposite, "composite");
2489 vars_of_mule_charset (void)
2496 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2497 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2499 /* Table of charsets indexed by leading byte. */
2500 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2501 chlook->charset_by_leading_byte[i] = Qnil;
2504 /* Table of charsets indexed by type/final-byte. */
2505 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2506 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2507 chlook->charset_by_attributes[i][j] = Qnil;
2509 /* Table of charsets indexed by type/final-byte/direction. */
2510 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2511 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2512 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2513 chlook->charset_by_attributes[i][j][k] = Qnil;
2517 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2519 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2520 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2524 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2525 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2526 Leading-code of private TYPE9N charset of column-width 1.
2528 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2532 Vdefault_coded_charset_priority_list = Qnil;
2533 DEFVAR_LISP ("default-coded-charset-priority-list",
2534 &Vdefault_coded_charset_priority_list /*
2535 Default order of preferred coded-character-sets.
2541 complex_vars_of_mule_charset (void)
2543 staticpro (&Vcharset_hash_table);
2544 Vcharset_hash_table =
2545 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2547 /* Predefined character sets. We store them into variables for
2551 staticpro (&Vcharset_ucs);
2553 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2554 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2555 build_string ("UCS"),
2556 build_string ("UCS"),
2557 build_string ("ISO/IEC 10646"),
2559 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2560 staticpro (&Vcharset_ucs_bmp);
2562 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2563 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2564 build_string ("BMP"),
2565 build_string ("UCS-BMP"),
2566 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2568 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2569 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2570 staticpro (&Vcharset_ucs_smp);
2572 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2573 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2574 build_string ("SMP"),
2575 build_string ("UCS-SMP"),
2576 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2577 build_string ("UCS00-1"),
2578 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2579 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2580 staticpro (&Vcharset_ucs_sip);
2582 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2583 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2584 build_string ("SIP"),
2585 build_string ("UCS-SIP"),
2586 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2587 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2588 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2589 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2590 staticpro (&Vcharset_ucs_cns);
2592 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2593 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2594 build_string ("UCS for CNS"),
2595 build_string ("UCS for CNS 11643"),
2596 build_string ("ISO/IEC 10646 for CNS 11643"),
2599 Qnil, CONVERSION_IDENTICAL);
2600 staticpro (&Vcharset_ucs_jis);
2602 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2603 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2604 build_string ("UCS for JIS"),
2605 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2606 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2608 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2609 staticpro (&Vcharset_ucs_ks);
2611 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2612 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2613 build_string ("UCS for KS"),
2614 build_string ("UCS for CCS defined by KS"),
2615 build_string ("ISO/IEC 10646 for Korean Standards"),
2617 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2618 staticpro (&Vcharset_ucs_big5);
2620 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2621 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2622 build_string ("UCS for Big5"),
2623 build_string ("UCS for Big5"),
2624 build_string ("ISO/IEC 10646 for Big5"),
2626 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2628 # define MIN_CHAR_THAI 0
2629 # define MAX_CHAR_THAI 0
2630 /* # define MIN_CHAR_HEBREW 0 */
2631 /* # define MAX_CHAR_HEBREW 0 */
2632 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2633 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2635 staticpro (&Vcharset_ascii);
2637 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2638 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2639 build_string ("ASCII"),
2640 build_string ("ASCII)"),
2641 build_string ("ASCII (ISO646 IRV)"),
2642 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2643 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2644 staticpro (&Vcharset_control_1);
2645 Vcharset_control_1 =
2646 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2647 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2648 build_string ("C1"),
2649 build_string ("Control characters"),
2650 build_string ("Control characters 128-191"),
2652 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2653 staticpro (&Vcharset_latin_iso8859_1);
2654 Vcharset_latin_iso8859_1 =
2655 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2656 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2657 build_string ("Latin-1"),
2658 build_string ("ISO8859-1 (Latin-1)"),
2659 build_string ("ISO8859-1 (Latin-1)"),
2660 build_string ("iso8859-1"),
2661 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2662 staticpro (&Vcharset_latin_iso8859_2);
2663 Vcharset_latin_iso8859_2 =
2664 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2665 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2666 build_string ("Latin-2"),
2667 build_string ("ISO8859-2 (Latin-2)"),
2668 build_string ("ISO8859-2 (Latin-2)"),
2669 build_string ("iso8859-2"),
2670 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2671 staticpro (&Vcharset_latin_iso8859_3);
2672 Vcharset_latin_iso8859_3 =
2673 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2674 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2675 build_string ("Latin-3"),
2676 build_string ("ISO8859-3 (Latin-3)"),
2677 build_string ("ISO8859-3 (Latin-3)"),
2678 build_string ("iso8859-3"),
2679 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2680 staticpro (&Vcharset_latin_iso8859_4);
2681 Vcharset_latin_iso8859_4 =
2682 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2683 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2684 build_string ("Latin-4"),
2685 build_string ("ISO8859-4 (Latin-4)"),
2686 build_string ("ISO8859-4 (Latin-4)"),
2687 build_string ("iso8859-4"),
2688 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2689 staticpro (&Vcharset_thai_tis620);
2690 Vcharset_thai_tis620 =
2691 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2692 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2693 build_string ("TIS620"),
2694 build_string ("TIS620 (Thai)"),
2695 build_string ("TIS620.2529 (Thai)"),
2696 build_string ("tis620"),
2697 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2698 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2699 staticpro (&Vcharset_greek_iso8859_7);
2700 Vcharset_greek_iso8859_7 =
2701 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2702 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2703 build_string ("ISO8859-7"),
2704 build_string ("ISO8859-7 (Greek)"),
2705 build_string ("ISO8859-7 (Greek)"),
2706 build_string ("iso8859-7"),
2707 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2708 staticpro (&Vcharset_arabic_iso8859_6);
2709 Vcharset_arabic_iso8859_6 =
2710 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2711 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2712 build_string ("ISO8859-6"),
2713 build_string ("ISO8859-6 (Arabic)"),
2714 build_string ("ISO8859-6 (Arabic)"),
2715 build_string ("iso8859-6"),
2716 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2717 staticpro (&Vcharset_hebrew_iso8859_8);
2718 Vcharset_hebrew_iso8859_8 =
2719 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2720 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2721 build_string ("ISO8859-8"),
2722 build_string ("ISO8859-8 (Hebrew)"),
2723 build_string ("ISO8859-8 (Hebrew)"),
2724 build_string ("iso8859-8"),
2726 0 /* MIN_CHAR_HEBREW */,
2727 0 /* MAX_CHAR_HEBREW */, 0, 32,
2728 Qnil, CONVERSION_IDENTICAL);
2729 staticpro (&Vcharset_katakana_jisx0201);
2730 Vcharset_katakana_jisx0201 =
2731 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2732 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2733 build_string ("JISX0201 Kana"),
2734 build_string ("JISX0201.1976 (Japanese Kana)"),
2735 build_string ("JISX0201.1976 Japanese Kana"),
2736 build_string ("jisx0201\\.1976"),
2737 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2738 staticpro (&Vcharset_latin_jisx0201);
2739 Vcharset_latin_jisx0201 =
2740 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2741 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2742 build_string ("JISX0201 Roman"),
2743 build_string ("JISX0201.1976 (Japanese Roman)"),
2744 build_string ("JISX0201.1976 Japanese Roman"),
2745 build_string ("jisx0201\\.1976"),
2746 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2747 staticpro (&Vcharset_cyrillic_iso8859_5);
2748 Vcharset_cyrillic_iso8859_5 =
2749 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2750 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2751 build_string ("ISO8859-5"),
2752 build_string ("ISO8859-5 (Cyrillic)"),
2753 build_string ("ISO8859-5 (Cyrillic)"),
2754 build_string ("iso8859-5"),
2755 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2756 staticpro (&Vcharset_latin_iso8859_9);
2757 Vcharset_latin_iso8859_9 =
2758 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2759 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2760 build_string ("Latin-5"),
2761 build_string ("ISO8859-9 (Latin-5)"),
2762 build_string ("ISO8859-9 (Latin-5)"),
2763 build_string ("iso8859-9"),
2764 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2765 staticpro (&Vcharset_japanese_jisx0208_1978);
2766 Vcharset_japanese_jisx0208_1978 =
2767 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2768 Qjapanese_jisx0208_1978, 94, 2,
2769 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2770 build_string ("JIS X0208:1978"),
2771 build_string ("JIS X0208:1978 (Japanese)"),
2773 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2774 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2775 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2776 staticpro (&Vcharset_chinese_gb2312);
2777 Vcharset_chinese_gb2312 =
2778 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2779 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2780 build_string ("GB2312"),
2781 build_string ("GB2312)"),
2782 build_string ("GB2312 Chinese simplified"),
2783 build_string ("gb2312"),
2784 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2785 staticpro (&Vcharset_chinese_gb12345);
2786 Vcharset_chinese_gb12345 =
2787 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2788 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2789 build_string ("G1"),
2790 build_string ("GB 12345)"),
2791 build_string ("GB 12345-1990"),
2792 build_string ("GB12345\\(\\.1990\\)?-0"),
2793 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2794 staticpro (&Vcharset_japanese_jisx0208);
2795 Vcharset_japanese_jisx0208 =
2796 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2797 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2798 build_string ("JISX0208"),
2799 build_string ("JIS X0208:1983 (Japanese)"),
2800 build_string ("JIS X0208:1983 Japanese Kanji"),
2801 build_string ("jisx0208\\.1983"),
2802 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2804 staticpro (&Vcharset_japanese_jisx0208_1990);
2805 Vcharset_japanese_jisx0208_1990 =
2806 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2807 Qjapanese_jisx0208_1990, 94, 2,
2808 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2809 build_string ("JISX0208-1990"),
2810 build_string ("JIS X0208:1990 (Japanese)"),
2811 build_string ("JIS X0208:1990 Japanese Kanji"),
2812 build_string ("jisx0208\\.1990"),
2814 MIN_CHAR_JIS_X0208_1990,
2815 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2816 Qnil, CONVERSION_IDENTICAL);
2818 staticpro (&Vcharset_korean_ksc5601);
2819 Vcharset_korean_ksc5601 =
2820 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2821 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2822 build_string ("KSC5601"),
2823 build_string ("KSC5601 (Korean"),
2824 build_string ("KSC5601 Korean Hangul and Hanja"),
2825 build_string ("ksc5601"),
2826 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2827 staticpro (&Vcharset_japanese_jisx0212);
2828 Vcharset_japanese_jisx0212 =
2829 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2830 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2831 build_string ("JISX0212"),
2832 build_string ("JISX0212 (Japanese)"),
2833 build_string ("JISX0212 Japanese Supplement"),
2834 build_string ("jisx0212"),
2835 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2837 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2838 staticpro (&Vcharset_chinese_cns11643_1);
2839 Vcharset_chinese_cns11643_1 =
2840 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2841 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2842 build_string ("CNS11643-1"),
2843 build_string ("CNS11643-1 (Chinese traditional)"),
2845 ("CNS 11643 Plane 1 Chinese traditional"),
2846 build_string (CHINESE_CNS_PLANE_RE("1")),
2847 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2848 staticpro (&Vcharset_chinese_cns11643_2);
2849 Vcharset_chinese_cns11643_2 =
2850 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2851 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2852 build_string ("CNS11643-2"),
2853 build_string ("CNS11643-2 (Chinese traditional)"),
2855 ("CNS 11643 Plane 2 Chinese traditional"),
2856 build_string (CHINESE_CNS_PLANE_RE("2")),
2857 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2859 staticpro (&Vcharset_latin_tcvn5712);
2860 Vcharset_latin_tcvn5712 =
2861 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2862 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2863 build_string ("TCVN 5712"),
2864 build_string ("TCVN 5712 (VSCII-2)"),
2865 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2866 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2867 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2868 staticpro (&Vcharset_latin_viscii_lower);
2869 Vcharset_latin_viscii_lower =
2870 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2871 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2872 build_string ("VISCII lower"),
2873 build_string ("VISCII lower (Vietnamese)"),
2874 build_string ("VISCII lower (Vietnamese)"),
2875 build_string ("MULEVISCII-LOWER"),
2876 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2877 staticpro (&Vcharset_latin_viscii_upper);
2878 Vcharset_latin_viscii_upper =
2879 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2880 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2881 build_string ("VISCII upper"),
2882 build_string ("VISCII upper (Vietnamese)"),
2883 build_string ("VISCII upper (Vietnamese)"),
2884 build_string ("MULEVISCII-UPPER"),
2885 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2886 staticpro (&Vcharset_latin_viscii);
2887 Vcharset_latin_viscii =
2888 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2889 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2890 build_string ("VISCII"),
2891 build_string ("VISCII 1.1 (Vietnamese)"),
2892 build_string ("VISCII 1.1 (Vietnamese)"),
2893 build_string ("VISCII1\\.1"),
2894 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2895 staticpro (&Vcharset_chinese_big5);
2896 Vcharset_chinese_big5 =
2897 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2898 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2899 build_string ("Big5"),
2900 build_string ("Big5"),
2901 build_string ("Big5 Chinese traditional"),
2902 build_string ("big5"),
2904 0 /* MIN_CHAR_BIG5_CDP */,
2905 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
2906 Qnil, CONVERSION_IDENTICAL);
2907 staticpro (&Vcharset_chinese_big5_cdp);
2908 Vcharset_chinese_big5_cdp =
2909 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2910 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2911 build_string ("Big5-CDP"),
2912 build_string ("Big5 + CDP extension"),
2913 build_string ("Big5 with CDP extension"),
2914 build_string ("big5\\.cdp-0"),
2915 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2916 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2917 #define DEF_HANZIKU(n) \
2918 staticpro (&Vcharset_ideograph_hanziku_##n); \
2919 Vcharset_ideograph_hanziku_##n = \
2920 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2921 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2922 build_string ("HZK-"#n), \
2923 build_string ("HANZIKU-"#n), \
2924 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2926 ("hanziku-"#n"$"), \
2927 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2928 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2941 staticpro (&Vcharset_china3_jef);
2942 Vcharset_china3_jef =
2943 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2944 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2945 build_string ("JC3"),
2946 build_string ("JEF + CHINA3"),
2947 build_string ("JEF + CHINA3 private characters"),
2948 build_string ("china3jef-0"),
2949 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2950 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2951 staticpro (&Vcharset_ideograph_cbeta);
2952 Vcharset_ideograph_cbeta =
2953 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2954 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2955 build_string ("CB"),
2956 build_string ("CBETA"),
2957 build_string ("CBETA private characters"),
2958 build_string ("cbeta-0"),
2959 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2960 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2961 staticpro (&Vcharset_ideograph_gt);
2962 Vcharset_ideograph_gt =
2963 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2964 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2965 build_string ("GT"),
2966 build_string ("GT"),
2967 build_string ("GT"),
2969 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
2970 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
2971 #define DEF_GT_PJ(n) \
2972 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2973 Vcharset_ideograph_gt_pj_##n = \
2974 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2975 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2976 build_string ("GT-PJ-"#n), \
2977 build_string ("GT (pseudo JIS encoding) part "#n), \
2978 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2980 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2981 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2994 staticpro (&Vcharset_ideograph_daikanwa_2);
2995 Vcharset_ideograph_daikanwa_2 =
2996 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
2997 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2998 build_string ("Daikanwa Rev."),
2999 build_string ("Morohashi's Daikanwa Rev."),
3001 ("Daikanwa dictionary (revised version)"),
3002 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3003 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3004 staticpro (&Vcharset_ideograph_daikanwa);
3005 Vcharset_ideograph_daikanwa =
3006 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3007 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3008 build_string ("Daikanwa"),
3009 build_string ("Morohashi's Daikanwa Rev.2"),
3011 ("Daikanwa dictionary (second revised version)"),
3012 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3013 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3014 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3016 staticpro (&Vcharset_ethiopic_ucs);
3017 Vcharset_ethiopic_ucs =
3018 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3019 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3020 build_string ("Ethiopic (UCS)"),
3021 build_string ("Ethiopic (UCS)"),
3022 build_string ("Ethiopic of UCS"),
3023 build_string ("Ethiopic-Unicode"),
3024 Qnil, 0x1200, 0x137F, 0, 0,
3025 Qnil, CONVERSION_IDENTICAL);
3027 staticpro (&Vcharset_chinese_big5_1);
3028 Vcharset_chinese_big5_1 =
3029 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3030 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3031 build_string ("Big5"),
3032 build_string ("Big5 (Level-1)"),
3034 ("Big5 Level-1 Chinese traditional"),
3035 build_string ("big5"),
3036 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3037 staticpro (&Vcharset_chinese_big5_2);
3038 Vcharset_chinese_big5_2 =
3039 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3040 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3041 build_string ("Big5"),
3042 build_string ("Big5 (Level-2)"),
3044 ("Big5 Level-2 Chinese traditional"),
3045 build_string ("big5"),
3046 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3048 #ifdef ENABLE_COMPOSITE_CHARS
3049 /* #### For simplicity, we put composite chars into a 96x96 charset.
3050 This is going to lead to problems because you can run out of
3051 room, esp. as we don't yet recycle numbers. */
3052 staticpro (&Vcharset_composite);
3053 Vcharset_composite =
3054 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3055 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3056 build_string ("Composite"),
3057 build_string ("Composite characters"),
3058 build_string ("Composite characters"),
3061 /* #### not dumped properly */
3062 composite_char_row_next = 32;
3063 composite_char_col_next = 32;
3065 Vcomposite_char_string2char_hash_table =
3066 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3067 Vcomposite_char_char2string_hash_table =
3068 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3069 staticpro (&Vcomposite_char_string2char_hash_table);
3070 staticpro (&Vcomposite_char_char2string_hash_table);
3071 #endif /* ENABLE_COMPOSITE_CHARS */