1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
193 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
195 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
199 if (XVECTOR_LENGTH (v) > ccs_len)
202 for (i = 0; i < XVECTOR_LENGTH (v); i++)
204 Lisp_Object c = XVECTOR_DATA(v)[i];
206 if (!NILP (c) && !CHARP (c))
210 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
222 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
225 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
235 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
236 nv = XVECTOR_DATA(v)[i];
242 XVECTOR_DATA(v)[i] = Qnil;
246 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
247 int code_point, Lisp_Object character);
249 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
250 int code_point, Lisp_Object character)
254 int ccs_len = XVECTOR_LENGTH (v);
259 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
260 nv = XVECTOR_DATA(v)[i];
264 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
270 XVECTOR_DATA(v)[i] = character;
274 put_char_ccs_code_point (Lisp_Object character,
275 Lisp_Object ccs, Lisp_Object value)
277 if (!EQ (XCHARSET_NAME (ccs), Qucs)
279 || (XCHAR (character) != XINT (value)))
281 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
282 int dim = XCHARSET_DIMENSION (ccs);
283 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
284 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
288 { /* obsolete representation: value must be a list of bytes */
289 Lisp_Object ret = Fcar (value);
293 signal_simple_error ("Invalid value for coded-charset", value);
294 code_point = XINT (ret);
295 if (XCHARSET_GRAPHIC (ccs) == 1)
303 signal_simple_error ("Invalid value for coded-charset",
307 signal_simple_error ("Invalid value for coded-charset",
310 if (XCHARSET_GRAPHIC (ccs) == 1)
312 code_point = (code_point << 8) | j;
315 value = make_int (code_point);
317 else if (INTP (value))
319 code_point = XINT (value);
320 if (XCHARSET_GRAPHIC (ccs) == 1)
322 code_point &= 0x7F7F7F7F;
323 value = make_int (code_point);
327 signal_simple_error ("Invalid value for coded-charset", value);
331 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
334 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
339 XCHARSET_DECODING_TABLE (ccs)
340 = v = make_vector (ccs_len, Qnil);
343 decoding_table_put_char (v, dim, byte_offset, code_point, character);
349 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
351 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
352 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
354 if (VECTORP (decoding_table))
356 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
360 decoding_table_remove_char (decoding_table,
361 XCHARSET_DIMENSION (ccs),
362 XCHARSET_BYTE_OFFSET (ccs),
366 if (CHAR_TABLEP (encoding_table))
368 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
376 int leading_code_private_11;
379 Lisp_Object Qcharsetp;
381 /* Qdoc_string, Qdimension, Qchars defined in general.c */
382 Lisp_Object Qregistry, Qfinal, Qgraphic;
383 Lisp_Object Qdirection;
384 Lisp_Object Qreverse_direction_charset;
385 Lisp_Object Qleading_byte;
386 Lisp_Object Qshort_name, Qlong_name;
388 Lisp_Object Qmin_code, Qmax_code;
389 Lisp_Object Qmother, Qconversion, Q94x60;
406 Qjapanese_jisx0208_1978,
410 Qjapanese_jisx0208_1990,
428 Qvietnamese_viscii_lower,
429 Qvietnamese_viscii_upper,
432 Qideograph_hanziku_1,
433 Qideograph_hanziku_2,
434 Qideograph_hanziku_3,
435 Qideograph_hanziku_4,
436 Qideograph_hanziku_5,
437 Qideograph_hanziku_6,
438 Qideograph_hanziku_7,
439 Qideograph_hanziku_8,
440 Qideograph_hanziku_9,
441 Qideograph_hanziku_10,
442 Qideograph_hanziku_11,
443 Qideograph_hanziku_12,
446 Qideograph_daikanwa_2,
466 Lisp_Object Ql2r, Qr2l;
468 Lisp_Object Vcharset_hash_table;
470 /* Composite characters are characters constructed by overstriking two
471 or more regular characters.
473 1) The old Mule implementation involves storing composite characters
474 in a buffer as a tag followed by all of the actual characters
475 used to make up the composite character. I think this is a bad
476 idea; it greatly complicates code that wants to handle strings
477 one character at a time because it has to deal with the possibility
478 of great big ungainly characters. It's much more reasonable to
479 simply store an index into a table of composite characters.
481 2) The current implementation only allows for 16,384 separate
482 composite characters over the lifetime of the XEmacs process.
483 This could become a potential problem if the user
484 edited lots of different files that use composite characters.
485 Due to FSF bogosity, increasing the number of allowable
486 composite characters under Mule would decrease the number
487 of possible faces that can exist. Mule already has shrunk
488 this to 2048, and further shrinkage would become uncomfortable.
489 No such problems exist in XEmacs.
491 Composite characters could be represented as 0x80 C1 C2 C3,
492 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
493 for slightly under 2^20 (one million) composite characters
494 over the XEmacs process lifetime, and you only need to
495 increase the size of a Mule character from 19 to 21 bits.
496 Or you could use 0x80 C1 C2 C3 C4, allowing for about
497 85 million (slightly over 2^26) composite characters. */
500 /************************************************************************/
501 /* Basic Emchar functions */
502 /************************************************************************/
504 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
505 string in STR. Returns the number of bytes stored.
506 Do not call this directly. Use the macro set_charptr_emchar() instead.
510 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
525 else if ( c <= 0x7ff )
527 *p++ = (c >> 6) | 0xc0;
528 *p++ = (c & 0x3f) | 0x80;
530 else if ( c <= 0xffff )
532 *p++ = (c >> 12) | 0xe0;
533 *p++ = ((c >> 6) & 0x3f) | 0x80;
534 *p++ = (c & 0x3f) | 0x80;
536 else if ( c <= 0x1fffff )
538 *p++ = (c >> 18) | 0xf0;
539 *p++ = ((c >> 12) & 0x3f) | 0x80;
540 *p++ = ((c >> 6) & 0x3f) | 0x80;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0x3ffffff )
545 *p++ = (c >> 24) | 0xf8;
546 *p++ = ((c >> 18) & 0x3f) | 0x80;
547 *p++ = ((c >> 12) & 0x3f) | 0x80;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
553 *p++ = (c >> 30) | 0xfc;
554 *p++ = ((c >> 24) & 0x3f) | 0x80;
555 *p++ = ((c >> 18) & 0x3f) | 0x80;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
561 BREAKUP_CHAR (c, charset, c1, c2);
562 lb = CHAR_LEADING_BYTE (c);
563 if (LEADING_BYTE_PRIVATE_P (lb))
564 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
566 if (EQ (charset, Vcharset_control_1))
575 /* Return the first character from a Mule-encoded string in STR,
576 assuming it's non-ASCII. Do not call this directly.
577 Use the macro charptr_emchar() instead. */
580 non_ascii_charptr_emchar (const Bufbyte *str)
593 else if ( b >= 0xf8 )
598 else if ( b >= 0xf0 )
603 else if ( b >= 0xe0 )
608 else if ( b >= 0xc0 )
618 for( ; len > 0; len-- )
621 ch = ( ch << 6 ) | ( b & 0x3f );
625 Bufbyte i0 = *str, i1, i2 = 0;
628 if (i0 == LEADING_BYTE_CONTROL_1)
629 return (Emchar) (*++str - 0x20);
631 if (LEADING_BYTE_PREFIX_P (i0))
636 charset = CHARSET_BY_LEADING_BYTE (i0);
637 if (XCHARSET_DIMENSION (charset) == 2)
640 return MAKE_CHAR (charset, i1, i2);
644 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
645 Do not call this directly. Use the macro valid_char_p() instead. */
649 non_ascii_valid_char_p (Emchar ch)
653 /* Must have only lowest 19 bits set */
657 f1 = CHAR_FIELD1 (ch);
658 f2 = CHAR_FIELD2 (ch);
659 f3 = CHAR_FIELD3 (ch);
665 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
666 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
667 f2 > MAX_CHAR_FIELD2_PRIVATE)
672 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
673 f2 <= MAX_CHAR_FIELD2_PRIVATE))
677 NOTE: This takes advantage of the fact that
678 FIELD2_TO_OFFICIAL_LEADING_BYTE and
679 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
681 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
682 if (EQ (charset, Qnil))
684 return (XCHARSET_CHARS (charset) == 96);
690 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
691 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
692 f1 > MAX_CHAR_FIELD1_PRIVATE)
694 if (f2 < 0x20 || f3 < 0x20)
697 #ifdef ENABLE_COMPOSITE_CHARS
698 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
700 if (UNBOUNDP (Fgethash (make_int (ch),
701 Vcomposite_char_char2string_hash_table,
706 #endif /* ENABLE_COMPOSITE_CHARS */
708 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
709 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
712 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
714 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
717 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
719 if (EQ (charset, Qnil))
721 return (XCHARSET_CHARS (charset) == 96);
727 /************************************************************************/
728 /* Basic string functions */
729 /************************************************************************/
731 /* Copy the character pointed to by SRC into DST. Do not call this
732 directly. Use the macro charptr_copy_char() instead.
733 Return the number of bytes copied. */
736 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
738 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
740 for (i = bytes; i; i--, dst++, src++)
746 /************************************************************************/
747 /* streams of Emchars */
748 /************************************************************************/
750 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
751 The functions below are not meant to be called directly; use
752 the macros in insdel.h. */
755 Lstream_get_emchar_1 (Lstream *stream, int ch)
757 Bufbyte str[MAX_EMCHAR_LEN];
758 Bufbyte *strptr = str;
761 str[0] = (Bufbyte) ch;
763 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
765 int c = Lstream_getc (stream);
766 bufpos_checking_assert (c >= 0);
767 *++strptr = (Bufbyte) c;
769 return charptr_emchar (str);
773 Lstream_fput_emchar (Lstream *stream, Emchar ch)
775 Bufbyte str[MAX_EMCHAR_LEN];
776 Bytecount len = set_charptr_emchar (str, ch);
777 return Lstream_write (stream, str, len);
781 Lstream_funget_emchar (Lstream *stream, Emchar ch)
783 Bufbyte str[MAX_EMCHAR_LEN];
784 Bytecount len = set_charptr_emchar (str, ch);
785 Lstream_unread (stream, str, len);
789 /************************************************************************/
791 /************************************************************************/
794 mark_charset (Lisp_Object obj)
796 Lisp_Charset *cs = XCHARSET (obj);
798 mark_object (cs->short_name);
799 mark_object (cs->long_name);
800 mark_object (cs->doc_string);
801 mark_object (cs->registry);
802 mark_object (cs->ccl_program);
804 mark_object (cs->decoding_table);
805 mark_object (cs->mother);
811 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
813 Lisp_Charset *cs = XCHARSET (obj);
817 error ("printing unreadable object #<charset %s 0x%x>",
818 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
821 write_c_string ("#<charset ", printcharfun);
822 print_internal (CHARSET_NAME (cs), printcharfun, 0);
823 write_c_string (" ", printcharfun);
824 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
825 write_c_string (" ", printcharfun);
826 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
827 write_c_string (" ", printcharfun);
828 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
829 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
831 CHARSET_DIMENSION (cs),
832 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
833 CHARSET_COLUMNS (cs),
834 CHARSET_GRAPHIC (cs),
836 write_c_string (buf, printcharfun);
837 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
838 sprintf (buf, " 0x%x>", cs->header.uid);
839 write_c_string (buf, printcharfun);
842 static const struct lrecord_description charset_description[] = {
843 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
844 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
845 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
846 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
847 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
848 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
849 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
851 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
852 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
857 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
858 mark_charset, print_charset, 0, 0, 0,
862 /* Make a new charset. */
863 /* #### SJT Should generic properties be allowed? */
865 make_charset (Charset_ID id, Lisp_Object name,
866 unsigned short chars, unsigned char dimension,
867 unsigned char columns, unsigned char graphic,
868 Bufbyte final, unsigned char direction, Lisp_Object short_name,
869 Lisp_Object long_name, Lisp_Object doc,
871 Lisp_Object decoding_table,
872 Emchar min_code, Emchar max_code,
873 Emchar code_offset, unsigned char byte_offset,
874 Lisp_Object mother, unsigned char conversion)
877 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
881 XSETCHARSET (obj, cs);
883 CHARSET_ID (cs) = id;
884 CHARSET_NAME (cs) = name;
885 CHARSET_SHORT_NAME (cs) = short_name;
886 CHARSET_LONG_NAME (cs) = long_name;
887 CHARSET_CHARS (cs) = chars;
888 CHARSET_DIMENSION (cs) = dimension;
889 CHARSET_DIRECTION (cs) = direction;
890 CHARSET_COLUMNS (cs) = columns;
891 CHARSET_GRAPHIC (cs) = graphic;
892 CHARSET_FINAL (cs) = final;
893 CHARSET_DOC_STRING (cs) = doc;
894 CHARSET_REGISTRY (cs) = reg;
895 CHARSET_CCL_PROGRAM (cs) = Qnil;
896 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
898 CHARSET_DECODING_TABLE(cs) = Qnil;
899 CHARSET_MIN_CODE (cs) = min_code;
900 CHARSET_MAX_CODE (cs) = max_code;
901 CHARSET_CODE_OFFSET (cs) = code_offset;
902 CHARSET_BYTE_OFFSET (cs) = byte_offset;
903 CHARSET_MOTHER (cs) = mother;
904 CHARSET_CONVERSION (cs) = conversion;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
921 unsigned char iso2022_type
922 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
924 if (code_offset == 0)
926 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
927 chlook->charset_by_attributes[iso2022_type][final] = obj;
931 (chlook->charset_by_attributes[iso2022_type][final][direction]));
932 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
936 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
937 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = chlook->next_allocated_leading_byte++;
959 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = chlook->next_allocated_1_byte_leading_byte++;
966 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = chlook->next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
982 /* Number of Big5 characters which have the same code in 1st byte. */
984 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
987 decode_builtin_char (Lisp_Object charset, int code_point)
991 if (EQ (charset, Vcharset_chinese_big5))
993 int c1 = code_point >> 8;
994 int c2 = code_point & 0xFF;
997 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
999 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1000 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1002 I = (c1 - 0xA1) * BIG5_SAME_ROW
1003 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1007 charset = Vcharset_chinese_big5_1;
1011 charset = Vcharset_chinese_big5_2;
1012 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1014 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1017 if ((final = XCHARSET_FINAL (charset)) >= '0')
1019 if (XCHARSET_DIMENSION (charset) == 1)
1021 switch (XCHARSET_CHARS (charset))
1025 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1028 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1036 switch (XCHARSET_CHARS (charset))
1039 return MIN_CHAR_94x94
1040 + (final - '0') * 94 * 94
1041 + (((code_point >> 8) & 0x7F) - 33) * 94
1042 + ((code_point & 0x7F) - 33);
1044 return MIN_CHAR_96x96
1045 + (final - '0') * 96 * 96
1046 + (((code_point >> 8) & 0x7F) - 32) * 96
1047 + ((code_point & 0x7F) - 32);
1054 else if (XCHARSET_MAX_CODE (charset))
1057 = (XCHARSET_DIMENSION (charset) == 1
1059 code_point - XCHARSET_BYTE_OFFSET (charset)
1061 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1062 * XCHARSET_CHARS (charset)
1063 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1064 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_MIN_CODE (charset);
1065 if ((cid < XCHARSET_MIN_CODE (charset))
1066 || (XCHARSET_MAX_CODE (charset) < cid))
1075 charset_code_point (Lisp_Object charset, Emchar ch)
1077 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1080 if ( CHAR_TABLEP (encoding_table)
1081 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1086 Lisp_Object mother = XCHARSET_MOTHER (charset);
1088 if ( CHARSETP (mother) )
1090 int min = XCHARSET_MIN_CODE (charset);
1091 int max = XCHARSET_MAX_CODE (charset);
1092 int code = charset_code_point (mother, ch);
1094 if ( (min <= code) && (code <= max) )
1096 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1100 int cell = m % 94 + 33;
1106 return (row << 8) | cell;
1109 return code - min + XCHARSET_CODE_OFFSET (charset);
1113 return range_charset_code_point (charset, ch);
1117 range_charset_code_point (Lisp_Object charset, Emchar ch)
1121 if ((XCHARSET_MIN_CODE (charset) <= ch)
1122 && (ch <= XCHARSET_MAX_CODE (charset)))
1124 d = ch - XCHARSET_MIN_CODE (charset) + XCHARSET_CODE_OFFSET (charset);
1126 if (XCHARSET_CHARS (charset) == 256)
1128 else if (XCHARSET_DIMENSION (charset) == 1)
1129 return d + XCHARSET_BYTE_OFFSET (charset);
1130 else if (XCHARSET_DIMENSION (charset) == 2)
1132 ((d / XCHARSET_CHARS (charset)
1133 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1134 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1135 else if (XCHARSET_DIMENSION (charset) == 3)
1137 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1138 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1139 | ((d / XCHARSET_CHARS (charset)
1140 % XCHARSET_CHARS (charset)
1141 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1142 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1143 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1145 ((d / (XCHARSET_CHARS (charset)
1146 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1147 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1148 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1149 % XCHARSET_CHARS (charset)
1150 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1151 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1152 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1153 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1155 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1157 if (XCHARSET_DIMENSION (charset) == 1)
1159 if (XCHARSET_CHARS (charset) == 94)
1161 if (((d = ch - (MIN_CHAR_94
1162 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1166 else if (XCHARSET_CHARS (charset) == 96)
1168 if (((d = ch - (MIN_CHAR_96
1169 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1176 else if (XCHARSET_DIMENSION (charset) == 2)
1178 if (XCHARSET_CHARS (charset) == 94)
1180 if (((d = ch - (MIN_CHAR_94x94
1181 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1184 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1186 else if (XCHARSET_CHARS (charset) == 96)
1188 if (((d = ch - (MIN_CHAR_96x96
1189 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1192 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1202 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1204 if (c <= MAX_CHAR_BASIC_LATIN)
1206 *charset = Vcharset_ascii;
1211 *charset = Vcharset_control_1;
1216 *charset = Vcharset_latin_iso8859_1;
1220 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1222 *charset = Vcharset_hebrew_iso8859_8;
1223 return c - MIN_CHAR_HEBREW + 0x20;
1226 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1228 *charset = Vcharset_thai_tis620;
1229 return c - MIN_CHAR_THAI + 0x20;
1232 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1233 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1235 return list2 (Vcharset_katakana_jisx0201,
1236 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1239 else if (c <= MAX_CHAR_BMP)
1241 *charset = Vcharset_ucs_bmp;
1244 else if (c <= MAX_CHAR_SMP)
1246 *charset = Vcharset_ucs_smp;
1247 return c - MIN_CHAR_SMP;
1249 else if (c <= MAX_CHAR_SIP)
1251 *charset = Vcharset_ucs_sip;
1252 return c - MIN_CHAR_SIP;
1254 else if (c < MIN_CHAR_DAIKANWA)
1256 *charset = Vcharset_ucs;
1259 else if (c <= MAX_CHAR_DAIKANWA)
1261 *charset = Vcharset_ideograph_daikanwa;
1262 return c - MIN_CHAR_DAIKANWA;
1264 else if (c < MIN_CHAR_94)
1266 *charset = Vcharset_ucs;
1269 else if (c <= MAX_CHAR_94)
1271 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1272 ((c - MIN_CHAR_94) / 94) + '0',
1273 CHARSET_LEFT_TO_RIGHT);
1274 if (!NILP (*charset))
1275 return ((c - MIN_CHAR_94) % 94) + 33;
1278 *charset = Vcharset_ucs;
1282 else if (c <= MAX_CHAR_96)
1284 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1285 ((c - MIN_CHAR_96) / 96) + '0',
1286 CHARSET_LEFT_TO_RIGHT);
1287 if (!NILP (*charset))
1288 return ((c - MIN_CHAR_96) % 96) + 32;
1291 *charset = Vcharset_ucs;
1295 else if (c <= MAX_CHAR_94x94)
1298 = CHARSET_BY_ATTRIBUTES (94, 2,
1299 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1303 | (((c - MIN_CHAR_94x94) % 94) + 33);
1306 *charset = Vcharset_ucs;
1310 else if (c <= MAX_CHAR_96x96)
1313 = CHARSET_BY_ATTRIBUTES (96, 2,
1314 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1315 CHARSET_LEFT_TO_RIGHT);
1316 if (!NILP (*charset))
1317 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1318 | (((c - MIN_CHAR_96x96) % 96) + 32);
1321 *charset = Vcharset_ucs;
1327 *charset = Vcharset_ucs;
1332 Lisp_Object Vdefault_coded_charset_priority_list;
1336 /************************************************************************/
1337 /* Basic charset Lisp functions */
1338 /************************************************************************/
1340 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1341 Return non-nil if OBJECT is a charset.
1345 return CHARSETP (object) ? Qt : Qnil;
1348 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1349 Retrieve the charset of the given name.
1350 If CHARSET-OR-NAME is a charset object, it is simply returned.
1351 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1352 nil is returned. Otherwise the associated charset object is returned.
1356 if (CHARSETP (charset_or_name))
1357 return charset_or_name;
1359 CHECK_SYMBOL (charset_or_name);
1360 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1363 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1364 Retrieve the charset of the given name.
1365 Same as `find-charset' except an error is signalled if there is no such
1366 charset instead of returning nil.
1370 Lisp_Object charset = Ffind_charset (name);
1373 signal_simple_error ("No such charset", name);
1377 /* We store the charsets in hash tables with the names as the key and the
1378 actual charset object as the value. Occasionally we need to use them
1379 in a list format. These routines provide us with that. */
1380 struct charset_list_closure
1382 Lisp_Object *charset_list;
1386 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1387 void *charset_list_closure)
1389 /* This function can GC */
1390 struct charset_list_closure *chcl =
1391 (struct charset_list_closure*) charset_list_closure;
1392 Lisp_Object *charset_list = chcl->charset_list;
1394 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1398 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1399 Return a list of the names of all defined charsets.
1403 Lisp_Object charset_list = Qnil;
1404 struct gcpro gcpro1;
1405 struct charset_list_closure charset_list_closure;
1407 GCPRO1 (charset_list);
1408 charset_list_closure.charset_list = &charset_list;
1409 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1410 &charset_list_closure);
1413 return charset_list;
1416 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1417 Return the name of charset CHARSET.
1421 return XCHARSET_NAME (Fget_charset (charset));
1424 /* #### SJT Should generic properties be allowed? */
1425 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1426 Define a new character set.
1427 This function is for use with Mule support.
1428 NAME is a symbol, the name by which the character set is normally referred.
1429 DOC-STRING is a string describing the character set.
1430 PROPS is a property list, describing the specific nature of the
1431 character set. Recognized properties are:
1433 'short-name Short version of the charset name (ex: Latin-1)
1434 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1435 'registry A regular expression matching the font registry field for
1437 'dimension Number of octets used to index a character in this charset.
1438 Either 1 or 2. Defaults to 1.
1439 'columns Number of columns used to display a character in this charset.
1440 Only used in TTY mode. (Under X, the actual width of a
1441 character can be derived from the font used to display the
1442 characters.) If unspecified, defaults to the dimension
1443 (this is almost always the correct value).
1444 'chars Number of characters in each dimension (94 or 96).
1445 Defaults to 94. Note that if the dimension is 2, the
1446 character set thus described is 94x94 or 96x96.
1447 'final Final byte of ISO 2022 escape sequence. Must be
1448 supplied. Each combination of (DIMENSION, CHARS) defines a
1449 separate namespace for final bytes. Note that ISO
1450 2022 restricts the final byte to the range
1451 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1452 dimension == 2. Note also that final bytes in the range
1453 0x30 - 0x3F are reserved for user-defined (not official)
1455 'graphic 0 (use left half of font on output) or 1 (use right half
1456 of font on output). Defaults to 0. For example, for
1457 a font whose registry is ISO8859-1, the left half
1458 (octets 0x20 - 0x7F) is the `ascii' character set, while
1459 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1460 character set. With 'graphic set to 0, the octets
1461 will have their high bit cleared; with it set to 1,
1462 the octets will have their high bit set.
1463 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1465 'ccl-program A compiled CCL program used to convert a character in
1466 this charset into an index into the font. This is in
1467 addition to the 'graphic property. The CCL program
1468 is passed the octets of the character, with the high
1469 bit cleared and set depending upon whether the value
1470 of the 'graphic property is 0 or 1.
1472 (name, doc_string, props))
1474 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1475 int direction = CHARSET_LEFT_TO_RIGHT;
1476 Lisp_Object registry = Qnil;
1477 Lisp_Object charset;
1478 Lisp_Object ccl_program = Qnil;
1479 Lisp_Object short_name = Qnil, long_name = Qnil;
1480 int min_code = 0, max_code = 0;
1481 Lisp_Object mother = Qnil;
1482 int byte_offset = -1;
1485 CHECK_SYMBOL (name);
1486 if (!NILP (doc_string))
1487 CHECK_STRING (doc_string);
1489 charset = Ffind_charset (name);
1490 if (!NILP (charset))
1491 signal_simple_error ("Cannot redefine existing charset", name);
1494 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1496 if (EQ (keyword, Qshort_name))
1498 CHECK_STRING (value);
1502 if (EQ (keyword, Qlong_name))
1504 CHECK_STRING (value);
1508 else if (EQ (keyword, Qdimension))
1511 dimension = XINT (value);
1512 if (dimension < 1 ||
1519 signal_simple_error ("Invalid value for 'dimension", value);
1522 else if (EQ (keyword, Qchars))
1525 chars = XINT (value);
1526 if (chars != 94 && chars != 96
1528 && chars != 128 && chars != 256
1531 signal_simple_error ("Invalid value for 'chars", value);
1534 else if (EQ (keyword, Qcolumns))
1537 columns = XINT (value);
1538 if (columns != 1 && columns != 2)
1539 signal_simple_error ("Invalid value for 'columns", value);
1542 else if (EQ (keyword, Qgraphic))
1545 graphic = XINT (value);
1553 signal_simple_error ("Invalid value for 'graphic", value);
1556 else if (EQ (keyword, Qregistry))
1558 CHECK_STRING (value);
1562 else if (EQ (keyword, Qdirection))
1564 if (EQ (value, Ql2r))
1565 direction = CHARSET_LEFT_TO_RIGHT;
1566 else if (EQ (value, Qr2l))
1567 direction = CHARSET_RIGHT_TO_LEFT;
1569 signal_simple_error ("Invalid value for 'direction", value);
1572 else if (EQ (keyword, Qfinal))
1574 CHECK_CHAR_COERCE_INT (value);
1575 final = XCHAR (value);
1576 if (final < '0' || final > '~')
1577 signal_simple_error ("Invalid value for 'final", value);
1581 else if (EQ (keyword, Qmin_code))
1584 min_code = XINT (value);
1587 min_code = (~(-1 - min_code)) & 0x7FFFFFFF;
1591 else if (EQ (keyword, Qmax_code))
1594 max_code = XINT (value);
1597 max_code = (~(-1 - max_code)) & 0x7FFFFFFF;
1601 else if (EQ (keyword, Qmother))
1603 mother = Fget_charset (value);
1606 else if (EQ (keyword, Qconversion))
1608 if (EQ (value, Q94x60))
1609 conversion = CONVERSION_94x60;
1613 else if (EQ (keyword, Qccl_program))
1615 struct ccl_program test_ccl;
1617 if (setup_ccl_program (&test_ccl, value) < 0)
1618 signal_simple_error ("Invalid value for 'ccl-program", value);
1619 ccl_program = value;
1623 signal_simple_error ("Unrecognized property", keyword);
1629 error ("'final must be specified");
1631 if (dimension == 2 && final > 0x5F)
1633 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1636 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1637 CHARSET_LEFT_TO_RIGHT)) ||
1638 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1639 CHARSET_RIGHT_TO_LEFT)))
1641 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1643 id = get_unallocated_leading_byte (dimension);
1645 if (NILP (doc_string))
1646 doc_string = build_string ("");
1648 if (NILP (registry))
1649 registry = build_string ("");
1651 if (NILP (short_name))
1652 XSETSTRING (short_name, XSYMBOL (name)->name);
1654 if (NILP (long_name))
1655 long_name = doc_string;
1658 columns = dimension;
1660 if (byte_offset < 0)
1664 else if (chars == 96)
1670 charset = make_charset (id, name, chars, dimension, columns, graphic,
1671 final, direction, short_name, long_name,
1672 doc_string, registry,
1673 Qnil, min_code, max_code, 0, byte_offset,
1674 mother, conversion);
1675 if (!NILP (ccl_program))
1676 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1680 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1682 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1683 NEW-NAME is the name of the new charset. Return the new charset.
1685 (charset, new_name))
1687 Lisp_Object new_charset = Qnil;
1688 int id, chars, dimension, columns, graphic, final;
1690 Lisp_Object registry, doc_string, short_name, long_name;
1693 charset = Fget_charset (charset);
1694 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1695 signal_simple_error ("Charset already has reverse-direction charset",
1698 CHECK_SYMBOL (new_name);
1699 if (!NILP (Ffind_charset (new_name)))
1700 signal_simple_error ("Cannot redefine existing charset", new_name);
1702 cs = XCHARSET (charset);
1704 chars = CHARSET_CHARS (cs);
1705 dimension = CHARSET_DIMENSION (cs);
1706 columns = CHARSET_COLUMNS (cs);
1707 id = get_unallocated_leading_byte (dimension);
1709 graphic = CHARSET_GRAPHIC (cs);
1710 final = CHARSET_FINAL (cs);
1711 direction = CHARSET_RIGHT_TO_LEFT;
1712 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1713 direction = CHARSET_LEFT_TO_RIGHT;
1714 doc_string = CHARSET_DOC_STRING (cs);
1715 short_name = CHARSET_SHORT_NAME (cs);
1716 long_name = CHARSET_LONG_NAME (cs);
1717 registry = CHARSET_REGISTRY (cs);
1719 new_charset = make_charset (id, new_name, chars, dimension, columns,
1720 graphic, final, direction, short_name, long_name,
1721 doc_string, registry,
1723 CHARSET_DECODING_TABLE(cs),
1724 CHARSET_MIN_CODE(cs),
1725 CHARSET_MAX_CODE(cs),
1726 CHARSET_CODE_OFFSET(cs),
1727 CHARSET_BYTE_OFFSET(cs),
1729 CHARSET_CONVERSION (cs)
1731 Qnil, 0, 0, 0, 0, Qnil, 0
1735 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1736 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1741 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1742 Define symbol ALIAS as an alias for CHARSET.
1746 CHECK_SYMBOL (alias);
1747 charset = Fget_charset (charset);
1748 return Fputhash (alias, charset, Vcharset_hash_table);
1751 /* #### Reverse direction charsets not yet implemented. */
1753 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1755 Return the reverse-direction charset parallel to CHARSET, if any.
1756 This is the charset with the same properties (in particular, the same
1757 dimension, number of characters per dimension, and final byte) as
1758 CHARSET but whose characters are displayed in the opposite direction.
1762 charset = Fget_charset (charset);
1763 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1767 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1768 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1769 If DIRECTION is omitted, both directions will be checked (left-to-right
1770 will be returned if character sets exist for both directions).
1772 (dimension, chars, final, direction))
1774 int dm, ch, fi, di = -1;
1775 Lisp_Object obj = Qnil;
1777 CHECK_INT (dimension);
1778 dm = XINT (dimension);
1779 if (dm < 1 || dm > 2)
1780 signal_simple_error ("Invalid value for DIMENSION", dimension);
1784 if (ch != 94 && ch != 96)
1785 signal_simple_error ("Invalid value for CHARS", chars);
1787 CHECK_CHAR_COERCE_INT (final);
1789 if (fi < '0' || fi > '~')
1790 signal_simple_error ("Invalid value for FINAL", final);
1792 if (EQ (direction, Ql2r))
1793 di = CHARSET_LEFT_TO_RIGHT;
1794 else if (EQ (direction, Qr2l))
1795 di = CHARSET_RIGHT_TO_LEFT;
1796 else if (!NILP (direction))
1797 signal_simple_error ("Invalid value for DIRECTION", direction);
1799 if (dm == 2 && fi > 0x5F)
1801 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1805 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1807 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1810 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1813 return XCHARSET_NAME (obj);
1817 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1818 Return short name of CHARSET.
1822 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1825 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1826 Return long name of CHARSET.
1830 return XCHARSET_LONG_NAME (Fget_charset (charset));
1833 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1834 Return description of CHARSET.
1838 return XCHARSET_DOC_STRING (Fget_charset (charset));
1841 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1842 Return dimension of CHARSET.
1846 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1849 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1850 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1851 Recognized properties are those listed in `make-charset', as well as
1852 'name and 'doc-string.
1858 charset = Fget_charset (charset);
1859 cs = XCHARSET (charset);
1861 CHECK_SYMBOL (prop);
1862 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1863 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1864 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1865 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1866 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1867 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1868 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1869 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1870 Qnil : make_char (CHARSET_FINAL (cs));
1871 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1872 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1873 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1874 if (EQ (prop, Qdirection))
1875 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1876 if (EQ (prop, Qreverse_direction_charset))
1878 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1879 /* #### Is this translation OK? If so, error checking sufficient? */
1880 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1883 if (EQ (prop, Qmother))
1884 return CHARSET_MOTHER (cs);
1885 if (EQ (prop, Qmin_code))
1886 return make_int (CHARSET_MIN_CODE (cs));
1887 if (EQ (prop, Qmax_code))
1888 return make_int (CHARSET_MAX_CODE (cs));
1890 signal_simple_error ("Unrecognized charset property name", prop);
1891 return Qnil; /* not reached */
1894 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1895 Return charset identification number of CHARSET.
1899 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1902 /* #### We need to figure out which properties we really want to
1905 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1906 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1908 (charset, ccl_program))
1910 struct ccl_program test_ccl;
1912 charset = Fget_charset (charset);
1913 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1914 signal_simple_error ("Invalid ccl-program", ccl_program);
1915 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1920 invalidate_charset_font_caches (Lisp_Object charset)
1922 /* Invalidate font cache entries for charset on all devices. */
1923 Lisp_Object devcons, concons, hash_table;
1924 DEVICE_LOOP_NO_BREAK (devcons, concons)
1926 struct device *d = XDEVICE (XCAR (devcons));
1927 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1928 if (!UNBOUNDP (hash_table))
1929 Fclrhash (hash_table);
1933 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1934 Set the 'registry property of CHARSET to REGISTRY.
1936 (charset, registry))
1938 charset = Fget_charset (charset);
1939 CHECK_STRING (registry);
1940 XCHARSET_REGISTRY (charset) = registry;
1941 invalidate_charset_font_caches (charset);
1942 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1947 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1948 Return mapping-table of CHARSET.
1952 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1955 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1956 Set mapping-table of CHARSET to TABLE.
1960 struct Lisp_Charset *cs;
1964 charset = Fget_charset (charset);
1965 cs = XCHARSET (charset);
1969 CHARSET_DECODING_TABLE(cs) = Qnil;
1972 else if (VECTORP (table))
1974 int ccs_len = CHARSET_BYTE_SIZE (cs);
1975 int ret = decoding_table_check_elements (table,
1976 CHARSET_DIMENSION (cs),
1981 signal_simple_error ("Too big table", table);
1983 signal_simple_error ("Invalid element is found", table);
1985 signal_simple_error ("Something wrong", table);
1987 CHARSET_DECODING_TABLE(cs) = Qnil;
1990 signal_error (Qwrong_type_argument,
1991 list2 (build_translated_string ("vector-or-nil-p"),
1994 byte_offset = CHARSET_BYTE_OFFSET (cs);
1995 switch (CHARSET_DIMENSION (cs))
1998 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2000 Lisp_Object c = XVECTOR_DATA(table)[i];
2003 Fput_char_attribute (c, XCHARSET_NAME (charset),
2004 make_int (i + byte_offset));
2008 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2010 Lisp_Object v = XVECTOR_DATA(table)[i];
2016 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2018 Lisp_Object c = XVECTOR_DATA(v)[j];
2022 (c, XCHARSET_NAME (charset),
2023 make_int ( ( (i + byte_offset) << 8 )
2029 Fput_char_attribute (v, XCHARSET_NAME (charset),
2030 make_int (i + byte_offset));
2039 /************************************************************************/
2040 /* Lisp primitives for working with characters */
2041 /************************************************************************/
2044 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2045 Make a character from CHARSET and code-point CODE.
2046 If DEFINED_ONLY is non-nil, builtin character is not returned.
2047 If corresponding character is not found, nil is returned.
2049 (charset, code, defined_only))
2053 charset = Fget_charset (charset);
2056 if (XCHARSET_GRAPHIC (charset) == 1)
2058 if (NILP (defined_only))
2059 c = DECODE_CHAR (charset, c);
2061 c = DECODE_DEFINED_CHAR (charset, c);
2062 return c >= 0 ? make_char (c) : Qnil;
2065 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2066 Make a builtin character from CHARSET and code-point CODE.
2072 charset = Fget_charset (charset);
2074 if (EQ (charset, Vcharset_latin_viscii))
2076 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2082 (ret = Fget_char_attribute (chr,
2083 Vcharset_latin_viscii_lower,
2086 charset = Vcharset_latin_viscii_lower;
2090 (ret = Fget_char_attribute (chr,
2091 Vcharset_latin_viscii_upper,
2094 charset = Vcharset_latin_viscii_upper;
2101 if (XCHARSET_GRAPHIC (charset) == 1)
2104 c = decode_builtin_char (charset, c);
2105 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2109 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2110 Make a character from CHARSET and octets ARG1 and ARG2.
2111 ARG2 is required only for characters from two-dimensional charsets.
2112 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2113 character s with caron.
2115 (charset, arg1, arg2))
2119 int lowlim, highlim;
2121 charset = Fget_charset (charset);
2122 cs = XCHARSET (charset);
2124 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2125 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2127 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2129 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2130 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2133 /* It is useful (and safe, according to Olivier Galibert) to strip
2134 the 8th bit off ARG1 and ARG2 because it allows programmers to
2135 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2136 Latin 2 code of the character. */
2144 if (a1 < lowlim || a1 > highlim)
2145 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2147 if (CHARSET_DIMENSION (cs) == 1)
2151 ("Charset is of dimension one; second octet must be nil", arg2);
2152 return make_char (MAKE_CHAR (charset, a1, 0));
2161 a2 = XINT (arg2) & 0x7f;
2163 if (a2 < lowlim || a2 > highlim)
2164 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2166 return make_char (MAKE_CHAR (charset, a1, a2));
2169 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2170 Return the character set of CHARACTER.
2174 CHECK_CHAR_COERCE_INT (character);
2176 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2179 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2180 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2181 N defaults to 0 if omitted.
2185 Lisp_Object charset;
2188 CHECK_CHAR_COERCE_INT (character);
2190 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2192 if (NILP (n) || EQ (n, Qzero))
2193 return make_int (octet0);
2194 else if (EQ (n, make_int (1)))
2195 return make_int (octet1);
2197 signal_simple_error ("Octet number must be 0 or 1", n);
2201 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2202 Return code-point of CHARACTER in specified CHARSET.
2204 (character, charset))
2208 CHECK_CHAR_COERCE_INT (character);
2209 charset = Fget_charset (charset);
2210 code_point = charset_code_point (charset, XCHAR (character));
2211 if (code_point >= 0)
2212 return make_int (code_point);
2218 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2219 Return list of charset and one or two position-codes of CHARACTER.
2223 /* This function can GC */
2224 struct gcpro gcpro1, gcpro2;
2225 Lisp_Object charset = Qnil;
2226 Lisp_Object rc = Qnil;
2234 GCPRO2 (charset, rc);
2235 CHECK_CHAR_COERCE_INT (character);
2238 code_point = ENCODE_CHAR (XCHAR (character), charset);
2239 dimension = XCHARSET_DIMENSION (charset);
2240 while (dimension > 0)
2242 rc = Fcons (make_int (code_point & 255), rc);
2246 rc = Fcons (XCHARSET_NAME (charset), rc);
2248 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2250 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2252 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2256 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2265 #ifdef ENABLE_COMPOSITE_CHARS
2266 /************************************************************************/
2267 /* composite character functions */
2268 /************************************************************************/
2271 lookup_composite_char (Bufbyte *str, int len)
2273 Lisp_Object lispstr = make_string (str, len);
2274 Lisp_Object ch = Fgethash (lispstr,
2275 Vcomposite_char_string2char_hash_table,
2281 if (composite_char_row_next >= 128)
2282 signal_simple_error ("No more composite chars available", lispstr);
2283 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2284 composite_char_col_next);
2285 Fputhash (make_char (emch), lispstr,
2286 Vcomposite_char_char2string_hash_table);
2287 Fputhash (lispstr, make_char (emch),
2288 Vcomposite_char_string2char_hash_table);
2289 composite_char_col_next++;
2290 if (composite_char_col_next >= 128)
2292 composite_char_col_next = 32;
2293 composite_char_row_next++;
2302 composite_char_string (Emchar ch)
2304 Lisp_Object str = Fgethash (make_char (ch),
2305 Vcomposite_char_char2string_hash_table,
2307 assert (!UNBOUNDP (str));
2311 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2312 Convert a string into a single composite character.
2313 The character is the result of overstriking all the characters in
2318 CHECK_STRING (string);
2319 return make_char (lookup_composite_char (XSTRING_DATA (string),
2320 XSTRING_LENGTH (string)));
2323 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2324 Return a string of the characters comprising a composite character.
2332 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2333 signal_simple_error ("Must be composite char", ch);
2334 return composite_char_string (emch);
2336 #endif /* ENABLE_COMPOSITE_CHARS */
2339 /************************************************************************/
2340 /* initialization */
2341 /************************************************************************/
2344 syms_of_mule_charset (void)
2346 INIT_LRECORD_IMPLEMENTATION (charset);
2348 DEFSUBR (Fcharsetp);
2349 DEFSUBR (Ffind_charset);
2350 DEFSUBR (Fget_charset);
2351 DEFSUBR (Fcharset_list);
2352 DEFSUBR (Fcharset_name);
2353 DEFSUBR (Fmake_charset);
2354 DEFSUBR (Fmake_reverse_direction_charset);
2355 /* DEFSUBR (Freverse_direction_charset); */
2356 DEFSUBR (Fdefine_charset_alias);
2357 DEFSUBR (Fcharset_from_attributes);
2358 DEFSUBR (Fcharset_short_name);
2359 DEFSUBR (Fcharset_long_name);
2360 DEFSUBR (Fcharset_description);
2361 DEFSUBR (Fcharset_dimension);
2362 DEFSUBR (Fcharset_property);
2363 DEFSUBR (Fcharset_id);
2364 DEFSUBR (Fset_charset_ccl_program);
2365 DEFSUBR (Fset_charset_registry);
2367 DEFSUBR (Fcharset_mapping_table);
2368 DEFSUBR (Fset_charset_mapping_table);
2372 DEFSUBR (Fdecode_char);
2373 DEFSUBR (Fdecode_builtin_char);
2374 DEFSUBR (Fencode_char);
2376 DEFSUBR (Fmake_char);
2377 DEFSUBR (Fchar_charset);
2378 DEFSUBR (Fchar_octet);
2379 DEFSUBR (Fsplit_char);
2381 #ifdef ENABLE_COMPOSITE_CHARS
2382 DEFSUBR (Fmake_composite_char);
2383 DEFSUBR (Fcomposite_char_string);
2386 defsymbol (&Qcharsetp, "charsetp");
2387 defsymbol (&Qregistry, "registry");
2388 defsymbol (&Qfinal, "final");
2389 defsymbol (&Qgraphic, "graphic");
2390 defsymbol (&Qdirection, "direction");
2391 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2392 defsymbol (&Qshort_name, "short-name");
2393 defsymbol (&Qlong_name, "long-name");
2395 defsymbol (&Qmin_code, "min-code");
2396 defsymbol (&Qmax_code, "max-code");
2397 defsymbol (&Qmother, "mother");
2398 defsymbol (&Qconversion, "conversion");
2399 defsymbol (&Q94x60, "94x60");
2402 defsymbol (&Ql2r, "l2r");
2403 defsymbol (&Qr2l, "r2l");
2405 /* Charsets, compatible with FSF 20.3
2406 Naming convention is Script-Charset[-Edition] */
2407 defsymbol (&Qascii, "ascii");
2408 defsymbol (&Qcontrol_1, "control-1");
2409 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2410 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2411 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2412 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2413 defsymbol (&Qthai_tis620, "thai-tis620");
2414 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2415 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2416 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2417 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2418 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2419 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2420 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2421 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2422 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2423 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2424 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2425 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2426 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2427 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2428 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2429 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2431 defsymbol (&Qucs, "ucs");
2432 defsymbol (&Qucs_bmp, "ucs-bmp");
2433 defsymbol (&Qucs_smp, "ucs-smp");
2434 defsymbol (&Qucs_sip, "ucs-sip");
2435 defsymbol (&Qucs_cns, "ucs-cns");
2436 defsymbol (&Qucs_jis, "ucs-jis");
2437 defsymbol (&Qucs_ks, "ucs-ks");
2438 defsymbol (&Qucs_big5, "ucs-big5");
2439 defsymbol (&Qlatin_viscii, "latin-viscii");
2440 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2441 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2442 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2443 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2444 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2445 defsymbol (&Qideograph_gt, "ideograph-gt");
2446 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2447 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2448 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2449 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2450 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2451 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2452 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2453 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2454 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2455 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2456 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2457 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2458 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2459 defsymbol (&Qchinese_big5, "chinese-big5");
2460 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2461 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2462 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2463 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2464 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2465 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2466 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2467 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2468 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2469 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2470 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2471 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2472 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2473 defsymbol (&Qchina3_jef, "china3-jef");
2474 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2475 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2477 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2478 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2480 defsymbol (&Qcomposite, "composite");
2484 vars_of_mule_charset (void)
2491 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2492 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2494 /* Table of charsets indexed by leading byte. */
2495 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2496 chlook->charset_by_leading_byte[i] = Qnil;
2499 /* Table of charsets indexed by type/final-byte. */
2500 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2501 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2502 chlook->charset_by_attributes[i][j] = Qnil;
2504 /* Table of charsets indexed by type/final-byte/direction. */
2505 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2506 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2507 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2508 chlook->charset_by_attributes[i][j][k] = Qnil;
2512 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2514 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2515 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2519 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2520 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2521 Leading-code of private TYPE9N charset of column-width 1.
2523 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2527 Vdefault_coded_charset_priority_list = Qnil;
2528 DEFVAR_LISP ("default-coded-charset-priority-list",
2529 &Vdefault_coded_charset_priority_list /*
2530 Default order of preferred coded-character-sets.
2536 complex_vars_of_mule_charset (void)
2538 staticpro (&Vcharset_hash_table);
2539 Vcharset_hash_table =
2540 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2542 /* Predefined character sets. We store them into variables for
2546 staticpro (&Vcharset_ucs);
2548 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2549 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2550 build_string ("UCS"),
2551 build_string ("UCS"),
2552 build_string ("ISO/IEC 10646"),
2554 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2555 staticpro (&Vcharset_ucs_bmp);
2557 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2558 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2559 build_string ("BMP"),
2560 build_string ("UCS-BMP"),
2561 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2563 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2564 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2565 staticpro (&Vcharset_ucs_smp);
2567 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2568 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2569 build_string ("SMP"),
2570 build_string ("UCS-SMP"),
2571 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2572 build_string ("UCS00-1"),
2573 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, 0, 0,
2574 Qnil, CONVERSION_IDENTICAL);
2575 staticpro (&Vcharset_ucs_sip);
2577 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2578 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2579 build_string ("SIP"),
2580 build_string ("UCS-SIP"),
2581 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2582 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2583 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, 0, 0,
2584 Qnil, CONVERSION_IDENTICAL);
2585 staticpro (&Vcharset_ucs_cns);
2587 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2588 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2589 build_string ("UCS for CNS"),
2590 build_string ("UCS for CNS 11643"),
2591 build_string ("ISO/IEC 10646 for CNS 11643"),
2594 Qnil, CONVERSION_IDENTICAL);
2595 staticpro (&Vcharset_ucs_jis);
2597 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2598 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2599 build_string ("UCS for JIS"),
2600 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2601 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2603 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2604 staticpro (&Vcharset_ucs_ks);
2606 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2607 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2608 build_string ("UCS for KS"),
2609 build_string ("UCS for CCS defined by KS"),
2610 build_string ("ISO/IEC 10646 for Korean Standards"),
2612 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2613 staticpro (&Vcharset_ucs_big5);
2615 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2616 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2617 build_string ("UCS for Big5"),
2618 build_string ("UCS for Big5"),
2619 build_string ("ISO/IEC 10646 for Big5"),
2621 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2623 # define MIN_CHAR_THAI 0
2624 # define MAX_CHAR_THAI 0
2625 /* # define MIN_CHAR_HEBREW 0 */
2626 /* # define MAX_CHAR_HEBREW 0 */
2627 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2628 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2630 staticpro (&Vcharset_ascii);
2632 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2633 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2634 build_string ("ASCII"),
2635 build_string ("ASCII)"),
2636 build_string ("ASCII (ISO646 IRV)"),
2637 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2638 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2639 staticpro (&Vcharset_control_1);
2640 Vcharset_control_1 =
2641 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2642 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2643 build_string ("C1"),
2644 build_string ("Control characters"),
2645 build_string ("Control characters 128-191"),
2647 Qnil, 0x80, 0x9F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2648 staticpro (&Vcharset_latin_iso8859_1);
2649 Vcharset_latin_iso8859_1 =
2650 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2651 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2652 build_string ("Latin-1"),
2653 build_string ("ISO8859-1 (Latin-1)"),
2654 build_string ("ISO8859-1 (Latin-1)"),
2655 build_string ("iso8859-1"),
2656 Qnil, 0xA0, 0xFF, 0, 32, Qnil, CONVERSION_IDENTICAL);
2657 staticpro (&Vcharset_latin_iso8859_2);
2658 Vcharset_latin_iso8859_2 =
2659 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2660 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2661 build_string ("Latin-2"),
2662 build_string ("ISO8859-2 (Latin-2)"),
2663 build_string ("ISO8859-2 (Latin-2)"),
2664 build_string ("iso8859-2"),
2665 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2666 staticpro (&Vcharset_latin_iso8859_3);
2667 Vcharset_latin_iso8859_3 =
2668 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2669 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2670 build_string ("Latin-3"),
2671 build_string ("ISO8859-3 (Latin-3)"),
2672 build_string ("ISO8859-3 (Latin-3)"),
2673 build_string ("iso8859-3"),
2674 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2675 staticpro (&Vcharset_latin_iso8859_4);
2676 Vcharset_latin_iso8859_4 =
2677 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2678 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2679 build_string ("Latin-4"),
2680 build_string ("ISO8859-4 (Latin-4)"),
2681 build_string ("ISO8859-4 (Latin-4)"),
2682 build_string ("iso8859-4"),
2683 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2684 staticpro (&Vcharset_thai_tis620);
2685 Vcharset_thai_tis620 =
2686 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2687 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2688 build_string ("TIS620"),
2689 build_string ("TIS620 (Thai)"),
2690 build_string ("TIS620.2529 (Thai)"),
2691 build_string ("tis620"),
2692 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32,
2693 Qnil, CONVERSION_IDENTICAL);
2694 staticpro (&Vcharset_greek_iso8859_7);
2695 Vcharset_greek_iso8859_7 =
2696 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2697 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("ISO8859-7"),
2699 build_string ("ISO8859-7 (Greek)"),
2700 build_string ("ISO8859-7 (Greek)"),
2701 build_string ("iso8859-7"),
2702 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2703 staticpro (&Vcharset_arabic_iso8859_6);
2704 Vcharset_arabic_iso8859_6 =
2705 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2706 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2707 build_string ("ISO8859-6"),
2708 build_string ("ISO8859-6 (Arabic)"),
2709 build_string ("ISO8859-6 (Arabic)"),
2710 build_string ("iso8859-6"),
2711 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2712 staticpro (&Vcharset_hebrew_iso8859_8);
2713 Vcharset_hebrew_iso8859_8 =
2714 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2715 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2716 build_string ("ISO8859-8"),
2717 build_string ("ISO8859-8 (Hebrew)"),
2718 build_string ("ISO8859-8 (Hebrew)"),
2719 build_string ("iso8859-8"),
2721 0 /* MIN_CHAR_HEBREW */,
2722 0 /* MAX_CHAR_HEBREW */, 0, 32,
2723 Qnil, CONVERSION_IDENTICAL);
2724 staticpro (&Vcharset_katakana_jisx0201);
2725 Vcharset_katakana_jisx0201 =
2726 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2727 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2728 build_string ("JISX0201 Kana"),
2729 build_string ("JISX0201.1976 (Japanese Kana)"),
2730 build_string ("JISX0201.1976 Japanese Kana"),
2731 build_string ("jisx0201\\.1976"),
2732 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2733 staticpro (&Vcharset_latin_jisx0201);
2734 Vcharset_latin_jisx0201 =
2735 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2736 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2737 build_string ("JISX0201 Roman"),
2738 build_string ("JISX0201.1976 (Japanese Roman)"),
2739 build_string ("JISX0201.1976 Japanese Roman"),
2740 build_string ("jisx0201\\.1976"),
2741 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2742 staticpro (&Vcharset_cyrillic_iso8859_5);
2743 Vcharset_cyrillic_iso8859_5 =
2744 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2745 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2746 build_string ("ISO8859-5"),
2747 build_string ("ISO8859-5 (Cyrillic)"),
2748 build_string ("ISO8859-5 (Cyrillic)"),
2749 build_string ("iso8859-5"),
2750 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2751 staticpro (&Vcharset_latin_iso8859_9);
2752 Vcharset_latin_iso8859_9 =
2753 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2754 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2755 build_string ("Latin-5"),
2756 build_string ("ISO8859-9 (Latin-5)"),
2757 build_string ("ISO8859-9 (Latin-5)"),
2758 build_string ("iso8859-9"),
2759 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2760 staticpro (&Vcharset_japanese_jisx0208_1978);
2761 Vcharset_japanese_jisx0208_1978 =
2762 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2763 Qjapanese_jisx0208_1978, 94, 2,
2764 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2765 build_string ("JIS X0208:1978"),
2766 build_string ("JIS X0208:1978 (Japanese)"),
2768 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2769 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2770 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2771 staticpro (&Vcharset_chinese_gb2312);
2772 Vcharset_chinese_gb2312 =
2773 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2774 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2775 build_string ("GB2312"),
2776 build_string ("GB2312)"),
2777 build_string ("GB2312 Chinese simplified"),
2778 build_string ("gb2312"),
2779 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2780 staticpro (&Vcharset_chinese_gb12345);
2781 Vcharset_chinese_gb12345 =
2782 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2783 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2784 build_string ("G1"),
2785 build_string ("GB 12345)"),
2786 build_string ("GB 12345-1990"),
2787 build_string ("GB12345\\(\\.1990\\)?-0"),
2788 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2789 staticpro (&Vcharset_japanese_jisx0208);
2790 Vcharset_japanese_jisx0208 =
2791 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2792 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2793 build_string ("JISX0208"),
2794 build_string ("JIS X0208:1983 (Japanese)"),
2795 build_string ("JIS X0208:1983 Japanese Kanji"),
2796 build_string ("jisx0208\\.1983"),
2797 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2799 staticpro (&Vcharset_japanese_jisx0208_1990);
2800 Vcharset_japanese_jisx0208_1990 =
2801 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2802 Qjapanese_jisx0208_1990, 94, 2,
2803 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2804 build_string ("JISX0208-1990"),
2805 build_string ("JIS X0208:1990 (Japanese)"),
2806 build_string ("JIS X0208:1990 Japanese Kanji"),
2807 build_string ("jisx0208\\.1990"),
2809 MIN_CHAR_JIS_X0208_1990,
2810 MAX_CHAR_JIS_X0208_1990, 0, 33,
2811 Qnil, CONVERSION_IDENTICAL);
2813 staticpro (&Vcharset_korean_ksc5601);
2814 Vcharset_korean_ksc5601 =
2815 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2816 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2817 build_string ("KSC5601"),
2818 build_string ("KSC5601 (Korean"),
2819 build_string ("KSC5601 Korean Hangul and Hanja"),
2820 build_string ("ksc5601"),
2821 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2822 staticpro (&Vcharset_japanese_jisx0212);
2823 Vcharset_japanese_jisx0212 =
2824 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2825 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2826 build_string ("JISX0212"),
2827 build_string ("JISX0212 (Japanese)"),
2828 build_string ("JISX0212 Japanese Supplement"),
2829 build_string ("jisx0212"),
2830 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2832 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2833 staticpro (&Vcharset_chinese_cns11643_1);
2834 Vcharset_chinese_cns11643_1 =
2835 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2836 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2837 build_string ("CNS11643-1"),
2838 build_string ("CNS11643-1 (Chinese traditional)"),
2840 ("CNS 11643 Plane 1 Chinese traditional"),
2841 build_string (CHINESE_CNS_PLANE_RE("1")),
2842 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2843 staticpro (&Vcharset_chinese_cns11643_2);
2844 Vcharset_chinese_cns11643_2 =
2845 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2846 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2847 build_string ("CNS11643-2"),
2848 build_string ("CNS11643-2 (Chinese traditional)"),
2850 ("CNS 11643 Plane 2 Chinese traditional"),
2851 build_string (CHINESE_CNS_PLANE_RE("2")),
2852 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2854 staticpro (&Vcharset_latin_tcvn5712);
2855 Vcharset_latin_tcvn5712 =
2856 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2857 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2858 build_string ("TCVN 5712"),
2859 build_string ("TCVN 5712 (VSCII-2)"),
2860 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2861 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2862 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2863 staticpro (&Vcharset_latin_viscii_lower);
2864 Vcharset_latin_viscii_lower =
2865 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2866 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2867 build_string ("VISCII lower"),
2868 build_string ("VISCII lower (Vietnamese)"),
2869 build_string ("VISCII lower (Vietnamese)"),
2870 build_string ("MULEVISCII-LOWER"),
2871 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2872 staticpro (&Vcharset_latin_viscii_upper);
2873 Vcharset_latin_viscii_upper =
2874 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2875 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2876 build_string ("VISCII upper"),
2877 build_string ("VISCII upper (Vietnamese)"),
2878 build_string ("VISCII upper (Vietnamese)"),
2879 build_string ("MULEVISCII-UPPER"),
2880 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_latin_viscii);
2882 Vcharset_latin_viscii =
2883 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2884 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2885 build_string ("VISCII"),
2886 build_string ("VISCII 1.1 (Vietnamese)"),
2887 build_string ("VISCII 1.1 (Vietnamese)"),
2888 build_string ("VISCII1\\.1"),
2889 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2890 staticpro (&Vcharset_chinese_big5);
2891 Vcharset_chinese_big5 =
2892 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2893 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2894 build_string ("Big5"),
2895 build_string ("Big5"),
2896 build_string ("Big5 Chinese traditional"),
2897 build_string ("big5"),
2899 0 /* MIN_CHAR_BIG5_CDP */,
2900 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
2901 Qnil, CONVERSION_IDENTICAL);
2902 staticpro (&Vcharset_chinese_big5_cdp);
2903 Vcharset_chinese_big5_cdp =
2904 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2905 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2906 build_string ("Big5-CDP"),
2907 build_string ("Big5 + CDP extension"),
2908 build_string ("Big5 with CDP extension"),
2909 build_string ("big5\\.cdp-0"),
2910 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0,
2911 Qnil, CONVERSION_IDENTICAL);
2912 #define DEF_HANZIKU(n) \
2913 staticpro (&Vcharset_ideograph_hanziku_##n); \
2914 Vcharset_ideograph_hanziku_##n = \
2915 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2916 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2917 build_string ("HZK-"#n), \
2918 build_string ("HANZIKU-"#n), \
2919 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2921 ("hanziku-"#n"$"), \
2922 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0, \
2923 Qnil, CONVERSION_IDENTICAL);
2936 staticpro (&Vcharset_china3_jef);
2937 Vcharset_china3_jef =
2938 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2939 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2940 build_string ("JC3"),
2941 build_string ("JEF + CHINA3"),
2942 build_string ("JEF + CHINA3 private characters"),
2943 build_string ("china3jef-0"),
2944 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0,
2945 Qnil, CONVERSION_IDENTICAL);
2946 staticpro (&Vcharset_ideograph_cbeta);
2947 Vcharset_ideograph_cbeta =
2948 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2949 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2950 build_string ("CB"),
2951 build_string ("CBETA"),
2952 build_string ("CBETA private characters"),
2953 build_string ("cbeta-0"),
2954 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0,
2955 Qnil, CONVERSION_IDENTICAL);
2956 staticpro (&Vcharset_ideograph_gt);
2957 Vcharset_ideograph_gt =
2958 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2959 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2960 build_string ("GT"),
2961 build_string ("GT"),
2962 build_string ("GT"),
2964 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0,
2965 Qnil, CONVERSION_IDENTICAL);
2966 #define DEF_GT_PJ(n) \
2967 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2968 Vcharset_ideograph_gt_pj_##n = \
2969 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2970 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2971 build_string ("GT-PJ-"#n), \
2972 build_string ("GT (pseudo JIS encoding) part "#n), \
2973 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2975 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2976 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2989 staticpro (&Vcharset_ideograph_daikanwa_2);
2990 Vcharset_ideograph_daikanwa_2 =
2991 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
2992 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2993 build_string ("Daikanwa Rev."),
2994 build_string ("Morohashi's Daikanwa Rev."),
2996 ("Daikanwa dictionary (revised version)"),
2997 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
2998 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2999 staticpro (&Vcharset_ideograph_daikanwa);
3000 Vcharset_ideograph_daikanwa =
3001 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3002 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3003 build_string ("Daikanwa"),
3004 build_string ("Morohashi's Daikanwa Rev.2"),
3006 ("Daikanwa dictionary (second revised version)"),
3007 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3008 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0,
3009 Qnil, CONVERSION_IDENTICAL);
3011 staticpro (&Vcharset_ethiopic_ucs);
3012 Vcharset_ethiopic_ucs =
3013 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3014 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3015 build_string ("Ethiopic (UCS)"),
3016 build_string ("Ethiopic (UCS)"),
3017 build_string ("Ethiopic of UCS"),
3018 build_string ("Ethiopic-Unicode"),
3019 Qnil, 0x1200, 0x137F, 0x1200, 0,
3020 Qnil, CONVERSION_IDENTICAL);
3022 staticpro (&Vcharset_chinese_big5_1);
3023 Vcharset_chinese_big5_1 =
3024 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3025 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3026 build_string ("Big5"),
3027 build_string ("Big5 (Level-1)"),
3029 ("Big5 Level-1 Chinese traditional"),
3030 build_string ("big5"),
3031 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3032 staticpro (&Vcharset_chinese_big5_2);
3033 Vcharset_chinese_big5_2 =
3034 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3035 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3036 build_string ("Big5"),
3037 build_string ("Big5 (Level-2)"),
3039 ("Big5 Level-2 Chinese traditional"),
3040 build_string ("big5"),
3041 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3043 #ifdef ENABLE_COMPOSITE_CHARS
3044 /* #### For simplicity, we put composite chars into a 96x96 charset.
3045 This is going to lead to problems because you can run out of
3046 room, esp. as we don't yet recycle numbers. */
3047 staticpro (&Vcharset_composite);
3048 Vcharset_composite =
3049 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3050 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3051 build_string ("Composite"),
3052 build_string ("Composite characters"),
3053 build_string ("Composite characters"),
3056 /* #### not dumped properly */
3057 composite_char_row_next = 32;
3058 composite_char_col_next = 32;
3060 Vcomposite_char_string2char_hash_table =
3061 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3062 Vcomposite_char_char2string_hash_table =
3063 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3064 staticpro (&Vcomposite_char_string2char_hash_table);
3065 staticpro (&Vcomposite_char_char2string_hash_table);
3066 #endif /* ENABLE_COMPOSITE_CHARS */