1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 /* Lisp_Object Vcharset_chinese_big5_cdp; */
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
193 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
195 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
199 if (XVECTOR_LENGTH (v) > ccs_len)
202 for (i = 0; i < XVECTOR_LENGTH (v); i++)
204 Lisp_Object c = XVECTOR_DATA(v)[i];
206 if (!NILP (c) && !CHARP (c))
210 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
222 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
225 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
235 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
236 nv = XVECTOR_DATA(v)[i];
242 XVECTOR_DATA(v)[i] = Qnil;
246 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
247 int code_point, Lisp_Object character);
249 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
250 int code_point, Lisp_Object character)
254 int ccs_len = XVECTOR_LENGTH (v);
259 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
260 nv = XVECTOR_DATA(v)[i];
264 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
270 XVECTOR_DATA(v)[i] = character;
274 put_char_ccs_code_point (Lisp_Object character,
275 Lisp_Object ccs, Lisp_Object value)
277 if (!EQ (XCHARSET_NAME (ccs), Qucs)
279 || (XCHAR (character) != XINT (value)))
281 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
282 int dim = XCHARSET_DIMENSION (ccs);
283 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
284 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
288 { /* obsolete representation: value must be a list of bytes */
289 Lisp_Object ret = Fcar (value);
293 signal_simple_error ("Invalid value for coded-charset", value);
294 code_point = XINT (ret);
295 if (XCHARSET_GRAPHIC (ccs) == 1)
303 signal_simple_error ("Invalid value for coded-charset",
307 signal_simple_error ("Invalid value for coded-charset",
310 if (XCHARSET_GRAPHIC (ccs) == 1)
312 code_point = (code_point << 8) | j;
315 value = make_int (code_point);
317 else if (INTP (value))
319 code_point = XINT (value);
320 if (XCHARSET_GRAPHIC (ccs) == 1)
322 code_point &= 0x7F7F7F7F;
323 value = make_int (code_point);
327 signal_simple_error ("Invalid value for coded-charset", value);
331 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
334 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
339 XCHARSET_DECODING_TABLE (ccs)
340 = v = make_vector (ccs_len, Qnil);
343 decoding_table_put_char (v, dim, byte_offset, code_point, character);
349 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
351 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
352 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
354 if (VECTORP (decoding_table))
356 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
360 decoding_table_remove_char (decoding_table,
361 XCHARSET_DIMENSION (ccs),
362 XCHARSET_BYTE_OFFSET (ccs),
366 if (CHAR_TABLEP (encoding_table))
368 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
376 int leading_code_private_11;
379 Lisp_Object Qcharsetp;
381 /* Qdoc_string, Qdimension, Qchars defined in general.c */
382 Lisp_Object Qregistry, Qfinal, Qgraphic;
383 Lisp_Object Qdirection;
384 Lisp_Object Qreverse_direction_charset;
385 Lisp_Object Qleading_byte;
386 Lisp_Object Qshort_name, Qlong_name;
388 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
389 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
406 Qjapanese_jisx0208_1978,
410 Qjapanese_jisx0208_1990,
428 Qvietnamese_viscii_lower,
429 Qvietnamese_viscii_upper,
431 /* Qchinese_big5_cdp, */
432 Qideograph_hanziku_1,
433 Qideograph_hanziku_2,
434 Qideograph_hanziku_3,
435 Qideograph_hanziku_4,
436 Qideograph_hanziku_5,
437 Qideograph_hanziku_6,
438 Qideograph_hanziku_7,
439 Qideograph_hanziku_8,
440 Qideograph_hanziku_9,
441 Qideograph_hanziku_10,
442 Qideograph_hanziku_11,
443 Qideograph_hanziku_12,
446 Qideograph_daikanwa_2,
466 Lisp_Object Ql2r, Qr2l;
468 Lisp_Object Vcharset_hash_table;
470 /* Composite characters are characters constructed by overstriking two
471 or more regular characters.
473 1) The old Mule implementation involves storing composite characters
474 in a buffer as a tag followed by all of the actual characters
475 used to make up the composite character. I think this is a bad
476 idea; it greatly complicates code that wants to handle strings
477 one character at a time because it has to deal with the possibility
478 of great big ungainly characters. It's much more reasonable to
479 simply store an index into a table of composite characters.
481 2) The current implementation only allows for 16,384 separate
482 composite characters over the lifetime of the XEmacs process.
483 This could become a potential problem if the user
484 edited lots of different files that use composite characters.
485 Due to FSF bogosity, increasing the number of allowable
486 composite characters under Mule would decrease the number
487 of possible faces that can exist. Mule already has shrunk
488 this to 2048, and further shrinkage would become uncomfortable.
489 No such problems exist in XEmacs.
491 Composite characters could be represented as 0x80 C1 C2 C3,
492 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
493 for slightly under 2^20 (one million) composite characters
494 over the XEmacs process lifetime, and you only need to
495 increase the size of a Mule character from 19 to 21 bits.
496 Or you could use 0x80 C1 C2 C3 C4, allowing for about
497 85 million (slightly over 2^26) composite characters. */
500 /************************************************************************/
501 /* Basic Emchar functions */
502 /************************************************************************/
504 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
505 string in STR. Returns the number of bytes stored.
506 Do not call this directly. Use the macro set_charptr_emchar() instead.
510 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
525 else if ( c <= 0x7ff )
527 *p++ = (c >> 6) | 0xc0;
528 *p++ = (c & 0x3f) | 0x80;
530 else if ( c <= 0xffff )
532 *p++ = (c >> 12) | 0xe0;
533 *p++ = ((c >> 6) & 0x3f) | 0x80;
534 *p++ = (c & 0x3f) | 0x80;
536 else if ( c <= 0x1fffff )
538 *p++ = (c >> 18) | 0xf0;
539 *p++ = ((c >> 12) & 0x3f) | 0x80;
540 *p++ = ((c >> 6) & 0x3f) | 0x80;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0x3ffffff )
545 *p++ = (c >> 24) | 0xf8;
546 *p++ = ((c >> 18) & 0x3f) | 0x80;
547 *p++ = ((c >> 12) & 0x3f) | 0x80;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
553 *p++ = (c >> 30) | 0xfc;
554 *p++ = ((c >> 24) & 0x3f) | 0x80;
555 *p++ = ((c >> 18) & 0x3f) | 0x80;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
561 BREAKUP_CHAR (c, charset, c1, c2);
562 lb = CHAR_LEADING_BYTE (c);
563 if (LEADING_BYTE_PRIVATE_P (lb))
564 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
566 if (EQ (charset, Vcharset_control_1))
575 /* Return the first character from a Mule-encoded string in STR,
576 assuming it's non-ASCII. Do not call this directly.
577 Use the macro charptr_emchar() instead. */
580 non_ascii_charptr_emchar (const Bufbyte *str)
593 else if ( b >= 0xf8 )
598 else if ( b >= 0xf0 )
603 else if ( b >= 0xe0 )
608 else if ( b >= 0xc0 )
618 for( ; len > 0; len-- )
621 ch = ( ch << 6 ) | ( b & 0x3f );
625 Bufbyte i0 = *str, i1, i2 = 0;
628 if (i0 == LEADING_BYTE_CONTROL_1)
629 return (Emchar) (*++str - 0x20);
631 if (LEADING_BYTE_PREFIX_P (i0))
636 charset = CHARSET_BY_LEADING_BYTE (i0);
637 if (XCHARSET_DIMENSION (charset) == 2)
640 return MAKE_CHAR (charset, i1, i2);
644 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
645 Do not call this directly. Use the macro valid_char_p() instead. */
649 non_ascii_valid_char_p (Emchar ch)
653 /* Must have only lowest 19 bits set */
657 f1 = CHAR_FIELD1 (ch);
658 f2 = CHAR_FIELD2 (ch);
659 f3 = CHAR_FIELD3 (ch);
665 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
666 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
667 f2 > MAX_CHAR_FIELD2_PRIVATE)
672 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
673 f2 <= MAX_CHAR_FIELD2_PRIVATE))
677 NOTE: This takes advantage of the fact that
678 FIELD2_TO_OFFICIAL_LEADING_BYTE and
679 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
681 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
682 if (EQ (charset, Qnil))
684 return (XCHARSET_CHARS (charset) == 96);
690 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
691 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
692 f1 > MAX_CHAR_FIELD1_PRIVATE)
694 if (f2 < 0x20 || f3 < 0x20)
697 #ifdef ENABLE_COMPOSITE_CHARS
698 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
700 if (UNBOUNDP (Fgethash (make_int (ch),
701 Vcomposite_char_char2string_hash_table,
706 #endif /* ENABLE_COMPOSITE_CHARS */
708 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
709 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
712 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
714 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
717 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
719 if (EQ (charset, Qnil))
721 return (XCHARSET_CHARS (charset) == 96);
727 /************************************************************************/
728 /* Basic string functions */
729 /************************************************************************/
731 /* Copy the character pointed to by SRC into DST. Do not call this
732 directly. Use the macro charptr_copy_char() instead.
733 Return the number of bytes copied. */
736 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
738 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
740 for (i = bytes; i; i--, dst++, src++)
746 /************************************************************************/
747 /* streams of Emchars */
748 /************************************************************************/
750 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
751 The functions below are not meant to be called directly; use
752 the macros in insdel.h. */
755 Lstream_get_emchar_1 (Lstream *stream, int ch)
757 Bufbyte str[MAX_EMCHAR_LEN];
758 Bufbyte *strptr = str;
761 str[0] = (Bufbyte) ch;
763 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
765 int c = Lstream_getc (stream);
766 bufpos_checking_assert (c >= 0);
767 *++strptr = (Bufbyte) c;
769 return charptr_emchar (str);
773 Lstream_fput_emchar (Lstream *stream, Emchar ch)
775 Bufbyte str[MAX_EMCHAR_LEN];
776 Bytecount len = set_charptr_emchar (str, ch);
777 return Lstream_write (stream, str, len);
781 Lstream_funget_emchar (Lstream *stream, Emchar ch)
783 Bufbyte str[MAX_EMCHAR_LEN];
784 Bytecount len = set_charptr_emchar (str, ch);
785 Lstream_unread (stream, str, len);
789 /************************************************************************/
791 /************************************************************************/
794 mark_charset (Lisp_Object obj)
796 Lisp_Charset *cs = XCHARSET (obj);
798 mark_object (cs->short_name);
799 mark_object (cs->long_name);
800 mark_object (cs->doc_string);
801 mark_object (cs->registry);
802 mark_object (cs->ccl_program);
804 mark_object (cs->decoding_table);
805 mark_object (cs->mother);
811 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
813 Lisp_Charset *cs = XCHARSET (obj);
817 error ("printing unreadable object #<charset %s 0x%x>",
818 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
821 write_c_string ("#<charset ", printcharfun);
822 print_internal (CHARSET_NAME (cs), printcharfun, 0);
823 write_c_string (" ", printcharfun);
824 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
825 write_c_string (" ", printcharfun);
826 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
827 write_c_string (" ", printcharfun);
828 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
829 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
831 CHARSET_DIMENSION (cs),
832 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
833 CHARSET_COLUMNS (cs),
834 CHARSET_GRAPHIC (cs),
836 write_c_string (buf, printcharfun);
837 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
838 sprintf (buf, " 0x%x>", cs->header.uid);
839 write_c_string (buf, printcharfun);
842 static const struct lrecord_description charset_description[] = {
843 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
844 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
845 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
846 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
847 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
848 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
849 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
851 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
852 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
857 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
858 mark_charset, print_charset, 0, 0, 0,
862 /* Make a new charset. */
863 /* #### SJT Should generic properties be allowed? */
865 make_charset (Charset_ID id, Lisp_Object name,
866 unsigned short chars, unsigned char dimension,
867 unsigned char columns, unsigned char graphic,
868 Bufbyte final, unsigned char direction, Lisp_Object short_name,
869 Lisp_Object long_name, Lisp_Object doc,
871 Lisp_Object decoding_table,
872 Emchar min_code, Emchar max_code,
873 Emchar code_offset, unsigned char byte_offset,
874 Lisp_Object mother, unsigned char conversion)
877 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
881 XSETCHARSET (obj, cs);
883 CHARSET_ID (cs) = id;
884 CHARSET_NAME (cs) = name;
885 CHARSET_SHORT_NAME (cs) = short_name;
886 CHARSET_LONG_NAME (cs) = long_name;
887 CHARSET_CHARS (cs) = chars;
888 CHARSET_DIMENSION (cs) = dimension;
889 CHARSET_DIRECTION (cs) = direction;
890 CHARSET_COLUMNS (cs) = columns;
891 CHARSET_GRAPHIC (cs) = graphic;
892 CHARSET_FINAL (cs) = final;
893 CHARSET_DOC_STRING (cs) = doc;
894 CHARSET_REGISTRY (cs) = reg;
895 CHARSET_CCL_PROGRAM (cs) = Qnil;
896 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
898 CHARSET_DECODING_TABLE(cs) = Qnil;
899 CHARSET_MIN_CODE (cs) = min_code;
900 CHARSET_MAX_CODE (cs) = max_code;
901 CHARSET_CODE_OFFSET (cs) = code_offset;
902 CHARSET_BYTE_OFFSET (cs) = byte_offset;
903 CHARSET_MOTHER (cs) = mother;
904 CHARSET_CONVERSION (cs) = conversion;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
921 unsigned char iso2022_type
922 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
924 if (code_offset == 0)
926 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
927 chlook->charset_by_attributes[iso2022_type][final] = obj;
931 (chlook->charset_by_attributes[iso2022_type][final][direction]));
932 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
936 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
937 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = chlook->next_allocated_leading_byte++;
959 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = chlook->next_allocated_1_byte_leading_byte++;
966 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = chlook->next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
982 /* Number of Big5 characters which have the same code in 1st byte. */
984 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
987 decode_builtin_char (Lisp_Object charset, int code_point)
989 Lisp_Object mother = XCHARSET_MOTHER (charset);
992 if ( CHARSETP (mother) )
994 int code = code_point;
996 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
998 int row = code_point >> 8;
999 int cell = code_point & 255;
1003 else if (row < 16 + 32 + 30)
1004 code = (row - (16 + 32)) * 94 + cell - 33;
1005 else if (row < 18 + 32 + 30)
1007 else if (row < 18 + 32 + 60)
1008 code = (row - (18 + 32)) * 94 + cell - 33;
1010 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1012 int plane = code_point >> 16;
1013 int row = (code_point >> 8) & 255;
1014 int cell = code_point & 255;
1018 else if (row < 16 + 32 + 30)
1020 = (plane - 33) * 94 * 60
1021 + (row - (16 + 32)) * 94
1023 else if (row < 18 + 32 + 30)
1025 else if (row < 18 + 32 + 60)
1027 = (plane - 33) * 94 * 60
1028 + (row - (18 + 32)) * 94
1032 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
1035 else if (EQ (charset, Vcharset_chinese_big5))
1037 int c1 = code_point >> 8;
1038 int c2 = code_point & 0xFF;
1041 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1043 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1044 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1046 I = (c1 - 0xA1) * BIG5_SAME_ROW
1047 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1051 charset = Vcharset_chinese_big5_1;
1055 charset = Vcharset_chinese_big5_2;
1056 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1058 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1062 if ((final = XCHARSET_FINAL (charset)) >= '0')
1064 if (XCHARSET_DIMENSION (charset) == 1)
1066 switch (XCHARSET_CHARS (charset))
1070 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1073 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1081 switch (XCHARSET_CHARS (charset))
1084 return MIN_CHAR_94x94
1085 + (final - '0') * 94 * 94
1086 + (((code_point >> 8) & 0x7F) - 33) * 94
1087 + ((code_point & 0x7F) - 33);
1089 return MIN_CHAR_96x96
1090 + (final - '0') * 96 * 96
1091 + (((code_point >> 8) & 0x7F) - 32) * 96
1092 + ((code_point & 0x7F) - 32);
1099 else if (XCHARSET_MAX_CODE (charset))
1102 = (XCHARSET_DIMENSION (charset) == 1
1104 code_point - XCHARSET_BYTE_OFFSET (charset)
1106 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1107 * XCHARSET_CHARS (charset)
1108 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1109 + XCHARSET_CODE_OFFSET (charset);
1110 if ((cid < XCHARSET_MIN_CODE (charset))
1111 || (XCHARSET_MAX_CODE (charset) < cid))
1120 charset_code_point (Lisp_Object charset, Emchar ch)
1122 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1125 if ( CHAR_TABLEP (encoding_table)
1126 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1131 Lisp_Object mother = XCHARSET_MOTHER (charset);
1132 int min = XCHARSET_MIN_CODE (charset);
1133 int max = XCHARSET_MAX_CODE (charset);
1136 if ( CHARSETP (mother) )
1137 code = charset_code_point (mother, ch);
1140 if ( (min <= code) && (code <= max) )
1142 int d = code - XCHARSET_CODE_OFFSET (charset);
1144 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1147 int cell = d % 94 + 33;
1153 return (row << 8) | cell;
1155 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1157 int plane = d / (94 * 60) + 33;
1158 int row = (d % (94 * 60)) / 94;
1159 int cell = d % 94 + 33;
1165 return (plane << 16) | (row << 8) | cell;
1167 else if (XCHARSET_CHARS (charset) == 94)
1169 if (XCHARSET_DIMENSION (charset) == 1)
1171 else if (XCHARSET_DIMENSION (charset) == 2)
1172 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1173 else if (XCHARSET_DIMENSION (charset) == 3)
1175 ( (d / (94 * 94) + 33) << 16)
1176 | ((d / 94 % 94 + 33) << 8)
1178 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1180 ( (d / (94 * 94 * 94) + 33) << 24)
1181 | ((d / (94 * 94) % 94 + 33) << 16)
1182 | ((d / 94 % 94 + 33) << 8)
1185 else if (XCHARSET_CHARS (charset) == 96)
1187 if (XCHARSET_DIMENSION (charset) == 1)
1189 else if (XCHARSET_DIMENSION (charset) == 2)
1190 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1191 else if (XCHARSET_DIMENSION (charset) == 3)
1193 ( (d / (96 * 96) + 32) << 16)
1194 | ((d / 96 % 96 + 32) << 8)
1196 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1198 ( (d / (96 * 96 * 96) + 32) << 24)
1199 | ((d / (96 * 96) % 96 + 32) << 16)
1200 | ((d / 96 % 96 + 32) << 8)
1204 return code - XCHARSET_CODE_OFFSET (charset);
1206 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1207 (XCHARSET_CODE_OFFSET (charset)
1208 == XCHARSET_MIN_CODE (charset)) )
1212 if (XCHARSET_DIMENSION (charset) == 1)
1214 if (XCHARSET_CHARS (charset) == 94)
1216 if (((d = ch - (MIN_CHAR_94
1217 + (XCHARSET_FINAL (charset) - '0') * 94))
1222 else if (XCHARSET_CHARS (charset) == 96)
1224 if (((d = ch - (MIN_CHAR_96
1225 + (XCHARSET_FINAL (charset) - '0') * 96))
1233 else if (XCHARSET_DIMENSION (charset) == 2)
1235 if (XCHARSET_CHARS (charset) == 94)
1237 if (((d = ch - (MIN_CHAR_94x94
1239 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1242 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1244 else if (XCHARSET_CHARS (charset) == 96)
1246 if (((d = ch - (MIN_CHAR_96x96
1248 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1251 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1262 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1264 if (c <= MAX_CHAR_BASIC_LATIN)
1266 *charset = Vcharset_ascii;
1271 *charset = Vcharset_control_1;
1276 *charset = Vcharset_latin_iso8859_1;
1280 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1282 *charset = Vcharset_hebrew_iso8859_8;
1283 return c - MIN_CHAR_HEBREW + 0x20;
1286 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1288 *charset = Vcharset_thai_tis620;
1289 return c - MIN_CHAR_THAI + 0x20;
1292 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1293 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1295 return list2 (Vcharset_katakana_jisx0201,
1296 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1299 else if (c <= MAX_CHAR_BMP)
1301 *charset = Vcharset_ucs_bmp;
1304 else if (c <= MAX_CHAR_SMP)
1306 *charset = Vcharset_ucs_smp;
1307 return c - MIN_CHAR_SMP;
1309 else if (c <= MAX_CHAR_SIP)
1311 *charset = Vcharset_ucs_sip;
1312 return c - MIN_CHAR_SIP;
1314 else if (c < MIN_CHAR_DAIKANWA)
1316 *charset = Vcharset_ucs;
1319 else if (c <= MAX_CHAR_DAIKANWA)
1321 *charset = Vcharset_ideograph_daikanwa;
1322 return c - MIN_CHAR_DAIKANWA;
1324 else if (c < MIN_CHAR_94)
1326 *charset = Vcharset_ucs;
1329 else if (c <= MAX_CHAR_94)
1331 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1332 ((c - MIN_CHAR_94) / 94) + '0',
1333 CHARSET_LEFT_TO_RIGHT);
1334 if (!NILP (*charset))
1335 return ((c - MIN_CHAR_94) % 94) + 33;
1338 *charset = Vcharset_ucs;
1342 else if (c <= MAX_CHAR_96)
1344 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1345 ((c - MIN_CHAR_96) / 96) + '0',
1346 CHARSET_LEFT_TO_RIGHT);
1347 if (!NILP (*charset))
1348 return ((c - MIN_CHAR_96) % 96) + 32;
1351 *charset = Vcharset_ucs;
1355 else if (c <= MAX_CHAR_94x94)
1358 = CHARSET_BY_ATTRIBUTES (94, 2,
1359 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1360 CHARSET_LEFT_TO_RIGHT);
1361 if (!NILP (*charset))
1362 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1363 | (((c - MIN_CHAR_94x94) % 94) + 33);
1366 *charset = Vcharset_ucs;
1370 else if (c <= MAX_CHAR_96x96)
1373 = CHARSET_BY_ATTRIBUTES (96, 2,
1374 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1375 CHARSET_LEFT_TO_RIGHT);
1376 if (!NILP (*charset))
1377 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1378 | (((c - MIN_CHAR_96x96) % 96) + 32);
1381 *charset = Vcharset_ucs;
1387 *charset = Vcharset_ucs;
1392 Lisp_Object Vdefault_coded_charset_priority_list;
1396 /************************************************************************/
1397 /* Basic charset Lisp functions */
1398 /************************************************************************/
1400 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1401 Return non-nil if OBJECT is a charset.
1405 return CHARSETP (object) ? Qt : Qnil;
1408 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1409 Retrieve the charset of the given name.
1410 If CHARSET-OR-NAME is a charset object, it is simply returned.
1411 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1412 nil is returned. Otherwise the associated charset object is returned.
1416 if (CHARSETP (charset_or_name))
1417 return charset_or_name;
1419 CHECK_SYMBOL (charset_or_name);
1420 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1423 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1424 Retrieve the charset of the given name.
1425 Same as `find-charset' except an error is signalled if there is no such
1426 charset instead of returning nil.
1430 Lisp_Object charset = Ffind_charset (name);
1433 signal_simple_error ("No such charset", name);
1437 /* We store the charsets in hash tables with the names as the key and the
1438 actual charset object as the value. Occasionally we need to use them
1439 in a list format. These routines provide us with that. */
1440 struct charset_list_closure
1442 Lisp_Object *charset_list;
1446 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1447 void *charset_list_closure)
1449 /* This function can GC */
1450 struct charset_list_closure *chcl =
1451 (struct charset_list_closure*) charset_list_closure;
1452 Lisp_Object *charset_list = chcl->charset_list;
1454 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1458 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1459 Return a list of the names of all defined charsets.
1463 Lisp_Object charset_list = Qnil;
1464 struct gcpro gcpro1;
1465 struct charset_list_closure charset_list_closure;
1467 GCPRO1 (charset_list);
1468 charset_list_closure.charset_list = &charset_list;
1469 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1470 &charset_list_closure);
1473 return charset_list;
1476 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1477 Return the name of charset CHARSET.
1481 return XCHARSET_NAME (Fget_charset (charset));
1484 /* #### SJT Should generic properties be allowed? */
1485 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1486 Define a new character set.
1487 This function is for use with Mule support.
1488 NAME is a symbol, the name by which the character set is normally referred.
1489 DOC-STRING is a string describing the character set.
1490 PROPS is a property list, describing the specific nature of the
1491 character set. Recognized properties are:
1493 'short-name Short version of the charset name (ex: Latin-1)
1494 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1495 'registry A regular expression matching the font registry field for
1497 'dimension Number of octets used to index a character in this charset.
1498 Either 1 or 2. Defaults to 1.
1499 If UTF-2000 feature is enabled, 3 or 4 are also available.
1500 'columns Number of columns used to display a character in this charset.
1501 Only used in TTY mode. (Under X, the actual width of a
1502 character can be derived from the font used to display the
1503 characters.) If unspecified, defaults to the dimension
1504 (this is almost always the correct value).
1505 'chars Number of characters in each dimension (94 or 96).
1506 Defaults to 94. Note that if the dimension is 2, the
1507 character set thus described is 94x94 or 96x96.
1508 If UTF-2000 feature is enabled, 128 or 256 are also available.
1509 'final Final byte of ISO 2022 escape sequence. Must be
1510 supplied. Each combination of (DIMENSION, CHARS) defines a
1511 separate namespace for final bytes. Note that ISO
1512 2022 restricts the final byte to the range
1513 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1514 dimension == 2. Note also that final bytes in the range
1515 0x30 - 0x3F are reserved for user-defined (not official)
1517 'graphic 0 (use left half of font on output) or 1 (use right half
1518 of font on output). Defaults to 0. For example, for
1519 a font whose registry is ISO8859-1, the left half
1520 (octets 0x20 - 0x7F) is the `ascii' character set, while
1521 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1522 character set. With 'graphic set to 0, the octets
1523 will have their high bit cleared; with it set to 1,
1524 the octets will have their high bit set.
1525 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1527 'ccl-program A compiled CCL program used to convert a character in
1528 this charset into an index into the font. This is in
1529 addition to the 'graphic property. The CCL program
1530 is passed the octets of the character, with the high
1531 bit cleared and set depending upon whether the value
1532 of the 'graphic property is 0 or 1.
1533 'mother [UTF-2000 only] Base coded-charset.
1534 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1535 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1536 'code-offset [UTF-2000 only] Offset for a code-point of a base
1538 'conversion [UTF-2000 only] Conversion for a code-point of a base
1539 coded-charset (94x60 or 94x94x60).
1541 (name, doc_string, props))
1543 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1544 int direction = CHARSET_LEFT_TO_RIGHT;
1545 Lisp_Object registry = Qnil;
1546 Lisp_Object charset;
1547 Lisp_Object ccl_program = Qnil;
1548 Lisp_Object short_name = Qnil, long_name = Qnil;
1549 Lisp_Object mother = Qnil;
1550 int min_code = 0, max_code = 0, code_offset = 0;
1551 int byte_offset = -1;
1554 CHECK_SYMBOL (name);
1555 if (!NILP (doc_string))
1556 CHECK_STRING (doc_string);
1558 charset = Ffind_charset (name);
1559 if (!NILP (charset))
1560 signal_simple_error ("Cannot redefine existing charset", name);
1563 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1565 if (EQ (keyword, Qshort_name))
1567 CHECK_STRING (value);
1571 if (EQ (keyword, Qlong_name))
1573 CHECK_STRING (value);
1577 else if (EQ (keyword, Qdimension))
1580 dimension = XINT (value);
1581 if (dimension < 1 ||
1588 signal_simple_error ("Invalid value for 'dimension", value);
1591 else if (EQ (keyword, Qchars))
1594 chars = XINT (value);
1595 if (chars != 94 && chars != 96
1597 && chars != 128 && chars != 256
1600 signal_simple_error ("Invalid value for 'chars", value);
1603 else if (EQ (keyword, Qcolumns))
1606 columns = XINT (value);
1607 if (columns != 1 && columns != 2)
1608 signal_simple_error ("Invalid value for 'columns", value);
1611 else if (EQ (keyword, Qgraphic))
1614 graphic = XINT (value);
1622 signal_simple_error ("Invalid value for 'graphic", value);
1625 else if (EQ (keyword, Qregistry))
1627 CHECK_STRING (value);
1631 else if (EQ (keyword, Qdirection))
1633 if (EQ (value, Ql2r))
1634 direction = CHARSET_LEFT_TO_RIGHT;
1635 else if (EQ (value, Qr2l))
1636 direction = CHARSET_RIGHT_TO_LEFT;
1638 signal_simple_error ("Invalid value for 'direction", value);
1641 else if (EQ (keyword, Qfinal))
1643 CHECK_CHAR_COERCE_INT (value);
1644 final = XCHAR (value);
1645 if (final < '0' || final > '~')
1646 signal_simple_error ("Invalid value for 'final", value);
1650 else if (EQ (keyword, Qmother))
1652 mother = Fget_charset (value);
1655 else if (EQ (keyword, Qmin_code))
1658 min_code = XUINT (value);
1661 else if (EQ (keyword, Qmax_code))
1664 max_code = XUINT (value);
1667 else if (EQ (keyword, Qcode_offset))
1670 code_offset = XUINT (value);
1673 else if (EQ (keyword, Qconversion))
1675 if (EQ (value, Q94x60))
1676 conversion = CONVERSION_94x60;
1677 else if (EQ (value, Q94x94x60))
1678 conversion = CONVERSION_94x94x60;
1680 signal_simple_error ("Unrecognized conversion", value);
1684 else if (EQ (keyword, Qccl_program))
1686 struct ccl_program test_ccl;
1688 if (setup_ccl_program (&test_ccl, value) < 0)
1689 signal_simple_error ("Invalid value for 'ccl-program", value);
1690 ccl_program = value;
1694 signal_simple_error ("Unrecognized property", keyword);
1700 error ("'final must be specified");
1702 if (dimension == 2 && final > 0x5F)
1704 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1707 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1708 CHARSET_LEFT_TO_RIGHT)) ||
1709 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1710 CHARSET_RIGHT_TO_LEFT)))
1712 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1714 id = get_unallocated_leading_byte (dimension);
1716 if (NILP (doc_string))
1717 doc_string = build_string ("");
1719 if (NILP (registry))
1720 registry = build_string ("");
1722 if (NILP (short_name))
1723 XSETSTRING (short_name, XSYMBOL (name)->name);
1725 if (NILP (long_name))
1726 long_name = doc_string;
1729 columns = dimension;
1731 if (byte_offset < 0)
1735 else if (chars == 96)
1741 charset = make_charset (id, name, chars, dimension, columns, graphic,
1742 final, direction, short_name, long_name,
1743 doc_string, registry,
1744 Qnil, min_code, max_code, code_offset, byte_offset,
1745 mother, conversion);
1746 if (!NILP (ccl_program))
1747 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1751 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1753 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1754 NEW-NAME is the name of the new charset. Return the new charset.
1756 (charset, new_name))
1758 Lisp_Object new_charset = Qnil;
1759 int id, chars, dimension, columns, graphic, final;
1761 Lisp_Object registry, doc_string, short_name, long_name;
1764 charset = Fget_charset (charset);
1765 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1766 signal_simple_error ("Charset already has reverse-direction charset",
1769 CHECK_SYMBOL (new_name);
1770 if (!NILP (Ffind_charset (new_name)))
1771 signal_simple_error ("Cannot redefine existing charset", new_name);
1773 cs = XCHARSET (charset);
1775 chars = CHARSET_CHARS (cs);
1776 dimension = CHARSET_DIMENSION (cs);
1777 columns = CHARSET_COLUMNS (cs);
1778 id = get_unallocated_leading_byte (dimension);
1780 graphic = CHARSET_GRAPHIC (cs);
1781 final = CHARSET_FINAL (cs);
1782 direction = CHARSET_RIGHT_TO_LEFT;
1783 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1784 direction = CHARSET_LEFT_TO_RIGHT;
1785 doc_string = CHARSET_DOC_STRING (cs);
1786 short_name = CHARSET_SHORT_NAME (cs);
1787 long_name = CHARSET_LONG_NAME (cs);
1788 registry = CHARSET_REGISTRY (cs);
1790 new_charset = make_charset (id, new_name, chars, dimension, columns,
1791 graphic, final, direction, short_name, long_name,
1792 doc_string, registry,
1794 CHARSET_DECODING_TABLE(cs),
1795 CHARSET_MIN_CODE(cs),
1796 CHARSET_MAX_CODE(cs),
1797 CHARSET_CODE_OFFSET(cs),
1798 CHARSET_BYTE_OFFSET(cs),
1800 CHARSET_CONVERSION (cs)
1802 Qnil, 0, 0, 0, 0, Qnil, 0
1806 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1807 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1812 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1813 Define symbol ALIAS as an alias for CHARSET.
1817 CHECK_SYMBOL (alias);
1818 charset = Fget_charset (charset);
1819 return Fputhash (alias, charset, Vcharset_hash_table);
1822 /* #### Reverse direction charsets not yet implemented. */
1824 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1826 Return the reverse-direction charset parallel to CHARSET, if any.
1827 This is the charset with the same properties (in particular, the same
1828 dimension, number of characters per dimension, and final byte) as
1829 CHARSET but whose characters are displayed in the opposite direction.
1833 charset = Fget_charset (charset);
1834 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1838 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1839 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1840 If DIRECTION is omitted, both directions will be checked (left-to-right
1841 will be returned if character sets exist for both directions).
1843 (dimension, chars, final, direction))
1845 int dm, ch, fi, di = -1;
1846 Lisp_Object obj = Qnil;
1848 CHECK_INT (dimension);
1849 dm = XINT (dimension);
1850 if (dm < 1 || dm > 2)
1851 signal_simple_error ("Invalid value for DIMENSION", dimension);
1855 if (ch != 94 && ch != 96)
1856 signal_simple_error ("Invalid value for CHARS", chars);
1858 CHECK_CHAR_COERCE_INT (final);
1860 if (fi < '0' || fi > '~')
1861 signal_simple_error ("Invalid value for FINAL", final);
1863 if (EQ (direction, Ql2r))
1864 di = CHARSET_LEFT_TO_RIGHT;
1865 else if (EQ (direction, Qr2l))
1866 di = CHARSET_RIGHT_TO_LEFT;
1867 else if (!NILP (direction))
1868 signal_simple_error ("Invalid value for DIRECTION", direction);
1870 if (dm == 2 && fi > 0x5F)
1872 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1876 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1878 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1881 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1884 return XCHARSET_NAME (obj);
1888 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1889 Return short name of CHARSET.
1893 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1896 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1897 Return long name of CHARSET.
1901 return XCHARSET_LONG_NAME (Fget_charset (charset));
1904 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1905 Return description of CHARSET.
1909 return XCHARSET_DOC_STRING (Fget_charset (charset));
1912 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1913 Return dimension of CHARSET.
1917 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1920 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1921 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1922 Recognized properties are those listed in `make-charset', as well as
1923 'name and 'doc-string.
1929 charset = Fget_charset (charset);
1930 cs = XCHARSET (charset);
1932 CHECK_SYMBOL (prop);
1933 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1934 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1935 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1936 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1937 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1938 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1939 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1940 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1941 Qnil : make_char (CHARSET_FINAL (cs));
1942 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1943 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1944 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1945 if (EQ (prop, Qdirection))
1946 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1947 if (EQ (prop, Qreverse_direction_charset))
1949 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1950 /* #### Is this translation OK? If so, error checking sufficient? */
1951 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1954 if (EQ (prop, Qmother))
1955 return CHARSET_MOTHER (cs);
1956 if (EQ (prop, Qmin_code))
1957 return make_int (CHARSET_MIN_CODE (cs));
1958 if (EQ (prop, Qmax_code))
1959 return make_int (CHARSET_MAX_CODE (cs));
1961 signal_simple_error ("Unrecognized charset property name", prop);
1962 return Qnil; /* not reached */
1965 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1966 Return charset identification number of CHARSET.
1970 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1973 /* #### We need to figure out which properties we really want to
1976 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1977 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1979 (charset, ccl_program))
1981 struct ccl_program test_ccl;
1983 charset = Fget_charset (charset);
1984 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1985 signal_simple_error ("Invalid ccl-program", ccl_program);
1986 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1991 invalidate_charset_font_caches (Lisp_Object charset)
1993 /* Invalidate font cache entries for charset on all devices. */
1994 Lisp_Object devcons, concons, hash_table;
1995 DEVICE_LOOP_NO_BREAK (devcons, concons)
1997 struct device *d = XDEVICE (XCAR (devcons));
1998 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1999 if (!UNBOUNDP (hash_table))
2000 Fclrhash (hash_table);
2004 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2005 Set the 'registry property of CHARSET to REGISTRY.
2007 (charset, registry))
2009 charset = Fget_charset (charset);
2010 CHECK_STRING (registry);
2011 XCHARSET_REGISTRY (charset) = registry;
2012 invalidate_charset_font_caches (charset);
2013 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2018 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2019 Return mapping-table of CHARSET.
2023 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2026 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2027 Set mapping-table of CHARSET to TABLE.
2031 struct Lisp_Charset *cs;
2035 charset = Fget_charset (charset);
2036 cs = XCHARSET (charset);
2040 CHARSET_DECODING_TABLE(cs) = Qnil;
2043 else if (VECTORP (table))
2045 int ccs_len = CHARSET_BYTE_SIZE (cs);
2046 int ret = decoding_table_check_elements (table,
2047 CHARSET_DIMENSION (cs),
2052 signal_simple_error ("Too big table", table);
2054 signal_simple_error ("Invalid element is found", table);
2056 signal_simple_error ("Something wrong", table);
2058 CHARSET_DECODING_TABLE(cs) = Qnil;
2061 signal_error (Qwrong_type_argument,
2062 list2 (build_translated_string ("vector-or-nil-p"),
2065 byte_offset = CHARSET_BYTE_OFFSET (cs);
2066 switch (CHARSET_DIMENSION (cs))
2069 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2071 Lisp_Object c = XVECTOR_DATA(table)[i];
2074 Fput_char_attribute (c, XCHARSET_NAME (charset),
2075 make_int (i + byte_offset));
2079 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2081 Lisp_Object v = XVECTOR_DATA(table)[i];
2087 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2089 Lisp_Object c = XVECTOR_DATA(v)[j];
2093 (c, XCHARSET_NAME (charset),
2094 make_int ( ( (i + byte_offset) << 8 )
2100 Fput_char_attribute (v, XCHARSET_NAME (charset),
2101 make_int (i + byte_offset));
2110 /************************************************************************/
2111 /* Lisp primitives for working with characters */
2112 /************************************************************************/
2115 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2116 Make a character from CHARSET and code-point CODE.
2117 If DEFINED_ONLY is non-nil, builtin character is not returned.
2118 If corresponding character is not found, nil is returned.
2120 (charset, code, defined_only))
2124 charset = Fget_charset (charset);
2127 if (XCHARSET_GRAPHIC (charset) == 1)
2129 if (NILP (defined_only))
2130 c = DECODE_CHAR (charset, c);
2132 c = DECODE_DEFINED_CHAR (charset, c);
2133 return c >= 0 ? make_char (c) : Qnil;
2136 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2137 Make a builtin character from CHARSET and code-point CODE.
2143 charset = Fget_charset (charset);
2145 if (EQ (charset, Vcharset_latin_viscii))
2147 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2153 (ret = Fget_char_attribute (chr,
2154 Vcharset_latin_viscii_lower,
2157 charset = Vcharset_latin_viscii_lower;
2161 (ret = Fget_char_attribute (chr,
2162 Vcharset_latin_viscii_upper,
2165 charset = Vcharset_latin_viscii_upper;
2172 if (XCHARSET_GRAPHIC (charset) == 1)
2175 c = decode_builtin_char (charset, c);
2176 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2180 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2181 Make a character from CHARSET and octets ARG1 and ARG2.
2182 ARG2 is required only for characters from two-dimensional charsets.
2183 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2184 character s with caron.
2186 (charset, arg1, arg2))
2190 int lowlim, highlim;
2192 charset = Fget_charset (charset);
2193 cs = XCHARSET (charset);
2195 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2196 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2198 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2200 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2201 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2204 /* It is useful (and safe, according to Olivier Galibert) to strip
2205 the 8th bit off ARG1 and ARG2 because it allows programmers to
2206 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2207 Latin 2 code of the character. */
2215 if (a1 < lowlim || a1 > highlim)
2216 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2218 if (CHARSET_DIMENSION (cs) == 1)
2222 ("Charset is of dimension one; second octet must be nil", arg2);
2223 return make_char (MAKE_CHAR (charset, a1, 0));
2232 a2 = XINT (arg2) & 0x7f;
2234 if (a2 < lowlim || a2 > highlim)
2235 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2237 return make_char (MAKE_CHAR (charset, a1, a2));
2240 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2241 Return the character set of CHARACTER.
2245 CHECK_CHAR_COERCE_INT (character);
2247 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2250 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2251 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2252 N defaults to 0 if omitted.
2256 Lisp_Object charset;
2259 CHECK_CHAR_COERCE_INT (character);
2261 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2263 if (NILP (n) || EQ (n, Qzero))
2264 return make_int (octet0);
2265 else if (EQ (n, make_int (1)))
2266 return make_int (octet1);
2268 signal_simple_error ("Octet number must be 0 or 1", n);
2272 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2273 Return code-point of CHARACTER in specified CHARSET.
2275 (character, charset))
2279 CHECK_CHAR_COERCE_INT (character);
2280 charset = Fget_charset (charset);
2281 code_point = charset_code_point (charset, XCHAR (character));
2282 if (code_point >= 0)
2283 return make_int (code_point);
2289 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2290 Return list of charset and one or two position-codes of CHARACTER.
2294 /* This function can GC */
2295 struct gcpro gcpro1, gcpro2;
2296 Lisp_Object charset = Qnil;
2297 Lisp_Object rc = Qnil;
2305 GCPRO2 (charset, rc);
2306 CHECK_CHAR_COERCE_INT (character);
2309 code_point = ENCODE_CHAR (XCHAR (character), charset);
2310 dimension = XCHARSET_DIMENSION (charset);
2311 while (dimension > 0)
2313 rc = Fcons (make_int (code_point & 255), rc);
2317 rc = Fcons (XCHARSET_NAME (charset), rc);
2319 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2321 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2323 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2327 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2336 #ifdef ENABLE_COMPOSITE_CHARS
2337 /************************************************************************/
2338 /* composite character functions */
2339 /************************************************************************/
2342 lookup_composite_char (Bufbyte *str, int len)
2344 Lisp_Object lispstr = make_string (str, len);
2345 Lisp_Object ch = Fgethash (lispstr,
2346 Vcomposite_char_string2char_hash_table,
2352 if (composite_char_row_next >= 128)
2353 signal_simple_error ("No more composite chars available", lispstr);
2354 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2355 composite_char_col_next);
2356 Fputhash (make_char (emch), lispstr,
2357 Vcomposite_char_char2string_hash_table);
2358 Fputhash (lispstr, make_char (emch),
2359 Vcomposite_char_string2char_hash_table);
2360 composite_char_col_next++;
2361 if (composite_char_col_next >= 128)
2363 composite_char_col_next = 32;
2364 composite_char_row_next++;
2373 composite_char_string (Emchar ch)
2375 Lisp_Object str = Fgethash (make_char (ch),
2376 Vcomposite_char_char2string_hash_table,
2378 assert (!UNBOUNDP (str));
2382 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2383 Convert a string into a single composite character.
2384 The character is the result of overstriking all the characters in
2389 CHECK_STRING (string);
2390 return make_char (lookup_composite_char (XSTRING_DATA (string),
2391 XSTRING_LENGTH (string)));
2394 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2395 Return a string of the characters comprising a composite character.
2403 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2404 signal_simple_error ("Must be composite char", ch);
2405 return composite_char_string (emch);
2407 #endif /* ENABLE_COMPOSITE_CHARS */
2410 /************************************************************************/
2411 /* initialization */
2412 /************************************************************************/
2415 syms_of_mule_charset (void)
2417 INIT_LRECORD_IMPLEMENTATION (charset);
2419 DEFSUBR (Fcharsetp);
2420 DEFSUBR (Ffind_charset);
2421 DEFSUBR (Fget_charset);
2422 DEFSUBR (Fcharset_list);
2423 DEFSUBR (Fcharset_name);
2424 DEFSUBR (Fmake_charset);
2425 DEFSUBR (Fmake_reverse_direction_charset);
2426 /* DEFSUBR (Freverse_direction_charset); */
2427 DEFSUBR (Fdefine_charset_alias);
2428 DEFSUBR (Fcharset_from_attributes);
2429 DEFSUBR (Fcharset_short_name);
2430 DEFSUBR (Fcharset_long_name);
2431 DEFSUBR (Fcharset_description);
2432 DEFSUBR (Fcharset_dimension);
2433 DEFSUBR (Fcharset_property);
2434 DEFSUBR (Fcharset_id);
2435 DEFSUBR (Fset_charset_ccl_program);
2436 DEFSUBR (Fset_charset_registry);
2438 DEFSUBR (Fcharset_mapping_table);
2439 DEFSUBR (Fset_charset_mapping_table);
2443 DEFSUBR (Fdecode_char);
2444 DEFSUBR (Fdecode_builtin_char);
2445 DEFSUBR (Fencode_char);
2447 DEFSUBR (Fmake_char);
2448 DEFSUBR (Fchar_charset);
2449 DEFSUBR (Fchar_octet);
2450 DEFSUBR (Fsplit_char);
2452 #ifdef ENABLE_COMPOSITE_CHARS
2453 DEFSUBR (Fmake_composite_char);
2454 DEFSUBR (Fcomposite_char_string);
2457 defsymbol (&Qcharsetp, "charsetp");
2458 defsymbol (&Qregistry, "registry");
2459 defsymbol (&Qfinal, "final");
2460 defsymbol (&Qgraphic, "graphic");
2461 defsymbol (&Qdirection, "direction");
2462 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2463 defsymbol (&Qshort_name, "short-name");
2464 defsymbol (&Qlong_name, "long-name");
2466 defsymbol (&Qmother, "mother");
2467 defsymbol (&Qmin_code, "min-code");
2468 defsymbol (&Qmax_code, "max-code");
2469 defsymbol (&Qcode_offset, "code-offset");
2470 defsymbol (&Qconversion, "conversion");
2471 defsymbol (&Q94x60, "94x60");
2472 defsymbol (&Q94x94x60, "94x94x60");
2475 defsymbol (&Ql2r, "l2r");
2476 defsymbol (&Qr2l, "r2l");
2478 /* Charsets, compatible with FSF 20.3
2479 Naming convention is Script-Charset[-Edition] */
2480 defsymbol (&Qascii, "ascii");
2481 defsymbol (&Qcontrol_1, "control-1");
2482 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2483 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2484 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2485 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2486 defsymbol (&Qthai_tis620, "thai-tis620");
2487 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2488 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2489 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2490 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2491 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2492 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2493 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2494 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2495 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2496 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2497 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2498 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2499 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2500 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2501 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2502 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2504 defsymbol (&Qucs, "ucs");
2505 defsymbol (&Qucs_bmp, "ucs-bmp");
2506 defsymbol (&Qucs_smp, "ucs-smp");
2507 defsymbol (&Qucs_sip, "ucs-sip");
2508 defsymbol (&Qucs_cns, "ucs-cns");
2509 defsymbol (&Qucs_jis, "ucs-jis");
2510 defsymbol (&Qucs_ks, "ucs-ks");
2511 defsymbol (&Qucs_big5, "ucs-big5");
2512 defsymbol (&Qlatin_viscii, "latin-viscii");
2513 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2514 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2515 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2516 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2517 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2518 defsymbol (&Qideograph_gt, "ideograph-gt");
2519 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2520 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2521 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2522 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2523 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2524 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2525 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2526 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2527 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2528 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2529 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2530 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2531 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2532 defsymbol (&Qchinese_big5, "chinese-big5");
2533 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2534 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2535 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2536 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2537 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2538 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2539 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2540 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2541 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2542 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2543 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2544 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2545 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2546 defsymbol (&Qchina3_jef, "china3-jef");
2547 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2548 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2550 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2551 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2553 defsymbol (&Qcomposite, "composite");
2557 vars_of_mule_charset (void)
2564 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2565 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2567 /* Table of charsets indexed by leading byte. */
2568 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2569 chlook->charset_by_leading_byte[i] = Qnil;
2572 /* Table of charsets indexed by type/final-byte. */
2573 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2574 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2575 chlook->charset_by_attributes[i][j] = Qnil;
2577 /* Table of charsets indexed by type/final-byte/direction. */
2578 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2579 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2580 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2581 chlook->charset_by_attributes[i][j][k] = Qnil;
2585 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2587 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2588 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2592 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2593 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2594 Leading-code of private TYPE9N charset of column-width 1.
2596 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2600 Vdefault_coded_charset_priority_list = Qnil;
2601 DEFVAR_LISP ("default-coded-charset-priority-list",
2602 &Vdefault_coded_charset_priority_list /*
2603 Default order of preferred coded-character-sets.
2609 complex_vars_of_mule_charset (void)
2611 staticpro (&Vcharset_hash_table);
2612 Vcharset_hash_table =
2613 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2615 /* Predefined character sets. We store them into variables for
2619 staticpro (&Vcharset_ucs);
2621 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2622 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2623 build_string ("UCS"),
2624 build_string ("UCS"),
2625 build_string ("ISO/IEC 10646"),
2627 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2628 staticpro (&Vcharset_ucs_bmp);
2630 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2631 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2632 build_string ("BMP"),
2633 build_string ("UCS-BMP"),
2634 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2636 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2637 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2638 staticpro (&Vcharset_ucs_smp);
2640 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2641 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2642 build_string ("SMP"),
2643 build_string ("UCS-SMP"),
2644 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2645 build_string ("UCS00-1"),
2646 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2647 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2648 staticpro (&Vcharset_ucs_sip);
2650 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2651 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2652 build_string ("SIP"),
2653 build_string ("UCS-SIP"),
2654 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2655 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2656 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2657 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2658 staticpro (&Vcharset_ucs_cns);
2660 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2661 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2662 build_string ("UCS for CNS"),
2663 build_string ("UCS for CNS 11643"),
2664 build_string ("ISO/IEC 10646 for CNS 11643"),
2667 Qnil, CONVERSION_IDENTICAL);
2668 staticpro (&Vcharset_ucs_jis);
2670 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2671 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2672 build_string ("UCS for JIS"),
2673 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2674 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2676 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2677 staticpro (&Vcharset_ucs_ks);
2679 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2680 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2681 build_string ("UCS for KS"),
2682 build_string ("UCS for CCS defined by KS"),
2683 build_string ("ISO/IEC 10646 for Korean Standards"),
2685 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2686 staticpro (&Vcharset_ucs_big5);
2688 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2689 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2690 build_string ("UCS for Big5"),
2691 build_string ("UCS for Big5"),
2692 build_string ("ISO/IEC 10646 for Big5"),
2694 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2696 # define MIN_CHAR_THAI 0
2697 # define MAX_CHAR_THAI 0
2698 /* # define MIN_CHAR_HEBREW 0 */
2699 /* # define MAX_CHAR_HEBREW 0 */
2700 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2701 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2703 staticpro (&Vcharset_ascii);
2705 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2706 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("ASCII"),
2708 build_string ("ASCII)"),
2709 build_string ("ASCII (ISO646 IRV)"),
2710 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2711 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2712 staticpro (&Vcharset_control_1);
2713 Vcharset_control_1 =
2714 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2715 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2716 build_string ("C1"),
2717 build_string ("Control characters"),
2718 build_string ("Control characters 128-191"),
2720 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2721 staticpro (&Vcharset_latin_iso8859_1);
2722 Vcharset_latin_iso8859_1 =
2723 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2724 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2725 build_string ("Latin-1"),
2726 build_string ("ISO8859-1 (Latin-1)"),
2727 build_string ("ISO8859-1 (Latin-1)"),
2728 build_string ("iso8859-1"),
2729 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2730 staticpro (&Vcharset_latin_iso8859_2);
2731 Vcharset_latin_iso8859_2 =
2732 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2733 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2734 build_string ("Latin-2"),
2735 build_string ("ISO8859-2 (Latin-2)"),
2736 build_string ("ISO8859-2 (Latin-2)"),
2737 build_string ("iso8859-2"),
2738 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2739 staticpro (&Vcharset_latin_iso8859_3);
2740 Vcharset_latin_iso8859_3 =
2741 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2742 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2743 build_string ("Latin-3"),
2744 build_string ("ISO8859-3 (Latin-3)"),
2745 build_string ("ISO8859-3 (Latin-3)"),
2746 build_string ("iso8859-3"),
2747 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2748 staticpro (&Vcharset_latin_iso8859_4);
2749 Vcharset_latin_iso8859_4 =
2750 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2751 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2752 build_string ("Latin-4"),
2753 build_string ("ISO8859-4 (Latin-4)"),
2754 build_string ("ISO8859-4 (Latin-4)"),
2755 build_string ("iso8859-4"),
2756 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2757 staticpro (&Vcharset_thai_tis620);
2758 Vcharset_thai_tis620 =
2759 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2760 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2761 build_string ("TIS620"),
2762 build_string ("TIS620 (Thai)"),
2763 build_string ("TIS620.2529 (Thai)"),
2764 build_string ("tis620"),
2765 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2766 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2767 staticpro (&Vcharset_greek_iso8859_7);
2768 Vcharset_greek_iso8859_7 =
2769 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2770 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2771 build_string ("ISO8859-7"),
2772 build_string ("ISO8859-7 (Greek)"),
2773 build_string ("ISO8859-7 (Greek)"),
2774 build_string ("iso8859-7"),
2775 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2776 staticpro (&Vcharset_arabic_iso8859_6);
2777 Vcharset_arabic_iso8859_6 =
2778 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2779 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2780 build_string ("ISO8859-6"),
2781 build_string ("ISO8859-6 (Arabic)"),
2782 build_string ("ISO8859-6 (Arabic)"),
2783 build_string ("iso8859-6"),
2784 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2785 staticpro (&Vcharset_hebrew_iso8859_8);
2786 Vcharset_hebrew_iso8859_8 =
2787 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2788 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2789 build_string ("ISO8859-8"),
2790 build_string ("ISO8859-8 (Hebrew)"),
2791 build_string ("ISO8859-8 (Hebrew)"),
2792 build_string ("iso8859-8"),
2794 0 /* MIN_CHAR_HEBREW */,
2795 0 /* MAX_CHAR_HEBREW */, 0, 32,
2796 Qnil, CONVERSION_IDENTICAL);
2797 staticpro (&Vcharset_katakana_jisx0201);
2798 Vcharset_katakana_jisx0201 =
2799 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2800 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2801 build_string ("JISX0201 Kana"),
2802 build_string ("JISX0201.1976 (Japanese Kana)"),
2803 build_string ("JISX0201.1976 Japanese Kana"),
2804 build_string ("jisx0201\\.1976"),
2805 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2806 staticpro (&Vcharset_latin_jisx0201);
2807 Vcharset_latin_jisx0201 =
2808 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2809 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2810 build_string ("JISX0201 Roman"),
2811 build_string ("JISX0201.1976 (Japanese Roman)"),
2812 build_string ("JISX0201.1976 Japanese Roman"),
2813 build_string ("jisx0201\\.1976"),
2814 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2815 staticpro (&Vcharset_cyrillic_iso8859_5);
2816 Vcharset_cyrillic_iso8859_5 =
2817 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2818 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2819 build_string ("ISO8859-5"),
2820 build_string ("ISO8859-5 (Cyrillic)"),
2821 build_string ("ISO8859-5 (Cyrillic)"),
2822 build_string ("iso8859-5"),
2823 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2824 staticpro (&Vcharset_latin_iso8859_9);
2825 Vcharset_latin_iso8859_9 =
2826 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2827 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2828 build_string ("Latin-5"),
2829 build_string ("ISO8859-9 (Latin-5)"),
2830 build_string ("ISO8859-9 (Latin-5)"),
2831 build_string ("iso8859-9"),
2832 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2833 staticpro (&Vcharset_japanese_jisx0208_1978);
2834 Vcharset_japanese_jisx0208_1978 =
2835 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2836 Qjapanese_jisx0208_1978, 94, 2,
2837 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2838 build_string ("JIS X0208:1978"),
2839 build_string ("JIS X0208:1978 (Japanese)"),
2841 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2842 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2843 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2844 staticpro (&Vcharset_chinese_gb2312);
2845 Vcharset_chinese_gb2312 =
2846 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2847 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2848 build_string ("GB2312"),
2849 build_string ("GB2312)"),
2850 build_string ("GB2312 Chinese simplified"),
2851 build_string ("gb2312"),
2852 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2853 staticpro (&Vcharset_chinese_gb12345);
2854 Vcharset_chinese_gb12345 =
2855 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2856 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2857 build_string ("G1"),
2858 build_string ("GB 12345)"),
2859 build_string ("GB 12345-1990"),
2860 build_string ("GB12345\\(\\.1990\\)?-0"),
2861 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_japanese_jisx0208);
2863 Vcharset_japanese_jisx0208 =
2864 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2865 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2866 build_string ("JISX0208"),
2867 build_string ("JIS X0208:1983 (Japanese)"),
2868 build_string ("JIS X0208:1983 Japanese Kanji"),
2869 build_string ("jisx0208\\.1983"),
2870 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2872 staticpro (&Vcharset_japanese_jisx0208_1990);
2873 Vcharset_japanese_jisx0208_1990 =
2874 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2875 Qjapanese_jisx0208_1990, 94, 2,
2876 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2877 build_string ("JISX0208-1990"),
2878 build_string ("JIS X0208:1990 (Japanese)"),
2879 build_string ("JIS X0208:1990 Japanese Kanji"),
2880 build_string ("jisx0208\\.1990"),
2882 MIN_CHAR_JIS_X0208_1990,
2883 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2884 Qnil, CONVERSION_IDENTICAL);
2886 staticpro (&Vcharset_korean_ksc5601);
2887 Vcharset_korean_ksc5601 =
2888 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2889 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2890 build_string ("KSC5601"),
2891 build_string ("KSC5601 (Korean"),
2892 build_string ("KSC5601 Korean Hangul and Hanja"),
2893 build_string ("ksc5601"),
2894 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2895 staticpro (&Vcharset_japanese_jisx0212);
2896 Vcharset_japanese_jisx0212 =
2897 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2898 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2899 build_string ("JISX0212"),
2900 build_string ("JISX0212 (Japanese)"),
2901 build_string ("JISX0212 Japanese Supplement"),
2902 build_string ("jisx0212"),
2903 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2905 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2906 staticpro (&Vcharset_chinese_cns11643_1);
2907 Vcharset_chinese_cns11643_1 =
2908 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2909 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2910 build_string ("CNS11643-1"),
2911 build_string ("CNS11643-1 (Chinese traditional)"),
2913 ("CNS 11643 Plane 1 Chinese traditional"),
2914 build_string (CHINESE_CNS_PLANE_RE("1")),
2915 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2916 staticpro (&Vcharset_chinese_cns11643_2);
2917 Vcharset_chinese_cns11643_2 =
2918 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2919 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2920 build_string ("CNS11643-2"),
2921 build_string ("CNS11643-2 (Chinese traditional)"),
2923 ("CNS 11643 Plane 2 Chinese traditional"),
2924 build_string (CHINESE_CNS_PLANE_RE("2")),
2925 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2927 staticpro (&Vcharset_latin_tcvn5712);
2928 Vcharset_latin_tcvn5712 =
2929 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2930 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2931 build_string ("TCVN 5712"),
2932 build_string ("TCVN 5712 (VSCII-2)"),
2933 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2934 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2935 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2936 staticpro (&Vcharset_latin_viscii_lower);
2937 Vcharset_latin_viscii_lower =
2938 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2939 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2940 build_string ("VISCII lower"),
2941 build_string ("VISCII lower (Vietnamese)"),
2942 build_string ("VISCII lower (Vietnamese)"),
2943 build_string ("MULEVISCII-LOWER"),
2944 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2945 staticpro (&Vcharset_latin_viscii_upper);
2946 Vcharset_latin_viscii_upper =
2947 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2948 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2949 build_string ("VISCII upper"),
2950 build_string ("VISCII upper (Vietnamese)"),
2951 build_string ("VISCII upper (Vietnamese)"),
2952 build_string ("MULEVISCII-UPPER"),
2953 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2954 staticpro (&Vcharset_latin_viscii);
2955 Vcharset_latin_viscii =
2956 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2957 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2958 build_string ("VISCII"),
2959 build_string ("VISCII 1.1 (Vietnamese)"),
2960 build_string ("VISCII 1.1 (Vietnamese)"),
2961 build_string ("VISCII1\\.1"),
2962 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2963 staticpro (&Vcharset_chinese_big5);
2964 Vcharset_chinese_big5 =
2965 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2966 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2967 build_string ("Big5"),
2968 build_string ("Big5"),
2969 build_string ("Big5 Chinese traditional"),
2970 build_string ("big5-0"),
2972 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2973 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2975 staticpro (&Vcharset_chinese_big5_cdp);
2976 Vcharset_chinese_big5_cdp =
2977 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2978 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2979 build_string ("Big5-CDP"),
2980 build_string ("Big5 + CDP extension"),
2981 build_string ("Big5 with CDP extension"),
2982 build_string ("big5\\.cdp-0"),
2983 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2984 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2986 #define DEF_HANZIKU(n) \
2987 staticpro (&Vcharset_ideograph_hanziku_##n); \
2988 Vcharset_ideograph_hanziku_##n = \
2989 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2990 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2991 build_string ("HZK-"#n), \
2992 build_string ("HANZIKU-"#n), \
2993 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2995 ("hanziku-"#n"$"), \
2996 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2997 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3010 staticpro (&Vcharset_china3_jef);
3011 Vcharset_china3_jef =
3012 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
3013 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3014 build_string ("JC3"),
3015 build_string ("JEF + CHINA3"),
3016 build_string ("JEF + CHINA3 private characters"),
3017 build_string ("china3jef-0"),
3018 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
3019 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
3020 staticpro (&Vcharset_ideograph_cbeta);
3021 Vcharset_ideograph_cbeta =
3022 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3023 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3024 build_string ("CB"),
3025 build_string ("CBETA"),
3026 build_string ("CBETA private characters"),
3027 build_string ("cbeta-0"),
3028 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3029 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3030 staticpro (&Vcharset_ideograph_gt);
3031 Vcharset_ideograph_gt =
3032 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3033 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3034 build_string ("GT"),
3035 build_string ("GT"),
3036 build_string ("GT"),
3038 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3039 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3040 #define DEF_GT_PJ(n) \
3041 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3042 Vcharset_ideograph_gt_pj_##n = \
3043 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3044 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3045 build_string ("GT-PJ-"#n), \
3046 build_string ("GT (pseudo JIS encoding) part "#n), \
3047 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3049 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3050 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3063 staticpro (&Vcharset_ideograph_daikanwa_2);
3064 Vcharset_ideograph_daikanwa_2 =
3065 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3066 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3067 build_string ("Daikanwa Rev."),
3068 build_string ("Morohashi's Daikanwa Rev."),
3070 ("Daikanwa dictionary (revised version)"),
3071 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3072 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3073 staticpro (&Vcharset_ideograph_daikanwa);
3074 Vcharset_ideograph_daikanwa =
3075 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3076 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3077 build_string ("Daikanwa"),
3078 build_string ("Morohashi's Daikanwa Rev.2"),
3080 ("Daikanwa dictionary (second revised version)"),
3081 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3082 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3083 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3085 staticpro (&Vcharset_ethiopic_ucs);
3086 Vcharset_ethiopic_ucs =
3087 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3088 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3089 build_string ("Ethiopic (UCS)"),
3090 build_string ("Ethiopic (UCS)"),
3091 build_string ("Ethiopic of UCS"),
3092 build_string ("Ethiopic-Unicode"),
3093 Qnil, 0x1200, 0x137F, 0, 0,
3094 Qnil, CONVERSION_IDENTICAL);
3096 staticpro (&Vcharset_chinese_big5_1);
3097 Vcharset_chinese_big5_1 =
3098 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3099 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3100 build_string ("Big5"),
3101 build_string ("Big5 (Level-1)"),
3103 ("Big5 Level-1 Chinese traditional"),
3104 build_string ("big5"),
3105 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3106 staticpro (&Vcharset_chinese_big5_2);
3107 Vcharset_chinese_big5_2 =
3108 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3109 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3110 build_string ("Big5"),
3111 build_string ("Big5 (Level-2)"),
3113 ("Big5 Level-2 Chinese traditional"),
3114 build_string ("big5"),
3115 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3117 #ifdef ENABLE_COMPOSITE_CHARS
3118 /* #### For simplicity, we put composite chars into a 96x96 charset.
3119 This is going to lead to problems because you can run out of
3120 room, esp. as we don't yet recycle numbers. */
3121 staticpro (&Vcharset_composite);
3122 Vcharset_composite =
3123 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3124 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3125 build_string ("Composite"),
3126 build_string ("Composite characters"),
3127 build_string ("Composite characters"),
3130 /* #### not dumped properly */
3131 composite_char_row_next = 32;
3132 composite_char_col_next = 32;
3134 Vcomposite_char_string2char_hash_table =
3135 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3136 Vcomposite_char_char2string_hash_table =
3137 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3138 staticpro (&Vcomposite_char_string2char_hash_table);
3139 staticpro (&Vcomposite_char_char2string_hash_table);
3140 #endif /* ENABLE_COMPOSITE_CHARS */