1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 /* Lisp_Object Vcharset_chinese_big5_cdp; */
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
193 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
195 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
199 if (XVECTOR_LENGTH (v) > ccs_len)
202 for (i = 0; i < XVECTOR_LENGTH (v); i++)
204 Lisp_Object c = XVECTOR_DATA(v)[i];
206 if (!NILP (c) && !CHARP (c))
210 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
222 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
225 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
235 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
236 nv = XVECTOR_DATA(v)[i];
242 XVECTOR_DATA(v)[i] = Qnil;
246 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
247 int code_point, Lisp_Object character);
249 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
250 int code_point, Lisp_Object character)
254 int ccs_len = XVECTOR_LENGTH (v);
259 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
260 nv = XVECTOR_DATA(v)[i];
264 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
270 XVECTOR_DATA(v)[i] = character;
274 put_char_ccs_code_point (Lisp_Object character,
275 Lisp_Object ccs, Lisp_Object value)
277 if (!EQ (XCHARSET_NAME (ccs), Qucs)
279 || (XCHAR (character) != XINT (value)))
281 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
282 int dim = XCHARSET_DIMENSION (ccs);
283 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
284 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
288 { /* obsolete representation: value must be a list of bytes */
289 Lisp_Object ret = Fcar (value);
293 signal_simple_error ("Invalid value for coded-charset", value);
294 code_point = XINT (ret);
295 if (XCHARSET_GRAPHIC (ccs) == 1)
303 signal_simple_error ("Invalid value for coded-charset",
307 signal_simple_error ("Invalid value for coded-charset",
310 if (XCHARSET_GRAPHIC (ccs) == 1)
312 code_point = (code_point << 8) | j;
315 value = make_int (code_point);
317 else if (INTP (value))
319 code_point = XINT (value);
320 if (XCHARSET_GRAPHIC (ccs) == 1)
322 code_point &= 0x7F7F7F7F;
323 value = make_int (code_point);
327 signal_simple_error ("Invalid value for coded-charset", value);
331 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
334 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
339 XCHARSET_DECODING_TABLE (ccs)
340 = v = make_vector (ccs_len, Qnil);
343 decoding_table_put_char (v, dim, byte_offset, code_point, character);
349 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
351 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
352 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
354 if (VECTORP (decoding_table))
356 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
360 decoding_table_remove_char (decoding_table,
361 XCHARSET_DIMENSION (ccs),
362 XCHARSET_BYTE_OFFSET (ccs),
366 if (CHAR_TABLEP (encoding_table))
368 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
376 int leading_code_private_11;
379 Lisp_Object Qcharsetp;
381 /* Qdoc_string, Qdimension, Qchars defined in general.c */
382 Lisp_Object Qregistry, Qfinal, Qgraphic;
383 Lisp_Object Qdirection;
384 Lisp_Object Qreverse_direction_charset;
385 Lisp_Object Qleading_byte;
386 Lisp_Object Qshort_name, Qlong_name;
388 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
389 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
406 Qjapanese_jisx0208_1978,
410 Qjapanese_jisx0208_1990,
428 Qvietnamese_viscii_lower,
429 Qvietnamese_viscii_upper,
431 /* Qchinese_big5_cdp, */
432 Qideograph_hanziku_1,
433 Qideograph_hanziku_2,
434 Qideograph_hanziku_3,
435 Qideograph_hanziku_4,
436 Qideograph_hanziku_5,
437 Qideograph_hanziku_6,
438 Qideograph_hanziku_7,
439 Qideograph_hanziku_8,
440 Qideograph_hanziku_9,
441 Qideograph_hanziku_10,
442 Qideograph_hanziku_11,
443 Qideograph_hanziku_12,
446 Qideograph_daikanwa_2,
466 Lisp_Object Ql2r, Qr2l;
468 Lisp_Object Vcharset_hash_table;
470 /* Composite characters are characters constructed by overstriking two
471 or more regular characters.
473 1) The old Mule implementation involves storing composite characters
474 in a buffer as a tag followed by all of the actual characters
475 used to make up the composite character. I think this is a bad
476 idea; it greatly complicates code that wants to handle strings
477 one character at a time because it has to deal with the possibility
478 of great big ungainly characters. It's much more reasonable to
479 simply store an index into a table of composite characters.
481 2) The current implementation only allows for 16,384 separate
482 composite characters over the lifetime of the XEmacs process.
483 This could become a potential problem if the user
484 edited lots of different files that use composite characters.
485 Due to FSF bogosity, increasing the number of allowable
486 composite characters under Mule would decrease the number
487 of possible faces that can exist. Mule already has shrunk
488 this to 2048, and further shrinkage would become uncomfortable.
489 No such problems exist in XEmacs.
491 Composite characters could be represented as 0x80 C1 C2 C3,
492 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
493 for slightly under 2^20 (one million) composite characters
494 over the XEmacs process lifetime, and you only need to
495 increase the size of a Mule character from 19 to 21 bits.
496 Or you could use 0x80 C1 C2 C3 C4, allowing for about
497 85 million (slightly over 2^26) composite characters. */
500 /************************************************************************/
501 /* Basic Emchar functions */
502 /************************************************************************/
504 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
505 string in STR. Returns the number of bytes stored.
506 Do not call this directly. Use the macro set_charptr_emchar() instead.
510 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
525 else if ( c <= 0x7ff )
527 *p++ = (c >> 6) | 0xc0;
528 *p++ = (c & 0x3f) | 0x80;
530 else if ( c <= 0xffff )
532 *p++ = (c >> 12) | 0xe0;
533 *p++ = ((c >> 6) & 0x3f) | 0x80;
534 *p++ = (c & 0x3f) | 0x80;
536 else if ( c <= 0x1fffff )
538 *p++ = (c >> 18) | 0xf0;
539 *p++ = ((c >> 12) & 0x3f) | 0x80;
540 *p++ = ((c >> 6) & 0x3f) | 0x80;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0x3ffffff )
545 *p++ = (c >> 24) | 0xf8;
546 *p++ = ((c >> 18) & 0x3f) | 0x80;
547 *p++ = ((c >> 12) & 0x3f) | 0x80;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
553 *p++ = (c >> 30) | 0xfc;
554 *p++ = ((c >> 24) & 0x3f) | 0x80;
555 *p++ = ((c >> 18) & 0x3f) | 0x80;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
561 BREAKUP_CHAR (c, charset, c1, c2);
562 lb = CHAR_LEADING_BYTE (c);
563 if (LEADING_BYTE_PRIVATE_P (lb))
564 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
566 if (EQ (charset, Vcharset_control_1))
575 /* Return the first character from a Mule-encoded string in STR,
576 assuming it's non-ASCII. Do not call this directly.
577 Use the macro charptr_emchar() instead. */
580 non_ascii_charptr_emchar (const Bufbyte *str)
593 else if ( b >= 0xf8 )
598 else if ( b >= 0xf0 )
603 else if ( b >= 0xe0 )
608 else if ( b >= 0xc0 )
618 for( ; len > 0; len-- )
621 ch = ( ch << 6 ) | ( b & 0x3f );
625 Bufbyte i0 = *str, i1, i2 = 0;
628 if (i0 == LEADING_BYTE_CONTROL_1)
629 return (Emchar) (*++str - 0x20);
631 if (LEADING_BYTE_PREFIX_P (i0))
636 charset = CHARSET_BY_LEADING_BYTE (i0);
637 if (XCHARSET_DIMENSION (charset) == 2)
640 return MAKE_CHAR (charset, i1, i2);
644 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
645 Do not call this directly. Use the macro valid_char_p() instead. */
649 non_ascii_valid_char_p (Emchar ch)
653 /* Must have only lowest 19 bits set */
657 f1 = CHAR_FIELD1 (ch);
658 f2 = CHAR_FIELD2 (ch);
659 f3 = CHAR_FIELD3 (ch);
665 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
666 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
667 f2 > MAX_CHAR_FIELD2_PRIVATE)
672 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
673 f2 <= MAX_CHAR_FIELD2_PRIVATE))
677 NOTE: This takes advantage of the fact that
678 FIELD2_TO_OFFICIAL_LEADING_BYTE and
679 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
681 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
682 if (EQ (charset, Qnil))
684 return (XCHARSET_CHARS (charset) == 96);
690 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
691 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
692 f1 > MAX_CHAR_FIELD1_PRIVATE)
694 if (f2 < 0x20 || f3 < 0x20)
697 #ifdef ENABLE_COMPOSITE_CHARS
698 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
700 if (UNBOUNDP (Fgethash (make_int (ch),
701 Vcomposite_char_char2string_hash_table,
706 #endif /* ENABLE_COMPOSITE_CHARS */
708 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
709 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
712 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
714 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
717 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
719 if (EQ (charset, Qnil))
721 return (XCHARSET_CHARS (charset) == 96);
727 /************************************************************************/
728 /* Basic string functions */
729 /************************************************************************/
731 /* Copy the character pointed to by SRC into DST. Do not call this
732 directly. Use the macro charptr_copy_char() instead.
733 Return the number of bytes copied. */
736 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
738 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
740 for (i = bytes; i; i--, dst++, src++)
746 /************************************************************************/
747 /* streams of Emchars */
748 /************************************************************************/
750 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
751 The functions below are not meant to be called directly; use
752 the macros in insdel.h. */
755 Lstream_get_emchar_1 (Lstream *stream, int ch)
757 Bufbyte str[MAX_EMCHAR_LEN];
758 Bufbyte *strptr = str;
761 str[0] = (Bufbyte) ch;
763 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
765 int c = Lstream_getc (stream);
766 bufpos_checking_assert (c >= 0);
767 *++strptr = (Bufbyte) c;
769 return charptr_emchar (str);
773 Lstream_fput_emchar (Lstream *stream, Emchar ch)
775 Bufbyte str[MAX_EMCHAR_LEN];
776 Bytecount len = set_charptr_emchar (str, ch);
777 return Lstream_write (stream, str, len);
781 Lstream_funget_emchar (Lstream *stream, Emchar ch)
783 Bufbyte str[MAX_EMCHAR_LEN];
784 Bytecount len = set_charptr_emchar (str, ch);
785 Lstream_unread (stream, str, len);
789 /************************************************************************/
791 /************************************************************************/
794 mark_charset (Lisp_Object obj)
796 Lisp_Charset *cs = XCHARSET (obj);
798 mark_object (cs->short_name);
799 mark_object (cs->long_name);
800 mark_object (cs->doc_string);
801 mark_object (cs->registry);
802 mark_object (cs->ccl_program);
804 mark_object (cs->decoding_table);
805 mark_object (cs->mother);
811 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
813 Lisp_Charset *cs = XCHARSET (obj);
817 error ("printing unreadable object #<charset %s 0x%x>",
818 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
821 write_c_string ("#<charset ", printcharfun);
822 print_internal (CHARSET_NAME (cs), printcharfun, 0);
823 write_c_string (" ", printcharfun);
824 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
825 write_c_string (" ", printcharfun);
826 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
827 write_c_string (" ", printcharfun);
828 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
829 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
831 CHARSET_DIMENSION (cs),
832 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
833 CHARSET_COLUMNS (cs),
834 CHARSET_GRAPHIC (cs),
836 write_c_string (buf, printcharfun);
837 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
838 sprintf (buf, " 0x%x>", cs->header.uid);
839 write_c_string (buf, printcharfun);
842 static const struct lrecord_description charset_description[] = {
843 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
844 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
845 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
846 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
847 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
848 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
849 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
851 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
852 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
857 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
858 mark_charset, print_charset, 0, 0, 0,
862 /* Make a new charset. */
863 /* #### SJT Should generic properties be allowed? */
865 make_charset (Charset_ID id, Lisp_Object name,
866 unsigned short chars, unsigned char dimension,
867 unsigned char columns, unsigned char graphic,
868 Bufbyte final, unsigned char direction, Lisp_Object short_name,
869 Lisp_Object long_name, Lisp_Object doc,
871 Lisp_Object decoding_table,
872 Emchar min_code, Emchar max_code,
873 Emchar code_offset, unsigned char byte_offset,
874 Lisp_Object mother, unsigned char conversion)
877 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
881 XSETCHARSET (obj, cs);
883 CHARSET_ID (cs) = id;
884 CHARSET_NAME (cs) = name;
885 CHARSET_SHORT_NAME (cs) = short_name;
886 CHARSET_LONG_NAME (cs) = long_name;
887 CHARSET_CHARS (cs) = chars;
888 CHARSET_DIMENSION (cs) = dimension;
889 CHARSET_DIRECTION (cs) = direction;
890 CHARSET_COLUMNS (cs) = columns;
891 CHARSET_GRAPHIC (cs) = graphic;
892 CHARSET_FINAL (cs) = final;
893 CHARSET_DOC_STRING (cs) = doc;
894 CHARSET_REGISTRY (cs) = reg;
895 CHARSET_CCL_PROGRAM (cs) = Qnil;
896 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
898 CHARSET_DECODING_TABLE(cs) = Qnil;
899 CHARSET_MIN_CODE (cs) = min_code;
900 CHARSET_MAX_CODE (cs) = max_code;
901 CHARSET_CODE_OFFSET (cs) = code_offset;
902 CHARSET_BYTE_OFFSET (cs) = byte_offset;
903 CHARSET_MOTHER (cs) = mother;
904 CHARSET_CONVERSION (cs) = conversion;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
921 unsigned char iso2022_type
922 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
924 if (code_offset == 0)
926 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
927 chlook->charset_by_attributes[iso2022_type][final] = obj;
931 (chlook->charset_by_attributes[iso2022_type][final][direction]));
932 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
936 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
937 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = chlook->next_allocated_leading_byte++;
959 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = chlook->next_allocated_1_byte_leading_byte++;
966 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = chlook->next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
982 /* Number of Big5 characters which have the same code in 1st byte. */
984 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
987 decode_builtin_char (Lisp_Object charset, int code_point)
989 Lisp_Object mother = XCHARSET_MOTHER (charset);
992 if ( CHARSETP (mother) )
994 int code = code_point;
996 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
998 int row = code_point >> 8;
999 int cell = code_point & 255;
1003 else if (row < 16 + 32 + 30)
1004 code = (row - (16 + 32)) * 94 + cell - 33;
1005 else if (row < 18 + 32 + 30)
1007 else if (row < 18 + 32 + 60)
1008 code = (row - (18 + 32)) * 94 + cell - 33;
1010 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1012 int plane = code_point >> 16;
1013 int row = (code_point >> 8) & 255;
1014 int cell = code_point & 255;
1018 else if (row < 16 + 32 + 30)
1020 = (plane - 33) * 94 * 60
1021 + (row - (16 + 32)) * 94
1023 else if (row < 18 + 32 + 30)
1025 else if (row < 18 + 32 + 60)
1027 = (plane - 33) * 94 * 60
1028 + (row - (18 + 32)) * 94
1031 return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
1033 else if (EQ (charset, Vcharset_chinese_big5))
1035 int c1 = code_point >> 8;
1036 int c2 = code_point & 0xFF;
1039 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1041 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1042 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1044 I = (c1 - 0xA1) * BIG5_SAME_ROW
1045 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1049 charset = Vcharset_chinese_big5_1;
1053 charset = Vcharset_chinese_big5_2;
1054 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1056 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1059 if ((final = XCHARSET_FINAL (charset)) >= '0')
1061 if (XCHARSET_DIMENSION (charset) == 1)
1063 switch (XCHARSET_CHARS (charset))
1067 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1070 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1078 switch (XCHARSET_CHARS (charset))
1081 return MIN_CHAR_94x94
1082 + (final - '0') * 94 * 94
1083 + (((code_point >> 8) & 0x7F) - 33) * 94
1084 + ((code_point & 0x7F) - 33);
1086 return MIN_CHAR_96x96
1087 + (final - '0') * 96 * 96
1088 + (((code_point >> 8) & 0x7F) - 32) * 96
1089 + ((code_point & 0x7F) - 32);
1096 else if (XCHARSET_MAX_CODE (charset))
1099 = (XCHARSET_DIMENSION (charset) == 1
1101 code_point - XCHARSET_BYTE_OFFSET (charset)
1103 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1104 * XCHARSET_CHARS (charset)
1105 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1106 + XCHARSET_CODE_OFFSET (charset);
1107 if ((cid < XCHARSET_MIN_CODE (charset))
1108 || (XCHARSET_MAX_CODE (charset) < cid))
1117 charset_code_point (Lisp_Object charset, Emchar ch)
1119 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1122 if ( CHAR_TABLEP (encoding_table)
1123 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1128 Lisp_Object mother = XCHARSET_MOTHER (charset);
1129 int min = XCHARSET_MIN_CODE (charset);
1130 int max = XCHARSET_MAX_CODE (charset);
1133 if ( CHARSETP (mother) )
1134 code = charset_code_point (mother, ch);
1137 if ( (min <= code) && (code <= max) )
1139 int d = code - XCHARSET_CODE_OFFSET (charset);
1141 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1144 int cell = d % 94 + 33;
1150 return (row << 8) | cell;
1152 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1154 int plane = d / (94 * 60) + 33;
1155 int row = (d % (94 * 60)) / 94;
1156 int cell = d % 94 + 33;
1162 return (plane << 16) | (row << 8) | cell;
1164 else if (XCHARSET_CHARS (charset) == 94)
1166 if (XCHARSET_DIMENSION (charset) == 1)
1168 else if (XCHARSET_DIMENSION (charset) == 2)
1169 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1170 else if (XCHARSET_DIMENSION (charset) == 3)
1172 ( (d / (94 * 94) + 33) << 16)
1173 | ((d / 94 % 94 + 33) << 8)
1175 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1177 ( (d / (94 * 94 * 94) + 33) << 24)
1178 | ((d / (94 * 94) % 94 + 33) << 16)
1179 | ((d / 94 % 94 + 33) << 8)
1182 else if (XCHARSET_CHARS (charset) == 96)
1184 if (XCHARSET_DIMENSION (charset) == 1)
1186 else if (XCHARSET_DIMENSION (charset) == 2)
1187 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1188 else if (XCHARSET_DIMENSION (charset) == 3)
1190 ( (d / (96 * 96) + 32) << 16)
1191 | ((d / 96 % 96 + 32) << 8)
1193 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1195 ( (d / (96 * 96 * 96) + 32) << 24)
1196 | ((d / (96 * 96) % 96 + 32) << 16)
1197 | ((d / 96 % 96 + 32) << 8)
1201 return code - XCHARSET_CODE_OFFSET (charset);
1203 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1204 (XCHARSET_CODE_OFFSET (charset)
1205 == XCHARSET_MIN_CODE (charset)) )
1209 if (XCHARSET_DIMENSION (charset) == 1)
1211 if (XCHARSET_CHARS (charset) == 94)
1213 if (((d = ch - (MIN_CHAR_94
1214 + (XCHARSET_FINAL (charset) - '0') * 94))
1219 else if (XCHARSET_CHARS (charset) == 96)
1221 if (((d = ch - (MIN_CHAR_96
1222 + (XCHARSET_FINAL (charset) - '0') * 96))
1230 else if (XCHARSET_DIMENSION (charset) == 2)
1232 if (XCHARSET_CHARS (charset) == 94)
1234 if (((d = ch - (MIN_CHAR_94x94
1236 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1239 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1241 else if (XCHARSET_CHARS (charset) == 96)
1243 if (((d = ch - (MIN_CHAR_96x96
1245 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1248 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1259 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1261 if (c <= MAX_CHAR_BASIC_LATIN)
1263 *charset = Vcharset_ascii;
1268 *charset = Vcharset_control_1;
1273 *charset = Vcharset_latin_iso8859_1;
1277 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1279 *charset = Vcharset_hebrew_iso8859_8;
1280 return c - MIN_CHAR_HEBREW + 0x20;
1283 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1285 *charset = Vcharset_thai_tis620;
1286 return c - MIN_CHAR_THAI + 0x20;
1289 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1290 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1292 return list2 (Vcharset_katakana_jisx0201,
1293 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1296 else if (c <= MAX_CHAR_BMP)
1298 *charset = Vcharset_ucs_bmp;
1301 else if (c <= MAX_CHAR_SMP)
1303 *charset = Vcharset_ucs_smp;
1304 return c - MIN_CHAR_SMP;
1306 else if (c <= MAX_CHAR_SIP)
1308 *charset = Vcharset_ucs_sip;
1309 return c - MIN_CHAR_SIP;
1311 else if (c < MIN_CHAR_DAIKANWA)
1313 *charset = Vcharset_ucs;
1316 else if (c <= MAX_CHAR_DAIKANWA)
1318 *charset = Vcharset_ideograph_daikanwa;
1319 return c - MIN_CHAR_DAIKANWA;
1321 else if (c < MIN_CHAR_94)
1323 *charset = Vcharset_ucs;
1326 else if (c <= MAX_CHAR_94)
1328 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1329 ((c - MIN_CHAR_94) / 94) + '0',
1330 CHARSET_LEFT_TO_RIGHT);
1331 if (!NILP (*charset))
1332 return ((c - MIN_CHAR_94) % 94) + 33;
1335 *charset = Vcharset_ucs;
1339 else if (c <= MAX_CHAR_96)
1341 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1342 ((c - MIN_CHAR_96) / 96) + '0',
1343 CHARSET_LEFT_TO_RIGHT);
1344 if (!NILP (*charset))
1345 return ((c - MIN_CHAR_96) % 96) + 32;
1348 *charset = Vcharset_ucs;
1352 else if (c <= MAX_CHAR_94x94)
1355 = CHARSET_BY_ATTRIBUTES (94, 2,
1356 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1357 CHARSET_LEFT_TO_RIGHT);
1358 if (!NILP (*charset))
1359 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1360 | (((c - MIN_CHAR_94x94) % 94) + 33);
1363 *charset = Vcharset_ucs;
1367 else if (c <= MAX_CHAR_96x96)
1370 = CHARSET_BY_ATTRIBUTES (96, 2,
1371 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1372 CHARSET_LEFT_TO_RIGHT);
1373 if (!NILP (*charset))
1374 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1375 | (((c - MIN_CHAR_96x96) % 96) + 32);
1378 *charset = Vcharset_ucs;
1384 *charset = Vcharset_ucs;
1389 Lisp_Object Vdefault_coded_charset_priority_list;
1393 /************************************************************************/
1394 /* Basic charset Lisp functions */
1395 /************************************************************************/
1397 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1398 Return non-nil if OBJECT is a charset.
1402 return CHARSETP (object) ? Qt : Qnil;
1405 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1406 Retrieve the charset of the given name.
1407 If CHARSET-OR-NAME is a charset object, it is simply returned.
1408 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1409 nil is returned. Otherwise the associated charset object is returned.
1413 if (CHARSETP (charset_or_name))
1414 return charset_or_name;
1416 CHECK_SYMBOL (charset_or_name);
1417 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1420 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1421 Retrieve the charset of the given name.
1422 Same as `find-charset' except an error is signalled if there is no such
1423 charset instead of returning nil.
1427 Lisp_Object charset = Ffind_charset (name);
1430 signal_simple_error ("No such charset", name);
1434 /* We store the charsets in hash tables with the names as the key and the
1435 actual charset object as the value. Occasionally we need to use them
1436 in a list format. These routines provide us with that. */
1437 struct charset_list_closure
1439 Lisp_Object *charset_list;
1443 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1444 void *charset_list_closure)
1446 /* This function can GC */
1447 struct charset_list_closure *chcl =
1448 (struct charset_list_closure*) charset_list_closure;
1449 Lisp_Object *charset_list = chcl->charset_list;
1451 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1455 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1456 Return a list of the names of all defined charsets.
1460 Lisp_Object charset_list = Qnil;
1461 struct gcpro gcpro1;
1462 struct charset_list_closure charset_list_closure;
1464 GCPRO1 (charset_list);
1465 charset_list_closure.charset_list = &charset_list;
1466 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1467 &charset_list_closure);
1470 return charset_list;
1473 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1474 Return the name of charset CHARSET.
1478 return XCHARSET_NAME (Fget_charset (charset));
1481 /* #### SJT Should generic properties be allowed? */
1482 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1483 Define a new character set.
1484 This function is for use with Mule support.
1485 NAME is a symbol, the name by which the character set is normally referred.
1486 DOC-STRING is a string describing the character set.
1487 PROPS is a property list, describing the specific nature of the
1488 character set. Recognized properties are:
1490 'short-name Short version of the charset name (ex: Latin-1)
1491 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1492 'registry A regular expression matching the font registry field for
1494 'dimension Number of octets used to index a character in this charset.
1495 Either 1 or 2. Defaults to 1.
1496 If UTF-2000 feature is enabled, 3 or 4 are also available.
1497 'columns Number of columns used to display a character in this charset.
1498 Only used in TTY mode. (Under X, the actual width of a
1499 character can be derived from the font used to display the
1500 characters.) If unspecified, defaults to the dimension
1501 (this is almost always the correct value).
1502 'chars Number of characters in each dimension (94 or 96).
1503 Defaults to 94. Note that if the dimension is 2, the
1504 character set thus described is 94x94 or 96x96.
1505 If UTF-2000 feature is enabled, 128 or 256 are also available.
1506 'final Final byte of ISO 2022 escape sequence. Must be
1507 supplied. Each combination of (DIMENSION, CHARS) defines a
1508 separate namespace for final bytes. Note that ISO
1509 2022 restricts the final byte to the range
1510 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1511 dimension == 2. Note also that final bytes in the range
1512 0x30 - 0x3F are reserved for user-defined (not official)
1514 'graphic 0 (use left half of font on output) or 1 (use right half
1515 of font on output). Defaults to 0. For example, for
1516 a font whose registry is ISO8859-1, the left half
1517 (octets 0x20 - 0x7F) is the `ascii' character set, while
1518 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1519 character set. With 'graphic set to 0, the octets
1520 will have their high bit cleared; with it set to 1,
1521 the octets will have their high bit set.
1522 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1524 'ccl-program A compiled CCL program used to convert a character in
1525 this charset into an index into the font. This is in
1526 addition to the 'graphic property. The CCL program
1527 is passed the octets of the character, with the high
1528 bit cleared and set depending upon whether the value
1529 of the 'graphic property is 0 or 1.
1530 'mother [UTF-2000 only] Base coded-charset.
1531 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1532 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1533 'code-offset [UTF-2000 only] Offset for a code-point of a base
1535 'conversion [UTF-2000 only] Conversion for a code-point of a base
1536 coded-charset (94x60 or 94x94x60).
1538 (name, doc_string, props))
1540 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1541 int direction = CHARSET_LEFT_TO_RIGHT;
1542 Lisp_Object registry = Qnil;
1543 Lisp_Object charset;
1544 Lisp_Object ccl_program = Qnil;
1545 Lisp_Object short_name = Qnil, long_name = Qnil;
1546 Lisp_Object mother = Qnil;
1547 int min_code = 0, max_code = 0, code_offset = 0;
1548 int byte_offset = -1;
1551 CHECK_SYMBOL (name);
1552 if (!NILP (doc_string))
1553 CHECK_STRING (doc_string);
1555 charset = Ffind_charset (name);
1556 if (!NILP (charset))
1557 signal_simple_error ("Cannot redefine existing charset", name);
1560 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1562 if (EQ (keyword, Qshort_name))
1564 CHECK_STRING (value);
1568 if (EQ (keyword, Qlong_name))
1570 CHECK_STRING (value);
1574 else if (EQ (keyword, Qdimension))
1577 dimension = XINT (value);
1578 if (dimension < 1 ||
1585 signal_simple_error ("Invalid value for 'dimension", value);
1588 else if (EQ (keyword, Qchars))
1591 chars = XINT (value);
1592 if (chars != 94 && chars != 96
1594 && chars != 128 && chars != 256
1597 signal_simple_error ("Invalid value for 'chars", value);
1600 else if (EQ (keyword, Qcolumns))
1603 columns = XINT (value);
1604 if (columns != 1 && columns != 2)
1605 signal_simple_error ("Invalid value for 'columns", value);
1608 else if (EQ (keyword, Qgraphic))
1611 graphic = XINT (value);
1619 signal_simple_error ("Invalid value for 'graphic", value);
1622 else if (EQ (keyword, Qregistry))
1624 CHECK_STRING (value);
1628 else if (EQ (keyword, Qdirection))
1630 if (EQ (value, Ql2r))
1631 direction = CHARSET_LEFT_TO_RIGHT;
1632 else if (EQ (value, Qr2l))
1633 direction = CHARSET_RIGHT_TO_LEFT;
1635 signal_simple_error ("Invalid value for 'direction", value);
1638 else if (EQ (keyword, Qfinal))
1640 CHECK_CHAR_COERCE_INT (value);
1641 final = XCHAR (value);
1642 if (final < '0' || final > '~')
1643 signal_simple_error ("Invalid value for 'final", value);
1647 else if (EQ (keyword, Qmother))
1649 mother = Fget_charset (value);
1652 else if (EQ (keyword, Qmin_code))
1655 min_code = XUINT (value);
1658 else if (EQ (keyword, Qmax_code))
1661 max_code = XUINT (value);
1664 else if (EQ (keyword, Qcode_offset))
1667 code_offset = XUINT (value);
1670 else if (EQ (keyword, Qconversion))
1672 if (EQ (value, Q94x60))
1673 conversion = CONVERSION_94x60;
1674 else if (EQ (value, Q94x94x60))
1675 conversion = CONVERSION_94x94x60;
1677 signal_simple_error ("Unrecognized conversion", value);
1681 else if (EQ (keyword, Qccl_program))
1683 struct ccl_program test_ccl;
1685 if (setup_ccl_program (&test_ccl, value) < 0)
1686 signal_simple_error ("Invalid value for 'ccl-program", value);
1687 ccl_program = value;
1691 signal_simple_error ("Unrecognized property", keyword);
1697 error ("'final must be specified");
1699 if (dimension == 2 && final > 0x5F)
1701 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1704 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1705 CHARSET_LEFT_TO_RIGHT)) ||
1706 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1707 CHARSET_RIGHT_TO_LEFT)))
1709 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1711 id = get_unallocated_leading_byte (dimension);
1713 if (NILP (doc_string))
1714 doc_string = build_string ("");
1716 if (NILP (registry))
1717 registry = build_string ("");
1719 if (NILP (short_name))
1720 XSETSTRING (short_name, XSYMBOL (name)->name);
1722 if (NILP (long_name))
1723 long_name = doc_string;
1726 columns = dimension;
1728 if (byte_offset < 0)
1732 else if (chars == 96)
1738 charset = make_charset (id, name, chars, dimension, columns, graphic,
1739 final, direction, short_name, long_name,
1740 doc_string, registry,
1741 Qnil, min_code, max_code, code_offset, byte_offset,
1742 mother, conversion);
1743 if (!NILP (ccl_program))
1744 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1748 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1750 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1751 NEW-NAME is the name of the new charset. Return the new charset.
1753 (charset, new_name))
1755 Lisp_Object new_charset = Qnil;
1756 int id, chars, dimension, columns, graphic, final;
1758 Lisp_Object registry, doc_string, short_name, long_name;
1761 charset = Fget_charset (charset);
1762 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1763 signal_simple_error ("Charset already has reverse-direction charset",
1766 CHECK_SYMBOL (new_name);
1767 if (!NILP (Ffind_charset (new_name)))
1768 signal_simple_error ("Cannot redefine existing charset", new_name);
1770 cs = XCHARSET (charset);
1772 chars = CHARSET_CHARS (cs);
1773 dimension = CHARSET_DIMENSION (cs);
1774 columns = CHARSET_COLUMNS (cs);
1775 id = get_unallocated_leading_byte (dimension);
1777 graphic = CHARSET_GRAPHIC (cs);
1778 final = CHARSET_FINAL (cs);
1779 direction = CHARSET_RIGHT_TO_LEFT;
1780 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1781 direction = CHARSET_LEFT_TO_RIGHT;
1782 doc_string = CHARSET_DOC_STRING (cs);
1783 short_name = CHARSET_SHORT_NAME (cs);
1784 long_name = CHARSET_LONG_NAME (cs);
1785 registry = CHARSET_REGISTRY (cs);
1787 new_charset = make_charset (id, new_name, chars, dimension, columns,
1788 graphic, final, direction, short_name, long_name,
1789 doc_string, registry,
1791 CHARSET_DECODING_TABLE(cs),
1792 CHARSET_MIN_CODE(cs),
1793 CHARSET_MAX_CODE(cs),
1794 CHARSET_CODE_OFFSET(cs),
1795 CHARSET_BYTE_OFFSET(cs),
1797 CHARSET_CONVERSION (cs)
1799 Qnil, 0, 0, 0, 0, Qnil, 0
1803 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1804 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1809 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1810 Define symbol ALIAS as an alias for CHARSET.
1814 CHECK_SYMBOL (alias);
1815 charset = Fget_charset (charset);
1816 return Fputhash (alias, charset, Vcharset_hash_table);
1819 /* #### Reverse direction charsets not yet implemented. */
1821 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1823 Return the reverse-direction charset parallel to CHARSET, if any.
1824 This is the charset with the same properties (in particular, the same
1825 dimension, number of characters per dimension, and final byte) as
1826 CHARSET but whose characters are displayed in the opposite direction.
1830 charset = Fget_charset (charset);
1831 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1835 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1836 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1837 If DIRECTION is omitted, both directions will be checked (left-to-right
1838 will be returned if character sets exist for both directions).
1840 (dimension, chars, final, direction))
1842 int dm, ch, fi, di = -1;
1843 Lisp_Object obj = Qnil;
1845 CHECK_INT (dimension);
1846 dm = XINT (dimension);
1847 if (dm < 1 || dm > 2)
1848 signal_simple_error ("Invalid value for DIMENSION", dimension);
1852 if (ch != 94 && ch != 96)
1853 signal_simple_error ("Invalid value for CHARS", chars);
1855 CHECK_CHAR_COERCE_INT (final);
1857 if (fi < '0' || fi > '~')
1858 signal_simple_error ("Invalid value for FINAL", final);
1860 if (EQ (direction, Ql2r))
1861 di = CHARSET_LEFT_TO_RIGHT;
1862 else if (EQ (direction, Qr2l))
1863 di = CHARSET_RIGHT_TO_LEFT;
1864 else if (!NILP (direction))
1865 signal_simple_error ("Invalid value for DIRECTION", direction);
1867 if (dm == 2 && fi > 0x5F)
1869 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1873 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1875 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1878 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1881 return XCHARSET_NAME (obj);
1885 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1886 Return short name of CHARSET.
1890 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1893 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1894 Return long name of CHARSET.
1898 return XCHARSET_LONG_NAME (Fget_charset (charset));
1901 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1902 Return description of CHARSET.
1906 return XCHARSET_DOC_STRING (Fget_charset (charset));
1909 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1910 Return dimension of CHARSET.
1914 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1917 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1918 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1919 Recognized properties are those listed in `make-charset', as well as
1920 'name and 'doc-string.
1926 charset = Fget_charset (charset);
1927 cs = XCHARSET (charset);
1929 CHECK_SYMBOL (prop);
1930 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1931 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1932 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1933 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1934 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1935 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1936 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1937 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1938 Qnil : make_char (CHARSET_FINAL (cs));
1939 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1940 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1941 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1942 if (EQ (prop, Qdirection))
1943 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1944 if (EQ (prop, Qreverse_direction_charset))
1946 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1947 /* #### Is this translation OK? If so, error checking sufficient? */
1948 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1951 if (EQ (prop, Qmother))
1952 return CHARSET_MOTHER (cs);
1953 if (EQ (prop, Qmin_code))
1954 return make_int (CHARSET_MIN_CODE (cs));
1955 if (EQ (prop, Qmax_code))
1956 return make_int (CHARSET_MAX_CODE (cs));
1958 signal_simple_error ("Unrecognized charset property name", prop);
1959 return Qnil; /* not reached */
1962 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1963 Return charset identification number of CHARSET.
1967 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1970 /* #### We need to figure out which properties we really want to
1973 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1974 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1976 (charset, ccl_program))
1978 struct ccl_program test_ccl;
1980 charset = Fget_charset (charset);
1981 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1982 signal_simple_error ("Invalid ccl-program", ccl_program);
1983 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1988 invalidate_charset_font_caches (Lisp_Object charset)
1990 /* Invalidate font cache entries for charset on all devices. */
1991 Lisp_Object devcons, concons, hash_table;
1992 DEVICE_LOOP_NO_BREAK (devcons, concons)
1994 struct device *d = XDEVICE (XCAR (devcons));
1995 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1996 if (!UNBOUNDP (hash_table))
1997 Fclrhash (hash_table);
2001 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2002 Set the 'registry property of CHARSET to REGISTRY.
2004 (charset, registry))
2006 charset = Fget_charset (charset);
2007 CHECK_STRING (registry);
2008 XCHARSET_REGISTRY (charset) = registry;
2009 invalidate_charset_font_caches (charset);
2010 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2015 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2016 Return mapping-table of CHARSET.
2020 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2023 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2024 Set mapping-table of CHARSET to TABLE.
2028 struct Lisp_Charset *cs;
2032 charset = Fget_charset (charset);
2033 cs = XCHARSET (charset);
2037 CHARSET_DECODING_TABLE(cs) = Qnil;
2040 else if (VECTORP (table))
2042 int ccs_len = CHARSET_BYTE_SIZE (cs);
2043 int ret = decoding_table_check_elements (table,
2044 CHARSET_DIMENSION (cs),
2049 signal_simple_error ("Too big table", table);
2051 signal_simple_error ("Invalid element is found", table);
2053 signal_simple_error ("Something wrong", table);
2055 CHARSET_DECODING_TABLE(cs) = Qnil;
2058 signal_error (Qwrong_type_argument,
2059 list2 (build_translated_string ("vector-or-nil-p"),
2062 byte_offset = CHARSET_BYTE_OFFSET (cs);
2063 switch (CHARSET_DIMENSION (cs))
2066 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2068 Lisp_Object c = XVECTOR_DATA(table)[i];
2071 Fput_char_attribute (c, XCHARSET_NAME (charset),
2072 make_int (i + byte_offset));
2076 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2078 Lisp_Object v = XVECTOR_DATA(table)[i];
2084 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2086 Lisp_Object c = XVECTOR_DATA(v)[j];
2090 (c, XCHARSET_NAME (charset),
2091 make_int ( ( (i + byte_offset) << 8 )
2097 Fput_char_attribute (v, XCHARSET_NAME (charset),
2098 make_int (i + byte_offset));
2107 /************************************************************************/
2108 /* Lisp primitives for working with characters */
2109 /************************************************************************/
2112 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2113 Make a character from CHARSET and code-point CODE.
2114 If DEFINED_ONLY is non-nil, builtin character is not returned.
2115 If corresponding character is not found, nil is returned.
2117 (charset, code, defined_only))
2121 charset = Fget_charset (charset);
2124 if (XCHARSET_GRAPHIC (charset) == 1)
2126 if (NILP (defined_only))
2127 c = DECODE_CHAR (charset, c);
2129 c = DECODE_DEFINED_CHAR (charset, c);
2130 return c >= 0 ? make_char (c) : Qnil;
2133 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2134 Make a builtin character from CHARSET and code-point CODE.
2140 charset = Fget_charset (charset);
2142 if (EQ (charset, Vcharset_latin_viscii))
2144 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2150 (ret = Fget_char_attribute (chr,
2151 Vcharset_latin_viscii_lower,
2154 charset = Vcharset_latin_viscii_lower;
2158 (ret = Fget_char_attribute (chr,
2159 Vcharset_latin_viscii_upper,
2162 charset = Vcharset_latin_viscii_upper;
2169 if (XCHARSET_GRAPHIC (charset) == 1)
2172 c = decode_builtin_char (charset, c);
2173 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2177 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2178 Make a character from CHARSET and octets ARG1 and ARG2.
2179 ARG2 is required only for characters from two-dimensional charsets.
2180 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2181 character s with caron.
2183 (charset, arg1, arg2))
2187 int lowlim, highlim;
2189 charset = Fget_charset (charset);
2190 cs = XCHARSET (charset);
2192 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2193 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2195 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2197 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2198 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2201 /* It is useful (and safe, according to Olivier Galibert) to strip
2202 the 8th bit off ARG1 and ARG2 because it allows programmers to
2203 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2204 Latin 2 code of the character. */
2212 if (a1 < lowlim || a1 > highlim)
2213 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2215 if (CHARSET_DIMENSION (cs) == 1)
2219 ("Charset is of dimension one; second octet must be nil", arg2);
2220 return make_char (MAKE_CHAR (charset, a1, 0));
2229 a2 = XINT (arg2) & 0x7f;
2231 if (a2 < lowlim || a2 > highlim)
2232 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2234 return make_char (MAKE_CHAR (charset, a1, a2));
2237 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2238 Return the character set of CHARACTER.
2242 CHECK_CHAR_COERCE_INT (character);
2244 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2247 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2248 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2249 N defaults to 0 if omitted.
2253 Lisp_Object charset;
2256 CHECK_CHAR_COERCE_INT (character);
2258 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2260 if (NILP (n) || EQ (n, Qzero))
2261 return make_int (octet0);
2262 else if (EQ (n, make_int (1)))
2263 return make_int (octet1);
2265 signal_simple_error ("Octet number must be 0 or 1", n);
2269 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2270 Return code-point of CHARACTER in specified CHARSET.
2272 (character, charset))
2276 CHECK_CHAR_COERCE_INT (character);
2277 charset = Fget_charset (charset);
2278 code_point = charset_code_point (charset, XCHAR (character));
2279 if (code_point >= 0)
2280 return make_int (code_point);
2286 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2287 Return list of charset and one or two position-codes of CHARACTER.
2291 /* This function can GC */
2292 struct gcpro gcpro1, gcpro2;
2293 Lisp_Object charset = Qnil;
2294 Lisp_Object rc = Qnil;
2302 GCPRO2 (charset, rc);
2303 CHECK_CHAR_COERCE_INT (character);
2306 code_point = ENCODE_CHAR (XCHAR (character), charset);
2307 dimension = XCHARSET_DIMENSION (charset);
2308 while (dimension > 0)
2310 rc = Fcons (make_int (code_point & 255), rc);
2314 rc = Fcons (XCHARSET_NAME (charset), rc);
2316 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2318 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2320 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2324 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2333 #ifdef ENABLE_COMPOSITE_CHARS
2334 /************************************************************************/
2335 /* composite character functions */
2336 /************************************************************************/
2339 lookup_composite_char (Bufbyte *str, int len)
2341 Lisp_Object lispstr = make_string (str, len);
2342 Lisp_Object ch = Fgethash (lispstr,
2343 Vcomposite_char_string2char_hash_table,
2349 if (composite_char_row_next >= 128)
2350 signal_simple_error ("No more composite chars available", lispstr);
2351 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2352 composite_char_col_next);
2353 Fputhash (make_char (emch), lispstr,
2354 Vcomposite_char_char2string_hash_table);
2355 Fputhash (lispstr, make_char (emch),
2356 Vcomposite_char_string2char_hash_table);
2357 composite_char_col_next++;
2358 if (composite_char_col_next >= 128)
2360 composite_char_col_next = 32;
2361 composite_char_row_next++;
2370 composite_char_string (Emchar ch)
2372 Lisp_Object str = Fgethash (make_char (ch),
2373 Vcomposite_char_char2string_hash_table,
2375 assert (!UNBOUNDP (str));
2379 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2380 Convert a string into a single composite character.
2381 The character is the result of overstriking all the characters in
2386 CHECK_STRING (string);
2387 return make_char (lookup_composite_char (XSTRING_DATA (string),
2388 XSTRING_LENGTH (string)));
2391 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2392 Return a string of the characters comprising a composite character.
2400 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2401 signal_simple_error ("Must be composite char", ch);
2402 return composite_char_string (emch);
2404 #endif /* ENABLE_COMPOSITE_CHARS */
2407 /************************************************************************/
2408 /* initialization */
2409 /************************************************************************/
2412 syms_of_mule_charset (void)
2414 INIT_LRECORD_IMPLEMENTATION (charset);
2416 DEFSUBR (Fcharsetp);
2417 DEFSUBR (Ffind_charset);
2418 DEFSUBR (Fget_charset);
2419 DEFSUBR (Fcharset_list);
2420 DEFSUBR (Fcharset_name);
2421 DEFSUBR (Fmake_charset);
2422 DEFSUBR (Fmake_reverse_direction_charset);
2423 /* DEFSUBR (Freverse_direction_charset); */
2424 DEFSUBR (Fdefine_charset_alias);
2425 DEFSUBR (Fcharset_from_attributes);
2426 DEFSUBR (Fcharset_short_name);
2427 DEFSUBR (Fcharset_long_name);
2428 DEFSUBR (Fcharset_description);
2429 DEFSUBR (Fcharset_dimension);
2430 DEFSUBR (Fcharset_property);
2431 DEFSUBR (Fcharset_id);
2432 DEFSUBR (Fset_charset_ccl_program);
2433 DEFSUBR (Fset_charset_registry);
2435 DEFSUBR (Fcharset_mapping_table);
2436 DEFSUBR (Fset_charset_mapping_table);
2440 DEFSUBR (Fdecode_char);
2441 DEFSUBR (Fdecode_builtin_char);
2442 DEFSUBR (Fencode_char);
2444 DEFSUBR (Fmake_char);
2445 DEFSUBR (Fchar_charset);
2446 DEFSUBR (Fchar_octet);
2447 DEFSUBR (Fsplit_char);
2449 #ifdef ENABLE_COMPOSITE_CHARS
2450 DEFSUBR (Fmake_composite_char);
2451 DEFSUBR (Fcomposite_char_string);
2454 defsymbol (&Qcharsetp, "charsetp");
2455 defsymbol (&Qregistry, "registry");
2456 defsymbol (&Qfinal, "final");
2457 defsymbol (&Qgraphic, "graphic");
2458 defsymbol (&Qdirection, "direction");
2459 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2460 defsymbol (&Qshort_name, "short-name");
2461 defsymbol (&Qlong_name, "long-name");
2463 defsymbol (&Qmother, "mother");
2464 defsymbol (&Qmin_code, "min-code");
2465 defsymbol (&Qmax_code, "max-code");
2466 defsymbol (&Qcode_offset, "code-offset");
2467 defsymbol (&Qconversion, "conversion");
2468 defsymbol (&Q94x60, "94x60");
2469 defsymbol (&Q94x94x60, "94x94x60");
2472 defsymbol (&Ql2r, "l2r");
2473 defsymbol (&Qr2l, "r2l");
2475 /* Charsets, compatible with FSF 20.3
2476 Naming convention is Script-Charset[-Edition] */
2477 defsymbol (&Qascii, "ascii");
2478 defsymbol (&Qcontrol_1, "control-1");
2479 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2480 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2481 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2482 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2483 defsymbol (&Qthai_tis620, "thai-tis620");
2484 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2485 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2486 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2487 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2488 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2489 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2490 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2491 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2492 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2493 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2494 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2495 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2496 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2497 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2498 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2499 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2501 defsymbol (&Qucs, "ucs");
2502 defsymbol (&Qucs_bmp, "ucs-bmp");
2503 defsymbol (&Qucs_smp, "ucs-smp");
2504 defsymbol (&Qucs_sip, "ucs-sip");
2505 defsymbol (&Qucs_cns, "ucs-cns");
2506 defsymbol (&Qucs_jis, "ucs-jis");
2507 defsymbol (&Qucs_ks, "ucs-ks");
2508 defsymbol (&Qucs_big5, "ucs-big5");
2509 defsymbol (&Qlatin_viscii, "latin-viscii");
2510 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2511 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2512 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2513 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2514 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2515 defsymbol (&Qideograph_gt, "ideograph-gt");
2516 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2517 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2518 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2519 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2520 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2521 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2522 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2523 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2524 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2525 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2526 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2527 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2528 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2529 defsymbol (&Qchinese_big5, "chinese-big5");
2530 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2531 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2532 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2533 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2534 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2535 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2536 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2537 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2538 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2539 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2540 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2541 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2542 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2543 defsymbol (&Qchina3_jef, "china3-jef");
2544 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2545 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2547 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2548 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2550 defsymbol (&Qcomposite, "composite");
2554 vars_of_mule_charset (void)
2561 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2562 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2564 /* Table of charsets indexed by leading byte. */
2565 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2566 chlook->charset_by_leading_byte[i] = Qnil;
2569 /* Table of charsets indexed by type/final-byte. */
2570 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2571 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2572 chlook->charset_by_attributes[i][j] = Qnil;
2574 /* Table of charsets indexed by type/final-byte/direction. */
2575 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2576 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2577 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2578 chlook->charset_by_attributes[i][j][k] = Qnil;
2582 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2584 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2585 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2589 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2590 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2591 Leading-code of private TYPE9N charset of column-width 1.
2593 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2597 Vdefault_coded_charset_priority_list = Qnil;
2598 DEFVAR_LISP ("default-coded-charset-priority-list",
2599 &Vdefault_coded_charset_priority_list /*
2600 Default order of preferred coded-character-sets.
2606 complex_vars_of_mule_charset (void)
2608 staticpro (&Vcharset_hash_table);
2609 Vcharset_hash_table =
2610 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2612 /* Predefined character sets. We store them into variables for
2616 staticpro (&Vcharset_ucs);
2618 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2619 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2620 build_string ("UCS"),
2621 build_string ("UCS"),
2622 build_string ("ISO/IEC 10646"),
2624 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2625 staticpro (&Vcharset_ucs_bmp);
2627 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2628 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2629 build_string ("BMP"),
2630 build_string ("UCS-BMP"),
2631 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2633 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2634 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2635 staticpro (&Vcharset_ucs_smp);
2637 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2638 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2639 build_string ("SMP"),
2640 build_string ("UCS-SMP"),
2641 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2642 build_string ("UCS00-1"),
2643 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2644 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2645 staticpro (&Vcharset_ucs_sip);
2647 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2648 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2649 build_string ("SIP"),
2650 build_string ("UCS-SIP"),
2651 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2652 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2653 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2654 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2655 staticpro (&Vcharset_ucs_cns);
2657 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2658 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2659 build_string ("UCS for CNS"),
2660 build_string ("UCS for CNS 11643"),
2661 build_string ("ISO/IEC 10646 for CNS 11643"),
2664 Qnil, CONVERSION_IDENTICAL);
2665 staticpro (&Vcharset_ucs_jis);
2667 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2668 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2669 build_string ("UCS for JIS"),
2670 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2671 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2673 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2674 staticpro (&Vcharset_ucs_ks);
2676 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2677 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2678 build_string ("UCS for KS"),
2679 build_string ("UCS for CCS defined by KS"),
2680 build_string ("ISO/IEC 10646 for Korean Standards"),
2682 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2683 staticpro (&Vcharset_ucs_big5);
2685 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2686 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2687 build_string ("UCS for Big5"),
2688 build_string ("UCS for Big5"),
2689 build_string ("ISO/IEC 10646 for Big5"),
2691 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2693 # define MIN_CHAR_THAI 0
2694 # define MAX_CHAR_THAI 0
2695 /* # define MIN_CHAR_HEBREW 0 */
2696 /* # define MAX_CHAR_HEBREW 0 */
2697 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2698 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2700 staticpro (&Vcharset_ascii);
2702 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2703 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2704 build_string ("ASCII"),
2705 build_string ("ASCII)"),
2706 build_string ("ASCII (ISO646 IRV)"),
2707 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2708 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2709 staticpro (&Vcharset_control_1);
2710 Vcharset_control_1 =
2711 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2712 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2713 build_string ("C1"),
2714 build_string ("Control characters"),
2715 build_string ("Control characters 128-191"),
2717 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2718 staticpro (&Vcharset_latin_iso8859_1);
2719 Vcharset_latin_iso8859_1 =
2720 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2721 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2722 build_string ("Latin-1"),
2723 build_string ("ISO8859-1 (Latin-1)"),
2724 build_string ("ISO8859-1 (Latin-1)"),
2725 build_string ("iso8859-1"),
2726 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2727 staticpro (&Vcharset_latin_iso8859_2);
2728 Vcharset_latin_iso8859_2 =
2729 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2730 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2731 build_string ("Latin-2"),
2732 build_string ("ISO8859-2 (Latin-2)"),
2733 build_string ("ISO8859-2 (Latin-2)"),
2734 build_string ("iso8859-2"),
2735 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2736 staticpro (&Vcharset_latin_iso8859_3);
2737 Vcharset_latin_iso8859_3 =
2738 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2739 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2740 build_string ("Latin-3"),
2741 build_string ("ISO8859-3 (Latin-3)"),
2742 build_string ("ISO8859-3 (Latin-3)"),
2743 build_string ("iso8859-3"),
2744 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2745 staticpro (&Vcharset_latin_iso8859_4);
2746 Vcharset_latin_iso8859_4 =
2747 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2748 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2749 build_string ("Latin-4"),
2750 build_string ("ISO8859-4 (Latin-4)"),
2751 build_string ("ISO8859-4 (Latin-4)"),
2752 build_string ("iso8859-4"),
2753 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2754 staticpro (&Vcharset_thai_tis620);
2755 Vcharset_thai_tis620 =
2756 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2757 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2758 build_string ("TIS620"),
2759 build_string ("TIS620 (Thai)"),
2760 build_string ("TIS620.2529 (Thai)"),
2761 build_string ("tis620"),
2762 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2763 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2764 staticpro (&Vcharset_greek_iso8859_7);
2765 Vcharset_greek_iso8859_7 =
2766 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2767 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2768 build_string ("ISO8859-7"),
2769 build_string ("ISO8859-7 (Greek)"),
2770 build_string ("ISO8859-7 (Greek)"),
2771 build_string ("iso8859-7"),
2772 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2773 staticpro (&Vcharset_arabic_iso8859_6);
2774 Vcharset_arabic_iso8859_6 =
2775 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2776 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2777 build_string ("ISO8859-6"),
2778 build_string ("ISO8859-6 (Arabic)"),
2779 build_string ("ISO8859-6 (Arabic)"),
2780 build_string ("iso8859-6"),
2781 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2782 staticpro (&Vcharset_hebrew_iso8859_8);
2783 Vcharset_hebrew_iso8859_8 =
2784 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2785 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2786 build_string ("ISO8859-8"),
2787 build_string ("ISO8859-8 (Hebrew)"),
2788 build_string ("ISO8859-8 (Hebrew)"),
2789 build_string ("iso8859-8"),
2791 0 /* MIN_CHAR_HEBREW */,
2792 0 /* MAX_CHAR_HEBREW */, 0, 32,
2793 Qnil, CONVERSION_IDENTICAL);
2794 staticpro (&Vcharset_katakana_jisx0201);
2795 Vcharset_katakana_jisx0201 =
2796 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2797 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2798 build_string ("JISX0201 Kana"),
2799 build_string ("JISX0201.1976 (Japanese Kana)"),
2800 build_string ("JISX0201.1976 Japanese Kana"),
2801 build_string ("jisx0201\\.1976"),
2802 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2803 staticpro (&Vcharset_latin_jisx0201);
2804 Vcharset_latin_jisx0201 =
2805 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2806 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2807 build_string ("JISX0201 Roman"),
2808 build_string ("JISX0201.1976 (Japanese Roman)"),
2809 build_string ("JISX0201.1976 Japanese Roman"),
2810 build_string ("jisx0201\\.1976"),
2811 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2812 staticpro (&Vcharset_cyrillic_iso8859_5);
2813 Vcharset_cyrillic_iso8859_5 =
2814 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2815 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2816 build_string ("ISO8859-5"),
2817 build_string ("ISO8859-5 (Cyrillic)"),
2818 build_string ("ISO8859-5 (Cyrillic)"),
2819 build_string ("iso8859-5"),
2820 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2821 staticpro (&Vcharset_latin_iso8859_9);
2822 Vcharset_latin_iso8859_9 =
2823 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2824 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2825 build_string ("Latin-5"),
2826 build_string ("ISO8859-9 (Latin-5)"),
2827 build_string ("ISO8859-9 (Latin-5)"),
2828 build_string ("iso8859-9"),
2829 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2830 staticpro (&Vcharset_japanese_jisx0208_1978);
2831 Vcharset_japanese_jisx0208_1978 =
2832 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2833 Qjapanese_jisx0208_1978, 94, 2,
2834 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2835 build_string ("JIS X0208:1978"),
2836 build_string ("JIS X0208:1978 (Japanese)"),
2838 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2839 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2840 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2841 staticpro (&Vcharset_chinese_gb2312);
2842 Vcharset_chinese_gb2312 =
2843 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2844 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2845 build_string ("GB2312"),
2846 build_string ("GB2312)"),
2847 build_string ("GB2312 Chinese simplified"),
2848 build_string ("gb2312"),
2849 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2850 staticpro (&Vcharset_chinese_gb12345);
2851 Vcharset_chinese_gb12345 =
2852 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2853 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2854 build_string ("G1"),
2855 build_string ("GB 12345)"),
2856 build_string ("GB 12345-1990"),
2857 build_string ("GB12345\\(\\.1990\\)?-0"),
2858 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2859 staticpro (&Vcharset_japanese_jisx0208);
2860 Vcharset_japanese_jisx0208 =
2861 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2862 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2863 build_string ("JISX0208"),
2864 build_string ("JIS X0208:1983 (Japanese)"),
2865 build_string ("JIS X0208:1983 Japanese Kanji"),
2866 build_string ("jisx0208\\.1983"),
2867 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2869 staticpro (&Vcharset_japanese_jisx0208_1990);
2870 Vcharset_japanese_jisx0208_1990 =
2871 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2872 Qjapanese_jisx0208_1990, 94, 2,
2873 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2874 build_string ("JISX0208-1990"),
2875 build_string ("JIS X0208:1990 (Japanese)"),
2876 build_string ("JIS X0208:1990 Japanese Kanji"),
2877 build_string ("jisx0208\\.1990"),
2879 MIN_CHAR_JIS_X0208_1990,
2880 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2881 Qnil, CONVERSION_IDENTICAL);
2883 staticpro (&Vcharset_korean_ksc5601);
2884 Vcharset_korean_ksc5601 =
2885 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2886 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2887 build_string ("KSC5601"),
2888 build_string ("KSC5601 (Korean"),
2889 build_string ("KSC5601 Korean Hangul and Hanja"),
2890 build_string ("ksc5601"),
2891 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2892 staticpro (&Vcharset_japanese_jisx0212);
2893 Vcharset_japanese_jisx0212 =
2894 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2895 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2896 build_string ("JISX0212"),
2897 build_string ("JISX0212 (Japanese)"),
2898 build_string ("JISX0212 Japanese Supplement"),
2899 build_string ("jisx0212"),
2900 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2902 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2903 staticpro (&Vcharset_chinese_cns11643_1);
2904 Vcharset_chinese_cns11643_1 =
2905 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2906 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2907 build_string ("CNS11643-1"),
2908 build_string ("CNS11643-1 (Chinese traditional)"),
2910 ("CNS 11643 Plane 1 Chinese traditional"),
2911 build_string (CHINESE_CNS_PLANE_RE("1")),
2912 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2913 staticpro (&Vcharset_chinese_cns11643_2);
2914 Vcharset_chinese_cns11643_2 =
2915 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2916 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2917 build_string ("CNS11643-2"),
2918 build_string ("CNS11643-2 (Chinese traditional)"),
2920 ("CNS 11643 Plane 2 Chinese traditional"),
2921 build_string (CHINESE_CNS_PLANE_RE("2")),
2922 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2924 staticpro (&Vcharset_latin_tcvn5712);
2925 Vcharset_latin_tcvn5712 =
2926 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2927 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2928 build_string ("TCVN 5712"),
2929 build_string ("TCVN 5712 (VSCII-2)"),
2930 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2931 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2932 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2933 staticpro (&Vcharset_latin_viscii_lower);
2934 Vcharset_latin_viscii_lower =
2935 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2936 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2937 build_string ("VISCII lower"),
2938 build_string ("VISCII lower (Vietnamese)"),
2939 build_string ("VISCII lower (Vietnamese)"),
2940 build_string ("MULEVISCII-LOWER"),
2941 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2942 staticpro (&Vcharset_latin_viscii_upper);
2943 Vcharset_latin_viscii_upper =
2944 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2945 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2946 build_string ("VISCII upper"),
2947 build_string ("VISCII upper (Vietnamese)"),
2948 build_string ("VISCII upper (Vietnamese)"),
2949 build_string ("MULEVISCII-UPPER"),
2950 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2951 staticpro (&Vcharset_latin_viscii);
2952 Vcharset_latin_viscii =
2953 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2954 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2955 build_string ("VISCII"),
2956 build_string ("VISCII 1.1 (Vietnamese)"),
2957 build_string ("VISCII 1.1 (Vietnamese)"),
2958 build_string ("VISCII1\\.1"),
2959 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2960 staticpro (&Vcharset_chinese_big5);
2961 Vcharset_chinese_big5 =
2962 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2963 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2964 build_string ("Big5"),
2965 build_string ("Big5"),
2966 build_string ("Big5 Chinese traditional"),
2967 build_string ("big5"),
2969 0 /* MIN_CHAR_BIG5_CDP */,
2970 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
2971 Qnil, CONVERSION_IDENTICAL);
2973 staticpro (&Vcharset_chinese_big5_cdp);
2974 Vcharset_chinese_big5_cdp =
2975 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2976 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2977 build_string ("Big5-CDP"),
2978 build_string ("Big5 + CDP extension"),
2979 build_string ("Big5 with CDP extension"),
2980 build_string ("big5\\.cdp-0"),
2981 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2982 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2984 #define DEF_HANZIKU(n) \
2985 staticpro (&Vcharset_ideograph_hanziku_##n); \
2986 Vcharset_ideograph_hanziku_##n = \
2987 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2988 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2989 build_string ("HZK-"#n), \
2990 build_string ("HANZIKU-"#n), \
2991 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2993 ("hanziku-"#n"$"), \
2994 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2995 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3008 staticpro (&Vcharset_china3_jef);
3009 Vcharset_china3_jef =
3010 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
3011 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3012 build_string ("JC3"),
3013 build_string ("JEF + CHINA3"),
3014 build_string ("JEF + CHINA3 private characters"),
3015 build_string ("china3jef-0"),
3016 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
3017 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
3018 staticpro (&Vcharset_ideograph_cbeta);
3019 Vcharset_ideograph_cbeta =
3020 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3021 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3022 build_string ("CB"),
3023 build_string ("CBETA"),
3024 build_string ("CBETA private characters"),
3025 build_string ("cbeta-0"),
3026 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3027 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3028 staticpro (&Vcharset_ideograph_gt);
3029 Vcharset_ideograph_gt =
3030 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3031 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3032 build_string ("GT"),
3033 build_string ("GT"),
3034 build_string ("GT"),
3036 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3037 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3038 #define DEF_GT_PJ(n) \
3039 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3040 Vcharset_ideograph_gt_pj_##n = \
3041 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3042 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3043 build_string ("GT-PJ-"#n), \
3044 build_string ("GT (pseudo JIS encoding) part "#n), \
3045 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3047 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3048 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3061 staticpro (&Vcharset_ideograph_daikanwa_2);
3062 Vcharset_ideograph_daikanwa_2 =
3063 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3064 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3065 build_string ("Daikanwa Rev."),
3066 build_string ("Morohashi's Daikanwa Rev."),
3068 ("Daikanwa dictionary (revised version)"),
3069 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3070 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3071 staticpro (&Vcharset_ideograph_daikanwa);
3072 Vcharset_ideograph_daikanwa =
3073 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3074 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3075 build_string ("Daikanwa"),
3076 build_string ("Morohashi's Daikanwa Rev.2"),
3078 ("Daikanwa dictionary (second revised version)"),
3079 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3080 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3081 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3083 staticpro (&Vcharset_ethiopic_ucs);
3084 Vcharset_ethiopic_ucs =
3085 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3086 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3087 build_string ("Ethiopic (UCS)"),
3088 build_string ("Ethiopic (UCS)"),
3089 build_string ("Ethiopic of UCS"),
3090 build_string ("Ethiopic-Unicode"),
3091 Qnil, 0x1200, 0x137F, 0, 0,
3092 Qnil, CONVERSION_IDENTICAL);
3094 staticpro (&Vcharset_chinese_big5_1);
3095 Vcharset_chinese_big5_1 =
3096 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3097 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3098 build_string ("Big5"),
3099 build_string ("Big5 (Level-1)"),
3101 ("Big5 Level-1 Chinese traditional"),
3102 build_string ("big5"),
3103 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3104 staticpro (&Vcharset_chinese_big5_2);
3105 Vcharset_chinese_big5_2 =
3106 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3107 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3108 build_string ("Big5"),
3109 build_string ("Big5 (Level-2)"),
3111 ("Big5 Level-2 Chinese traditional"),
3112 build_string ("big5"),
3113 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3115 #ifdef ENABLE_COMPOSITE_CHARS
3116 /* #### For simplicity, we put composite chars into a 96x96 charset.
3117 This is going to lead to problems because you can run out of
3118 room, esp. as we don't yet recycle numbers. */
3119 staticpro (&Vcharset_composite);
3120 Vcharset_composite =
3121 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3122 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3123 build_string ("Composite"),
3124 build_string ("Composite characters"),
3125 build_string ("Composite characters"),
3128 /* #### not dumped properly */
3129 composite_char_row_next = 32;
3130 composite_char_col_next = 32;
3132 Vcomposite_char_string2char_hash_table =
3133 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3134 Vcomposite_char_char2string_hash_table =
3135 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3136 staticpro (&Vcomposite_char_string2char_hash_table);
3137 staticpro (&Vcomposite_char_char2string_hash_table);
3138 #endif /* ENABLE_COMPOSITE_CHARS */