1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
193 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
195 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
199 if (XVECTOR_LENGTH (v) > ccs_len)
202 for (i = 0; i < XVECTOR_LENGTH (v); i++)
204 Lisp_Object c = XVECTOR_DATA(v)[i];
206 if (!NILP (c) && !CHARP (c))
210 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
222 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
225 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
235 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
236 nv = XVECTOR_DATA(v)[i];
242 XVECTOR_DATA(v)[i] = Qnil;
246 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
247 int code_point, Lisp_Object character);
249 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
250 int code_point, Lisp_Object character)
254 int ccs_len = XVECTOR_LENGTH (v);
259 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
260 nv = XVECTOR_DATA(v)[i];
264 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
270 XVECTOR_DATA(v)[i] = character;
274 put_char_ccs_code_point (Lisp_Object character,
275 Lisp_Object ccs, Lisp_Object value)
277 if (!EQ (XCHARSET_NAME (ccs), Qucs)
279 || (XCHAR (character) != XINT (value)))
281 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
282 int dim = XCHARSET_DIMENSION (ccs);
283 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
284 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
288 { /* obsolete representation: value must be a list of bytes */
289 Lisp_Object ret = Fcar (value);
293 signal_simple_error ("Invalid value for coded-charset", value);
294 code_point = XINT (ret);
295 if (XCHARSET_GRAPHIC (ccs) == 1)
303 signal_simple_error ("Invalid value for coded-charset",
307 signal_simple_error ("Invalid value for coded-charset",
310 if (XCHARSET_GRAPHIC (ccs) == 1)
312 code_point = (code_point << 8) | j;
315 value = make_int (code_point);
317 else if (INTP (value))
319 code_point = XINT (value);
320 if (XCHARSET_GRAPHIC (ccs) == 1)
322 code_point &= 0x7F7F7F7F;
323 value = make_int (code_point);
327 signal_simple_error ("Invalid value for coded-charset", value);
331 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
334 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
339 XCHARSET_DECODING_TABLE (ccs)
340 = v = make_vector (ccs_len, Qnil);
343 decoding_table_put_char (v, dim, byte_offset, code_point, character);
349 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
351 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
352 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
354 if (VECTORP (decoding_table))
356 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
360 decoding_table_remove_char (decoding_table,
361 XCHARSET_DIMENSION (ccs),
362 XCHARSET_BYTE_OFFSET (ccs),
366 if (CHAR_TABLEP (encoding_table))
368 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
376 int leading_code_private_11;
379 Lisp_Object Qcharsetp;
381 /* Qdoc_string, Qdimension, Qchars defined in general.c */
382 Lisp_Object Qregistry, Qfinal, Qgraphic;
383 Lisp_Object Qdirection;
384 Lisp_Object Qreverse_direction_charset;
385 Lisp_Object Qleading_byte;
386 Lisp_Object Qshort_name, Qlong_name;
388 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
389 Lisp_Object Qmother, Qconversion, Q94x60;
406 Qjapanese_jisx0208_1978,
410 Qjapanese_jisx0208_1990,
428 Qvietnamese_viscii_lower,
429 Qvietnamese_viscii_upper,
432 Qideograph_hanziku_1,
433 Qideograph_hanziku_2,
434 Qideograph_hanziku_3,
435 Qideograph_hanziku_4,
436 Qideograph_hanziku_5,
437 Qideograph_hanziku_6,
438 Qideograph_hanziku_7,
439 Qideograph_hanziku_8,
440 Qideograph_hanziku_9,
441 Qideograph_hanziku_10,
442 Qideograph_hanziku_11,
443 Qideograph_hanziku_12,
446 Qideograph_daikanwa_2,
466 Lisp_Object Ql2r, Qr2l;
468 Lisp_Object Vcharset_hash_table;
470 /* Composite characters are characters constructed by overstriking two
471 or more regular characters.
473 1) The old Mule implementation involves storing composite characters
474 in a buffer as a tag followed by all of the actual characters
475 used to make up the composite character. I think this is a bad
476 idea; it greatly complicates code that wants to handle strings
477 one character at a time because it has to deal with the possibility
478 of great big ungainly characters. It's much more reasonable to
479 simply store an index into a table of composite characters.
481 2) The current implementation only allows for 16,384 separate
482 composite characters over the lifetime of the XEmacs process.
483 This could become a potential problem if the user
484 edited lots of different files that use composite characters.
485 Due to FSF bogosity, increasing the number of allowable
486 composite characters under Mule would decrease the number
487 of possible faces that can exist. Mule already has shrunk
488 this to 2048, and further shrinkage would become uncomfortable.
489 No such problems exist in XEmacs.
491 Composite characters could be represented as 0x80 C1 C2 C3,
492 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
493 for slightly under 2^20 (one million) composite characters
494 over the XEmacs process lifetime, and you only need to
495 increase the size of a Mule character from 19 to 21 bits.
496 Or you could use 0x80 C1 C2 C3 C4, allowing for about
497 85 million (slightly over 2^26) composite characters. */
500 /************************************************************************/
501 /* Basic Emchar functions */
502 /************************************************************************/
504 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
505 string in STR. Returns the number of bytes stored.
506 Do not call this directly. Use the macro set_charptr_emchar() instead.
510 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
525 else if ( c <= 0x7ff )
527 *p++ = (c >> 6) | 0xc0;
528 *p++ = (c & 0x3f) | 0x80;
530 else if ( c <= 0xffff )
532 *p++ = (c >> 12) | 0xe0;
533 *p++ = ((c >> 6) & 0x3f) | 0x80;
534 *p++ = (c & 0x3f) | 0x80;
536 else if ( c <= 0x1fffff )
538 *p++ = (c >> 18) | 0xf0;
539 *p++ = ((c >> 12) & 0x3f) | 0x80;
540 *p++ = ((c >> 6) & 0x3f) | 0x80;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0x3ffffff )
545 *p++ = (c >> 24) | 0xf8;
546 *p++ = ((c >> 18) & 0x3f) | 0x80;
547 *p++ = ((c >> 12) & 0x3f) | 0x80;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
553 *p++ = (c >> 30) | 0xfc;
554 *p++ = ((c >> 24) & 0x3f) | 0x80;
555 *p++ = ((c >> 18) & 0x3f) | 0x80;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
561 BREAKUP_CHAR (c, charset, c1, c2);
562 lb = CHAR_LEADING_BYTE (c);
563 if (LEADING_BYTE_PRIVATE_P (lb))
564 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
566 if (EQ (charset, Vcharset_control_1))
575 /* Return the first character from a Mule-encoded string in STR,
576 assuming it's non-ASCII. Do not call this directly.
577 Use the macro charptr_emchar() instead. */
580 non_ascii_charptr_emchar (const Bufbyte *str)
593 else if ( b >= 0xf8 )
598 else if ( b >= 0xf0 )
603 else if ( b >= 0xe0 )
608 else if ( b >= 0xc0 )
618 for( ; len > 0; len-- )
621 ch = ( ch << 6 ) | ( b & 0x3f );
625 Bufbyte i0 = *str, i1, i2 = 0;
628 if (i0 == LEADING_BYTE_CONTROL_1)
629 return (Emchar) (*++str - 0x20);
631 if (LEADING_BYTE_PREFIX_P (i0))
636 charset = CHARSET_BY_LEADING_BYTE (i0);
637 if (XCHARSET_DIMENSION (charset) == 2)
640 return MAKE_CHAR (charset, i1, i2);
644 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
645 Do not call this directly. Use the macro valid_char_p() instead. */
649 non_ascii_valid_char_p (Emchar ch)
653 /* Must have only lowest 19 bits set */
657 f1 = CHAR_FIELD1 (ch);
658 f2 = CHAR_FIELD2 (ch);
659 f3 = CHAR_FIELD3 (ch);
665 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
666 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
667 f2 > MAX_CHAR_FIELD2_PRIVATE)
672 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
673 f2 <= MAX_CHAR_FIELD2_PRIVATE))
677 NOTE: This takes advantage of the fact that
678 FIELD2_TO_OFFICIAL_LEADING_BYTE and
679 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
681 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
682 if (EQ (charset, Qnil))
684 return (XCHARSET_CHARS (charset) == 96);
690 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
691 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
692 f1 > MAX_CHAR_FIELD1_PRIVATE)
694 if (f2 < 0x20 || f3 < 0x20)
697 #ifdef ENABLE_COMPOSITE_CHARS
698 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
700 if (UNBOUNDP (Fgethash (make_int (ch),
701 Vcomposite_char_char2string_hash_table,
706 #endif /* ENABLE_COMPOSITE_CHARS */
708 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
709 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
712 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
714 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
717 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
719 if (EQ (charset, Qnil))
721 return (XCHARSET_CHARS (charset) == 96);
727 /************************************************************************/
728 /* Basic string functions */
729 /************************************************************************/
731 /* Copy the character pointed to by SRC into DST. Do not call this
732 directly. Use the macro charptr_copy_char() instead.
733 Return the number of bytes copied. */
736 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
738 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
740 for (i = bytes; i; i--, dst++, src++)
746 /************************************************************************/
747 /* streams of Emchars */
748 /************************************************************************/
750 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
751 The functions below are not meant to be called directly; use
752 the macros in insdel.h. */
755 Lstream_get_emchar_1 (Lstream *stream, int ch)
757 Bufbyte str[MAX_EMCHAR_LEN];
758 Bufbyte *strptr = str;
761 str[0] = (Bufbyte) ch;
763 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
765 int c = Lstream_getc (stream);
766 bufpos_checking_assert (c >= 0);
767 *++strptr = (Bufbyte) c;
769 return charptr_emchar (str);
773 Lstream_fput_emchar (Lstream *stream, Emchar ch)
775 Bufbyte str[MAX_EMCHAR_LEN];
776 Bytecount len = set_charptr_emchar (str, ch);
777 return Lstream_write (stream, str, len);
781 Lstream_funget_emchar (Lstream *stream, Emchar ch)
783 Bufbyte str[MAX_EMCHAR_LEN];
784 Bytecount len = set_charptr_emchar (str, ch);
785 Lstream_unread (stream, str, len);
789 /************************************************************************/
791 /************************************************************************/
794 mark_charset (Lisp_Object obj)
796 Lisp_Charset *cs = XCHARSET (obj);
798 mark_object (cs->short_name);
799 mark_object (cs->long_name);
800 mark_object (cs->doc_string);
801 mark_object (cs->registry);
802 mark_object (cs->ccl_program);
804 mark_object (cs->decoding_table);
805 mark_object (cs->mother);
811 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
813 Lisp_Charset *cs = XCHARSET (obj);
817 error ("printing unreadable object #<charset %s 0x%x>",
818 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
821 write_c_string ("#<charset ", printcharfun);
822 print_internal (CHARSET_NAME (cs), printcharfun, 0);
823 write_c_string (" ", printcharfun);
824 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
825 write_c_string (" ", printcharfun);
826 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
827 write_c_string (" ", printcharfun);
828 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
829 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
831 CHARSET_DIMENSION (cs),
832 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
833 CHARSET_COLUMNS (cs),
834 CHARSET_GRAPHIC (cs),
836 write_c_string (buf, printcharfun);
837 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
838 sprintf (buf, " 0x%x>", cs->header.uid);
839 write_c_string (buf, printcharfun);
842 static const struct lrecord_description charset_description[] = {
843 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
844 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
845 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
846 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
847 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
848 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
849 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
851 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
852 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
857 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
858 mark_charset, print_charset, 0, 0, 0,
862 /* Make a new charset. */
863 /* #### SJT Should generic properties be allowed? */
865 make_charset (Charset_ID id, Lisp_Object name,
866 unsigned short chars, unsigned char dimension,
867 unsigned char columns, unsigned char graphic,
868 Bufbyte final, unsigned char direction, Lisp_Object short_name,
869 Lisp_Object long_name, Lisp_Object doc,
871 Lisp_Object decoding_table,
872 Emchar min_code, Emchar max_code,
873 Emchar code_offset, unsigned char byte_offset,
874 Lisp_Object mother, unsigned char conversion)
877 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
881 XSETCHARSET (obj, cs);
883 CHARSET_ID (cs) = id;
884 CHARSET_NAME (cs) = name;
885 CHARSET_SHORT_NAME (cs) = short_name;
886 CHARSET_LONG_NAME (cs) = long_name;
887 CHARSET_CHARS (cs) = chars;
888 CHARSET_DIMENSION (cs) = dimension;
889 CHARSET_DIRECTION (cs) = direction;
890 CHARSET_COLUMNS (cs) = columns;
891 CHARSET_GRAPHIC (cs) = graphic;
892 CHARSET_FINAL (cs) = final;
893 CHARSET_DOC_STRING (cs) = doc;
894 CHARSET_REGISTRY (cs) = reg;
895 CHARSET_CCL_PROGRAM (cs) = Qnil;
896 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
898 CHARSET_DECODING_TABLE(cs) = Qnil;
899 CHARSET_MIN_CODE (cs) = min_code;
900 CHARSET_MAX_CODE (cs) = max_code;
901 CHARSET_CODE_OFFSET (cs) = code_offset;
902 CHARSET_BYTE_OFFSET (cs) = byte_offset;
903 CHARSET_MOTHER (cs) = mother;
904 CHARSET_CONVERSION (cs) = conversion;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
921 unsigned char iso2022_type
922 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
924 if (code_offset == 0)
926 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
927 chlook->charset_by_attributes[iso2022_type][final] = obj;
931 (chlook->charset_by_attributes[iso2022_type][final][direction]));
932 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
936 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
937 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = chlook->next_allocated_leading_byte++;
959 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = chlook->next_allocated_1_byte_leading_byte++;
966 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = chlook->next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
982 /* Number of Big5 characters which have the same code in 1st byte. */
984 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
987 decode_builtin_char (Lisp_Object charset, int code_point)
991 if (EQ (charset, Vcharset_chinese_big5))
993 int c1 = code_point >> 8;
994 int c2 = code_point & 0xFF;
997 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
999 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1000 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1002 I = (c1 - 0xA1) * BIG5_SAME_ROW
1003 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1007 charset = Vcharset_chinese_big5_1;
1011 charset = Vcharset_chinese_big5_2;
1012 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1014 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1017 if ((final = XCHARSET_FINAL (charset)) >= '0')
1019 if (XCHARSET_DIMENSION (charset) == 1)
1021 switch (XCHARSET_CHARS (charset))
1025 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1028 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1036 switch (XCHARSET_CHARS (charset))
1039 return MIN_CHAR_94x94
1040 + (final - '0') * 94 * 94
1041 + (((code_point >> 8) & 0x7F) - 33) * 94
1042 + ((code_point & 0x7F) - 33);
1044 return MIN_CHAR_96x96
1045 + (final - '0') * 96 * 96
1046 + (((code_point >> 8) & 0x7F) - 32) * 96
1047 + ((code_point & 0x7F) - 32);
1054 else if (XCHARSET_MAX_CODE (charset))
1057 = (XCHARSET_DIMENSION (charset) == 1
1059 code_point - XCHARSET_BYTE_OFFSET (charset)
1061 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1062 * XCHARSET_CHARS (charset)
1063 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1064 + XCHARSET_CODE_OFFSET (charset);
1065 if ((cid < XCHARSET_MIN_CODE (charset))
1066 || (XCHARSET_MAX_CODE (charset) < cid))
1075 charset_code_point (Lisp_Object charset, Emchar ch)
1077 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1080 if ( CHAR_TABLEP (encoding_table)
1081 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1086 Lisp_Object mother = XCHARSET_MOTHER (charset);
1087 int min = XCHARSET_MIN_CODE (charset);
1088 int max = XCHARSET_MAX_CODE (charset);
1091 if ( CHARSETP (mother) )
1092 code = charset_code_point (mother, ch);
1095 if ( (min <= code) && (code <= max) )
1097 int d = code - XCHARSET_CODE_OFFSET (charset);
1099 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1102 int cell = d % 94 + 33;
1108 return (row << 8) | cell;
1110 else if (XCHARSET_CHARS (charset) == 94)
1112 if (XCHARSET_DIMENSION (charset) == 1)
1114 else if (XCHARSET_DIMENSION (charset) == 2)
1115 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1116 else if (XCHARSET_DIMENSION (charset) == 3)
1118 ( (d / (94 * 94) + 33) << 16)
1119 | ((d / 94 % 94 + 33) << 8)
1121 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1123 ( (d / (94 * 94 * 94) + 33) << 24)
1124 | ((d / (94 * 94) % 94 + 33) << 16)
1125 | ((d / 94 % 94 + 33) << 8)
1128 else if (XCHARSET_CHARS (charset) == 96)
1130 if (XCHARSET_DIMENSION (charset) == 1)
1132 else if (XCHARSET_DIMENSION (charset) == 2)
1133 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1134 else if (XCHARSET_DIMENSION (charset) == 3)
1136 ( (d / (96 * 96) + 32) << 16)
1137 | ((d / 96 % 96 + 32) << 8)
1139 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1141 ( (d / (96 * 96 * 96) + 32) << 24)
1142 | ((d / (96 * 96) % 96 + 32) << 16)
1143 | ((d / 96 % 96 + 32) << 8)
1147 return code - XCHARSET_CODE_OFFSET (charset);
1149 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1150 (XCHARSET_CODE_OFFSET (charset)
1151 == XCHARSET_MIN_CODE (charset)) )
1155 if (XCHARSET_DIMENSION (charset) == 1)
1157 if (XCHARSET_CHARS (charset) == 94)
1159 if (((d = ch - (MIN_CHAR_94
1160 + (XCHARSET_FINAL (charset) - '0') * 94))
1165 else if (XCHARSET_CHARS (charset) == 96)
1167 if (((d = ch - (MIN_CHAR_96
1168 + (XCHARSET_FINAL (charset) - '0') * 96))
1176 else if (XCHARSET_DIMENSION (charset) == 2)
1178 if (XCHARSET_CHARS (charset) == 94)
1180 if (((d = ch - (MIN_CHAR_94x94
1182 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1185 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1187 else if (XCHARSET_CHARS (charset) == 96)
1189 if (((d = ch - (MIN_CHAR_96x96
1191 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1194 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1205 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1207 if (c <= MAX_CHAR_BASIC_LATIN)
1209 *charset = Vcharset_ascii;
1214 *charset = Vcharset_control_1;
1219 *charset = Vcharset_latin_iso8859_1;
1223 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1225 *charset = Vcharset_hebrew_iso8859_8;
1226 return c - MIN_CHAR_HEBREW + 0x20;
1229 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1231 *charset = Vcharset_thai_tis620;
1232 return c - MIN_CHAR_THAI + 0x20;
1235 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1236 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1238 return list2 (Vcharset_katakana_jisx0201,
1239 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1242 else if (c <= MAX_CHAR_BMP)
1244 *charset = Vcharset_ucs_bmp;
1247 else if (c <= MAX_CHAR_SMP)
1249 *charset = Vcharset_ucs_smp;
1250 return c - MIN_CHAR_SMP;
1252 else if (c <= MAX_CHAR_SIP)
1254 *charset = Vcharset_ucs_sip;
1255 return c - MIN_CHAR_SIP;
1257 else if (c < MIN_CHAR_DAIKANWA)
1259 *charset = Vcharset_ucs;
1262 else if (c <= MAX_CHAR_DAIKANWA)
1264 *charset = Vcharset_ideograph_daikanwa;
1265 return c - MIN_CHAR_DAIKANWA;
1267 else if (c < MIN_CHAR_94)
1269 *charset = Vcharset_ucs;
1272 else if (c <= MAX_CHAR_94)
1274 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1275 ((c - MIN_CHAR_94) / 94) + '0',
1276 CHARSET_LEFT_TO_RIGHT);
1277 if (!NILP (*charset))
1278 return ((c - MIN_CHAR_94) % 94) + 33;
1281 *charset = Vcharset_ucs;
1285 else if (c <= MAX_CHAR_96)
1287 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1288 ((c - MIN_CHAR_96) / 96) + '0',
1289 CHARSET_LEFT_TO_RIGHT);
1290 if (!NILP (*charset))
1291 return ((c - MIN_CHAR_96) % 96) + 32;
1294 *charset = Vcharset_ucs;
1298 else if (c <= MAX_CHAR_94x94)
1301 = CHARSET_BY_ATTRIBUTES (94, 2,
1302 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1303 CHARSET_LEFT_TO_RIGHT);
1304 if (!NILP (*charset))
1305 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1306 | (((c - MIN_CHAR_94x94) % 94) + 33);
1309 *charset = Vcharset_ucs;
1313 else if (c <= MAX_CHAR_96x96)
1316 = CHARSET_BY_ATTRIBUTES (96, 2,
1317 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1318 CHARSET_LEFT_TO_RIGHT);
1319 if (!NILP (*charset))
1320 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1321 | (((c - MIN_CHAR_96x96) % 96) + 32);
1324 *charset = Vcharset_ucs;
1330 *charset = Vcharset_ucs;
1335 Lisp_Object Vdefault_coded_charset_priority_list;
1339 /************************************************************************/
1340 /* Basic charset Lisp functions */
1341 /************************************************************************/
1343 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1344 Return non-nil if OBJECT is a charset.
1348 return CHARSETP (object) ? Qt : Qnil;
1351 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1352 Retrieve the charset of the given name.
1353 If CHARSET-OR-NAME is a charset object, it is simply returned.
1354 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1355 nil is returned. Otherwise the associated charset object is returned.
1359 if (CHARSETP (charset_or_name))
1360 return charset_or_name;
1362 CHECK_SYMBOL (charset_or_name);
1363 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1366 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1367 Retrieve the charset of the given name.
1368 Same as `find-charset' except an error is signalled if there is no such
1369 charset instead of returning nil.
1373 Lisp_Object charset = Ffind_charset (name);
1376 signal_simple_error ("No such charset", name);
1380 /* We store the charsets in hash tables with the names as the key and the
1381 actual charset object as the value. Occasionally we need to use them
1382 in a list format. These routines provide us with that. */
1383 struct charset_list_closure
1385 Lisp_Object *charset_list;
1389 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1390 void *charset_list_closure)
1392 /* This function can GC */
1393 struct charset_list_closure *chcl =
1394 (struct charset_list_closure*) charset_list_closure;
1395 Lisp_Object *charset_list = chcl->charset_list;
1397 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1401 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1402 Return a list of the names of all defined charsets.
1406 Lisp_Object charset_list = Qnil;
1407 struct gcpro gcpro1;
1408 struct charset_list_closure charset_list_closure;
1410 GCPRO1 (charset_list);
1411 charset_list_closure.charset_list = &charset_list;
1412 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1413 &charset_list_closure);
1416 return charset_list;
1419 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1420 Return the name of charset CHARSET.
1424 return XCHARSET_NAME (Fget_charset (charset));
1427 /* #### SJT Should generic properties be allowed? */
1428 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1429 Define a new character set.
1430 This function is for use with Mule support.
1431 NAME is a symbol, the name by which the character set is normally referred.
1432 DOC-STRING is a string describing the character set.
1433 PROPS is a property list, describing the specific nature of the
1434 character set. Recognized properties are:
1436 'short-name Short version of the charset name (ex: Latin-1)
1437 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1438 'registry A regular expression matching the font registry field for
1440 'dimension Number of octets used to index a character in this charset.
1441 Either 1 or 2. Defaults to 1.
1442 'columns Number of columns used to display a character in this charset.
1443 Only used in TTY mode. (Under X, the actual width of a
1444 character can be derived from the font used to display the
1445 characters.) If unspecified, defaults to the dimension
1446 (this is almost always the correct value).
1447 'chars Number of characters in each dimension (94 or 96).
1448 Defaults to 94. Note that if the dimension is 2, the
1449 character set thus described is 94x94 or 96x96.
1450 'final Final byte of ISO 2022 escape sequence. Must be
1451 supplied. Each combination of (DIMENSION, CHARS) defines a
1452 separate namespace for final bytes. Note that ISO
1453 2022 restricts the final byte to the range
1454 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1455 dimension == 2. Note also that final bytes in the range
1456 0x30 - 0x3F are reserved for user-defined (not official)
1458 'graphic 0 (use left half of font on output) or 1 (use right half
1459 of font on output). Defaults to 0. For example, for
1460 a font whose registry is ISO8859-1, the left half
1461 (octets 0x20 - 0x7F) is the `ascii' character set, while
1462 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1463 character set. With 'graphic set to 0, the octets
1464 will have their high bit cleared; with it set to 1,
1465 the octets will have their high bit set.
1466 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1468 'ccl-program A compiled CCL program used to convert a character in
1469 this charset into an index into the font. This is in
1470 addition to the 'graphic property. The CCL program
1471 is passed the octets of the character, with the high
1472 bit cleared and set depending upon whether the value
1473 of the 'graphic property is 0 or 1.
1475 (name, doc_string, props))
1477 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1478 int direction = CHARSET_LEFT_TO_RIGHT;
1479 Lisp_Object registry = Qnil;
1480 Lisp_Object charset;
1481 Lisp_Object ccl_program = Qnil;
1482 Lisp_Object short_name = Qnil, long_name = Qnil;
1483 Lisp_Object mother = Qnil;
1484 int min_code = 0, max_code = 0, code_offset = 0;
1485 int byte_offset = -1;
1488 CHECK_SYMBOL (name);
1489 if (!NILP (doc_string))
1490 CHECK_STRING (doc_string);
1492 charset = Ffind_charset (name);
1493 if (!NILP (charset))
1494 signal_simple_error ("Cannot redefine existing charset", name);
1497 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1499 if (EQ (keyword, Qshort_name))
1501 CHECK_STRING (value);
1505 if (EQ (keyword, Qlong_name))
1507 CHECK_STRING (value);
1511 else if (EQ (keyword, Qdimension))
1514 dimension = XINT (value);
1515 if (dimension < 1 ||
1522 signal_simple_error ("Invalid value for 'dimension", value);
1525 else if (EQ (keyword, Qchars))
1528 chars = XINT (value);
1529 if (chars != 94 && chars != 96
1531 && chars != 128 && chars != 256
1534 signal_simple_error ("Invalid value for 'chars", value);
1537 else if (EQ (keyword, Qcolumns))
1540 columns = XINT (value);
1541 if (columns != 1 && columns != 2)
1542 signal_simple_error ("Invalid value for 'columns", value);
1545 else if (EQ (keyword, Qgraphic))
1548 graphic = XINT (value);
1556 signal_simple_error ("Invalid value for 'graphic", value);
1559 else if (EQ (keyword, Qregistry))
1561 CHECK_STRING (value);
1565 else if (EQ (keyword, Qdirection))
1567 if (EQ (value, Ql2r))
1568 direction = CHARSET_LEFT_TO_RIGHT;
1569 else if (EQ (value, Qr2l))
1570 direction = CHARSET_RIGHT_TO_LEFT;
1572 signal_simple_error ("Invalid value for 'direction", value);
1575 else if (EQ (keyword, Qfinal))
1577 CHECK_CHAR_COERCE_INT (value);
1578 final = XCHAR (value);
1579 if (final < '0' || final > '~')
1580 signal_simple_error ("Invalid value for 'final", value);
1584 else if (EQ (keyword, Qmother))
1586 mother = Fget_charset (value);
1589 else if (EQ (keyword, Qmin_code))
1592 min_code = XUINT (value);
1595 else if (EQ (keyword, Qmax_code))
1598 max_code = XUINT (value);
1601 else if (EQ (keyword, Qcode_offset))
1604 code_offset = XUINT (value);
1607 else if (EQ (keyword, Qconversion))
1609 if (EQ (value, Q94x60))
1610 conversion = CONVERSION_94x60;
1614 else if (EQ (keyword, Qccl_program))
1616 struct ccl_program test_ccl;
1618 if (setup_ccl_program (&test_ccl, value) < 0)
1619 signal_simple_error ("Invalid value for 'ccl-program", value);
1620 ccl_program = value;
1624 signal_simple_error ("Unrecognized property", keyword);
1630 error ("'final must be specified");
1632 if (dimension == 2 && final > 0x5F)
1634 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1637 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1638 CHARSET_LEFT_TO_RIGHT)) ||
1639 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1640 CHARSET_RIGHT_TO_LEFT)))
1642 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1644 id = get_unallocated_leading_byte (dimension);
1646 if (NILP (doc_string))
1647 doc_string = build_string ("");
1649 if (NILP (registry))
1650 registry = build_string ("");
1652 if (NILP (short_name))
1653 XSETSTRING (short_name, XSYMBOL (name)->name);
1655 if (NILP (long_name))
1656 long_name = doc_string;
1659 columns = dimension;
1661 if (byte_offset < 0)
1665 else if (chars == 96)
1671 charset = make_charset (id, name, chars, dimension, columns, graphic,
1672 final, direction, short_name, long_name,
1673 doc_string, registry,
1674 Qnil, min_code, max_code, code_offset, byte_offset,
1675 mother, conversion);
1676 if (!NILP (ccl_program))
1677 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1681 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1683 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1684 NEW-NAME is the name of the new charset. Return the new charset.
1686 (charset, new_name))
1688 Lisp_Object new_charset = Qnil;
1689 int id, chars, dimension, columns, graphic, final;
1691 Lisp_Object registry, doc_string, short_name, long_name;
1694 charset = Fget_charset (charset);
1695 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1696 signal_simple_error ("Charset already has reverse-direction charset",
1699 CHECK_SYMBOL (new_name);
1700 if (!NILP (Ffind_charset (new_name)))
1701 signal_simple_error ("Cannot redefine existing charset", new_name);
1703 cs = XCHARSET (charset);
1705 chars = CHARSET_CHARS (cs);
1706 dimension = CHARSET_DIMENSION (cs);
1707 columns = CHARSET_COLUMNS (cs);
1708 id = get_unallocated_leading_byte (dimension);
1710 graphic = CHARSET_GRAPHIC (cs);
1711 final = CHARSET_FINAL (cs);
1712 direction = CHARSET_RIGHT_TO_LEFT;
1713 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1714 direction = CHARSET_LEFT_TO_RIGHT;
1715 doc_string = CHARSET_DOC_STRING (cs);
1716 short_name = CHARSET_SHORT_NAME (cs);
1717 long_name = CHARSET_LONG_NAME (cs);
1718 registry = CHARSET_REGISTRY (cs);
1720 new_charset = make_charset (id, new_name, chars, dimension, columns,
1721 graphic, final, direction, short_name, long_name,
1722 doc_string, registry,
1724 CHARSET_DECODING_TABLE(cs),
1725 CHARSET_MIN_CODE(cs),
1726 CHARSET_MAX_CODE(cs),
1727 CHARSET_CODE_OFFSET(cs),
1728 CHARSET_BYTE_OFFSET(cs),
1730 CHARSET_CONVERSION (cs)
1732 Qnil, 0, 0, 0, 0, Qnil, 0
1736 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1737 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1742 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1743 Define symbol ALIAS as an alias for CHARSET.
1747 CHECK_SYMBOL (alias);
1748 charset = Fget_charset (charset);
1749 return Fputhash (alias, charset, Vcharset_hash_table);
1752 /* #### Reverse direction charsets not yet implemented. */
1754 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1756 Return the reverse-direction charset parallel to CHARSET, if any.
1757 This is the charset with the same properties (in particular, the same
1758 dimension, number of characters per dimension, and final byte) as
1759 CHARSET but whose characters are displayed in the opposite direction.
1763 charset = Fget_charset (charset);
1764 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1768 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1769 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1770 If DIRECTION is omitted, both directions will be checked (left-to-right
1771 will be returned if character sets exist for both directions).
1773 (dimension, chars, final, direction))
1775 int dm, ch, fi, di = -1;
1776 Lisp_Object obj = Qnil;
1778 CHECK_INT (dimension);
1779 dm = XINT (dimension);
1780 if (dm < 1 || dm > 2)
1781 signal_simple_error ("Invalid value for DIMENSION", dimension);
1785 if (ch != 94 && ch != 96)
1786 signal_simple_error ("Invalid value for CHARS", chars);
1788 CHECK_CHAR_COERCE_INT (final);
1790 if (fi < '0' || fi > '~')
1791 signal_simple_error ("Invalid value for FINAL", final);
1793 if (EQ (direction, Ql2r))
1794 di = CHARSET_LEFT_TO_RIGHT;
1795 else if (EQ (direction, Qr2l))
1796 di = CHARSET_RIGHT_TO_LEFT;
1797 else if (!NILP (direction))
1798 signal_simple_error ("Invalid value for DIRECTION", direction);
1800 if (dm == 2 && fi > 0x5F)
1802 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1806 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1808 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1811 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1814 return XCHARSET_NAME (obj);
1818 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1819 Return short name of CHARSET.
1823 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1826 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1827 Return long name of CHARSET.
1831 return XCHARSET_LONG_NAME (Fget_charset (charset));
1834 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1835 Return description of CHARSET.
1839 return XCHARSET_DOC_STRING (Fget_charset (charset));
1842 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1843 Return dimension of CHARSET.
1847 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1850 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1851 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1852 Recognized properties are those listed in `make-charset', as well as
1853 'name and 'doc-string.
1859 charset = Fget_charset (charset);
1860 cs = XCHARSET (charset);
1862 CHECK_SYMBOL (prop);
1863 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1864 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1865 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1866 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1867 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1868 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1869 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1870 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1871 Qnil : make_char (CHARSET_FINAL (cs));
1872 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1873 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1874 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1875 if (EQ (prop, Qdirection))
1876 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1877 if (EQ (prop, Qreverse_direction_charset))
1879 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1880 /* #### Is this translation OK? If so, error checking sufficient? */
1881 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1884 if (EQ (prop, Qmother))
1885 return CHARSET_MOTHER (cs);
1886 if (EQ (prop, Qmin_code))
1887 return make_int (CHARSET_MIN_CODE (cs));
1888 if (EQ (prop, Qmax_code))
1889 return make_int (CHARSET_MAX_CODE (cs));
1891 signal_simple_error ("Unrecognized charset property name", prop);
1892 return Qnil; /* not reached */
1895 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1896 Return charset identification number of CHARSET.
1900 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1903 /* #### We need to figure out which properties we really want to
1906 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1907 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1909 (charset, ccl_program))
1911 struct ccl_program test_ccl;
1913 charset = Fget_charset (charset);
1914 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1915 signal_simple_error ("Invalid ccl-program", ccl_program);
1916 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1921 invalidate_charset_font_caches (Lisp_Object charset)
1923 /* Invalidate font cache entries for charset on all devices. */
1924 Lisp_Object devcons, concons, hash_table;
1925 DEVICE_LOOP_NO_BREAK (devcons, concons)
1927 struct device *d = XDEVICE (XCAR (devcons));
1928 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1929 if (!UNBOUNDP (hash_table))
1930 Fclrhash (hash_table);
1934 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1935 Set the 'registry property of CHARSET to REGISTRY.
1937 (charset, registry))
1939 charset = Fget_charset (charset);
1940 CHECK_STRING (registry);
1941 XCHARSET_REGISTRY (charset) = registry;
1942 invalidate_charset_font_caches (charset);
1943 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1948 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1949 Return mapping-table of CHARSET.
1953 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1956 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1957 Set mapping-table of CHARSET to TABLE.
1961 struct Lisp_Charset *cs;
1965 charset = Fget_charset (charset);
1966 cs = XCHARSET (charset);
1970 CHARSET_DECODING_TABLE(cs) = Qnil;
1973 else if (VECTORP (table))
1975 int ccs_len = CHARSET_BYTE_SIZE (cs);
1976 int ret = decoding_table_check_elements (table,
1977 CHARSET_DIMENSION (cs),
1982 signal_simple_error ("Too big table", table);
1984 signal_simple_error ("Invalid element is found", table);
1986 signal_simple_error ("Something wrong", table);
1988 CHARSET_DECODING_TABLE(cs) = Qnil;
1991 signal_error (Qwrong_type_argument,
1992 list2 (build_translated_string ("vector-or-nil-p"),
1995 byte_offset = CHARSET_BYTE_OFFSET (cs);
1996 switch (CHARSET_DIMENSION (cs))
1999 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2001 Lisp_Object c = XVECTOR_DATA(table)[i];
2004 Fput_char_attribute (c, XCHARSET_NAME (charset),
2005 make_int (i + byte_offset));
2009 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2011 Lisp_Object v = XVECTOR_DATA(table)[i];
2017 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2019 Lisp_Object c = XVECTOR_DATA(v)[j];
2023 (c, XCHARSET_NAME (charset),
2024 make_int ( ( (i + byte_offset) << 8 )
2030 Fput_char_attribute (v, XCHARSET_NAME (charset),
2031 make_int (i + byte_offset));
2040 /************************************************************************/
2041 /* Lisp primitives for working with characters */
2042 /************************************************************************/
2045 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2046 Make a character from CHARSET and code-point CODE.
2047 If DEFINED_ONLY is non-nil, builtin character is not returned.
2048 If corresponding character is not found, nil is returned.
2050 (charset, code, defined_only))
2054 charset = Fget_charset (charset);
2057 if (XCHARSET_GRAPHIC (charset) == 1)
2059 if (NILP (defined_only))
2060 c = DECODE_CHAR (charset, c);
2062 c = DECODE_DEFINED_CHAR (charset, c);
2063 return c >= 0 ? make_char (c) : Qnil;
2066 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2067 Make a builtin character from CHARSET and code-point CODE.
2073 charset = Fget_charset (charset);
2075 if (EQ (charset, Vcharset_latin_viscii))
2077 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2083 (ret = Fget_char_attribute (chr,
2084 Vcharset_latin_viscii_lower,
2087 charset = Vcharset_latin_viscii_lower;
2091 (ret = Fget_char_attribute (chr,
2092 Vcharset_latin_viscii_upper,
2095 charset = Vcharset_latin_viscii_upper;
2102 if (XCHARSET_GRAPHIC (charset) == 1)
2105 c = decode_builtin_char (charset, c);
2106 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2110 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2111 Make a character from CHARSET and octets ARG1 and ARG2.
2112 ARG2 is required only for characters from two-dimensional charsets.
2113 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2114 character s with caron.
2116 (charset, arg1, arg2))
2120 int lowlim, highlim;
2122 charset = Fget_charset (charset);
2123 cs = XCHARSET (charset);
2125 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2126 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2128 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2130 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2131 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2134 /* It is useful (and safe, according to Olivier Galibert) to strip
2135 the 8th bit off ARG1 and ARG2 because it allows programmers to
2136 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2137 Latin 2 code of the character. */
2145 if (a1 < lowlim || a1 > highlim)
2146 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2148 if (CHARSET_DIMENSION (cs) == 1)
2152 ("Charset is of dimension one; second octet must be nil", arg2);
2153 return make_char (MAKE_CHAR (charset, a1, 0));
2162 a2 = XINT (arg2) & 0x7f;
2164 if (a2 < lowlim || a2 > highlim)
2165 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2167 return make_char (MAKE_CHAR (charset, a1, a2));
2170 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2171 Return the character set of CHARACTER.
2175 CHECK_CHAR_COERCE_INT (character);
2177 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2180 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2181 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2182 N defaults to 0 if omitted.
2186 Lisp_Object charset;
2189 CHECK_CHAR_COERCE_INT (character);
2191 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2193 if (NILP (n) || EQ (n, Qzero))
2194 return make_int (octet0);
2195 else if (EQ (n, make_int (1)))
2196 return make_int (octet1);
2198 signal_simple_error ("Octet number must be 0 or 1", n);
2202 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2203 Return code-point of CHARACTER in specified CHARSET.
2205 (character, charset))
2209 CHECK_CHAR_COERCE_INT (character);
2210 charset = Fget_charset (charset);
2211 code_point = charset_code_point (charset, XCHAR (character));
2212 if (code_point >= 0)
2213 return make_int (code_point);
2219 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2220 Return list of charset and one or two position-codes of CHARACTER.
2224 /* This function can GC */
2225 struct gcpro gcpro1, gcpro2;
2226 Lisp_Object charset = Qnil;
2227 Lisp_Object rc = Qnil;
2235 GCPRO2 (charset, rc);
2236 CHECK_CHAR_COERCE_INT (character);
2239 code_point = ENCODE_CHAR (XCHAR (character), charset);
2240 dimension = XCHARSET_DIMENSION (charset);
2241 while (dimension > 0)
2243 rc = Fcons (make_int (code_point & 255), rc);
2247 rc = Fcons (XCHARSET_NAME (charset), rc);
2249 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2251 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2253 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2257 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2266 #ifdef ENABLE_COMPOSITE_CHARS
2267 /************************************************************************/
2268 /* composite character functions */
2269 /************************************************************************/
2272 lookup_composite_char (Bufbyte *str, int len)
2274 Lisp_Object lispstr = make_string (str, len);
2275 Lisp_Object ch = Fgethash (lispstr,
2276 Vcomposite_char_string2char_hash_table,
2282 if (composite_char_row_next >= 128)
2283 signal_simple_error ("No more composite chars available", lispstr);
2284 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2285 composite_char_col_next);
2286 Fputhash (make_char (emch), lispstr,
2287 Vcomposite_char_char2string_hash_table);
2288 Fputhash (lispstr, make_char (emch),
2289 Vcomposite_char_string2char_hash_table);
2290 composite_char_col_next++;
2291 if (composite_char_col_next >= 128)
2293 composite_char_col_next = 32;
2294 composite_char_row_next++;
2303 composite_char_string (Emchar ch)
2305 Lisp_Object str = Fgethash (make_char (ch),
2306 Vcomposite_char_char2string_hash_table,
2308 assert (!UNBOUNDP (str));
2312 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2313 Convert a string into a single composite character.
2314 The character is the result of overstriking all the characters in
2319 CHECK_STRING (string);
2320 return make_char (lookup_composite_char (XSTRING_DATA (string),
2321 XSTRING_LENGTH (string)));
2324 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2325 Return a string of the characters comprising a composite character.
2333 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2334 signal_simple_error ("Must be composite char", ch);
2335 return composite_char_string (emch);
2337 #endif /* ENABLE_COMPOSITE_CHARS */
2340 /************************************************************************/
2341 /* initialization */
2342 /************************************************************************/
2345 syms_of_mule_charset (void)
2347 INIT_LRECORD_IMPLEMENTATION (charset);
2349 DEFSUBR (Fcharsetp);
2350 DEFSUBR (Ffind_charset);
2351 DEFSUBR (Fget_charset);
2352 DEFSUBR (Fcharset_list);
2353 DEFSUBR (Fcharset_name);
2354 DEFSUBR (Fmake_charset);
2355 DEFSUBR (Fmake_reverse_direction_charset);
2356 /* DEFSUBR (Freverse_direction_charset); */
2357 DEFSUBR (Fdefine_charset_alias);
2358 DEFSUBR (Fcharset_from_attributes);
2359 DEFSUBR (Fcharset_short_name);
2360 DEFSUBR (Fcharset_long_name);
2361 DEFSUBR (Fcharset_description);
2362 DEFSUBR (Fcharset_dimension);
2363 DEFSUBR (Fcharset_property);
2364 DEFSUBR (Fcharset_id);
2365 DEFSUBR (Fset_charset_ccl_program);
2366 DEFSUBR (Fset_charset_registry);
2368 DEFSUBR (Fcharset_mapping_table);
2369 DEFSUBR (Fset_charset_mapping_table);
2373 DEFSUBR (Fdecode_char);
2374 DEFSUBR (Fdecode_builtin_char);
2375 DEFSUBR (Fencode_char);
2377 DEFSUBR (Fmake_char);
2378 DEFSUBR (Fchar_charset);
2379 DEFSUBR (Fchar_octet);
2380 DEFSUBR (Fsplit_char);
2382 #ifdef ENABLE_COMPOSITE_CHARS
2383 DEFSUBR (Fmake_composite_char);
2384 DEFSUBR (Fcomposite_char_string);
2387 defsymbol (&Qcharsetp, "charsetp");
2388 defsymbol (&Qregistry, "registry");
2389 defsymbol (&Qfinal, "final");
2390 defsymbol (&Qgraphic, "graphic");
2391 defsymbol (&Qdirection, "direction");
2392 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2393 defsymbol (&Qshort_name, "short-name");
2394 defsymbol (&Qlong_name, "long-name");
2396 defsymbol (&Qmother, "mother");
2397 defsymbol (&Qmin_code, "min-code");
2398 defsymbol (&Qmax_code, "max-code");
2399 defsymbol (&Qcode_offset, "code-offset");
2400 defsymbol (&Qconversion, "conversion");
2401 defsymbol (&Q94x60, "94x60");
2404 defsymbol (&Ql2r, "l2r");
2405 defsymbol (&Qr2l, "r2l");
2407 /* Charsets, compatible with FSF 20.3
2408 Naming convention is Script-Charset[-Edition] */
2409 defsymbol (&Qascii, "ascii");
2410 defsymbol (&Qcontrol_1, "control-1");
2411 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2412 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2413 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2414 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2415 defsymbol (&Qthai_tis620, "thai-tis620");
2416 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2417 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2418 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2419 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2420 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2421 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2422 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2423 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2424 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2425 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2426 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2427 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2428 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2429 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2430 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2431 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2433 defsymbol (&Qucs, "ucs");
2434 defsymbol (&Qucs_bmp, "ucs-bmp");
2435 defsymbol (&Qucs_smp, "ucs-smp");
2436 defsymbol (&Qucs_sip, "ucs-sip");
2437 defsymbol (&Qucs_cns, "ucs-cns");
2438 defsymbol (&Qucs_jis, "ucs-jis");
2439 defsymbol (&Qucs_ks, "ucs-ks");
2440 defsymbol (&Qucs_big5, "ucs-big5");
2441 defsymbol (&Qlatin_viscii, "latin-viscii");
2442 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2443 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2444 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2445 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2446 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2447 defsymbol (&Qideograph_gt, "ideograph-gt");
2448 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2449 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2450 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2451 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2452 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2453 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2454 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2455 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2456 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2457 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2458 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2459 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2460 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2461 defsymbol (&Qchinese_big5, "chinese-big5");
2462 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2463 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2464 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2465 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2466 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2467 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2468 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2469 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2470 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2471 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2472 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2473 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2474 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2475 defsymbol (&Qchina3_jef, "china3-jef");
2476 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2477 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2479 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2480 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2482 defsymbol (&Qcomposite, "composite");
2486 vars_of_mule_charset (void)
2493 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2494 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2496 /* Table of charsets indexed by leading byte. */
2497 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2498 chlook->charset_by_leading_byte[i] = Qnil;
2501 /* Table of charsets indexed by type/final-byte. */
2502 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2503 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2504 chlook->charset_by_attributes[i][j] = Qnil;
2506 /* Table of charsets indexed by type/final-byte/direction. */
2507 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2508 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2509 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2510 chlook->charset_by_attributes[i][j][k] = Qnil;
2514 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2516 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2517 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2521 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2522 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2523 Leading-code of private TYPE9N charset of column-width 1.
2525 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2529 Vdefault_coded_charset_priority_list = Qnil;
2530 DEFVAR_LISP ("default-coded-charset-priority-list",
2531 &Vdefault_coded_charset_priority_list /*
2532 Default order of preferred coded-character-sets.
2538 complex_vars_of_mule_charset (void)
2540 staticpro (&Vcharset_hash_table);
2541 Vcharset_hash_table =
2542 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2544 /* Predefined character sets. We store them into variables for
2548 staticpro (&Vcharset_ucs);
2550 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2551 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2552 build_string ("UCS"),
2553 build_string ("UCS"),
2554 build_string ("ISO/IEC 10646"),
2556 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2557 staticpro (&Vcharset_ucs_bmp);
2559 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2560 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2561 build_string ("BMP"),
2562 build_string ("UCS-BMP"),
2563 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2565 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2566 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2567 staticpro (&Vcharset_ucs_smp);
2569 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2570 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2571 build_string ("SMP"),
2572 build_string ("UCS-SMP"),
2573 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2574 build_string ("UCS00-1"),
2575 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2576 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2577 staticpro (&Vcharset_ucs_sip);
2579 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2580 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2581 build_string ("SIP"),
2582 build_string ("UCS-SIP"),
2583 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2584 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2585 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2586 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2587 staticpro (&Vcharset_ucs_cns);
2589 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2590 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2591 build_string ("UCS for CNS"),
2592 build_string ("UCS for CNS 11643"),
2593 build_string ("ISO/IEC 10646 for CNS 11643"),
2596 Qnil, CONVERSION_IDENTICAL);
2597 staticpro (&Vcharset_ucs_jis);
2599 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2600 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2601 build_string ("UCS for JIS"),
2602 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2603 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2605 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2606 staticpro (&Vcharset_ucs_ks);
2608 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2609 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2610 build_string ("UCS for KS"),
2611 build_string ("UCS for CCS defined by KS"),
2612 build_string ("ISO/IEC 10646 for Korean Standards"),
2614 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2615 staticpro (&Vcharset_ucs_big5);
2617 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2618 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2619 build_string ("UCS for Big5"),
2620 build_string ("UCS for Big5"),
2621 build_string ("ISO/IEC 10646 for Big5"),
2623 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2625 # define MIN_CHAR_THAI 0
2626 # define MAX_CHAR_THAI 0
2627 /* # define MIN_CHAR_HEBREW 0 */
2628 /* # define MAX_CHAR_HEBREW 0 */
2629 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2630 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2632 staticpro (&Vcharset_ascii);
2634 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2635 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2636 build_string ("ASCII"),
2637 build_string ("ASCII)"),
2638 build_string ("ASCII (ISO646 IRV)"),
2639 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2640 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2641 staticpro (&Vcharset_control_1);
2642 Vcharset_control_1 =
2643 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2644 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2645 build_string ("C1"),
2646 build_string ("Control characters"),
2647 build_string ("Control characters 128-191"),
2649 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2650 staticpro (&Vcharset_latin_iso8859_1);
2651 Vcharset_latin_iso8859_1 =
2652 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2653 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2654 build_string ("Latin-1"),
2655 build_string ("ISO8859-1 (Latin-1)"),
2656 build_string ("ISO8859-1 (Latin-1)"),
2657 build_string ("iso8859-1"),
2658 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2659 staticpro (&Vcharset_latin_iso8859_2);
2660 Vcharset_latin_iso8859_2 =
2661 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2662 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2663 build_string ("Latin-2"),
2664 build_string ("ISO8859-2 (Latin-2)"),
2665 build_string ("ISO8859-2 (Latin-2)"),
2666 build_string ("iso8859-2"),
2667 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2668 staticpro (&Vcharset_latin_iso8859_3);
2669 Vcharset_latin_iso8859_3 =
2670 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2671 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2672 build_string ("Latin-3"),
2673 build_string ("ISO8859-3 (Latin-3)"),
2674 build_string ("ISO8859-3 (Latin-3)"),
2675 build_string ("iso8859-3"),
2676 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2677 staticpro (&Vcharset_latin_iso8859_4);
2678 Vcharset_latin_iso8859_4 =
2679 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2680 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2681 build_string ("Latin-4"),
2682 build_string ("ISO8859-4 (Latin-4)"),
2683 build_string ("ISO8859-4 (Latin-4)"),
2684 build_string ("iso8859-4"),
2685 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2686 staticpro (&Vcharset_thai_tis620);
2687 Vcharset_thai_tis620 =
2688 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2689 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2690 build_string ("TIS620"),
2691 build_string ("TIS620 (Thai)"),
2692 build_string ("TIS620.2529 (Thai)"),
2693 build_string ("tis620"),
2694 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2695 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2696 staticpro (&Vcharset_greek_iso8859_7);
2697 Vcharset_greek_iso8859_7 =
2698 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2699 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2700 build_string ("ISO8859-7"),
2701 build_string ("ISO8859-7 (Greek)"),
2702 build_string ("ISO8859-7 (Greek)"),
2703 build_string ("iso8859-7"),
2704 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2705 staticpro (&Vcharset_arabic_iso8859_6);
2706 Vcharset_arabic_iso8859_6 =
2707 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2708 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2709 build_string ("ISO8859-6"),
2710 build_string ("ISO8859-6 (Arabic)"),
2711 build_string ("ISO8859-6 (Arabic)"),
2712 build_string ("iso8859-6"),
2713 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2714 staticpro (&Vcharset_hebrew_iso8859_8);
2715 Vcharset_hebrew_iso8859_8 =
2716 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2717 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2718 build_string ("ISO8859-8"),
2719 build_string ("ISO8859-8 (Hebrew)"),
2720 build_string ("ISO8859-8 (Hebrew)"),
2721 build_string ("iso8859-8"),
2723 0 /* MIN_CHAR_HEBREW */,
2724 0 /* MAX_CHAR_HEBREW */, 0, 32,
2725 Qnil, CONVERSION_IDENTICAL);
2726 staticpro (&Vcharset_katakana_jisx0201);
2727 Vcharset_katakana_jisx0201 =
2728 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2729 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2730 build_string ("JISX0201 Kana"),
2731 build_string ("JISX0201.1976 (Japanese Kana)"),
2732 build_string ("JISX0201.1976 Japanese Kana"),
2733 build_string ("jisx0201\\.1976"),
2734 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2735 staticpro (&Vcharset_latin_jisx0201);
2736 Vcharset_latin_jisx0201 =
2737 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2738 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2739 build_string ("JISX0201 Roman"),
2740 build_string ("JISX0201.1976 (Japanese Roman)"),
2741 build_string ("JISX0201.1976 Japanese Roman"),
2742 build_string ("jisx0201\\.1976"),
2743 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2744 staticpro (&Vcharset_cyrillic_iso8859_5);
2745 Vcharset_cyrillic_iso8859_5 =
2746 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2747 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2748 build_string ("ISO8859-5"),
2749 build_string ("ISO8859-5 (Cyrillic)"),
2750 build_string ("ISO8859-5 (Cyrillic)"),
2751 build_string ("iso8859-5"),
2752 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2753 staticpro (&Vcharset_latin_iso8859_9);
2754 Vcharset_latin_iso8859_9 =
2755 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2756 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2757 build_string ("Latin-5"),
2758 build_string ("ISO8859-9 (Latin-5)"),
2759 build_string ("ISO8859-9 (Latin-5)"),
2760 build_string ("iso8859-9"),
2761 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2762 staticpro (&Vcharset_japanese_jisx0208_1978);
2763 Vcharset_japanese_jisx0208_1978 =
2764 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2765 Qjapanese_jisx0208_1978, 94, 2,
2766 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2767 build_string ("JIS X0208:1978"),
2768 build_string ("JIS X0208:1978 (Japanese)"),
2770 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2771 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2772 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2773 staticpro (&Vcharset_chinese_gb2312);
2774 Vcharset_chinese_gb2312 =
2775 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2776 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2777 build_string ("GB2312"),
2778 build_string ("GB2312)"),
2779 build_string ("GB2312 Chinese simplified"),
2780 build_string ("gb2312"),
2781 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2782 staticpro (&Vcharset_chinese_gb12345);
2783 Vcharset_chinese_gb12345 =
2784 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2785 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2786 build_string ("G1"),
2787 build_string ("GB 12345)"),
2788 build_string ("GB 12345-1990"),
2789 build_string ("GB12345\\(\\.1990\\)?-0"),
2790 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2791 staticpro (&Vcharset_japanese_jisx0208);
2792 Vcharset_japanese_jisx0208 =
2793 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2794 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2795 build_string ("JISX0208"),
2796 build_string ("JIS X0208:1983 (Japanese)"),
2797 build_string ("JIS X0208:1983 Japanese Kanji"),
2798 build_string ("jisx0208\\.1983"),
2799 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2801 staticpro (&Vcharset_japanese_jisx0208_1990);
2802 Vcharset_japanese_jisx0208_1990 =
2803 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2804 Qjapanese_jisx0208_1990, 94, 2,
2805 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2806 build_string ("JISX0208-1990"),
2807 build_string ("JIS X0208:1990 (Japanese)"),
2808 build_string ("JIS X0208:1990 Japanese Kanji"),
2809 build_string ("jisx0208\\.1990"),
2811 MIN_CHAR_JIS_X0208_1990,
2812 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2813 Qnil, CONVERSION_IDENTICAL);
2815 staticpro (&Vcharset_korean_ksc5601);
2816 Vcharset_korean_ksc5601 =
2817 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2818 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2819 build_string ("KSC5601"),
2820 build_string ("KSC5601 (Korean"),
2821 build_string ("KSC5601 Korean Hangul and Hanja"),
2822 build_string ("ksc5601"),
2823 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2824 staticpro (&Vcharset_japanese_jisx0212);
2825 Vcharset_japanese_jisx0212 =
2826 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2827 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2828 build_string ("JISX0212"),
2829 build_string ("JISX0212 (Japanese)"),
2830 build_string ("JISX0212 Japanese Supplement"),
2831 build_string ("jisx0212"),
2832 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2834 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2835 staticpro (&Vcharset_chinese_cns11643_1);
2836 Vcharset_chinese_cns11643_1 =
2837 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2838 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2839 build_string ("CNS11643-1"),
2840 build_string ("CNS11643-1 (Chinese traditional)"),
2842 ("CNS 11643 Plane 1 Chinese traditional"),
2843 build_string (CHINESE_CNS_PLANE_RE("1")),
2844 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2845 staticpro (&Vcharset_chinese_cns11643_2);
2846 Vcharset_chinese_cns11643_2 =
2847 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2848 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2849 build_string ("CNS11643-2"),
2850 build_string ("CNS11643-2 (Chinese traditional)"),
2852 ("CNS 11643 Plane 2 Chinese traditional"),
2853 build_string (CHINESE_CNS_PLANE_RE("2")),
2854 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2856 staticpro (&Vcharset_latin_tcvn5712);
2857 Vcharset_latin_tcvn5712 =
2858 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2859 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2860 build_string ("TCVN 5712"),
2861 build_string ("TCVN 5712 (VSCII-2)"),
2862 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2863 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2864 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2865 staticpro (&Vcharset_latin_viscii_lower);
2866 Vcharset_latin_viscii_lower =
2867 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2868 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2869 build_string ("VISCII lower"),
2870 build_string ("VISCII lower (Vietnamese)"),
2871 build_string ("VISCII lower (Vietnamese)"),
2872 build_string ("MULEVISCII-LOWER"),
2873 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2874 staticpro (&Vcharset_latin_viscii_upper);
2875 Vcharset_latin_viscii_upper =
2876 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2877 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2878 build_string ("VISCII upper"),
2879 build_string ("VISCII upper (Vietnamese)"),
2880 build_string ("VISCII upper (Vietnamese)"),
2881 build_string ("MULEVISCII-UPPER"),
2882 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2883 staticpro (&Vcharset_latin_viscii);
2884 Vcharset_latin_viscii =
2885 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2886 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2887 build_string ("VISCII"),
2888 build_string ("VISCII 1.1 (Vietnamese)"),
2889 build_string ("VISCII 1.1 (Vietnamese)"),
2890 build_string ("VISCII1\\.1"),
2891 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2892 staticpro (&Vcharset_chinese_big5);
2893 Vcharset_chinese_big5 =
2894 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2895 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2896 build_string ("Big5"),
2897 build_string ("Big5"),
2898 build_string ("Big5 Chinese traditional"),
2899 build_string ("big5"),
2901 0 /* MIN_CHAR_BIG5_CDP */,
2902 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
2903 Qnil, CONVERSION_IDENTICAL);
2904 staticpro (&Vcharset_chinese_big5_cdp);
2905 Vcharset_chinese_big5_cdp =
2906 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2907 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2908 build_string ("Big5-CDP"),
2909 build_string ("Big5 + CDP extension"),
2910 build_string ("Big5 with CDP extension"),
2911 build_string ("big5\\.cdp-0"),
2912 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2913 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2914 #define DEF_HANZIKU(n) \
2915 staticpro (&Vcharset_ideograph_hanziku_##n); \
2916 Vcharset_ideograph_hanziku_##n = \
2917 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2918 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2919 build_string ("HZK-"#n), \
2920 build_string ("HANZIKU-"#n), \
2921 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2923 ("hanziku-"#n"$"), \
2924 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2925 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2938 staticpro (&Vcharset_china3_jef);
2939 Vcharset_china3_jef =
2940 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2941 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2942 build_string ("JC3"),
2943 build_string ("JEF + CHINA3"),
2944 build_string ("JEF + CHINA3 private characters"),
2945 build_string ("china3jef-0"),
2946 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2947 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2948 staticpro (&Vcharset_ideograph_cbeta);
2949 Vcharset_ideograph_cbeta =
2950 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2951 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2952 build_string ("CB"),
2953 build_string ("CBETA"),
2954 build_string ("CBETA private characters"),
2955 build_string ("cbeta-0"),
2956 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2957 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2958 staticpro (&Vcharset_ideograph_gt);
2959 Vcharset_ideograph_gt =
2960 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2961 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2962 build_string ("GT"),
2963 build_string ("GT"),
2964 build_string ("GT"),
2966 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
2967 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
2968 #define DEF_GT_PJ(n) \
2969 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2970 Vcharset_ideograph_gt_pj_##n = \
2971 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2972 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2973 build_string ("GT-PJ-"#n), \
2974 build_string ("GT (pseudo JIS encoding) part "#n), \
2975 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2977 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2978 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2991 staticpro (&Vcharset_ideograph_daikanwa_2);
2992 Vcharset_ideograph_daikanwa_2 =
2993 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
2994 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2995 build_string ("Daikanwa Rev."),
2996 build_string ("Morohashi's Daikanwa Rev."),
2998 ("Daikanwa dictionary (revised version)"),
2999 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3000 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3001 staticpro (&Vcharset_ideograph_daikanwa);
3002 Vcharset_ideograph_daikanwa =
3003 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3004 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3005 build_string ("Daikanwa"),
3006 build_string ("Morohashi's Daikanwa Rev.2"),
3008 ("Daikanwa dictionary (second revised version)"),
3009 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3010 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3011 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3013 staticpro (&Vcharset_ethiopic_ucs);
3014 Vcharset_ethiopic_ucs =
3015 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3016 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3017 build_string ("Ethiopic (UCS)"),
3018 build_string ("Ethiopic (UCS)"),
3019 build_string ("Ethiopic of UCS"),
3020 build_string ("Ethiopic-Unicode"),
3021 Qnil, 0x1200, 0x137F, 0, 0,
3022 Qnil, CONVERSION_IDENTICAL);
3024 staticpro (&Vcharset_chinese_big5_1);
3025 Vcharset_chinese_big5_1 =
3026 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3027 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3028 build_string ("Big5"),
3029 build_string ("Big5 (Level-1)"),
3031 ("Big5 Level-1 Chinese traditional"),
3032 build_string ("big5"),
3033 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3034 staticpro (&Vcharset_chinese_big5_2);
3035 Vcharset_chinese_big5_2 =
3036 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3037 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3038 build_string ("Big5"),
3039 build_string ("Big5 (Level-2)"),
3041 ("Big5 Level-2 Chinese traditional"),
3042 build_string ("big5"),
3043 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3045 #ifdef ENABLE_COMPOSITE_CHARS
3046 /* #### For simplicity, we put composite chars into a 96x96 charset.
3047 This is going to lead to problems because you can run out of
3048 room, esp. as we don't yet recycle numbers. */
3049 staticpro (&Vcharset_composite);
3050 Vcharset_composite =
3051 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3052 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3053 build_string ("Composite"),
3054 build_string ("Composite characters"),
3055 build_string ("Composite characters"),
3058 /* #### not dumped properly */
3059 composite_char_row_next = 32;
3060 composite_char_col_next = 32;
3062 Vcomposite_char_string2char_hash_table =
3063 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3064 Vcomposite_char_char2string_hash_table =
3065 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3066 staticpro (&Vcomposite_char_string2char_hash_table);
3067 staticpro (&Vcomposite_char_char2string_hash_table);
3068 #endif /* ENABLE_COMPOSITE_CHARS */