1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_ucs_big5;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_ideograph_gt;
95 Lisp_Object Vcharset_ideograph_gt_pj_1;
96 Lisp_Object Vcharset_ideograph_gt_pj_2;
97 Lisp_Object Vcharset_ideograph_gt_pj_3;
98 Lisp_Object Vcharset_ideograph_gt_pj_4;
99 Lisp_Object Vcharset_ideograph_gt_pj_5;
100 Lisp_Object Vcharset_ideograph_gt_pj_6;
101 Lisp_Object Vcharset_ideograph_gt_pj_7;
102 Lisp_Object Vcharset_ideograph_gt_pj_8;
103 Lisp_Object Vcharset_ideograph_gt_pj_9;
104 Lisp_Object Vcharset_ideograph_gt_pj_10;
105 Lisp_Object Vcharset_ideograph_gt_pj_11;
106 Lisp_Object Vcharset_ideograph_daikanwa_2;
107 Lisp_Object Vcharset_ideograph_daikanwa;
108 Lisp_Object Vcharset_ethiopic_ucs;
110 Lisp_Object Vcharset_chinese_big5_1;
111 Lisp_Object Vcharset_chinese_big5_2;
113 #ifdef ENABLE_COMPOSITE_CHARS
114 Lisp_Object Vcharset_composite;
116 /* Hash tables for composite chars. One maps string representing
117 composed chars to their equivalent chars; one goes the
119 Lisp_Object Vcomposite_char_char2string_hash_table;
120 Lisp_Object Vcomposite_char_string2char_hash_table;
122 static int composite_char_row_next;
123 static int composite_char_col_next;
125 #endif /* ENABLE_COMPOSITE_CHARS */
127 struct charset_lookup *chlook;
129 static const struct lrecord_description charset_lookup_description_1[] = {
130 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
132 NUM_LEADING_BYTES+4*128
139 static const struct struct_description charset_lookup_description = {
140 sizeof (struct charset_lookup),
141 charset_lookup_description_1
145 /* Table of number of bytes in the string representation of a character
146 indexed by the first byte of that representation.
148 rep_bytes_by_first_byte(c) is more efficient than the equivalent
149 canonical computation:
151 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
153 const Bytecount rep_bytes_by_first_byte[0xA0] =
154 { /* 0x00 - 0x7f are for straight ASCII */
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 /* 0x80 - 0x8f are for Dimension-1 official charsets */
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 /* 0x90 - 0x9d are for Dimension-2 official charsets */
170 /* 0x9e is for Dimension-1 private charsets */
171 /* 0x9f is for Dimension-2 private charsets */
172 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
178 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
180 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
184 if (XVECTOR_LENGTH (v) > ccs_len)
187 for (i = 0; i < XVECTOR_LENGTH (v); i++)
189 Lisp_Object c = XVECTOR_DATA(v)[i];
191 if (!NILP (c) && !CHARP (c))
195 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
207 put_char_ccs_code_point (Lisp_Object character,
208 Lisp_Object ccs, Lisp_Object value)
210 if (!EQ (XCHARSET_NAME (ccs), Qucs)
212 || (XCHAR (character) != XINT (value)))
214 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
218 { /* obsolete representation: value must be a list of bytes */
219 Lisp_Object ret = Fcar (value);
223 signal_simple_error ("Invalid value for coded-charset", value);
224 code_point = XINT (ret);
225 if (XCHARSET_GRAPHIC (ccs) == 1)
233 signal_simple_error ("Invalid value for coded-charset",
237 signal_simple_error ("Invalid value for coded-charset",
240 if (XCHARSET_GRAPHIC (ccs) == 1)
242 code_point = (code_point << 8) | j;
245 value = make_int (code_point);
247 else if (INTP (value))
249 code_point = XINT (value);
250 if (XCHARSET_GRAPHIC (ccs) == 1)
252 code_point &= 0x7F7F7F7F;
253 value = make_int (code_point);
257 signal_simple_error ("Invalid value for coded-charset", value);
261 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
264 decoding_table_remove_char (ccs, XINT (cpos));
267 decoding_table_put_char (ccs, code_point, character);
273 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
275 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
276 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
278 if (VECTORP (decoding_table))
280 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
284 decoding_table_remove_char (ccs, XINT (cpos));
287 if (CHAR_TABLEP (encoding_table))
289 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
297 int leading_code_private_11;
300 Lisp_Object Qcharsetp;
302 /* Qdoc_string, Qdimension, Qchars defined in general.c */
303 Lisp_Object Qregistry, Qfinal, Qgraphic;
304 Lisp_Object Qdirection;
305 Lisp_Object Qreverse_direction_charset;
306 Lisp_Object Qleading_byte;
307 Lisp_Object Qshort_name, Qlong_name;
309 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
310 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
327 Qjapanese_jisx0208_1978,
331 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
354 /* Qchinese_big5_cdp, */
355 Qideograph_hanziku_1,
356 Qideograph_hanziku_2,
357 Qideograph_hanziku_3,
358 Qideograph_hanziku_4,
359 Qideograph_hanziku_5,
360 Qideograph_hanziku_6,
361 Qideograph_hanziku_7,
362 Qideograph_hanziku_8,
363 Qideograph_hanziku_9,
364 Qideograph_hanziku_10,
365 Qideograph_hanziku_11,
366 Qideograph_hanziku_12,
367 Qideograph_daikanwa_2,
387 Lisp_Object Ql2r, Qr2l;
389 Lisp_Object Vcharset_hash_table;
391 /* Composite characters are characters constructed by overstriking two
392 or more regular characters.
394 1) The old Mule implementation involves storing composite characters
395 in a buffer as a tag followed by all of the actual characters
396 used to make up the composite character. I think this is a bad
397 idea; it greatly complicates code that wants to handle strings
398 one character at a time because it has to deal with the possibility
399 of great big ungainly characters. It's much more reasonable to
400 simply store an index into a table of composite characters.
402 2) The current implementation only allows for 16,384 separate
403 composite characters over the lifetime of the XEmacs process.
404 This could become a potential problem if the user
405 edited lots of different files that use composite characters.
406 Due to FSF bogosity, increasing the number of allowable
407 composite characters under Mule would decrease the number
408 of possible faces that can exist. Mule already has shrunk
409 this to 2048, and further shrinkage would become uncomfortable.
410 No such problems exist in XEmacs.
412 Composite characters could be represented as 0x80 C1 C2 C3,
413 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
414 for slightly under 2^20 (one million) composite characters
415 over the XEmacs process lifetime, and you only need to
416 increase the size of a Mule character from 19 to 21 bits.
417 Or you could use 0x80 C1 C2 C3 C4, allowing for about
418 85 million (slightly over 2^26) composite characters. */
421 /************************************************************************/
422 /* Basic Emchar functions */
423 /************************************************************************/
425 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
426 string in STR. Returns the number of bytes stored.
427 Do not call this directly. Use the macro set_charptr_emchar() instead.
431 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
446 else if ( c <= 0x7ff )
448 *p++ = (c >> 6) | 0xc0;
449 *p++ = (c & 0x3f) | 0x80;
451 else if ( c <= 0xffff )
453 *p++ = (c >> 12) | 0xe0;
454 *p++ = ((c >> 6) & 0x3f) | 0x80;
455 *p++ = (c & 0x3f) | 0x80;
457 else if ( c <= 0x1fffff )
459 *p++ = (c >> 18) | 0xf0;
460 *p++ = ((c >> 12) & 0x3f) | 0x80;
461 *p++ = ((c >> 6) & 0x3f) | 0x80;
462 *p++ = (c & 0x3f) | 0x80;
464 else if ( c <= 0x3ffffff )
466 *p++ = (c >> 24) | 0xf8;
467 *p++ = ((c >> 18) & 0x3f) | 0x80;
468 *p++ = ((c >> 12) & 0x3f) | 0x80;
469 *p++ = ((c >> 6) & 0x3f) | 0x80;
470 *p++ = (c & 0x3f) | 0x80;
474 *p++ = (c >> 30) | 0xfc;
475 *p++ = ((c >> 24) & 0x3f) | 0x80;
476 *p++ = ((c >> 18) & 0x3f) | 0x80;
477 *p++ = ((c >> 12) & 0x3f) | 0x80;
478 *p++ = ((c >> 6) & 0x3f) | 0x80;
479 *p++ = (c & 0x3f) | 0x80;
482 BREAKUP_CHAR (c, charset, c1, c2);
483 lb = CHAR_LEADING_BYTE (c);
484 if (LEADING_BYTE_PRIVATE_P (lb))
485 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
487 if (EQ (charset, Vcharset_control_1))
496 /* Return the first character from a Mule-encoded string in STR,
497 assuming it's non-ASCII. Do not call this directly.
498 Use the macro charptr_emchar() instead. */
501 non_ascii_charptr_emchar (const Bufbyte *str)
514 else if ( b >= 0xf8 )
519 else if ( b >= 0xf0 )
524 else if ( b >= 0xe0 )
529 else if ( b >= 0xc0 )
539 for( ; len > 0; len-- )
542 ch = ( ch << 6 ) | ( b & 0x3f );
546 Bufbyte i0 = *str, i1, i2 = 0;
549 if (i0 == LEADING_BYTE_CONTROL_1)
550 return (Emchar) (*++str - 0x20);
552 if (LEADING_BYTE_PREFIX_P (i0))
557 charset = CHARSET_BY_LEADING_BYTE (i0);
558 if (XCHARSET_DIMENSION (charset) == 2)
561 return MAKE_CHAR (charset, i1, i2);
565 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
566 Do not call this directly. Use the macro valid_char_p() instead. */
570 non_ascii_valid_char_p (Emchar ch)
574 /* Must have only lowest 19 bits set */
578 f1 = CHAR_FIELD1 (ch);
579 f2 = CHAR_FIELD2 (ch);
580 f3 = CHAR_FIELD3 (ch);
586 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
587 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
588 f2 > MAX_CHAR_FIELD2_PRIVATE)
593 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
594 f2 <= MAX_CHAR_FIELD2_PRIVATE))
598 NOTE: This takes advantage of the fact that
599 FIELD2_TO_OFFICIAL_LEADING_BYTE and
600 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
602 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
603 if (EQ (charset, Qnil))
605 return (XCHARSET_CHARS (charset) == 96);
611 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
612 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
613 f1 > MAX_CHAR_FIELD1_PRIVATE)
615 if (f2 < 0x20 || f3 < 0x20)
618 #ifdef ENABLE_COMPOSITE_CHARS
619 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
621 if (UNBOUNDP (Fgethash (make_int (ch),
622 Vcomposite_char_char2string_hash_table,
627 #endif /* ENABLE_COMPOSITE_CHARS */
629 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
630 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
633 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
635 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
638 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
640 if (EQ (charset, Qnil))
642 return (XCHARSET_CHARS (charset) == 96);
648 /************************************************************************/
649 /* Basic string functions */
650 /************************************************************************/
652 /* Copy the character pointed to by SRC into DST. Do not call this
653 directly. Use the macro charptr_copy_char() instead.
654 Return the number of bytes copied. */
657 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
659 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
661 for (i = bytes; i; i--, dst++, src++)
667 /************************************************************************/
668 /* streams of Emchars */
669 /************************************************************************/
671 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
672 The functions below are not meant to be called directly; use
673 the macros in insdel.h. */
676 Lstream_get_emchar_1 (Lstream *stream, int ch)
678 Bufbyte str[MAX_EMCHAR_LEN];
679 Bufbyte *strptr = str;
682 str[0] = (Bufbyte) ch;
684 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
686 int c = Lstream_getc (stream);
687 bufpos_checking_assert (c >= 0);
688 *++strptr = (Bufbyte) c;
690 return charptr_emchar (str);
694 Lstream_fput_emchar (Lstream *stream, Emchar ch)
696 Bufbyte str[MAX_EMCHAR_LEN];
697 Bytecount len = set_charptr_emchar (str, ch);
698 return Lstream_write (stream, str, len);
702 Lstream_funget_emchar (Lstream *stream, Emchar ch)
704 Bufbyte str[MAX_EMCHAR_LEN];
705 Bytecount len = set_charptr_emchar (str, ch);
706 Lstream_unread (stream, str, len);
710 /************************************************************************/
712 /************************************************************************/
715 mark_charset (Lisp_Object obj)
717 Lisp_Charset *cs = XCHARSET (obj);
719 mark_object (cs->short_name);
720 mark_object (cs->long_name);
721 mark_object (cs->doc_string);
722 mark_object (cs->registry);
723 mark_object (cs->ccl_program);
725 mark_object (cs->decoding_table);
726 mark_object (cs->mother);
732 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
734 Lisp_Charset *cs = XCHARSET (obj);
738 error ("printing unreadable object #<charset %s 0x%x>",
739 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
742 write_c_string ("#<charset ", printcharfun);
743 print_internal (CHARSET_NAME (cs), printcharfun, 0);
744 write_c_string (" ", printcharfun);
745 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
746 write_c_string (" ", printcharfun);
747 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
748 write_c_string (" ", printcharfun);
749 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
750 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
752 CHARSET_DIMENSION (cs),
753 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
754 CHARSET_COLUMNS (cs),
755 CHARSET_GRAPHIC (cs),
757 write_c_string (buf, printcharfun);
758 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
759 sprintf (buf, " 0x%x>", cs->header.uid);
760 write_c_string (buf, printcharfun);
763 static const struct lrecord_description charset_description[] = {
764 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
778 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
779 mark_charset, print_charset, 0, 0, 0,
783 /* Make a new charset. */
784 /* #### SJT Should generic properties be allowed? */
786 make_charset (Charset_ID id, Lisp_Object name,
787 unsigned short chars, unsigned char dimension,
788 unsigned char columns, unsigned char graphic,
789 Bufbyte final, unsigned char direction, Lisp_Object short_name,
790 Lisp_Object long_name, Lisp_Object doc,
792 Lisp_Object decoding_table,
793 Emchar min_code, Emchar max_code,
794 Emchar code_offset, unsigned char byte_offset,
795 Lisp_Object mother, unsigned char conversion)
798 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
802 XSETCHARSET (obj, cs);
804 CHARSET_ID (cs) = id;
805 CHARSET_NAME (cs) = name;
806 CHARSET_SHORT_NAME (cs) = short_name;
807 CHARSET_LONG_NAME (cs) = long_name;
808 CHARSET_CHARS (cs) = chars;
809 CHARSET_DIMENSION (cs) = dimension;
810 CHARSET_DIRECTION (cs) = direction;
811 CHARSET_COLUMNS (cs) = columns;
812 CHARSET_GRAPHIC (cs) = graphic;
813 CHARSET_FINAL (cs) = final;
814 CHARSET_DOC_STRING (cs) = doc;
815 CHARSET_REGISTRY (cs) = reg;
816 CHARSET_CCL_PROGRAM (cs) = Qnil;
817 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
819 CHARSET_DECODING_TABLE(cs) = Qunbound;
820 CHARSET_MIN_CODE (cs) = min_code;
821 CHARSET_MAX_CODE (cs) = max_code;
822 CHARSET_CODE_OFFSET (cs) = code_offset;
823 CHARSET_BYTE_OFFSET (cs) = byte_offset;
824 CHARSET_MOTHER (cs) = mother;
825 CHARSET_CONVERSION (cs) = conversion;
829 if (id == LEADING_BYTE_ASCII)
830 CHARSET_REP_BYTES (cs) = 1;
832 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
834 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
839 /* some charsets do not have final characters. This includes
840 ASCII, Control-1, Composite, and the two faux private
842 unsigned char iso2022_type
843 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
845 if (code_offset == 0)
847 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
848 chlook->charset_by_attributes[iso2022_type][final] = obj;
852 (chlook->charset_by_attributes[iso2022_type][final][direction]));
853 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
857 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
858 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
860 /* Some charsets are "faux" and don't have names or really exist at
861 all except in the leading-byte table. */
863 Fputhash (name, obj, Vcharset_hash_table);
868 get_unallocated_leading_byte (int dimension)
873 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
876 lb = chlook->next_allocated_leading_byte++;
880 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
883 lb = chlook->next_allocated_1_byte_leading_byte++;
887 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
890 lb = chlook->next_allocated_2_byte_leading_byte++;
896 ("No more character sets free for this dimension",
897 make_int (dimension));
903 /* Number of Big5 characters which have the same code in 1st byte. */
905 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
908 decode_defined_char (Lisp_Object ccs, int code_point)
910 int dim = XCHARSET_DIMENSION (ccs);
911 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
919 = get_ccs_octet_table (decoding_table, ccs,
920 (code_point >> (dim * 8)) & 255);
922 if (CHARP (decoding_table))
923 return XCHAR (decoding_table);
926 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
928 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
930 if ( EQ (mother, Vcharset_ucs) )
931 return DECODE_CHAR (mother, code_point);
933 return decode_defined_char (mother, code_point);
940 decode_builtin_char (Lisp_Object charset, int code_point)
942 Lisp_Object mother = XCHARSET_MOTHER (charset);
945 if ( XCHARSET_MAX_CODE (charset) > 0 )
947 if ( CHARSETP (mother) )
949 int code = code_point;
951 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
953 int row = code_point >> 8;
954 int cell = code_point & 255;
958 else if (row < 16 + 32 + 30)
959 code = (row - (16 + 32)) * 94 + cell - 33;
960 else if (row < 18 + 32 + 30)
962 else if (row < 18 + 32 + 60)
963 code = (row - (18 + 32)) * 94 + cell - 33;
965 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
967 int plane = code_point >> 16;
968 int row = (code_point >> 8) & 255;
969 int cell = code_point & 255;
973 else if (row < 16 + 32 + 30)
975 = (plane - 33) * 94 * 60
976 + (row - (16 + 32)) * 94
978 else if (row < 18 + 32 + 30)
980 else if (row < 18 + 32 + 60)
982 = (plane - 33) * 94 * 60
983 + (row - (18 + 32)) * 94
987 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
992 = (XCHARSET_DIMENSION (charset) == 1
994 code_point - XCHARSET_BYTE_OFFSET (charset)
996 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
997 * XCHARSET_CHARS (charset)
998 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
999 + XCHARSET_CODE_OFFSET (charset);
1000 if ((cid < XCHARSET_MIN_CODE (charset))
1001 || (XCHARSET_MAX_CODE (charset) < cid))
1006 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1008 if (XCHARSET_DIMENSION (charset) == 1)
1010 switch (XCHARSET_CHARS (charset))
1014 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1017 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1025 switch (XCHARSET_CHARS (charset))
1028 return MIN_CHAR_94x94
1029 + (final - '0') * 94 * 94
1030 + (((code_point >> 8) & 0x7F) - 33) * 94
1031 + ((code_point & 0x7F) - 33);
1033 return MIN_CHAR_96x96
1034 + (final - '0') * 96 * 96
1035 + (((code_point >> 8) & 0x7F) - 32) * 96
1036 + ((code_point & 0x7F) - 32);
1048 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1050 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1053 if ( CHAR_TABLEP (encoding_table)
1054 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1059 Lisp_Object mother = XCHARSET_MOTHER (charset);
1060 int min = XCHARSET_MIN_CODE (charset);
1061 int max = XCHARSET_MAX_CODE (charset);
1064 if ( CHARSETP (mother) )
1066 if (XCHARSET_FINAL (charset) >= '0')
1067 code = charset_code_point (mother, ch, 1);
1069 code = charset_code_point (mother, ch, defined_only);
1071 else if (defined_only)
1073 else if ( ((max == 0) && CHARSETP (mother)
1074 && (XCHARSET_FINAL (charset) == 0))
1075 || ((min <= ch) && (ch <= max)) )
1077 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1078 || ((min <= code) && (code <= max)) )
1080 int d = code - XCHARSET_CODE_OFFSET (charset);
1082 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1084 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1086 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1088 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1091 int cell = d % 94 + 33;
1097 return (row << 8) | cell;
1099 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1100 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1101 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1102 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1103 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1105 int plane = d / (94 * 60) + 33;
1106 int row = (d % (94 * 60)) / 94;
1107 int cell = d % 94 + 33;
1113 return (plane << 16) | (row << 8) | cell;
1115 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1117 ( (d / (94 * 94) + 33) << 16)
1118 | ((d / 94 % 94 + 33) << 8)
1120 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1122 ( (d / (96 * 96) + 32) << 16)
1123 | ((d / 96 % 96 + 32) << 8)
1125 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1127 ( (d / (94 * 94 * 94) + 33) << 24)
1128 | ((d / (94 * 94) % 94 + 33) << 16)
1129 | ((d / 94 % 94 + 33) << 8)
1131 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1133 ( (d / (96 * 96 * 96) + 32) << 24)
1134 | ((d / (96 * 96) % 96 + 32) << 16)
1135 | ((d / 96 % 96 + 32) << 8)
1139 printf ("Unknown CCS-conversion %d is specified!",
1140 XCHARSET_CONVERSION (charset));
1144 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1145 ( XCHARSET_MIN_CODE (charset) == 0 )
1147 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1148 (XCHARSET_CODE_OFFSET (charset)
1149 == XCHARSET_MIN_CODE (charset))
1154 if (XCHARSET_DIMENSION (charset) == 1)
1156 if (XCHARSET_CHARS (charset) == 94)
1158 if (((d = ch - (MIN_CHAR_94
1159 + (XCHARSET_FINAL (charset) - '0') * 94))
1164 else if (XCHARSET_CHARS (charset) == 96)
1166 if (((d = ch - (MIN_CHAR_96
1167 + (XCHARSET_FINAL (charset) - '0') * 96))
1175 else if (XCHARSET_DIMENSION (charset) == 2)
1177 if (XCHARSET_CHARS (charset) == 94)
1179 if (((d = ch - (MIN_CHAR_94x94
1181 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1184 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1186 else if (XCHARSET_CHARS (charset) == 96)
1188 if (((d = ch - (MIN_CHAR_96x96
1190 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1193 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1204 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1206 if (c <= MAX_CHAR_BASIC_LATIN)
1208 *charset = Vcharset_ascii;
1213 *charset = Vcharset_control_1;
1218 *charset = Vcharset_latin_iso8859_1;
1222 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1224 *charset = Vcharset_hebrew_iso8859_8;
1225 return c - MIN_CHAR_HEBREW + 0x20;
1228 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1230 *charset = Vcharset_thai_tis620;
1231 return c - MIN_CHAR_THAI + 0x20;
1234 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1235 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1237 return list2 (Vcharset_katakana_jisx0201,
1238 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1241 else if (c <= MAX_CHAR_BMP)
1243 *charset = Vcharset_ucs_bmp;
1246 else if (c <= MAX_CHAR_SMP)
1248 *charset = Vcharset_ucs_smp;
1249 return c - MIN_CHAR_SMP;
1251 else if (c <= MAX_CHAR_SIP)
1253 *charset = Vcharset_ucs_sip;
1254 return c - MIN_CHAR_SIP;
1256 else if (c < MIN_CHAR_DAIKANWA)
1258 *charset = Vcharset_ucs;
1261 else if (c <= MAX_CHAR_DAIKANWA)
1263 *charset = Vcharset_ideograph_daikanwa;
1264 return c - MIN_CHAR_DAIKANWA;
1266 else if (c < MIN_CHAR_94)
1268 *charset = Vcharset_ucs;
1271 else if (c <= MAX_CHAR_94)
1273 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1274 ((c - MIN_CHAR_94) / 94) + '0',
1275 CHARSET_LEFT_TO_RIGHT);
1276 if (!NILP (*charset))
1277 return ((c - MIN_CHAR_94) % 94) + 33;
1280 *charset = Vcharset_ucs;
1284 else if (c <= MAX_CHAR_96)
1286 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1287 ((c - MIN_CHAR_96) / 96) + '0',
1288 CHARSET_LEFT_TO_RIGHT);
1289 if (!NILP (*charset))
1290 return ((c - MIN_CHAR_96) % 96) + 32;
1293 *charset = Vcharset_ucs;
1297 else if (c <= MAX_CHAR_94x94)
1300 = CHARSET_BY_ATTRIBUTES (94, 2,
1301 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1302 CHARSET_LEFT_TO_RIGHT);
1303 if (!NILP (*charset))
1304 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1305 | (((c - MIN_CHAR_94x94) % 94) + 33);
1308 *charset = Vcharset_ucs;
1312 else if (c <= MAX_CHAR_96x96)
1315 = CHARSET_BY_ATTRIBUTES (96, 2,
1316 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1317 CHARSET_LEFT_TO_RIGHT);
1318 if (!NILP (*charset))
1319 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1320 | (((c - MIN_CHAR_96x96) % 96) + 32);
1323 *charset = Vcharset_ucs;
1329 *charset = Vcharset_ucs;
1334 Lisp_Object Vdefault_coded_charset_priority_list;
1338 /************************************************************************/
1339 /* Basic charset Lisp functions */
1340 /************************************************************************/
1342 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1343 Return non-nil if OBJECT is a charset.
1347 return CHARSETP (object) ? Qt : Qnil;
1350 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1351 Retrieve the charset of the given name.
1352 If CHARSET-OR-NAME is a charset object, it is simply returned.
1353 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1354 nil is returned. Otherwise the associated charset object is returned.
1358 if (CHARSETP (charset_or_name))
1359 return charset_or_name;
1361 CHECK_SYMBOL (charset_or_name);
1362 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1365 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1366 Retrieve the charset of the given name.
1367 Same as `find-charset' except an error is signalled if there is no such
1368 charset instead of returning nil.
1372 Lisp_Object charset = Ffind_charset (name);
1375 signal_simple_error ("No such charset", name);
1379 /* We store the charsets in hash tables with the names as the key and the
1380 actual charset object as the value. Occasionally we need to use them
1381 in a list format. These routines provide us with that. */
1382 struct charset_list_closure
1384 Lisp_Object *charset_list;
1388 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1389 void *charset_list_closure)
1391 /* This function can GC */
1392 struct charset_list_closure *chcl =
1393 (struct charset_list_closure*) charset_list_closure;
1394 Lisp_Object *charset_list = chcl->charset_list;
1396 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1400 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1401 Return a list of the names of all defined charsets.
1405 Lisp_Object charset_list = Qnil;
1406 struct gcpro gcpro1;
1407 struct charset_list_closure charset_list_closure;
1409 GCPRO1 (charset_list);
1410 charset_list_closure.charset_list = &charset_list;
1411 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1412 &charset_list_closure);
1415 return charset_list;
1418 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1419 Return the name of charset CHARSET.
1423 return XCHARSET_NAME (Fget_charset (charset));
1426 /* #### SJT Should generic properties be allowed? */
1427 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1428 Define a new character set.
1429 This function is for use with Mule support.
1430 NAME is a symbol, the name by which the character set is normally referred.
1431 DOC-STRING is a string describing the character set.
1432 PROPS is a property list, describing the specific nature of the
1433 character set. Recognized properties are:
1435 'short-name Short version of the charset name (ex: Latin-1)
1436 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1437 'registry A regular expression matching the font registry field for
1439 'dimension Number of octets used to index a character in this charset.
1440 Either 1 or 2. Defaults to 1.
1441 If UTF-2000 feature is enabled, 3 or 4 are also available.
1442 'columns Number of columns used to display a character in this charset.
1443 Only used in TTY mode. (Under X, the actual width of a
1444 character can be derived from the font used to display the
1445 characters.) If unspecified, defaults to the dimension
1446 (this is almost always the correct value).
1447 'chars Number of characters in each dimension (94 or 96).
1448 Defaults to 94. Note that if the dimension is 2, the
1449 character set thus described is 94x94 or 96x96.
1450 If UTF-2000 feature is enabled, 128 or 256 are also available.
1451 'final Final byte of ISO 2022 escape sequence. Must be
1452 supplied. Each combination of (DIMENSION, CHARS) defines a
1453 separate namespace for final bytes. Note that ISO
1454 2022 restricts the final byte to the range
1455 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1456 dimension == 2. Note also that final bytes in the range
1457 0x30 - 0x3F are reserved for user-defined (not official)
1459 'graphic 0 (use left half of font on output) or 1 (use right half
1460 of font on output). Defaults to 0. For example, for
1461 a font whose registry is ISO8859-1, the left half
1462 (octets 0x20 - 0x7F) is the `ascii' character set, while
1463 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1464 character set. With 'graphic set to 0, the octets
1465 will have their high bit cleared; with it set to 1,
1466 the octets will have their high bit set.
1467 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1469 'ccl-program A compiled CCL program used to convert a character in
1470 this charset into an index into the font. This is in
1471 addition to the 'graphic property. The CCL program
1472 is passed the octets of the character, with the high
1473 bit cleared and set depending upon whether the value
1474 of the 'graphic property is 0 or 1.
1475 'mother [UTF-2000 only] Base coded-charset.
1476 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1477 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1478 'code-offset [UTF-2000 only] Offset for a code-point of a base
1480 'conversion [UTF-2000 only] Conversion for a code-point of a base
1481 coded-charset (94x60 or 94x94x60).
1483 (name, doc_string, props))
1485 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1486 int direction = CHARSET_LEFT_TO_RIGHT;
1487 Lisp_Object registry = Qnil;
1488 Lisp_Object charset;
1489 Lisp_Object ccl_program = Qnil;
1490 Lisp_Object short_name = Qnil, long_name = Qnil;
1491 Lisp_Object mother = Qnil;
1492 int min_code = 0, max_code = 0, code_offset = 0;
1493 int byte_offset = -1;
1496 CHECK_SYMBOL (name);
1497 if (!NILP (doc_string))
1498 CHECK_STRING (doc_string);
1500 charset = Ffind_charset (name);
1501 if (!NILP (charset))
1502 signal_simple_error ("Cannot redefine existing charset", name);
1505 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1507 if (EQ (keyword, Qshort_name))
1509 CHECK_STRING (value);
1513 if (EQ (keyword, Qlong_name))
1515 CHECK_STRING (value);
1519 else if (EQ (keyword, Qdimension))
1522 dimension = XINT (value);
1523 if (dimension < 1 ||
1530 signal_simple_error ("Invalid value for 'dimension", value);
1533 else if (EQ (keyword, Qchars))
1536 chars = XINT (value);
1537 if (chars != 94 && chars != 96
1539 && chars != 128 && chars != 256
1542 signal_simple_error ("Invalid value for 'chars", value);
1545 else if (EQ (keyword, Qcolumns))
1548 columns = XINT (value);
1549 if (columns != 1 && columns != 2)
1550 signal_simple_error ("Invalid value for 'columns", value);
1553 else if (EQ (keyword, Qgraphic))
1556 graphic = XINT (value);
1564 signal_simple_error ("Invalid value for 'graphic", value);
1567 else if (EQ (keyword, Qregistry))
1569 CHECK_STRING (value);
1573 else if (EQ (keyword, Qdirection))
1575 if (EQ (value, Ql2r))
1576 direction = CHARSET_LEFT_TO_RIGHT;
1577 else if (EQ (value, Qr2l))
1578 direction = CHARSET_RIGHT_TO_LEFT;
1580 signal_simple_error ("Invalid value for 'direction", value);
1583 else if (EQ (keyword, Qfinal))
1585 CHECK_CHAR_COERCE_INT (value);
1586 final = XCHAR (value);
1587 if (final < '0' || final > '~')
1588 signal_simple_error ("Invalid value for 'final", value);
1592 else if (EQ (keyword, Qmother))
1594 mother = Fget_charset (value);
1597 else if (EQ (keyword, Qmin_code))
1600 min_code = XUINT (value);
1603 else if (EQ (keyword, Qmax_code))
1606 max_code = XUINT (value);
1609 else if (EQ (keyword, Qcode_offset))
1612 code_offset = XUINT (value);
1615 else if (EQ (keyword, Qconversion))
1617 if (EQ (value, Q94x60))
1618 conversion = CONVERSION_94x60;
1619 else if (EQ (value, Q94x94x60))
1620 conversion = CONVERSION_94x94x60;
1622 signal_simple_error ("Unrecognized conversion", value);
1626 else if (EQ (keyword, Qccl_program))
1628 struct ccl_program test_ccl;
1630 if (setup_ccl_program (&test_ccl, value) < 0)
1631 signal_simple_error ("Invalid value for 'ccl-program", value);
1632 ccl_program = value;
1636 signal_simple_error ("Unrecognized property", keyword);
1642 error ("'final must be specified");
1644 if (dimension == 2 && final > 0x5F)
1646 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1649 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1650 CHARSET_LEFT_TO_RIGHT)) ||
1651 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1652 CHARSET_RIGHT_TO_LEFT)))
1654 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1656 id = get_unallocated_leading_byte (dimension);
1658 if (NILP (doc_string))
1659 doc_string = build_string ("");
1661 if (NILP (registry))
1662 registry = build_string ("");
1664 if (NILP (short_name))
1665 XSETSTRING (short_name, XSYMBOL (name)->name);
1667 if (NILP (long_name))
1668 long_name = doc_string;
1671 columns = dimension;
1673 if (byte_offset < 0)
1677 else if (chars == 96)
1683 charset = make_charset (id, name, chars, dimension, columns, graphic,
1684 final, direction, short_name, long_name,
1685 doc_string, registry,
1686 Qnil, min_code, max_code, code_offset, byte_offset,
1687 mother, conversion);
1688 if (!NILP (ccl_program))
1689 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1693 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1695 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1696 NEW-NAME is the name of the new charset. Return the new charset.
1698 (charset, new_name))
1700 Lisp_Object new_charset = Qnil;
1701 int id, chars, dimension, columns, graphic, final;
1703 Lisp_Object registry, doc_string, short_name, long_name;
1706 charset = Fget_charset (charset);
1707 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1708 signal_simple_error ("Charset already has reverse-direction charset",
1711 CHECK_SYMBOL (new_name);
1712 if (!NILP (Ffind_charset (new_name)))
1713 signal_simple_error ("Cannot redefine existing charset", new_name);
1715 cs = XCHARSET (charset);
1717 chars = CHARSET_CHARS (cs);
1718 dimension = CHARSET_DIMENSION (cs);
1719 columns = CHARSET_COLUMNS (cs);
1720 id = get_unallocated_leading_byte (dimension);
1722 graphic = CHARSET_GRAPHIC (cs);
1723 final = CHARSET_FINAL (cs);
1724 direction = CHARSET_RIGHT_TO_LEFT;
1725 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1726 direction = CHARSET_LEFT_TO_RIGHT;
1727 doc_string = CHARSET_DOC_STRING (cs);
1728 short_name = CHARSET_SHORT_NAME (cs);
1729 long_name = CHARSET_LONG_NAME (cs);
1730 registry = CHARSET_REGISTRY (cs);
1732 new_charset = make_charset (id, new_name, chars, dimension, columns,
1733 graphic, final, direction, short_name, long_name,
1734 doc_string, registry,
1736 CHARSET_DECODING_TABLE(cs),
1737 CHARSET_MIN_CODE(cs),
1738 CHARSET_MAX_CODE(cs),
1739 CHARSET_CODE_OFFSET(cs),
1740 CHARSET_BYTE_OFFSET(cs),
1742 CHARSET_CONVERSION (cs)
1744 Qnil, 0, 0, 0, 0, Qnil, 0
1748 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1749 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1754 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1755 Define symbol ALIAS as an alias for CHARSET.
1759 CHECK_SYMBOL (alias);
1760 charset = Fget_charset (charset);
1761 return Fputhash (alias, charset, Vcharset_hash_table);
1764 /* #### Reverse direction charsets not yet implemented. */
1766 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1768 Return the reverse-direction charset parallel to CHARSET, if any.
1769 This is the charset with the same properties (in particular, the same
1770 dimension, number of characters per dimension, and final byte) as
1771 CHARSET but whose characters are displayed in the opposite direction.
1775 charset = Fget_charset (charset);
1776 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1780 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1781 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1782 If DIRECTION is omitted, both directions will be checked (left-to-right
1783 will be returned if character sets exist for both directions).
1785 (dimension, chars, final, direction))
1787 int dm, ch, fi, di = -1;
1788 Lisp_Object obj = Qnil;
1790 CHECK_INT (dimension);
1791 dm = XINT (dimension);
1792 if (dm < 1 || dm > 2)
1793 signal_simple_error ("Invalid value for DIMENSION", dimension);
1797 if (ch != 94 && ch != 96)
1798 signal_simple_error ("Invalid value for CHARS", chars);
1800 CHECK_CHAR_COERCE_INT (final);
1802 if (fi < '0' || fi > '~')
1803 signal_simple_error ("Invalid value for FINAL", final);
1805 if (EQ (direction, Ql2r))
1806 di = CHARSET_LEFT_TO_RIGHT;
1807 else if (EQ (direction, Qr2l))
1808 di = CHARSET_RIGHT_TO_LEFT;
1809 else if (!NILP (direction))
1810 signal_simple_error ("Invalid value for DIRECTION", direction);
1812 if (dm == 2 && fi > 0x5F)
1814 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1818 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1820 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1823 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1826 return XCHARSET_NAME (obj);
1830 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1831 Return short name of CHARSET.
1835 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1838 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1839 Return long name of CHARSET.
1843 return XCHARSET_LONG_NAME (Fget_charset (charset));
1846 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1847 Return description of CHARSET.
1851 return XCHARSET_DOC_STRING (Fget_charset (charset));
1854 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1855 Return dimension of CHARSET.
1859 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1862 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1863 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1864 Recognized properties are those listed in `make-charset', as well as
1865 'name and 'doc-string.
1871 charset = Fget_charset (charset);
1872 cs = XCHARSET (charset);
1874 CHECK_SYMBOL (prop);
1875 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1876 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1877 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1878 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1879 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1880 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1881 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1882 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1883 Qnil : make_char (CHARSET_FINAL (cs));
1884 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1885 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1886 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1887 if (EQ (prop, Qdirection))
1888 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1889 if (EQ (prop, Qreverse_direction_charset))
1891 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1892 /* #### Is this translation OK? If so, error checking sufficient? */
1893 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1896 if (EQ (prop, Qmother))
1897 return CHARSET_MOTHER (cs);
1898 if (EQ (prop, Qmin_code))
1899 return make_int (CHARSET_MIN_CODE (cs));
1900 if (EQ (prop, Qmax_code))
1901 return make_int (CHARSET_MAX_CODE (cs));
1903 signal_simple_error ("Unrecognized charset property name", prop);
1904 return Qnil; /* not reached */
1907 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1908 Return charset identification number of CHARSET.
1912 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1915 /* #### We need to figure out which properties we really want to
1918 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1919 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1921 (charset, ccl_program))
1923 struct ccl_program test_ccl;
1925 charset = Fget_charset (charset);
1926 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1927 signal_simple_error ("Invalid ccl-program", ccl_program);
1928 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1933 invalidate_charset_font_caches (Lisp_Object charset)
1935 /* Invalidate font cache entries for charset on all devices. */
1936 Lisp_Object devcons, concons, hash_table;
1937 DEVICE_LOOP_NO_BREAK (devcons, concons)
1939 struct device *d = XDEVICE (XCAR (devcons));
1940 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1941 if (!UNBOUNDP (hash_table))
1942 Fclrhash (hash_table);
1946 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1947 Set the 'registry property of CHARSET to REGISTRY.
1949 (charset, registry))
1951 charset = Fget_charset (charset);
1952 CHECK_STRING (registry);
1953 XCHARSET_REGISTRY (charset) = registry;
1954 invalidate_charset_font_caches (charset);
1955 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1960 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1961 Return mapping-table of CHARSET.
1965 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1968 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1969 Set mapping-table of CHARSET to TABLE.
1973 struct Lisp_Charset *cs;
1977 charset = Fget_charset (charset);
1978 cs = XCHARSET (charset);
1982 CHARSET_DECODING_TABLE(cs) = Qnil;
1985 else if (VECTORP (table))
1987 int ccs_len = CHARSET_BYTE_SIZE (cs);
1988 int ret = decoding_table_check_elements (table,
1989 CHARSET_DIMENSION (cs),
1994 signal_simple_error ("Too big table", table);
1996 signal_simple_error ("Invalid element is found", table);
1998 signal_simple_error ("Something wrong", table);
2000 CHARSET_DECODING_TABLE(cs) = Qnil;
2003 signal_error (Qwrong_type_argument,
2004 list2 (build_translated_string ("vector-or-nil-p"),
2007 byte_offset = CHARSET_BYTE_OFFSET (cs);
2008 switch (CHARSET_DIMENSION (cs))
2011 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2013 Lisp_Object c = XVECTOR_DATA(table)[i];
2016 Fput_char_attribute (c, XCHARSET_NAME (charset),
2017 make_int (i + byte_offset));
2021 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2023 Lisp_Object v = XVECTOR_DATA(table)[i];
2029 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2031 Lisp_Object c = XVECTOR_DATA(v)[j];
2035 (c, XCHARSET_NAME (charset),
2036 make_int ( ( (i + byte_offset) << 8 )
2042 Fput_char_attribute (v, XCHARSET_NAME (charset),
2043 make_int (i + byte_offset));
2052 /************************************************************************/
2053 /* Lisp primitives for working with characters */
2054 /************************************************************************/
2057 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2058 Make a character from CHARSET and code-point CODE.
2059 If DEFINED_ONLY is non-nil, builtin character is not returned.
2060 If corresponding character is not found, nil is returned.
2062 (charset, code, defined_only))
2066 charset = Fget_charset (charset);
2069 if (XCHARSET_GRAPHIC (charset) == 1)
2071 if (NILP (defined_only))
2072 c = DECODE_CHAR (charset, c);
2074 c = decode_defined_char (charset, c);
2075 return c >= 0 ? make_char (c) : Qnil;
2078 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2079 Make a builtin character from CHARSET and code-point CODE.
2085 charset = Fget_charset (charset);
2087 if (EQ (charset, Vcharset_latin_viscii))
2089 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2095 (ret = Fget_char_attribute (chr,
2096 Vcharset_latin_viscii_lower,
2099 charset = Vcharset_latin_viscii_lower;
2103 (ret = Fget_char_attribute (chr,
2104 Vcharset_latin_viscii_upper,
2107 charset = Vcharset_latin_viscii_upper;
2114 if (XCHARSET_GRAPHIC (charset) == 1)
2117 c = decode_builtin_char (charset, c);
2118 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2122 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2123 Make a character from CHARSET and octets ARG1 and ARG2.
2124 ARG2 is required only for characters from two-dimensional charsets.
2125 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2126 character s with caron.
2128 (charset, arg1, arg2))
2132 int lowlim, highlim;
2134 charset = Fget_charset (charset);
2135 cs = XCHARSET (charset);
2137 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2138 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2140 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2142 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2143 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2146 /* It is useful (and safe, according to Olivier Galibert) to strip
2147 the 8th bit off ARG1 and ARG2 because it allows programmers to
2148 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2149 Latin 2 code of the character. */
2157 if (a1 < lowlim || a1 > highlim)
2158 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2160 if (CHARSET_DIMENSION (cs) == 1)
2164 ("Charset is of dimension one; second octet must be nil", arg2);
2165 return make_char (MAKE_CHAR (charset, a1, 0));
2174 a2 = XINT (arg2) & 0x7f;
2176 if (a2 < lowlim || a2 > highlim)
2177 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2179 return make_char (MAKE_CHAR (charset, a1, a2));
2182 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2183 Return the character set of CHARACTER.
2187 CHECK_CHAR_COERCE_INT (character);
2189 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2192 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2193 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2194 N defaults to 0 if omitted.
2198 Lisp_Object charset;
2201 CHECK_CHAR_COERCE_INT (character);
2203 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2205 if (NILP (n) || EQ (n, Qzero))
2206 return make_int (octet0);
2207 else if (EQ (n, make_int (1)))
2208 return make_int (octet1);
2210 signal_simple_error ("Octet number must be 0 or 1", n);
2214 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2215 Return code-point of CHARACTER in specified CHARSET.
2217 (character, charset, defined_only))
2221 CHECK_CHAR_COERCE_INT (character);
2222 charset = Fget_charset (charset);
2223 code_point = charset_code_point (charset, XCHAR (character),
2224 !NILP (defined_only));
2225 if (code_point >= 0)
2226 return make_int (code_point);
2232 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2233 Return list of charset and one or two position-codes of CHARACTER.
2237 /* This function can GC */
2238 struct gcpro gcpro1, gcpro2;
2239 Lisp_Object charset = Qnil;
2240 Lisp_Object rc = Qnil;
2248 GCPRO2 (charset, rc);
2249 CHECK_CHAR_COERCE_INT (character);
2252 code_point = ENCODE_CHAR (XCHAR (character), charset);
2253 dimension = XCHARSET_DIMENSION (charset);
2254 while (dimension > 0)
2256 rc = Fcons (make_int (code_point & 255), rc);
2260 rc = Fcons (XCHARSET_NAME (charset), rc);
2262 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2264 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2266 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2270 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2279 #ifdef ENABLE_COMPOSITE_CHARS
2280 /************************************************************************/
2281 /* composite character functions */
2282 /************************************************************************/
2285 lookup_composite_char (Bufbyte *str, int len)
2287 Lisp_Object lispstr = make_string (str, len);
2288 Lisp_Object ch = Fgethash (lispstr,
2289 Vcomposite_char_string2char_hash_table,
2295 if (composite_char_row_next >= 128)
2296 signal_simple_error ("No more composite chars available", lispstr);
2297 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2298 composite_char_col_next);
2299 Fputhash (make_char (emch), lispstr,
2300 Vcomposite_char_char2string_hash_table);
2301 Fputhash (lispstr, make_char (emch),
2302 Vcomposite_char_string2char_hash_table);
2303 composite_char_col_next++;
2304 if (composite_char_col_next >= 128)
2306 composite_char_col_next = 32;
2307 composite_char_row_next++;
2316 composite_char_string (Emchar ch)
2318 Lisp_Object str = Fgethash (make_char (ch),
2319 Vcomposite_char_char2string_hash_table,
2321 assert (!UNBOUNDP (str));
2325 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2326 Convert a string into a single composite character.
2327 The character is the result of overstriking all the characters in
2332 CHECK_STRING (string);
2333 return make_char (lookup_composite_char (XSTRING_DATA (string),
2334 XSTRING_LENGTH (string)));
2337 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2338 Return a string of the characters comprising a composite character.
2346 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2347 signal_simple_error ("Must be composite char", ch);
2348 return composite_char_string (emch);
2350 #endif /* ENABLE_COMPOSITE_CHARS */
2353 /************************************************************************/
2354 /* initialization */
2355 /************************************************************************/
2358 syms_of_mule_charset (void)
2360 INIT_LRECORD_IMPLEMENTATION (charset);
2362 DEFSUBR (Fcharsetp);
2363 DEFSUBR (Ffind_charset);
2364 DEFSUBR (Fget_charset);
2365 DEFSUBR (Fcharset_list);
2366 DEFSUBR (Fcharset_name);
2367 DEFSUBR (Fmake_charset);
2368 DEFSUBR (Fmake_reverse_direction_charset);
2369 /* DEFSUBR (Freverse_direction_charset); */
2370 DEFSUBR (Fdefine_charset_alias);
2371 DEFSUBR (Fcharset_from_attributes);
2372 DEFSUBR (Fcharset_short_name);
2373 DEFSUBR (Fcharset_long_name);
2374 DEFSUBR (Fcharset_description);
2375 DEFSUBR (Fcharset_dimension);
2376 DEFSUBR (Fcharset_property);
2377 DEFSUBR (Fcharset_id);
2378 DEFSUBR (Fset_charset_ccl_program);
2379 DEFSUBR (Fset_charset_registry);
2381 DEFSUBR (Fcharset_mapping_table);
2382 DEFSUBR (Fset_charset_mapping_table);
2386 DEFSUBR (Fdecode_char);
2387 DEFSUBR (Fdecode_builtin_char);
2388 DEFSUBR (Fencode_char);
2390 DEFSUBR (Fmake_char);
2391 DEFSUBR (Fchar_charset);
2392 DEFSUBR (Fchar_octet);
2393 DEFSUBR (Fsplit_char);
2395 #ifdef ENABLE_COMPOSITE_CHARS
2396 DEFSUBR (Fmake_composite_char);
2397 DEFSUBR (Fcomposite_char_string);
2400 defsymbol (&Qcharsetp, "charsetp");
2401 defsymbol (&Qregistry, "registry");
2402 defsymbol (&Qfinal, "final");
2403 defsymbol (&Qgraphic, "graphic");
2404 defsymbol (&Qdirection, "direction");
2405 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2406 defsymbol (&Qshort_name, "short-name");
2407 defsymbol (&Qlong_name, "long-name");
2409 defsymbol (&Qmother, "mother");
2410 defsymbol (&Qmin_code, "min-code");
2411 defsymbol (&Qmax_code, "max-code");
2412 defsymbol (&Qcode_offset, "code-offset");
2413 defsymbol (&Qconversion, "conversion");
2414 defsymbol (&Q94x60, "94x60");
2415 defsymbol (&Q94x94x60, "94x94x60");
2418 defsymbol (&Ql2r, "l2r");
2419 defsymbol (&Qr2l, "r2l");
2421 /* Charsets, compatible with FSF 20.3
2422 Naming convention is Script-Charset[-Edition] */
2423 defsymbol (&Qascii, "ascii");
2424 defsymbol (&Qcontrol_1, "control-1");
2425 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2426 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2427 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2428 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2429 defsymbol (&Qthai_tis620, "thai-tis620");
2430 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2431 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2432 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2433 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2434 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2435 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2436 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2437 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2438 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2439 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2440 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2441 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2442 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2443 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2444 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2445 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2447 defsymbol (&Qucs, "ucs");
2448 defsymbol (&Qucs_bmp, "ucs-bmp");
2449 defsymbol (&Qucs_smp, "ucs-smp");
2450 defsymbol (&Qucs_sip, "ucs-sip");
2451 defsymbol (&Qucs_gb, "ucs-gb");
2452 defsymbol (&Qucs_cns, "ucs-cns");
2453 defsymbol (&Qucs_jis, "ucs-jis");
2454 defsymbol (&Qucs_ks, "ucs-ks");
2455 defsymbol (&Qucs_big5, "ucs-big5");
2456 defsymbol (&Qlatin_viscii, "latin-viscii");
2457 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2458 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2459 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2460 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2461 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2462 defsymbol (&Qjis_x0208, "=jis-x0208");
2463 defsymbol (&Qideograph_gt, "ideograph-gt");
2464 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2465 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2466 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2467 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2468 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2469 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2470 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2471 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2472 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2473 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2474 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2475 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2476 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2477 defsymbol (&Qchinese_big5, "chinese-big5");
2478 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2479 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2480 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2481 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2482 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2483 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2484 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2485 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2486 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2487 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2488 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2489 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2490 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2491 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2493 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2494 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2496 defsymbol (&Qcomposite, "composite");
2500 vars_of_mule_charset (void)
2507 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2508 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2510 /* Table of charsets indexed by leading byte. */
2511 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2512 chlook->charset_by_leading_byte[i] = Qnil;
2515 /* Table of charsets indexed by type/final-byte. */
2516 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2517 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2518 chlook->charset_by_attributes[i][j] = Qnil;
2520 /* Table of charsets indexed by type/final-byte/direction. */
2521 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2522 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2523 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2524 chlook->charset_by_attributes[i][j][k] = Qnil;
2528 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2530 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2531 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2535 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2536 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2537 Leading-code of private TYPE9N charset of column-width 1.
2539 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2543 Vdefault_coded_charset_priority_list = Qnil;
2544 DEFVAR_LISP ("default-coded-charset-priority-list",
2545 &Vdefault_coded_charset_priority_list /*
2546 Default order of preferred coded-character-sets.
2552 complex_vars_of_mule_charset (void)
2554 staticpro (&Vcharset_hash_table);
2555 Vcharset_hash_table =
2556 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2558 /* Predefined character sets. We store them into variables for
2562 staticpro (&Vcharset_ucs);
2564 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2565 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2566 build_string ("UCS"),
2567 build_string ("UCS"),
2568 build_string ("ISO/IEC 10646"),
2570 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2571 staticpro (&Vcharset_ucs_bmp);
2573 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2574 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2575 build_string ("BMP"),
2576 build_string ("UCS-BMP"),
2577 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2579 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2580 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2581 staticpro (&Vcharset_ucs_smp);
2583 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2584 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2585 build_string ("SMP"),
2586 build_string ("UCS-SMP"),
2587 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2588 build_string ("UCS00-1"),
2589 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2590 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2591 staticpro (&Vcharset_ucs_sip);
2593 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2594 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2595 build_string ("SIP"),
2596 build_string ("UCS-SIP"),
2597 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2598 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2599 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2600 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2601 staticpro (&Vcharset_ucs_gb);
2603 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2604 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2605 build_string ("UCS for GB"),
2606 build_string ("UCS for GB"),
2607 build_string ("ISO/IEC 10646 for GB"),
2609 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2610 staticpro (&Vcharset_ucs_cns);
2612 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2613 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2614 build_string ("UCS for CNS"),
2615 build_string ("UCS for CNS 11643"),
2616 build_string ("ISO/IEC 10646 for CNS 11643"),
2618 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2619 staticpro (&Vcharset_ucs_jis);
2621 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2622 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2623 build_string ("UCS for JIS"),
2624 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2626 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2628 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2629 staticpro (&Vcharset_ucs_ks);
2631 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2632 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2633 build_string ("UCS for KS"),
2634 build_string ("UCS for CCS defined by KS"),
2635 build_string ("ISO/IEC 10646 for Korean Standards"),
2637 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2638 staticpro (&Vcharset_ucs_big5);
2640 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2641 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2642 build_string ("UCS for Big5"),
2643 build_string ("UCS for Big5"),
2644 build_string ("ISO/IEC 10646 for Big5"),
2646 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2648 # define MIN_CHAR_THAI 0
2649 # define MAX_CHAR_THAI 0
2650 /* # define MIN_CHAR_HEBREW 0 */
2651 /* # define MAX_CHAR_HEBREW 0 */
2652 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2653 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2655 staticpro (&Vcharset_ascii);
2657 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2658 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2659 build_string ("ASCII"),
2660 build_string ("ASCII)"),
2661 build_string ("ASCII (ISO646 IRV)"),
2662 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2663 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2664 staticpro (&Vcharset_control_1);
2665 Vcharset_control_1 =
2666 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2667 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2668 build_string ("C1"),
2669 build_string ("Control characters"),
2670 build_string ("Control characters 128-191"),
2672 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2673 staticpro (&Vcharset_latin_iso8859_1);
2674 Vcharset_latin_iso8859_1 =
2675 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2676 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2677 build_string ("Latin-1"),
2678 build_string ("ISO8859-1 (Latin-1)"),
2679 build_string ("ISO8859-1 (Latin-1)"),
2680 build_string ("iso8859-1"),
2681 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2682 staticpro (&Vcharset_latin_iso8859_2);
2683 Vcharset_latin_iso8859_2 =
2684 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2685 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2686 build_string ("Latin-2"),
2687 build_string ("ISO8859-2 (Latin-2)"),
2688 build_string ("ISO8859-2 (Latin-2)"),
2689 build_string ("iso8859-2"),
2690 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2691 staticpro (&Vcharset_latin_iso8859_3);
2692 Vcharset_latin_iso8859_3 =
2693 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2694 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2695 build_string ("Latin-3"),
2696 build_string ("ISO8859-3 (Latin-3)"),
2697 build_string ("ISO8859-3 (Latin-3)"),
2698 build_string ("iso8859-3"),
2699 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2700 staticpro (&Vcharset_latin_iso8859_4);
2701 Vcharset_latin_iso8859_4 =
2702 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2703 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2704 build_string ("Latin-4"),
2705 build_string ("ISO8859-4 (Latin-4)"),
2706 build_string ("ISO8859-4 (Latin-4)"),
2707 build_string ("iso8859-4"),
2708 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2709 staticpro (&Vcharset_thai_tis620);
2710 Vcharset_thai_tis620 =
2711 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2712 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2713 build_string ("TIS620"),
2714 build_string ("TIS620 (Thai)"),
2715 build_string ("TIS620.2529 (Thai)"),
2716 build_string ("tis620"),
2717 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2718 staticpro (&Vcharset_greek_iso8859_7);
2719 Vcharset_greek_iso8859_7 =
2720 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2721 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2722 build_string ("ISO8859-7"),
2723 build_string ("ISO8859-7 (Greek)"),
2724 build_string ("ISO8859-7 (Greek)"),
2725 build_string ("iso8859-7"),
2726 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2727 staticpro (&Vcharset_arabic_iso8859_6);
2728 Vcharset_arabic_iso8859_6 =
2729 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2730 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2731 build_string ("ISO8859-6"),
2732 build_string ("ISO8859-6 (Arabic)"),
2733 build_string ("ISO8859-6 (Arabic)"),
2734 build_string ("iso8859-6"),
2735 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2736 staticpro (&Vcharset_hebrew_iso8859_8);
2737 Vcharset_hebrew_iso8859_8 =
2738 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2739 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2740 build_string ("ISO8859-8"),
2741 build_string ("ISO8859-8 (Hebrew)"),
2742 build_string ("ISO8859-8 (Hebrew)"),
2743 build_string ("iso8859-8"),
2745 0 /* MIN_CHAR_HEBREW */,
2746 0 /* MAX_CHAR_HEBREW */, 0, 32,
2747 Qnil, CONVERSION_IDENTICAL);
2748 staticpro (&Vcharset_katakana_jisx0201);
2749 Vcharset_katakana_jisx0201 =
2750 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2751 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2752 build_string ("JISX0201 Kana"),
2753 build_string ("JISX0201.1976 (Japanese Kana)"),
2754 build_string ("JISX0201.1976 Japanese Kana"),
2755 build_string ("jisx0201\\.1976"),
2756 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2757 staticpro (&Vcharset_latin_jisx0201);
2758 Vcharset_latin_jisx0201 =
2759 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2760 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2761 build_string ("JISX0201 Roman"),
2762 build_string ("JISX0201.1976 (Japanese Roman)"),
2763 build_string ("JISX0201.1976 Japanese Roman"),
2764 build_string ("jisx0201\\.1976"),
2765 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2766 staticpro (&Vcharset_cyrillic_iso8859_5);
2767 Vcharset_cyrillic_iso8859_5 =
2768 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2769 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2770 build_string ("ISO8859-5"),
2771 build_string ("ISO8859-5 (Cyrillic)"),
2772 build_string ("ISO8859-5 (Cyrillic)"),
2773 build_string ("iso8859-5"),
2774 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2775 staticpro (&Vcharset_latin_iso8859_9);
2776 Vcharset_latin_iso8859_9 =
2777 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2778 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2779 build_string ("Latin-5"),
2780 build_string ("ISO8859-9 (Latin-5)"),
2781 build_string ("ISO8859-9 (Latin-5)"),
2782 build_string ("iso8859-9"),
2783 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2785 staticpro (&Vcharset_jis_x0208);
2786 Vcharset_jis_x0208 =
2787 make_charset (LEADING_BYTE_JIS_X0208,
2789 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2790 build_string ("JIS X0208"),
2791 build_string ("JIS X0208 Common"),
2792 build_string ("JIS X0208 Common part"),
2793 build_string ("jisx0208\\.1990"),
2795 MIN_CHAR_JIS_X0208_1990,
2796 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2797 Qnil, CONVERSION_94x94);
2799 staticpro (&Vcharset_japanese_jisx0208_1978);
2800 Vcharset_japanese_jisx0208_1978 =
2801 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2802 Qjapanese_jisx0208_1978, 94, 2,
2803 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2804 build_string ("JIS X0208:1978"),
2805 build_string ("JIS X0208:1978 (Japanese)"),
2807 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2808 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2815 CONVERSION_IDENTICAL);
2816 staticpro (&Vcharset_chinese_gb2312);
2817 Vcharset_chinese_gb2312 =
2818 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2819 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2820 build_string ("GB2312"),
2821 build_string ("GB2312)"),
2822 build_string ("GB2312 Chinese simplified"),
2823 build_string ("gb2312"),
2824 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2825 staticpro (&Vcharset_chinese_gb12345);
2826 Vcharset_chinese_gb12345 =
2827 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2828 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2829 build_string ("G1"),
2830 build_string ("GB 12345)"),
2831 build_string ("GB 12345-1990"),
2832 build_string ("GB12345\\(\\.1990\\)?-0"),
2833 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2834 staticpro (&Vcharset_japanese_jisx0208);
2835 Vcharset_japanese_jisx0208 =
2836 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2837 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2838 build_string ("JISX0208"),
2839 build_string ("JIS X0208:1983 (Japanese)"),
2840 build_string ("JIS X0208:1983 Japanese Kanji"),
2841 build_string ("jisx0208\\.1983"),
2848 CONVERSION_IDENTICAL);
2850 staticpro (&Vcharset_japanese_jisx0208_1990);
2851 Vcharset_japanese_jisx0208_1990 =
2852 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2853 Qjapanese_jisx0208_1990, 94, 2,
2854 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2855 build_string ("JISX0208-1990"),
2856 build_string ("JIS X0208:1990 (Japanese)"),
2857 build_string ("JIS X0208:1990 Japanese Kanji"),
2858 build_string ("jisx0208\\.1990"),
2860 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2861 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2862 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2863 Vcharset_jis_x0208 /* Qnil */,
2864 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2866 staticpro (&Vcharset_korean_ksc5601);
2867 Vcharset_korean_ksc5601 =
2868 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2869 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2870 build_string ("KSC5601"),
2871 build_string ("KSC5601 (Korean"),
2872 build_string ("KSC5601 Korean Hangul and Hanja"),
2873 build_string ("ksc5601"),
2874 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2875 staticpro (&Vcharset_japanese_jisx0212);
2876 Vcharset_japanese_jisx0212 =
2877 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2878 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2879 build_string ("JISX0212"),
2880 build_string ("JISX0212 (Japanese)"),
2881 build_string ("JISX0212 Japanese Supplement"),
2882 build_string ("jisx0212"),
2883 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2885 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2886 staticpro (&Vcharset_chinese_cns11643_1);
2887 Vcharset_chinese_cns11643_1 =
2888 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2889 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2890 build_string ("CNS11643-1"),
2891 build_string ("CNS11643-1 (Chinese traditional)"),
2893 ("CNS 11643 Plane 1 Chinese traditional"),
2894 build_string (CHINESE_CNS_PLANE_RE("1")),
2895 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2896 staticpro (&Vcharset_chinese_cns11643_2);
2897 Vcharset_chinese_cns11643_2 =
2898 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2899 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2900 build_string ("CNS11643-2"),
2901 build_string ("CNS11643-2 (Chinese traditional)"),
2903 ("CNS 11643 Plane 2 Chinese traditional"),
2904 build_string (CHINESE_CNS_PLANE_RE("2")),
2905 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2907 staticpro (&Vcharset_latin_tcvn5712);
2908 Vcharset_latin_tcvn5712 =
2909 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2910 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2911 build_string ("TCVN 5712"),
2912 build_string ("TCVN 5712 (VSCII-2)"),
2913 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2914 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2915 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2916 staticpro (&Vcharset_latin_viscii_lower);
2917 Vcharset_latin_viscii_lower =
2918 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2919 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2920 build_string ("VISCII lower"),
2921 build_string ("VISCII lower (Vietnamese)"),
2922 build_string ("VISCII lower (Vietnamese)"),
2923 build_string ("MULEVISCII-LOWER"),
2924 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2925 staticpro (&Vcharset_latin_viscii_upper);
2926 Vcharset_latin_viscii_upper =
2927 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2928 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2929 build_string ("VISCII upper"),
2930 build_string ("VISCII upper (Vietnamese)"),
2931 build_string ("VISCII upper (Vietnamese)"),
2932 build_string ("MULEVISCII-UPPER"),
2933 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2934 staticpro (&Vcharset_latin_viscii);
2935 Vcharset_latin_viscii =
2936 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2937 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2938 build_string ("VISCII"),
2939 build_string ("VISCII 1.1 (Vietnamese)"),
2940 build_string ("VISCII 1.1 (Vietnamese)"),
2941 build_string ("VISCII1\\.1"),
2942 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2943 staticpro (&Vcharset_chinese_big5);
2944 Vcharset_chinese_big5 =
2945 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2946 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2947 build_string ("Big5"),
2948 build_string ("Big5"),
2949 build_string ("Big5 Chinese traditional"),
2950 build_string ("big5-0"),
2952 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2953 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2955 staticpro (&Vcharset_chinese_big5_cdp);
2956 Vcharset_chinese_big5_cdp =
2957 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2958 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2959 build_string ("Big5-CDP"),
2960 build_string ("Big5 + CDP extension"),
2961 build_string ("Big5 with CDP extension"),
2962 build_string ("big5\\.cdp-0"),
2963 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2964 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2966 #define DEF_HANZIKU(n) \
2967 staticpro (&Vcharset_ideograph_hanziku_##n); \
2968 Vcharset_ideograph_hanziku_##n = \
2969 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2970 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2971 build_string ("HZK-"#n), \
2972 build_string ("HANZIKU-"#n), \
2973 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2975 ("hanziku-"#n"$"), \
2976 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2977 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2990 staticpro (&Vcharset_ideograph_gt);
2991 Vcharset_ideograph_gt =
2992 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2993 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2994 build_string ("GT"),
2995 build_string ("GT"),
2996 build_string ("GT"),
2998 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
2999 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3000 #define DEF_GT_PJ(n) \
3001 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3002 Vcharset_ideograph_gt_pj_##n = \
3003 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3004 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3005 build_string ("GT-PJ-"#n), \
3006 build_string ("GT (pseudo JIS encoding) part "#n), \
3007 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3009 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3010 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3023 staticpro (&Vcharset_ideograph_daikanwa_2);
3024 Vcharset_ideograph_daikanwa_2 =
3025 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3026 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3027 build_string ("Daikanwa Rev."),
3028 build_string ("Morohashi's Daikanwa Rev."),
3030 ("Daikanwa dictionary (revised version)"),
3031 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3032 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3033 staticpro (&Vcharset_ideograph_daikanwa);
3034 Vcharset_ideograph_daikanwa =
3035 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3036 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3037 build_string ("Daikanwa"),
3038 build_string ("Morohashi's Daikanwa Rev.2"),
3040 ("Daikanwa dictionary (second revised version)"),
3041 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3042 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3043 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3045 staticpro (&Vcharset_ethiopic_ucs);
3046 Vcharset_ethiopic_ucs =
3047 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3048 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3049 build_string ("Ethiopic (UCS)"),
3050 build_string ("Ethiopic (UCS)"),
3051 build_string ("Ethiopic of UCS"),
3052 build_string ("Ethiopic-Unicode"),
3053 Qnil, 0x1200, 0x137F, 0, 0,
3054 Qnil, CONVERSION_IDENTICAL);
3056 staticpro (&Vcharset_chinese_big5_1);
3057 Vcharset_chinese_big5_1 =
3058 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3059 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3060 build_string ("Big5"),
3061 build_string ("Big5 (Level-1)"),
3063 ("Big5 Level-1 Chinese traditional"),
3064 build_string ("big5"),
3065 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3066 staticpro (&Vcharset_chinese_big5_2);
3067 Vcharset_chinese_big5_2 =
3068 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3069 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3070 build_string ("Big5"),
3071 build_string ("Big5 (Level-2)"),
3073 ("Big5 Level-2 Chinese traditional"),
3074 build_string ("big5"),
3075 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3077 #ifdef ENABLE_COMPOSITE_CHARS
3078 /* #### For simplicity, we put composite chars into a 96x96 charset.
3079 This is going to lead to problems because you can run out of
3080 room, esp. as we don't yet recycle numbers. */
3081 staticpro (&Vcharset_composite);
3082 Vcharset_composite =
3083 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3084 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3085 build_string ("Composite"),
3086 build_string ("Composite characters"),
3087 build_string ("Composite characters"),
3090 /* #### not dumped properly */
3091 composite_char_row_next = 32;
3092 composite_char_col_next = 32;
3094 Vcomposite_char_string2char_hash_table =
3095 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3096 Vcomposite_char_char2string_hash_table =
3097 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3098 staticpro (&Vcomposite_char_string2char_hash_table);
3099 staticpro (&Vcomposite_char_char2string_hash_table);
3100 #endif /* ENABLE_COMPOSITE_CHARS */