1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_ideograph_gt_pj_1;
94 Lisp_Object Vcharset_ideograph_gt_pj_2;
95 Lisp_Object Vcharset_ideograph_gt_pj_3;
96 Lisp_Object Vcharset_ideograph_gt_pj_4;
97 Lisp_Object Vcharset_ideograph_gt_pj_5;
98 Lisp_Object Vcharset_ideograph_gt_pj_6;
99 Lisp_Object Vcharset_ideograph_gt_pj_7;
100 Lisp_Object Vcharset_ideograph_gt_pj_8;
101 Lisp_Object Vcharset_ideograph_gt_pj_9;
102 Lisp_Object Vcharset_ideograph_gt_pj_10;
103 Lisp_Object Vcharset_ideograph_gt_pj_11;
104 Lisp_Object Vcharset_ideograph_daikanwa_2;
105 Lisp_Object Vcharset_ideograph_daikanwa;
106 Lisp_Object Vcharset_ethiopic_ucs;
108 Lisp_Object Vcharset_chinese_big5_1;
109 Lisp_Object Vcharset_chinese_big5_2;
111 #ifdef ENABLE_COMPOSITE_CHARS
112 Lisp_Object Vcharset_composite;
114 /* Hash tables for composite chars. One maps string representing
115 composed chars to their equivalent chars; one goes the
117 Lisp_Object Vcomposite_char_char2string_hash_table;
118 Lisp_Object Vcomposite_char_string2char_hash_table;
120 static int composite_char_row_next;
121 static int composite_char_col_next;
123 #endif /* ENABLE_COMPOSITE_CHARS */
125 struct charset_lookup *chlook;
127 static const struct lrecord_description charset_lookup_description_1[] = {
128 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
130 NUM_LEADING_BYTES+4*128
137 static const struct struct_description charset_lookup_description = {
138 sizeof (struct charset_lookup),
139 charset_lookup_description_1
143 /* Table of number of bytes in the string representation of a character
144 indexed by the first byte of that representation.
146 rep_bytes_by_first_byte(c) is more efficient than the equivalent
147 canonical computation:
149 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
151 const Bytecount rep_bytes_by_first_byte[0xA0] =
152 { /* 0x00 - 0x7f are for straight ASCII */
153 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 /* 0x80 - 0x8f are for Dimension-1 official charsets */
163 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
167 /* 0x90 - 0x9d are for Dimension-2 official charsets */
168 /* 0x9e is for Dimension-1 private charsets */
169 /* 0x9f is for Dimension-2 private charsets */
170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
176 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
178 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
182 if (XVECTOR_LENGTH (v) > ccs_len)
185 for (i = 0; i < XVECTOR_LENGTH (v); i++)
187 Lisp_Object c = XVECTOR_DATA(v)[i];
189 if (!NILP (c) && !CHARP (c))
193 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
205 put_char_ccs_code_point (Lisp_Object character,
206 Lisp_Object ccs, Lisp_Object value)
208 if (!EQ (XCHARSET_NAME (ccs), Qucs)
210 || (XCHAR (character) != XINT (value)))
212 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
216 { /* obsolete representation: value must be a list of bytes */
217 Lisp_Object ret = Fcar (value);
221 signal_simple_error ("Invalid value for coded-charset", value);
222 code_point = XINT (ret);
223 if (XCHARSET_GRAPHIC (ccs) == 1)
231 signal_simple_error ("Invalid value for coded-charset",
235 signal_simple_error ("Invalid value for coded-charset",
238 if (XCHARSET_GRAPHIC (ccs) == 1)
240 code_point = (code_point << 8) | j;
243 value = make_int (code_point);
245 else if (INTP (value))
247 code_point = XINT (value);
248 if (XCHARSET_GRAPHIC (ccs) == 1)
250 code_point &= 0x7F7F7F7F;
251 value = make_int (code_point);
255 signal_simple_error ("Invalid value for coded-charset", value);
259 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
262 decoding_table_remove_char (ccs, XINT (cpos));
265 decoding_table_put_char (ccs, code_point, character);
271 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
273 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
274 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
276 if (VECTORP (decoding_table))
278 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
282 decoding_table_remove_char (ccs, XINT (cpos));
285 if (CHAR_TABLEP (encoding_table))
287 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
295 int leading_code_private_11;
298 Lisp_Object Qcharsetp;
300 /* Qdoc_string, Qdimension, Qchars defined in general.c */
301 Lisp_Object Qregistry, Qfinal, Qgraphic;
302 Lisp_Object Qdirection;
303 Lisp_Object Qreverse_direction_charset;
304 Lisp_Object Qleading_byte;
305 Lisp_Object Qshort_name, Qlong_name;
307 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
308 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
325 Qjapanese_jisx0208_1978,
329 Qjapanese_jisx0208_1990,
347 Qvietnamese_viscii_lower,
348 Qvietnamese_viscii_upper,
351 /* Qchinese_big5_cdp, */
352 Qideograph_hanziku_1,
353 Qideograph_hanziku_2,
354 Qideograph_hanziku_3,
355 Qideograph_hanziku_4,
356 Qideograph_hanziku_5,
357 Qideograph_hanziku_6,
358 Qideograph_hanziku_7,
359 Qideograph_hanziku_8,
360 Qideograph_hanziku_9,
361 Qideograph_hanziku_10,
362 Qideograph_hanziku_11,
363 Qideograph_hanziku_12,
364 Qideograph_daikanwa_2,
383 Lisp_Object Ql2r, Qr2l;
385 Lisp_Object Vcharset_hash_table;
387 /* Composite characters are characters constructed by overstriking two
388 or more regular characters.
390 1) The old Mule implementation involves storing composite characters
391 in a buffer as a tag followed by all of the actual characters
392 used to make up the composite character. I think this is a bad
393 idea; it greatly complicates code that wants to handle strings
394 one character at a time because it has to deal with the possibility
395 of great big ungainly characters. It's much more reasonable to
396 simply store an index into a table of composite characters.
398 2) The current implementation only allows for 16,384 separate
399 composite characters over the lifetime of the XEmacs process.
400 This could become a potential problem if the user
401 edited lots of different files that use composite characters.
402 Due to FSF bogosity, increasing the number of allowable
403 composite characters under Mule would decrease the number
404 of possible faces that can exist. Mule already has shrunk
405 this to 2048, and further shrinkage would become uncomfortable.
406 No such problems exist in XEmacs.
408 Composite characters could be represented as 0x80 C1 C2 C3,
409 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
410 for slightly under 2^20 (one million) composite characters
411 over the XEmacs process lifetime, and you only need to
412 increase the size of a Mule character from 19 to 21 bits.
413 Or you could use 0x80 C1 C2 C3 C4, allowing for about
414 85 million (slightly over 2^26) composite characters. */
417 /************************************************************************/
418 /* Basic Emchar functions */
419 /************************************************************************/
421 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
422 string in STR. Returns the number of bytes stored.
423 Do not call this directly. Use the macro set_charptr_emchar() instead.
427 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
442 else if ( c <= 0x7ff )
444 *p++ = (c >> 6) | 0xc0;
445 *p++ = (c & 0x3f) | 0x80;
447 else if ( c <= 0xffff )
449 *p++ = (c >> 12) | 0xe0;
450 *p++ = ((c >> 6) & 0x3f) | 0x80;
451 *p++ = (c & 0x3f) | 0x80;
453 else if ( c <= 0x1fffff )
455 *p++ = (c >> 18) | 0xf0;
456 *p++ = ((c >> 12) & 0x3f) | 0x80;
457 *p++ = ((c >> 6) & 0x3f) | 0x80;
458 *p++ = (c & 0x3f) | 0x80;
460 else if ( c <= 0x3ffffff )
462 *p++ = (c >> 24) | 0xf8;
463 *p++ = ((c >> 18) & 0x3f) | 0x80;
464 *p++ = ((c >> 12) & 0x3f) | 0x80;
465 *p++ = ((c >> 6) & 0x3f) | 0x80;
466 *p++ = (c & 0x3f) | 0x80;
470 *p++ = (c >> 30) | 0xfc;
471 *p++ = ((c >> 24) & 0x3f) | 0x80;
472 *p++ = ((c >> 18) & 0x3f) | 0x80;
473 *p++ = ((c >> 12) & 0x3f) | 0x80;
474 *p++ = ((c >> 6) & 0x3f) | 0x80;
475 *p++ = (c & 0x3f) | 0x80;
478 BREAKUP_CHAR (c, charset, c1, c2);
479 lb = CHAR_LEADING_BYTE (c);
480 if (LEADING_BYTE_PRIVATE_P (lb))
481 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
483 if (EQ (charset, Vcharset_control_1))
492 /* Return the first character from a Mule-encoded string in STR,
493 assuming it's non-ASCII. Do not call this directly.
494 Use the macro charptr_emchar() instead. */
497 non_ascii_charptr_emchar (const Bufbyte *str)
510 else if ( b >= 0xf8 )
515 else if ( b >= 0xf0 )
520 else if ( b >= 0xe0 )
525 else if ( b >= 0xc0 )
535 for( ; len > 0; len-- )
538 ch = ( ch << 6 ) | ( b & 0x3f );
542 Bufbyte i0 = *str, i1, i2 = 0;
545 if (i0 == LEADING_BYTE_CONTROL_1)
546 return (Emchar) (*++str - 0x20);
548 if (LEADING_BYTE_PREFIX_P (i0))
553 charset = CHARSET_BY_LEADING_BYTE (i0);
554 if (XCHARSET_DIMENSION (charset) == 2)
557 return MAKE_CHAR (charset, i1, i2);
561 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
562 Do not call this directly. Use the macro valid_char_p() instead. */
566 non_ascii_valid_char_p (Emchar ch)
570 /* Must have only lowest 19 bits set */
574 f1 = CHAR_FIELD1 (ch);
575 f2 = CHAR_FIELD2 (ch);
576 f3 = CHAR_FIELD3 (ch);
582 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
583 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
584 f2 > MAX_CHAR_FIELD2_PRIVATE)
589 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
590 f2 <= MAX_CHAR_FIELD2_PRIVATE))
594 NOTE: This takes advantage of the fact that
595 FIELD2_TO_OFFICIAL_LEADING_BYTE and
596 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
598 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
599 if (EQ (charset, Qnil))
601 return (XCHARSET_CHARS (charset) == 96);
607 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
608 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
609 f1 > MAX_CHAR_FIELD1_PRIVATE)
611 if (f2 < 0x20 || f3 < 0x20)
614 #ifdef ENABLE_COMPOSITE_CHARS
615 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
617 if (UNBOUNDP (Fgethash (make_int (ch),
618 Vcomposite_char_char2string_hash_table,
623 #endif /* ENABLE_COMPOSITE_CHARS */
625 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
626 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
629 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
631 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
634 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
636 if (EQ (charset, Qnil))
638 return (XCHARSET_CHARS (charset) == 96);
644 /************************************************************************/
645 /* Basic string functions */
646 /************************************************************************/
648 /* Copy the character pointed to by SRC into DST. Do not call this
649 directly. Use the macro charptr_copy_char() instead.
650 Return the number of bytes copied. */
653 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
655 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
657 for (i = bytes; i; i--, dst++, src++)
663 /************************************************************************/
664 /* streams of Emchars */
665 /************************************************************************/
667 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
668 The functions below are not meant to be called directly; use
669 the macros in insdel.h. */
672 Lstream_get_emchar_1 (Lstream *stream, int ch)
674 Bufbyte str[MAX_EMCHAR_LEN];
675 Bufbyte *strptr = str;
678 str[0] = (Bufbyte) ch;
680 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
682 int c = Lstream_getc (stream);
683 bufpos_checking_assert (c >= 0);
684 *++strptr = (Bufbyte) c;
686 return charptr_emchar (str);
690 Lstream_fput_emchar (Lstream *stream, Emchar ch)
692 Bufbyte str[MAX_EMCHAR_LEN];
693 Bytecount len = set_charptr_emchar (str, ch);
694 return Lstream_write (stream, str, len);
698 Lstream_funget_emchar (Lstream *stream, Emchar ch)
700 Bufbyte str[MAX_EMCHAR_LEN];
701 Bytecount len = set_charptr_emchar (str, ch);
702 Lstream_unread (stream, str, len);
706 /************************************************************************/
708 /************************************************************************/
711 mark_charset (Lisp_Object obj)
713 Lisp_Charset *cs = XCHARSET (obj);
715 mark_object (cs->short_name);
716 mark_object (cs->long_name);
717 mark_object (cs->doc_string);
718 mark_object (cs->registry);
719 mark_object (cs->ccl_program);
721 mark_object (cs->decoding_table);
722 mark_object (cs->mother);
728 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
730 Lisp_Charset *cs = XCHARSET (obj);
734 error ("printing unreadable object #<charset %s 0x%x>",
735 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
738 write_c_string ("#<charset ", printcharfun);
739 print_internal (CHARSET_NAME (cs), printcharfun, 0);
740 write_c_string (" ", printcharfun);
741 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
742 write_c_string (" ", printcharfun);
743 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
744 write_c_string (" ", printcharfun);
745 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
746 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
748 CHARSET_DIMENSION (cs),
749 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
750 CHARSET_COLUMNS (cs),
751 CHARSET_GRAPHIC (cs),
753 write_c_string (buf, printcharfun);
754 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
755 sprintf (buf, " 0x%x>", cs->header.uid);
756 write_c_string (buf, printcharfun);
759 static const struct lrecord_description charset_description[] = {
760 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
761 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
762 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
763 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
764 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
774 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
775 mark_charset, print_charset, 0, 0, 0,
779 /* Make a new charset. */
780 /* #### SJT Should generic properties be allowed? */
782 make_charset (Charset_ID id, Lisp_Object name,
783 unsigned short chars, unsigned char dimension,
784 unsigned char columns, unsigned char graphic,
785 Bufbyte final, unsigned char direction, Lisp_Object short_name,
786 Lisp_Object long_name, Lisp_Object doc,
788 Lisp_Object decoding_table,
789 Emchar min_code, Emchar max_code,
790 Emchar code_offset, unsigned char byte_offset,
791 Lisp_Object mother, unsigned char conversion)
794 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
798 XSETCHARSET (obj, cs);
800 CHARSET_ID (cs) = id;
801 CHARSET_NAME (cs) = name;
802 CHARSET_SHORT_NAME (cs) = short_name;
803 CHARSET_LONG_NAME (cs) = long_name;
804 CHARSET_CHARS (cs) = chars;
805 CHARSET_DIMENSION (cs) = dimension;
806 CHARSET_DIRECTION (cs) = direction;
807 CHARSET_COLUMNS (cs) = columns;
808 CHARSET_GRAPHIC (cs) = graphic;
809 CHARSET_FINAL (cs) = final;
810 CHARSET_DOC_STRING (cs) = doc;
811 CHARSET_REGISTRY (cs) = reg;
812 CHARSET_CCL_PROGRAM (cs) = Qnil;
813 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
815 CHARSET_DECODING_TABLE(cs) = Qunbound;
816 CHARSET_MIN_CODE (cs) = min_code;
817 CHARSET_MAX_CODE (cs) = max_code;
818 CHARSET_CODE_OFFSET (cs) = code_offset;
819 CHARSET_BYTE_OFFSET (cs) = byte_offset;
820 CHARSET_MOTHER (cs) = mother;
821 CHARSET_CONVERSION (cs) = conversion;
825 if (id == LEADING_BYTE_ASCII)
826 CHARSET_REP_BYTES (cs) = 1;
828 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
830 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
835 /* some charsets do not have final characters. This includes
836 ASCII, Control-1, Composite, and the two faux private
838 unsigned char iso2022_type
839 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
841 if (code_offset == 0)
843 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
844 chlook->charset_by_attributes[iso2022_type][final] = obj;
848 (chlook->charset_by_attributes[iso2022_type][final][direction]));
849 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
853 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
854 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
856 /* Some charsets are "faux" and don't have names or really exist at
857 all except in the leading-byte table. */
859 Fputhash (name, obj, Vcharset_hash_table);
864 get_unallocated_leading_byte (int dimension)
869 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
872 lb = chlook->next_allocated_leading_byte++;
876 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
879 lb = chlook->next_allocated_1_byte_leading_byte++;
883 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
886 lb = chlook->next_allocated_2_byte_leading_byte++;
892 ("No more character sets free for this dimension",
893 make_int (dimension));
899 /* Number of Big5 characters which have the same code in 1st byte. */
901 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
904 decode_defined_char (Lisp_Object ccs, int code_point)
906 int dim = XCHARSET_DIMENSION (ccs);
907 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
915 = get_ccs_octet_table (decoding_table, ccs,
916 (code_point >> (dim * 8)) & 255);
918 if (CHARP (decoding_table))
919 return XCHAR (decoding_table);
922 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
924 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
926 if ( EQ (mother, Vcharset_ucs) )
927 return DECODE_CHAR (mother, code_point);
929 return decode_defined_char (mother, code_point);
936 decode_builtin_char (Lisp_Object charset, int code_point)
938 Lisp_Object mother = XCHARSET_MOTHER (charset);
941 if ( XCHARSET_MAX_CODE (charset) > 0 )
943 if ( CHARSETP (mother) )
945 int code = code_point;
947 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
949 int row = code_point >> 8;
950 int cell = code_point & 255;
954 else if (row < 16 + 32 + 30)
955 code = (row - (16 + 32)) * 94 + cell - 33;
956 else if (row < 18 + 32 + 30)
958 else if (row < 18 + 32 + 60)
959 code = (row - (18 + 32)) * 94 + cell - 33;
961 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
963 int plane = code_point >> 16;
964 int row = (code_point >> 8) & 255;
965 int cell = code_point & 255;
969 else if (row < 16 + 32 + 30)
971 = (plane - 33) * 94 * 60
972 + (row - (16 + 32)) * 94
974 else if (row < 18 + 32 + 30)
976 else if (row < 18 + 32 + 60)
978 = (plane - 33) * 94 * 60
979 + (row - (18 + 32)) * 94
983 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
988 = (XCHARSET_DIMENSION (charset) == 1
990 code_point - XCHARSET_BYTE_OFFSET (charset)
992 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
993 * XCHARSET_CHARS (charset)
994 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
995 + XCHARSET_CODE_OFFSET (charset);
996 if ((cid < XCHARSET_MIN_CODE (charset))
997 || (XCHARSET_MAX_CODE (charset) < cid))
1002 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1004 if (XCHARSET_DIMENSION (charset) == 1)
1006 switch (XCHARSET_CHARS (charset))
1010 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1013 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1021 switch (XCHARSET_CHARS (charset))
1024 return MIN_CHAR_94x94
1025 + (final - '0') * 94 * 94
1026 + (((code_point >> 8) & 0x7F) - 33) * 94
1027 + ((code_point & 0x7F) - 33);
1029 return MIN_CHAR_96x96
1030 + (final - '0') * 96 * 96
1031 + (((code_point >> 8) & 0x7F) - 32) * 96
1032 + ((code_point & 0x7F) - 32);
1044 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1046 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1049 if ( CHAR_TABLEP (encoding_table)
1050 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1055 Lisp_Object mother = XCHARSET_MOTHER (charset);
1056 int min = XCHARSET_MIN_CODE (charset);
1057 int max = XCHARSET_MAX_CODE (charset);
1060 if ( CHARSETP (mother) )
1062 if (XCHARSET_FINAL (charset) >= '0')
1063 code = charset_code_point (mother, ch, 1);
1065 code = charset_code_point (mother, ch, defined_only);
1067 else if (defined_only)
1069 else if ( ((max == 0) && CHARSETP (mother)
1070 && (XCHARSET_FINAL (charset) == 0))
1071 || ((min <= ch) && (ch <= max)) )
1073 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1074 || ((min <= code) && (code <= max)) )
1076 int d = code - XCHARSET_CODE_OFFSET (charset);
1078 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1080 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1082 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1084 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1087 int cell = d % 94 + 33;
1093 return (row << 8) | cell;
1095 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1096 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1097 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1098 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1099 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1101 int plane = d / (94 * 60) + 33;
1102 int row = (d % (94 * 60)) / 94;
1103 int cell = d % 94 + 33;
1109 return (plane << 16) | (row << 8) | cell;
1111 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1113 ( (d / (94 * 94) + 33) << 16)
1114 | ((d / 94 % 94 + 33) << 8)
1116 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1118 ( (d / (96 * 96) + 32) << 16)
1119 | ((d / 96 % 96 + 32) << 8)
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1123 ( (d / (94 * 94 * 94) + 33) << 24)
1124 | ((d / (94 * 94) % 94 + 33) << 16)
1125 | ((d / 94 % 94 + 33) << 8)
1127 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1129 ( (d / (96 * 96 * 96) + 32) << 24)
1130 | ((d / (96 * 96) % 96 + 32) << 16)
1131 | ((d / 96 % 96 + 32) << 8)
1135 printf ("Unknown CCS-conversion %d is specified!",
1136 XCHARSET_CONVERSION (charset));
1140 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1141 ( XCHARSET_MIN_CODE (charset) == 0 )
1143 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1144 (XCHARSET_CODE_OFFSET (charset)
1145 == XCHARSET_MIN_CODE (charset))
1150 if (XCHARSET_DIMENSION (charset) == 1)
1152 if (XCHARSET_CHARS (charset) == 94)
1154 if (((d = ch - (MIN_CHAR_94
1155 + (XCHARSET_FINAL (charset) - '0') * 94))
1160 else if (XCHARSET_CHARS (charset) == 96)
1162 if (((d = ch - (MIN_CHAR_96
1163 + (XCHARSET_FINAL (charset) - '0') * 96))
1171 else if (XCHARSET_DIMENSION (charset) == 2)
1173 if (XCHARSET_CHARS (charset) == 94)
1175 if (((d = ch - (MIN_CHAR_94x94
1177 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1180 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1182 else if (XCHARSET_CHARS (charset) == 96)
1184 if (((d = ch - (MIN_CHAR_96x96
1186 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1189 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1200 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1202 if (c <= MAX_CHAR_BASIC_LATIN)
1204 *charset = Vcharset_ascii;
1209 *charset = Vcharset_control_1;
1214 *charset = Vcharset_latin_iso8859_1;
1218 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1220 *charset = Vcharset_hebrew_iso8859_8;
1221 return c - MIN_CHAR_HEBREW + 0x20;
1224 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1226 *charset = Vcharset_thai_tis620;
1227 return c - MIN_CHAR_THAI + 0x20;
1230 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1231 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1233 return list2 (Vcharset_katakana_jisx0201,
1234 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1237 else if (c <= MAX_CHAR_BMP)
1239 *charset = Vcharset_ucs_bmp;
1242 else if (c <= MAX_CHAR_SMP)
1244 *charset = Vcharset_ucs_smp;
1245 return c - MIN_CHAR_SMP;
1247 else if (c <= MAX_CHAR_SIP)
1249 *charset = Vcharset_ucs_sip;
1250 return c - MIN_CHAR_SIP;
1252 else if (c < MIN_CHAR_DAIKANWA)
1254 *charset = Vcharset_ucs;
1257 else if (c <= MAX_CHAR_DAIKANWA)
1259 *charset = Vcharset_ideograph_daikanwa;
1260 return c - MIN_CHAR_DAIKANWA;
1262 else if (c < MIN_CHAR_94)
1264 *charset = Vcharset_ucs;
1267 else if (c <= MAX_CHAR_94)
1269 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1270 ((c - MIN_CHAR_94) / 94) + '0',
1271 CHARSET_LEFT_TO_RIGHT);
1272 if (!NILP (*charset))
1273 return ((c - MIN_CHAR_94) % 94) + 33;
1276 *charset = Vcharset_ucs;
1280 else if (c <= MAX_CHAR_96)
1282 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1283 ((c - MIN_CHAR_96) / 96) + '0',
1284 CHARSET_LEFT_TO_RIGHT);
1285 if (!NILP (*charset))
1286 return ((c - MIN_CHAR_96) % 96) + 32;
1289 *charset = Vcharset_ucs;
1293 else if (c <= MAX_CHAR_94x94)
1296 = CHARSET_BY_ATTRIBUTES (94, 2,
1297 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1298 CHARSET_LEFT_TO_RIGHT);
1299 if (!NILP (*charset))
1300 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1301 | (((c - MIN_CHAR_94x94) % 94) + 33);
1304 *charset = Vcharset_ucs;
1308 else if (c <= MAX_CHAR_96x96)
1311 = CHARSET_BY_ATTRIBUTES (96, 2,
1312 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1313 CHARSET_LEFT_TO_RIGHT);
1314 if (!NILP (*charset))
1315 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1316 | (((c - MIN_CHAR_96x96) % 96) + 32);
1319 *charset = Vcharset_ucs;
1325 *charset = Vcharset_ucs;
1330 Lisp_Object Vdefault_coded_charset_priority_list;
1334 /************************************************************************/
1335 /* Basic charset Lisp functions */
1336 /************************************************************************/
1338 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1339 Return non-nil if OBJECT is a charset.
1343 return CHARSETP (object) ? Qt : Qnil;
1346 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1347 Retrieve the charset of the given name.
1348 If CHARSET-OR-NAME is a charset object, it is simply returned.
1349 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1350 nil is returned. Otherwise the associated charset object is returned.
1354 if (CHARSETP (charset_or_name))
1355 return charset_or_name;
1357 CHECK_SYMBOL (charset_or_name);
1358 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1361 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1362 Retrieve the charset of the given name.
1363 Same as `find-charset' except an error is signalled if there is no such
1364 charset instead of returning nil.
1368 Lisp_Object charset = Ffind_charset (name);
1371 signal_simple_error ("No such charset", name);
1375 /* We store the charsets in hash tables with the names as the key and the
1376 actual charset object as the value. Occasionally we need to use them
1377 in a list format. These routines provide us with that. */
1378 struct charset_list_closure
1380 Lisp_Object *charset_list;
1384 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1385 void *charset_list_closure)
1387 /* This function can GC */
1388 struct charset_list_closure *chcl =
1389 (struct charset_list_closure*) charset_list_closure;
1390 Lisp_Object *charset_list = chcl->charset_list;
1392 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1396 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1397 Return a list of the names of all defined charsets.
1401 Lisp_Object charset_list = Qnil;
1402 struct gcpro gcpro1;
1403 struct charset_list_closure charset_list_closure;
1405 GCPRO1 (charset_list);
1406 charset_list_closure.charset_list = &charset_list;
1407 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1408 &charset_list_closure);
1411 return charset_list;
1414 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1415 Return the name of charset CHARSET.
1419 return XCHARSET_NAME (Fget_charset (charset));
1422 /* #### SJT Should generic properties be allowed? */
1423 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1424 Define a new character set.
1425 This function is for use with Mule support.
1426 NAME is a symbol, the name by which the character set is normally referred.
1427 DOC-STRING is a string describing the character set.
1428 PROPS is a property list, describing the specific nature of the
1429 character set. Recognized properties are:
1431 'short-name Short version of the charset name (ex: Latin-1)
1432 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1433 'registry A regular expression matching the font registry field for
1435 'dimension Number of octets used to index a character in this charset.
1436 Either 1 or 2. Defaults to 1.
1437 If UTF-2000 feature is enabled, 3 or 4 are also available.
1438 'columns Number of columns used to display a character in this charset.
1439 Only used in TTY mode. (Under X, the actual width of a
1440 character can be derived from the font used to display the
1441 characters.) If unspecified, defaults to the dimension
1442 (this is almost always the correct value).
1443 'chars Number of characters in each dimension (94 or 96).
1444 Defaults to 94. Note that if the dimension is 2, the
1445 character set thus described is 94x94 or 96x96.
1446 If UTF-2000 feature is enabled, 128 or 256 are also available.
1447 'final Final byte of ISO 2022 escape sequence. Must be
1448 supplied. Each combination of (DIMENSION, CHARS) defines a
1449 separate namespace for final bytes. Note that ISO
1450 2022 restricts the final byte to the range
1451 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1452 dimension == 2. Note also that final bytes in the range
1453 0x30 - 0x3F are reserved for user-defined (not official)
1455 'graphic 0 (use left half of font on output) or 1 (use right half
1456 of font on output). Defaults to 0. For example, for
1457 a font whose registry is ISO8859-1, the left half
1458 (octets 0x20 - 0x7F) is the `ascii' character set, while
1459 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1460 character set. With 'graphic set to 0, the octets
1461 will have their high bit cleared; with it set to 1,
1462 the octets will have their high bit set.
1463 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1465 'ccl-program A compiled CCL program used to convert a character in
1466 this charset into an index into the font. This is in
1467 addition to the 'graphic property. The CCL program
1468 is passed the octets of the character, with the high
1469 bit cleared and set depending upon whether the value
1470 of the 'graphic property is 0 or 1.
1471 'mother [UTF-2000 only] Base coded-charset.
1472 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1473 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1474 'code-offset [UTF-2000 only] Offset for a code-point of a base
1476 'conversion [UTF-2000 only] Conversion for a code-point of a base
1477 coded-charset (94x60 or 94x94x60).
1479 (name, doc_string, props))
1481 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1482 int direction = CHARSET_LEFT_TO_RIGHT;
1483 Lisp_Object registry = Qnil;
1484 Lisp_Object charset;
1485 Lisp_Object ccl_program = Qnil;
1486 Lisp_Object short_name = Qnil, long_name = Qnil;
1487 Lisp_Object mother = Qnil;
1488 int min_code = 0, max_code = 0, code_offset = 0;
1489 int byte_offset = -1;
1492 CHECK_SYMBOL (name);
1493 if (!NILP (doc_string))
1494 CHECK_STRING (doc_string);
1496 charset = Ffind_charset (name);
1497 if (!NILP (charset))
1498 signal_simple_error ("Cannot redefine existing charset", name);
1501 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1503 if (EQ (keyword, Qshort_name))
1505 CHECK_STRING (value);
1509 if (EQ (keyword, Qlong_name))
1511 CHECK_STRING (value);
1515 else if (EQ (keyword, Qdimension))
1518 dimension = XINT (value);
1519 if (dimension < 1 ||
1526 signal_simple_error ("Invalid value for 'dimension", value);
1529 else if (EQ (keyword, Qchars))
1532 chars = XINT (value);
1533 if (chars != 94 && chars != 96
1535 && chars != 128 && chars != 256
1538 signal_simple_error ("Invalid value for 'chars", value);
1541 else if (EQ (keyword, Qcolumns))
1544 columns = XINT (value);
1545 if (columns != 1 && columns != 2)
1546 signal_simple_error ("Invalid value for 'columns", value);
1549 else if (EQ (keyword, Qgraphic))
1552 graphic = XINT (value);
1560 signal_simple_error ("Invalid value for 'graphic", value);
1563 else if (EQ (keyword, Qregistry))
1565 CHECK_STRING (value);
1569 else if (EQ (keyword, Qdirection))
1571 if (EQ (value, Ql2r))
1572 direction = CHARSET_LEFT_TO_RIGHT;
1573 else if (EQ (value, Qr2l))
1574 direction = CHARSET_RIGHT_TO_LEFT;
1576 signal_simple_error ("Invalid value for 'direction", value);
1579 else if (EQ (keyword, Qfinal))
1581 CHECK_CHAR_COERCE_INT (value);
1582 final = XCHAR (value);
1583 if (final < '0' || final > '~')
1584 signal_simple_error ("Invalid value for 'final", value);
1588 else if (EQ (keyword, Qmother))
1590 mother = Fget_charset (value);
1593 else if (EQ (keyword, Qmin_code))
1596 min_code = XUINT (value);
1599 else if (EQ (keyword, Qmax_code))
1602 max_code = XUINT (value);
1605 else if (EQ (keyword, Qcode_offset))
1608 code_offset = XUINT (value);
1611 else if (EQ (keyword, Qconversion))
1613 if (EQ (value, Q94x60))
1614 conversion = CONVERSION_94x60;
1615 else if (EQ (value, Q94x94x60))
1616 conversion = CONVERSION_94x94x60;
1618 signal_simple_error ("Unrecognized conversion", value);
1622 else if (EQ (keyword, Qccl_program))
1624 struct ccl_program test_ccl;
1626 if (setup_ccl_program (&test_ccl, value) < 0)
1627 signal_simple_error ("Invalid value for 'ccl-program", value);
1628 ccl_program = value;
1632 signal_simple_error ("Unrecognized property", keyword);
1638 error ("'final must be specified");
1640 if (dimension == 2 && final > 0x5F)
1642 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1645 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1646 CHARSET_LEFT_TO_RIGHT)) ||
1647 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1648 CHARSET_RIGHT_TO_LEFT)))
1650 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1652 id = get_unallocated_leading_byte (dimension);
1654 if (NILP (doc_string))
1655 doc_string = build_string ("");
1657 if (NILP (registry))
1658 registry = build_string ("");
1660 if (NILP (short_name))
1661 XSETSTRING (short_name, XSYMBOL (name)->name);
1663 if (NILP (long_name))
1664 long_name = doc_string;
1667 columns = dimension;
1669 if (byte_offset < 0)
1673 else if (chars == 96)
1679 charset = make_charset (id, name, chars, dimension, columns, graphic,
1680 final, direction, short_name, long_name,
1681 doc_string, registry,
1682 Qnil, min_code, max_code, code_offset, byte_offset,
1683 mother, conversion);
1684 if (!NILP (ccl_program))
1685 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1689 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1691 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1692 NEW-NAME is the name of the new charset. Return the new charset.
1694 (charset, new_name))
1696 Lisp_Object new_charset = Qnil;
1697 int id, chars, dimension, columns, graphic, final;
1699 Lisp_Object registry, doc_string, short_name, long_name;
1702 charset = Fget_charset (charset);
1703 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1704 signal_simple_error ("Charset already has reverse-direction charset",
1707 CHECK_SYMBOL (new_name);
1708 if (!NILP (Ffind_charset (new_name)))
1709 signal_simple_error ("Cannot redefine existing charset", new_name);
1711 cs = XCHARSET (charset);
1713 chars = CHARSET_CHARS (cs);
1714 dimension = CHARSET_DIMENSION (cs);
1715 columns = CHARSET_COLUMNS (cs);
1716 id = get_unallocated_leading_byte (dimension);
1718 graphic = CHARSET_GRAPHIC (cs);
1719 final = CHARSET_FINAL (cs);
1720 direction = CHARSET_RIGHT_TO_LEFT;
1721 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1722 direction = CHARSET_LEFT_TO_RIGHT;
1723 doc_string = CHARSET_DOC_STRING (cs);
1724 short_name = CHARSET_SHORT_NAME (cs);
1725 long_name = CHARSET_LONG_NAME (cs);
1726 registry = CHARSET_REGISTRY (cs);
1728 new_charset = make_charset (id, new_name, chars, dimension, columns,
1729 graphic, final, direction, short_name, long_name,
1730 doc_string, registry,
1732 CHARSET_DECODING_TABLE(cs),
1733 CHARSET_MIN_CODE(cs),
1734 CHARSET_MAX_CODE(cs),
1735 CHARSET_CODE_OFFSET(cs),
1736 CHARSET_BYTE_OFFSET(cs),
1738 CHARSET_CONVERSION (cs)
1740 Qnil, 0, 0, 0, 0, Qnil, 0
1744 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1745 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1750 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1751 Define symbol ALIAS as an alias for CHARSET.
1755 CHECK_SYMBOL (alias);
1756 charset = Fget_charset (charset);
1757 return Fputhash (alias, charset, Vcharset_hash_table);
1760 /* #### Reverse direction charsets not yet implemented. */
1762 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1764 Return the reverse-direction charset parallel to CHARSET, if any.
1765 This is the charset with the same properties (in particular, the same
1766 dimension, number of characters per dimension, and final byte) as
1767 CHARSET but whose characters are displayed in the opposite direction.
1771 charset = Fget_charset (charset);
1772 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1776 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1777 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1778 If DIRECTION is omitted, both directions will be checked (left-to-right
1779 will be returned if character sets exist for both directions).
1781 (dimension, chars, final, direction))
1783 int dm, ch, fi, di = -1;
1784 Lisp_Object obj = Qnil;
1786 CHECK_INT (dimension);
1787 dm = XINT (dimension);
1788 if (dm < 1 || dm > 2)
1789 signal_simple_error ("Invalid value for DIMENSION", dimension);
1793 if (ch != 94 && ch != 96)
1794 signal_simple_error ("Invalid value for CHARS", chars);
1796 CHECK_CHAR_COERCE_INT (final);
1798 if (fi < '0' || fi > '~')
1799 signal_simple_error ("Invalid value for FINAL", final);
1801 if (EQ (direction, Ql2r))
1802 di = CHARSET_LEFT_TO_RIGHT;
1803 else if (EQ (direction, Qr2l))
1804 di = CHARSET_RIGHT_TO_LEFT;
1805 else if (!NILP (direction))
1806 signal_simple_error ("Invalid value for DIRECTION", direction);
1808 if (dm == 2 && fi > 0x5F)
1810 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1814 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1816 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1819 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1822 return XCHARSET_NAME (obj);
1826 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1827 Return short name of CHARSET.
1831 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1834 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1835 Return long name of CHARSET.
1839 return XCHARSET_LONG_NAME (Fget_charset (charset));
1842 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1843 Return description of CHARSET.
1847 return XCHARSET_DOC_STRING (Fget_charset (charset));
1850 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1851 Return dimension of CHARSET.
1855 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1858 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1859 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1860 Recognized properties are those listed in `make-charset', as well as
1861 'name and 'doc-string.
1867 charset = Fget_charset (charset);
1868 cs = XCHARSET (charset);
1870 CHECK_SYMBOL (prop);
1871 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1872 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1873 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1874 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1875 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1876 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1877 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1878 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1879 Qnil : make_char (CHARSET_FINAL (cs));
1880 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1881 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1882 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1883 if (EQ (prop, Qdirection))
1884 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1885 if (EQ (prop, Qreverse_direction_charset))
1887 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1888 /* #### Is this translation OK? If so, error checking sufficient? */
1889 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1892 if (EQ (prop, Qmother))
1893 return CHARSET_MOTHER (cs);
1894 if (EQ (prop, Qmin_code))
1895 return make_int (CHARSET_MIN_CODE (cs));
1896 if (EQ (prop, Qmax_code))
1897 return make_int (CHARSET_MAX_CODE (cs));
1899 signal_simple_error ("Unrecognized charset property name", prop);
1900 return Qnil; /* not reached */
1903 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1904 Return charset identification number of CHARSET.
1908 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1911 /* #### We need to figure out which properties we really want to
1914 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1915 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1917 (charset, ccl_program))
1919 struct ccl_program test_ccl;
1921 charset = Fget_charset (charset);
1922 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1923 signal_simple_error ("Invalid ccl-program", ccl_program);
1924 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1929 invalidate_charset_font_caches (Lisp_Object charset)
1931 /* Invalidate font cache entries for charset on all devices. */
1932 Lisp_Object devcons, concons, hash_table;
1933 DEVICE_LOOP_NO_BREAK (devcons, concons)
1935 struct device *d = XDEVICE (XCAR (devcons));
1936 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1937 if (!UNBOUNDP (hash_table))
1938 Fclrhash (hash_table);
1942 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1943 Set the 'registry property of CHARSET to REGISTRY.
1945 (charset, registry))
1947 charset = Fget_charset (charset);
1948 CHECK_STRING (registry);
1949 XCHARSET_REGISTRY (charset) = registry;
1950 invalidate_charset_font_caches (charset);
1951 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1956 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1957 Return mapping-table of CHARSET.
1961 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1964 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1965 Set mapping-table of CHARSET to TABLE.
1969 struct Lisp_Charset *cs;
1973 charset = Fget_charset (charset);
1974 cs = XCHARSET (charset);
1978 CHARSET_DECODING_TABLE(cs) = Qnil;
1981 else if (VECTORP (table))
1983 int ccs_len = CHARSET_BYTE_SIZE (cs);
1984 int ret = decoding_table_check_elements (table,
1985 CHARSET_DIMENSION (cs),
1990 signal_simple_error ("Too big table", table);
1992 signal_simple_error ("Invalid element is found", table);
1994 signal_simple_error ("Something wrong", table);
1996 CHARSET_DECODING_TABLE(cs) = Qnil;
1999 signal_error (Qwrong_type_argument,
2000 list2 (build_translated_string ("vector-or-nil-p"),
2003 byte_offset = CHARSET_BYTE_OFFSET (cs);
2004 switch (CHARSET_DIMENSION (cs))
2007 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2009 Lisp_Object c = XVECTOR_DATA(table)[i];
2012 Fput_char_attribute (c, XCHARSET_NAME (charset),
2013 make_int (i + byte_offset));
2017 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2019 Lisp_Object v = XVECTOR_DATA(table)[i];
2025 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2027 Lisp_Object c = XVECTOR_DATA(v)[j];
2031 (c, XCHARSET_NAME (charset),
2032 make_int ( ( (i + byte_offset) << 8 )
2038 Fput_char_attribute (v, XCHARSET_NAME (charset),
2039 make_int (i + byte_offset));
2048 /************************************************************************/
2049 /* Lisp primitives for working with characters */
2050 /************************************************************************/
2053 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2054 Make a character from CHARSET and code-point CODE.
2055 If DEFINED_ONLY is non-nil, builtin character is not returned.
2056 If corresponding character is not found, nil is returned.
2058 (charset, code, defined_only))
2062 charset = Fget_charset (charset);
2065 if (XCHARSET_GRAPHIC (charset) == 1)
2067 if (NILP (defined_only))
2068 c = DECODE_CHAR (charset, c);
2070 c = decode_defined_char (charset, c);
2071 return c >= 0 ? make_char (c) : Qnil;
2074 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2075 Make a builtin character from CHARSET and code-point CODE.
2081 charset = Fget_charset (charset);
2083 if (EQ (charset, Vcharset_latin_viscii))
2085 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2091 (ret = Fget_char_attribute (chr,
2092 Vcharset_latin_viscii_lower,
2095 charset = Vcharset_latin_viscii_lower;
2099 (ret = Fget_char_attribute (chr,
2100 Vcharset_latin_viscii_upper,
2103 charset = Vcharset_latin_viscii_upper;
2110 if (XCHARSET_GRAPHIC (charset) == 1)
2113 c = decode_builtin_char (charset, c);
2114 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2118 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2119 Make a character from CHARSET and octets ARG1 and ARG2.
2120 ARG2 is required only for characters from two-dimensional charsets.
2121 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2122 character s with caron.
2124 (charset, arg1, arg2))
2128 int lowlim, highlim;
2130 charset = Fget_charset (charset);
2131 cs = XCHARSET (charset);
2133 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2134 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2136 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2138 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2139 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2142 /* It is useful (and safe, according to Olivier Galibert) to strip
2143 the 8th bit off ARG1 and ARG2 because it allows programmers to
2144 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2145 Latin 2 code of the character. */
2153 if (a1 < lowlim || a1 > highlim)
2154 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2156 if (CHARSET_DIMENSION (cs) == 1)
2160 ("Charset is of dimension one; second octet must be nil", arg2);
2161 return make_char (MAKE_CHAR (charset, a1, 0));
2170 a2 = XINT (arg2) & 0x7f;
2172 if (a2 < lowlim || a2 > highlim)
2173 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2175 return make_char (MAKE_CHAR (charset, a1, a2));
2178 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2179 Return the character set of CHARACTER.
2183 CHECK_CHAR_COERCE_INT (character);
2185 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2188 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2189 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2190 N defaults to 0 if omitted.
2194 Lisp_Object charset;
2197 CHECK_CHAR_COERCE_INT (character);
2199 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2201 if (NILP (n) || EQ (n, Qzero))
2202 return make_int (octet0);
2203 else if (EQ (n, make_int (1)))
2204 return make_int (octet1);
2206 signal_simple_error ("Octet number must be 0 or 1", n);
2210 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2211 Return code-point of CHARACTER in specified CHARSET.
2213 (character, charset, defined_only))
2217 CHECK_CHAR_COERCE_INT (character);
2218 charset = Fget_charset (charset);
2219 code_point = charset_code_point (charset, XCHAR (character),
2220 !NILP (defined_only));
2221 if (code_point >= 0)
2222 return make_int (code_point);
2228 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2229 Return list of charset and one or two position-codes of CHARACTER.
2233 /* This function can GC */
2234 struct gcpro gcpro1, gcpro2;
2235 Lisp_Object charset = Qnil;
2236 Lisp_Object rc = Qnil;
2244 GCPRO2 (charset, rc);
2245 CHECK_CHAR_COERCE_INT (character);
2248 code_point = ENCODE_CHAR (XCHAR (character), charset);
2249 dimension = XCHARSET_DIMENSION (charset);
2250 while (dimension > 0)
2252 rc = Fcons (make_int (code_point & 255), rc);
2256 rc = Fcons (XCHARSET_NAME (charset), rc);
2258 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2260 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2262 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2266 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2275 #ifdef ENABLE_COMPOSITE_CHARS
2276 /************************************************************************/
2277 /* composite character functions */
2278 /************************************************************************/
2281 lookup_composite_char (Bufbyte *str, int len)
2283 Lisp_Object lispstr = make_string (str, len);
2284 Lisp_Object ch = Fgethash (lispstr,
2285 Vcomposite_char_string2char_hash_table,
2291 if (composite_char_row_next >= 128)
2292 signal_simple_error ("No more composite chars available", lispstr);
2293 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2294 composite_char_col_next);
2295 Fputhash (make_char (emch), lispstr,
2296 Vcomposite_char_char2string_hash_table);
2297 Fputhash (lispstr, make_char (emch),
2298 Vcomposite_char_string2char_hash_table);
2299 composite_char_col_next++;
2300 if (composite_char_col_next >= 128)
2302 composite_char_col_next = 32;
2303 composite_char_row_next++;
2312 composite_char_string (Emchar ch)
2314 Lisp_Object str = Fgethash (make_char (ch),
2315 Vcomposite_char_char2string_hash_table,
2317 assert (!UNBOUNDP (str));
2321 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2322 Convert a string into a single composite character.
2323 The character is the result of overstriking all the characters in
2328 CHECK_STRING (string);
2329 return make_char (lookup_composite_char (XSTRING_DATA (string),
2330 XSTRING_LENGTH (string)));
2333 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2334 Return a string of the characters comprising a composite character.
2342 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2343 signal_simple_error ("Must be composite char", ch);
2344 return composite_char_string (emch);
2346 #endif /* ENABLE_COMPOSITE_CHARS */
2349 /************************************************************************/
2350 /* initialization */
2351 /************************************************************************/
2354 syms_of_mule_charset (void)
2356 INIT_LRECORD_IMPLEMENTATION (charset);
2358 DEFSUBR (Fcharsetp);
2359 DEFSUBR (Ffind_charset);
2360 DEFSUBR (Fget_charset);
2361 DEFSUBR (Fcharset_list);
2362 DEFSUBR (Fcharset_name);
2363 DEFSUBR (Fmake_charset);
2364 DEFSUBR (Fmake_reverse_direction_charset);
2365 /* DEFSUBR (Freverse_direction_charset); */
2366 DEFSUBR (Fdefine_charset_alias);
2367 DEFSUBR (Fcharset_from_attributes);
2368 DEFSUBR (Fcharset_short_name);
2369 DEFSUBR (Fcharset_long_name);
2370 DEFSUBR (Fcharset_description);
2371 DEFSUBR (Fcharset_dimension);
2372 DEFSUBR (Fcharset_property);
2373 DEFSUBR (Fcharset_id);
2374 DEFSUBR (Fset_charset_ccl_program);
2375 DEFSUBR (Fset_charset_registry);
2377 DEFSUBR (Fcharset_mapping_table);
2378 DEFSUBR (Fset_charset_mapping_table);
2382 DEFSUBR (Fdecode_char);
2383 DEFSUBR (Fdecode_builtin_char);
2384 DEFSUBR (Fencode_char);
2386 DEFSUBR (Fmake_char);
2387 DEFSUBR (Fchar_charset);
2388 DEFSUBR (Fchar_octet);
2389 DEFSUBR (Fsplit_char);
2391 #ifdef ENABLE_COMPOSITE_CHARS
2392 DEFSUBR (Fmake_composite_char);
2393 DEFSUBR (Fcomposite_char_string);
2396 defsymbol (&Qcharsetp, "charsetp");
2397 defsymbol (&Qregistry, "registry");
2398 defsymbol (&Qfinal, "final");
2399 defsymbol (&Qgraphic, "graphic");
2400 defsymbol (&Qdirection, "direction");
2401 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2402 defsymbol (&Qshort_name, "short-name");
2403 defsymbol (&Qlong_name, "long-name");
2405 defsymbol (&Qmother, "mother");
2406 defsymbol (&Qmin_code, "min-code");
2407 defsymbol (&Qmax_code, "max-code");
2408 defsymbol (&Qcode_offset, "code-offset");
2409 defsymbol (&Qconversion, "conversion");
2410 defsymbol (&Q94x60, "94x60");
2411 defsymbol (&Q94x94x60, "94x94x60");
2414 defsymbol (&Ql2r, "l2r");
2415 defsymbol (&Qr2l, "r2l");
2417 /* Charsets, compatible with FSF 20.3
2418 Naming convention is Script-Charset[-Edition] */
2419 defsymbol (&Qascii, "ascii");
2420 defsymbol (&Qcontrol_1, "control-1");
2421 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2422 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2423 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2424 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2425 defsymbol (&Qthai_tis620, "thai-tis620");
2426 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2427 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2428 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2429 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2430 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2431 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2432 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2433 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2434 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2435 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2436 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2437 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2438 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2439 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2440 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2441 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2443 defsymbol (&Qucs, "ucs");
2444 defsymbol (&Qucs_bmp, "ucs-bmp");
2445 defsymbol (&Qucs_smp, "ucs-smp");
2446 defsymbol (&Qucs_sip, "ucs-sip");
2447 defsymbol (&Qucs_gb, "ucs-gb");
2448 defsymbol (&Qucs_cns, "ucs-cns");
2449 defsymbol (&Qucs_jis, "ucs-jis");
2450 defsymbol (&Qucs_ks, "ucs-ks");
2451 defsymbol (&Qlatin_viscii, "latin-viscii");
2452 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2453 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2454 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2455 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2456 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2457 defsymbol (&Qjis_x0208, "=jis-x0208");
2458 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2459 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2460 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2461 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2462 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2463 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2464 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2465 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2466 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2467 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2468 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2469 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2470 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2471 defsymbol (&Qchinese_big5, "chinese-big5");
2472 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2473 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2474 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2475 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2476 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2477 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2478 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2479 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2480 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2481 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2482 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2483 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2484 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2485 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2487 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2488 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2490 defsymbol (&Qcomposite, "composite");
2494 vars_of_mule_charset (void)
2501 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2502 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2504 /* Table of charsets indexed by leading byte. */
2505 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2506 chlook->charset_by_leading_byte[i] = Qnil;
2509 /* Table of charsets indexed by type/final-byte. */
2510 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2511 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2512 chlook->charset_by_attributes[i][j] = Qnil;
2514 /* Table of charsets indexed by type/final-byte/direction. */
2515 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2516 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2517 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2518 chlook->charset_by_attributes[i][j][k] = Qnil;
2522 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2524 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2525 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2529 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2530 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2531 Leading-code of private TYPE9N charset of column-width 1.
2533 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2537 Vdefault_coded_charset_priority_list = Qnil;
2538 DEFVAR_LISP ("default-coded-charset-priority-list",
2539 &Vdefault_coded_charset_priority_list /*
2540 Default order of preferred coded-character-sets.
2546 complex_vars_of_mule_charset (void)
2548 staticpro (&Vcharset_hash_table);
2549 Vcharset_hash_table =
2550 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2552 /* Predefined character sets. We store them into variables for
2556 staticpro (&Vcharset_ucs);
2558 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2559 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2560 build_string ("UCS"),
2561 build_string ("UCS"),
2562 build_string ("ISO/IEC 10646"),
2564 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2565 staticpro (&Vcharset_ucs_bmp);
2567 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2568 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2569 build_string ("BMP"),
2570 build_string ("UCS-BMP"),
2571 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2573 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2574 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2575 staticpro (&Vcharset_ucs_smp);
2577 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2578 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2579 build_string ("SMP"),
2580 build_string ("UCS-SMP"),
2581 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2582 build_string ("UCS00-1"),
2583 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2584 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2585 staticpro (&Vcharset_ucs_sip);
2587 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2588 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2589 build_string ("SIP"),
2590 build_string ("UCS-SIP"),
2591 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2592 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2593 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2594 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2595 staticpro (&Vcharset_ucs_gb);
2597 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2598 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2599 build_string ("UCS for GB"),
2600 build_string ("UCS for GB"),
2601 build_string ("ISO/IEC 10646 for GB"),
2603 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2604 staticpro (&Vcharset_ucs_cns);
2606 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2607 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2608 build_string ("UCS for CNS"),
2609 build_string ("UCS for CNS 11643"),
2610 build_string ("ISO/IEC 10646 for CNS 11643"),
2612 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2613 staticpro (&Vcharset_ucs_jis);
2615 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2616 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2617 build_string ("UCS for JIS"),
2618 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2620 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2622 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2623 staticpro (&Vcharset_ucs_ks);
2625 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2626 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2627 build_string ("UCS for KS"),
2628 build_string ("UCS for CCS defined by KS"),
2629 build_string ("ISO/IEC 10646 for Korean Standards"),
2631 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2633 # define MIN_CHAR_THAI 0
2634 # define MAX_CHAR_THAI 0
2635 /* # define MIN_CHAR_HEBREW 0 */
2636 /* # define MAX_CHAR_HEBREW 0 */
2637 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2638 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2640 staticpro (&Vcharset_ascii);
2642 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2643 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2644 build_string ("ASCII"),
2645 build_string ("ASCII)"),
2646 build_string ("ASCII (ISO646 IRV)"),
2647 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2648 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2649 staticpro (&Vcharset_control_1);
2650 Vcharset_control_1 =
2651 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2652 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2653 build_string ("C1"),
2654 build_string ("Control characters"),
2655 build_string ("Control characters 128-191"),
2657 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2658 staticpro (&Vcharset_latin_iso8859_1);
2659 Vcharset_latin_iso8859_1 =
2660 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2661 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2662 build_string ("Latin-1"),
2663 build_string ("ISO8859-1 (Latin-1)"),
2664 build_string ("ISO8859-1 (Latin-1)"),
2665 build_string ("iso8859-1"),
2666 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2667 staticpro (&Vcharset_latin_iso8859_2);
2668 Vcharset_latin_iso8859_2 =
2669 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2670 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2671 build_string ("Latin-2"),
2672 build_string ("ISO8859-2 (Latin-2)"),
2673 build_string ("ISO8859-2 (Latin-2)"),
2674 build_string ("iso8859-2"),
2675 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2676 staticpro (&Vcharset_latin_iso8859_3);
2677 Vcharset_latin_iso8859_3 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2679 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2680 build_string ("Latin-3"),
2681 build_string ("ISO8859-3 (Latin-3)"),
2682 build_string ("ISO8859-3 (Latin-3)"),
2683 build_string ("iso8859-3"),
2684 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2685 staticpro (&Vcharset_latin_iso8859_4);
2686 Vcharset_latin_iso8859_4 =
2687 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2688 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2689 build_string ("Latin-4"),
2690 build_string ("ISO8859-4 (Latin-4)"),
2691 build_string ("ISO8859-4 (Latin-4)"),
2692 build_string ("iso8859-4"),
2693 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2694 staticpro (&Vcharset_thai_tis620);
2695 Vcharset_thai_tis620 =
2696 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2697 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("TIS620"),
2699 build_string ("TIS620 (Thai)"),
2700 build_string ("TIS620.2529 (Thai)"),
2701 build_string ("tis620"),
2702 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2703 staticpro (&Vcharset_greek_iso8859_7);
2704 Vcharset_greek_iso8859_7 =
2705 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2706 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("ISO8859-7"),
2708 build_string ("ISO8859-7 (Greek)"),
2709 build_string ("ISO8859-7 (Greek)"),
2710 build_string ("iso8859-7"),
2711 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2712 staticpro (&Vcharset_arabic_iso8859_6);
2713 Vcharset_arabic_iso8859_6 =
2714 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2715 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2716 build_string ("ISO8859-6"),
2717 build_string ("ISO8859-6 (Arabic)"),
2718 build_string ("ISO8859-6 (Arabic)"),
2719 build_string ("iso8859-6"),
2720 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2721 staticpro (&Vcharset_hebrew_iso8859_8);
2722 Vcharset_hebrew_iso8859_8 =
2723 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2724 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2725 build_string ("ISO8859-8"),
2726 build_string ("ISO8859-8 (Hebrew)"),
2727 build_string ("ISO8859-8 (Hebrew)"),
2728 build_string ("iso8859-8"),
2730 0 /* MIN_CHAR_HEBREW */,
2731 0 /* MAX_CHAR_HEBREW */, 0, 32,
2732 Qnil, CONVERSION_IDENTICAL);
2733 staticpro (&Vcharset_katakana_jisx0201);
2734 Vcharset_katakana_jisx0201 =
2735 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2736 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2737 build_string ("JISX0201 Kana"),
2738 build_string ("JISX0201.1976 (Japanese Kana)"),
2739 build_string ("JISX0201.1976 Japanese Kana"),
2740 build_string ("jisx0201\\.1976"),
2741 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2742 staticpro (&Vcharset_latin_jisx0201);
2743 Vcharset_latin_jisx0201 =
2744 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2745 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2746 build_string ("JISX0201 Roman"),
2747 build_string ("JISX0201.1976 (Japanese Roman)"),
2748 build_string ("JISX0201.1976 Japanese Roman"),
2749 build_string ("jisx0201\\.1976"),
2750 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2751 staticpro (&Vcharset_cyrillic_iso8859_5);
2752 Vcharset_cyrillic_iso8859_5 =
2753 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2754 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2755 build_string ("ISO8859-5"),
2756 build_string ("ISO8859-5 (Cyrillic)"),
2757 build_string ("ISO8859-5 (Cyrillic)"),
2758 build_string ("iso8859-5"),
2759 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2760 staticpro (&Vcharset_latin_iso8859_9);
2761 Vcharset_latin_iso8859_9 =
2762 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2763 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2764 build_string ("Latin-5"),
2765 build_string ("ISO8859-9 (Latin-5)"),
2766 build_string ("ISO8859-9 (Latin-5)"),
2767 build_string ("iso8859-9"),
2768 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2770 staticpro (&Vcharset_jis_x0208);
2771 Vcharset_jis_x0208 =
2772 make_charset (LEADING_BYTE_JIS_X0208,
2774 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2775 build_string ("JIS X0208"),
2776 build_string ("JIS X0208 Common"),
2777 build_string ("JIS X0208 Common part"),
2778 build_string ("jisx0208\\.1990"),
2780 MIN_CHAR_JIS_X0208_1990,
2781 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2782 Qnil, CONVERSION_94x94);
2784 staticpro (&Vcharset_japanese_jisx0208_1978);
2785 Vcharset_japanese_jisx0208_1978 =
2786 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2787 Qjapanese_jisx0208_1978, 94, 2,
2788 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2789 build_string ("JIS X0208:1978"),
2790 build_string ("JIS X0208:1978 (Japanese)"),
2792 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2793 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2800 CONVERSION_IDENTICAL);
2801 staticpro (&Vcharset_chinese_gb2312);
2802 Vcharset_chinese_gb2312 =
2803 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2804 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2805 build_string ("GB2312"),
2806 build_string ("GB2312)"),
2807 build_string ("GB2312 Chinese simplified"),
2808 build_string ("gb2312"),
2809 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2810 staticpro (&Vcharset_chinese_gb12345);
2811 Vcharset_chinese_gb12345 =
2812 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2813 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2814 build_string ("G1"),
2815 build_string ("GB 12345)"),
2816 build_string ("GB 12345-1990"),
2817 build_string ("GB12345\\(\\.1990\\)?-0"),
2818 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2819 staticpro (&Vcharset_japanese_jisx0208);
2820 Vcharset_japanese_jisx0208 =
2821 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2822 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2823 build_string ("JISX0208"),
2824 build_string ("JIS X0208:1983 (Japanese)"),
2825 build_string ("JIS X0208:1983 Japanese Kanji"),
2826 build_string ("jisx0208\\.1983"),
2833 CONVERSION_IDENTICAL);
2835 staticpro (&Vcharset_japanese_jisx0208_1990);
2836 Vcharset_japanese_jisx0208_1990 =
2837 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2838 Qjapanese_jisx0208_1990, 94, 2,
2839 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2840 build_string ("JISX0208-1990"),
2841 build_string ("JIS X0208:1990 (Japanese)"),
2842 build_string ("JIS X0208:1990 Japanese Kanji"),
2843 build_string ("jisx0208\\.1990"),
2845 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2846 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2847 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2848 Vcharset_jis_x0208 /* Qnil */,
2849 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2851 staticpro (&Vcharset_korean_ksc5601);
2852 Vcharset_korean_ksc5601 =
2853 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2854 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2855 build_string ("KSC5601"),
2856 build_string ("KSC5601 (Korean"),
2857 build_string ("KSC5601 Korean Hangul and Hanja"),
2858 build_string ("ksc5601"),
2859 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2860 staticpro (&Vcharset_japanese_jisx0212);
2861 Vcharset_japanese_jisx0212 =
2862 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2863 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2864 build_string ("JISX0212"),
2865 build_string ("JISX0212 (Japanese)"),
2866 build_string ("JISX0212 Japanese Supplement"),
2867 build_string ("jisx0212"),
2868 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2870 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2871 staticpro (&Vcharset_chinese_cns11643_1);
2872 Vcharset_chinese_cns11643_1 =
2873 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2874 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2875 build_string ("CNS11643-1"),
2876 build_string ("CNS11643-1 (Chinese traditional)"),
2878 ("CNS 11643 Plane 1 Chinese traditional"),
2879 build_string (CHINESE_CNS_PLANE_RE("1")),
2880 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_chinese_cns11643_2);
2882 Vcharset_chinese_cns11643_2 =
2883 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2884 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2885 build_string ("CNS11643-2"),
2886 build_string ("CNS11643-2 (Chinese traditional)"),
2888 ("CNS 11643 Plane 2 Chinese traditional"),
2889 build_string (CHINESE_CNS_PLANE_RE("2")),
2890 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2892 staticpro (&Vcharset_latin_tcvn5712);
2893 Vcharset_latin_tcvn5712 =
2894 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2895 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2896 build_string ("TCVN 5712"),
2897 build_string ("TCVN 5712 (VSCII-2)"),
2898 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2899 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2900 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2901 staticpro (&Vcharset_latin_viscii_lower);
2902 Vcharset_latin_viscii_lower =
2903 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2904 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2905 build_string ("VISCII lower"),
2906 build_string ("VISCII lower (Vietnamese)"),
2907 build_string ("VISCII lower (Vietnamese)"),
2908 build_string ("MULEVISCII-LOWER"),
2909 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2910 staticpro (&Vcharset_latin_viscii_upper);
2911 Vcharset_latin_viscii_upper =
2912 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2913 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2914 build_string ("VISCII upper"),
2915 build_string ("VISCII upper (Vietnamese)"),
2916 build_string ("VISCII upper (Vietnamese)"),
2917 build_string ("MULEVISCII-UPPER"),
2918 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2919 staticpro (&Vcharset_latin_viscii);
2920 Vcharset_latin_viscii =
2921 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2922 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2923 build_string ("VISCII"),
2924 build_string ("VISCII 1.1 (Vietnamese)"),
2925 build_string ("VISCII 1.1 (Vietnamese)"),
2926 build_string ("VISCII1\\.1"),
2927 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2928 staticpro (&Vcharset_chinese_big5);
2929 Vcharset_chinese_big5 =
2930 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2931 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2932 build_string ("Big5"),
2933 build_string ("Big5"),
2934 build_string ("Big5 Chinese traditional"),
2935 build_string ("big5-0"),
2937 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2938 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2940 staticpro (&Vcharset_chinese_big5_cdp);
2941 Vcharset_chinese_big5_cdp =
2942 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2943 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2944 build_string ("Big5-CDP"),
2945 build_string ("Big5 + CDP extension"),
2946 build_string ("Big5 with CDP extension"),
2947 build_string ("big5\\.cdp-0"),
2948 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2949 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2951 #define DEF_HANZIKU(n) \
2952 staticpro (&Vcharset_ideograph_hanziku_##n); \
2953 Vcharset_ideograph_hanziku_##n = \
2954 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2955 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2956 build_string ("HZK-"#n), \
2957 build_string ("HANZIKU-"#n), \
2958 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2960 ("hanziku-"#n"$"), \
2961 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2962 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2975 #define DEF_GT_PJ(n) \
2976 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2977 Vcharset_ideograph_gt_pj_##n = \
2978 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2979 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2980 build_string ("GT-PJ-"#n), \
2981 build_string ("GT (pseudo JIS encoding) part "#n), \
2982 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2984 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2985 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2998 staticpro (&Vcharset_ideograph_daikanwa_2);
2999 Vcharset_ideograph_daikanwa_2 =
3000 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3001 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3002 build_string ("Daikanwa Rev."),
3003 build_string ("Morohashi's Daikanwa Rev."),
3005 ("Daikanwa dictionary (revised version)"),
3006 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3007 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3008 staticpro (&Vcharset_ideograph_daikanwa);
3009 Vcharset_ideograph_daikanwa =
3010 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3011 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3012 build_string ("Daikanwa"),
3013 build_string ("Morohashi's Daikanwa Rev.2"),
3015 ("Daikanwa dictionary (second revised version)"),
3016 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3017 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3018 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3020 staticpro (&Vcharset_ethiopic_ucs);
3021 Vcharset_ethiopic_ucs =
3022 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3023 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3024 build_string ("Ethiopic (UCS)"),
3025 build_string ("Ethiopic (UCS)"),
3026 build_string ("Ethiopic of UCS"),
3027 build_string ("Ethiopic-Unicode"),
3028 Qnil, 0x1200, 0x137F, 0, 0,
3029 Qnil, CONVERSION_IDENTICAL);
3031 staticpro (&Vcharset_chinese_big5_1);
3032 Vcharset_chinese_big5_1 =
3033 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3034 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3035 build_string ("Big5"),
3036 build_string ("Big5 (Level-1)"),
3038 ("Big5 Level-1 Chinese traditional"),
3039 build_string ("big5"),
3040 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3041 staticpro (&Vcharset_chinese_big5_2);
3042 Vcharset_chinese_big5_2 =
3043 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3044 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3045 build_string ("Big5"),
3046 build_string ("Big5 (Level-2)"),
3048 ("Big5 Level-2 Chinese traditional"),
3049 build_string ("big5"),
3050 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3052 #ifdef ENABLE_COMPOSITE_CHARS
3053 /* #### For simplicity, we put composite chars into a 96x96 charset.
3054 This is going to lead to problems because you can run out of
3055 room, esp. as we don't yet recycle numbers. */
3056 staticpro (&Vcharset_composite);
3057 Vcharset_composite =
3058 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3059 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3060 build_string ("Composite"),
3061 build_string ("Composite characters"),
3062 build_string ("Composite characters"),
3065 /* #### not dumped properly */
3066 composite_char_row_next = 32;
3067 composite_char_col_next = 32;
3069 Vcomposite_char_string2char_hash_table =
3070 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3071 Vcomposite_char_char2string_hash_table =
3072 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3073 staticpro (&Vcomposite_char_string2char_hash_table);
3074 staticpro (&Vcomposite_char_char2string_hash_table);
3075 #endif /* ENABLE_COMPOSITE_CHARS */