1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_ideograph_daikanwa_2;
94 Lisp_Object Vcharset_ideograph_daikanwa;
95 Lisp_Object Vcharset_ethiopic_ucs;
97 Lisp_Object Vcharset_chinese_big5_1;
98 Lisp_Object Vcharset_chinese_big5_2;
100 #ifdef ENABLE_COMPOSITE_CHARS
101 Lisp_Object Vcharset_composite;
103 /* Hash tables for composite chars. One maps string representing
104 composed chars to their equivalent chars; one goes the
106 Lisp_Object Vcomposite_char_char2string_hash_table;
107 Lisp_Object Vcomposite_char_string2char_hash_table;
109 static int composite_char_row_next;
110 static int composite_char_col_next;
112 #endif /* ENABLE_COMPOSITE_CHARS */
114 struct charset_lookup *chlook;
116 static const struct lrecord_description charset_lookup_description_1[] = {
117 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
119 NUM_LEADING_BYTES+4*128
126 static const struct struct_description charset_lookup_description = {
127 sizeof (struct charset_lookup),
128 charset_lookup_description_1
132 /* Table of number of bytes in the string representation of a character
133 indexed by the first byte of that representation.
135 rep_bytes_by_first_byte(c) is more efficient than the equivalent
136 canonical computation:
138 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
140 const Bytecount rep_bytes_by_first_byte[0xA0] =
141 { /* 0x00 - 0x7f are for straight ASCII */
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 /* 0x80 - 0x8f are for Dimension-1 official charsets */
152 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
154 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
156 /* 0x90 - 0x9d are for Dimension-2 official charsets */
157 /* 0x9e is for Dimension-1 private charsets */
158 /* 0x9f is for Dimension-2 private charsets */
159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
165 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
167 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
171 if (XVECTOR_LENGTH (v) > ccs_len)
174 for (i = 0; i < XVECTOR_LENGTH (v); i++)
176 Lisp_Object c = XVECTOR_DATA(v)[i];
178 if (!NILP (c) && !CHARP (c))
182 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
194 put_char_ccs_code_point (Lisp_Object character,
195 Lisp_Object ccs, Lisp_Object value)
197 if (!EQ (XCHARSET_NAME (ccs), Qucs)
199 || (XCHAR (character) != XINT (value)))
201 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
205 { /* obsolete representation: value must be a list of bytes */
206 Lisp_Object ret = Fcar (value);
210 signal_simple_error ("Invalid value for coded-charset", value);
211 code_point = XINT (ret);
212 if (XCHARSET_GRAPHIC (ccs) == 1)
220 signal_simple_error ("Invalid value for coded-charset",
224 signal_simple_error ("Invalid value for coded-charset",
227 if (XCHARSET_GRAPHIC (ccs) == 1)
229 code_point = (code_point << 8) | j;
232 value = make_int (code_point);
234 else if (INTP (value))
236 code_point = XINT (value);
237 if (XCHARSET_GRAPHIC (ccs) == 1)
239 code_point &= 0x7F7F7F7F;
240 value = make_int (code_point);
244 signal_simple_error ("Invalid value for coded-charset", value);
248 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
251 decoding_table_remove_char (ccs, XINT (cpos));
254 decoding_table_put_char (ccs, code_point, character);
260 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
262 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
263 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
265 if (VECTORP (decoding_table))
267 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
271 decoding_table_remove_char (ccs, XINT (cpos));
274 if (CHAR_TABLEP (encoding_table))
276 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
284 int leading_code_private_11;
287 Lisp_Object Qcharsetp;
289 /* Qdoc_string, Qdimension, Qchars defined in general.c */
290 Lisp_Object Qregistry, Qfinal, Qgraphic;
291 Lisp_Object Qdirection;
292 Lisp_Object Qreverse_direction_charset;
293 Lisp_Object Qleading_byte;
294 Lisp_Object Qshort_name, Qlong_name;
296 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
297 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
314 Qjapanese_jisx0208_1978,
318 Qjapanese_jisx0208_1990,
336 Qvietnamese_viscii_lower,
337 Qvietnamese_viscii_upper,
340 /* Qchinese_big5_cdp, */
341 Qideograph_hanziku_1,
342 Qideograph_hanziku_2,
343 Qideograph_hanziku_3,
344 Qideograph_hanziku_4,
345 Qideograph_hanziku_5,
346 Qideograph_hanziku_6,
347 Qideograph_hanziku_7,
348 Qideograph_hanziku_8,
349 Qideograph_hanziku_9,
350 Qideograph_hanziku_10,
351 Qideograph_hanziku_11,
352 Qideograph_hanziku_12,
353 Qideograph_daikanwa_2,
361 Lisp_Object Ql2r, Qr2l;
363 Lisp_Object Vcharset_hash_table;
365 /* Composite characters are characters constructed by overstriking two
366 or more regular characters.
368 1) The old Mule implementation involves storing composite characters
369 in a buffer as a tag followed by all of the actual characters
370 used to make up the composite character. I think this is a bad
371 idea; it greatly complicates code that wants to handle strings
372 one character at a time because it has to deal with the possibility
373 of great big ungainly characters. It's much more reasonable to
374 simply store an index into a table of composite characters.
376 2) The current implementation only allows for 16,384 separate
377 composite characters over the lifetime of the XEmacs process.
378 This could become a potential problem if the user
379 edited lots of different files that use composite characters.
380 Due to FSF bogosity, increasing the number of allowable
381 composite characters under Mule would decrease the number
382 of possible faces that can exist. Mule already has shrunk
383 this to 2048, and further shrinkage would become uncomfortable.
384 No such problems exist in XEmacs.
386 Composite characters could be represented as 0x80 C1 C2 C3,
387 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
388 for slightly under 2^20 (one million) composite characters
389 over the XEmacs process lifetime, and you only need to
390 increase the size of a Mule character from 19 to 21 bits.
391 Or you could use 0x80 C1 C2 C3 C4, allowing for about
392 85 million (slightly over 2^26) composite characters. */
395 /************************************************************************/
396 /* Basic Emchar functions */
397 /************************************************************************/
399 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
400 string in STR. Returns the number of bytes stored.
401 Do not call this directly. Use the macro set_charptr_emchar() instead.
405 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
420 else if ( c <= 0x7ff )
422 *p++ = (c >> 6) | 0xc0;
423 *p++ = (c & 0x3f) | 0x80;
425 else if ( c <= 0xffff )
427 *p++ = (c >> 12) | 0xe0;
428 *p++ = ((c >> 6) & 0x3f) | 0x80;
429 *p++ = (c & 0x3f) | 0x80;
431 else if ( c <= 0x1fffff )
433 *p++ = (c >> 18) | 0xf0;
434 *p++ = ((c >> 12) & 0x3f) | 0x80;
435 *p++ = ((c >> 6) & 0x3f) | 0x80;
436 *p++ = (c & 0x3f) | 0x80;
438 else if ( c <= 0x3ffffff )
440 *p++ = (c >> 24) | 0xf8;
441 *p++ = ((c >> 18) & 0x3f) | 0x80;
442 *p++ = ((c >> 12) & 0x3f) | 0x80;
443 *p++ = ((c >> 6) & 0x3f) | 0x80;
444 *p++ = (c & 0x3f) | 0x80;
448 *p++ = (c >> 30) | 0xfc;
449 *p++ = ((c >> 24) & 0x3f) | 0x80;
450 *p++ = ((c >> 18) & 0x3f) | 0x80;
451 *p++ = ((c >> 12) & 0x3f) | 0x80;
452 *p++ = ((c >> 6) & 0x3f) | 0x80;
453 *p++ = (c & 0x3f) | 0x80;
456 BREAKUP_CHAR (c, charset, c1, c2);
457 lb = CHAR_LEADING_BYTE (c);
458 if (LEADING_BYTE_PRIVATE_P (lb))
459 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
461 if (EQ (charset, Vcharset_control_1))
470 /* Return the first character from a Mule-encoded string in STR,
471 assuming it's non-ASCII. Do not call this directly.
472 Use the macro charptr_emchar() instead. */
475 non_ascii_charptr_emchar (const Bufbyte *str)
488 else if ( b >= 0xf8 )
493 else if ( b >= 0xf0 )
498 else if ( b >= 0xe0 )
503 else if ( b >= 0xc0 )
513 for( ; len > 0; len-- )
516 ch = ( ch << 6 ) | ( b & 0x3f );
520 Bufbyte i0 = *str, i1, i2 = 0;
523 if (i0 == LEADING_BYTE_CONTROL_1)
524 return (Emchar) (*++str - 0x20);
526 if (LEADING_BYTE_PREFIX_P (i0))
531 charset = CHARSET_BY_LEADING_BYTE (i0);
532 if (XCHARSET_DIMENSION (charset) == 2)
535 return MAKE_CHAR (charset, i1, i2);
539 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
540 Do not call this directly. Use the macro valid_char_p() instead. */
544 non_ascii_valid_char_p (Emchar ch)
548 /* Must have only lowest 19 bits set */
552 f1 = CHAR_FIELD1 (ch);
553 f2 = CHAR_FIELD2 (ch);
554 f3 = CHAR_FIELD3 (ch);
560 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
561 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
562 f2 > MAX_CHAR_FIELD2_PRIVATE)
567 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
568 f2 <= MAX_CHAR_FIELD2_PRIVATE))
572 NOTE: This takes advantage of the fact that
573 FIELD2_TO_OFFICIAL_LEADING_BYTE and
574 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
576 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
577 if (EQ (charset, Qnil))
579 return (XCHARSET_CHARS (charset) == 96);
585 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
586 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
587 f1 > MAX_CHAR_FIELD1_PRIVATE)
589 if (f2 < 0x20 || f3 < 0x20)
592 #ifdef ENABLE_COMPOSITE_CHARS
593 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
595 if (UNBOUNDP (Fgethash (make_int (ch),
596 Vcomposite_char_char2string_hash_table,
601 #endif /* ENABLE_COMPOSITE_CHARS */
603 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
604 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
607 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
609 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
612 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
614 if (EQ (charset, Qnil))
616 return (XCHARSET_CHARS (charset) == 96);
622 /************************************************************************/
623 /* Basic string functions */
624 /************************************************************************/
626 /* Copy the character pointed to by SRC into DST. Do not call this
627 directly. Use the macro charptr_copy_char() instead.
628 Return the number of bytes copied. */
631 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
633 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
635 for (i = bytes; i; i--, dst++, src++)
641 /************************************************************************/
642 /* streams of Emchars */
643 /************************************************************************/
645 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
646 The functions below are not meant to be called directly; use
647 the macros in insdel.h. */
650 Lstream_get_emchar_1 (Lstream *stream, int ch)
652 Bufbyte str[MAX_EMCHAR_LEN];
653 Bufbyte *strptr = str;
656 str[0] = (Bufbyte) ch;
658 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
660 int c = Lstream_getc (stream);
661 bufpos_checking_assert (c >= 0);
662 *++strptr = (Bufbyte) c;
664 return charptr_emchar (str);
668 Lstream_fput_emchar (Lstream *stream, Emchar ch)
670 Bufbyte str[MAX_EMCHAR_LEN];
671 Bytecount len = set_charptr_emchar (str, ch);
672 return Lstream_write (stream, str, len);
676 Lstream_funget_emchar (Lstream *stream, Emchar ch)
678 Bufbyte str[MAX_EMCHAR_LEN];
679 Bytecount len = set_charptr_emchar (str, ch);
680 Lstream_unread (stream, str, len);
684 /************************************************************************/
686 /************************************************************************/
689 mark_charset (Lisp_Object obj)
691 Lisp_Charset *cs = XCHARSET (obj);
693 mark_object (cs->short_name);
694 mark_object (cs->long_name);
695 mark_object (cs->doc_string);
696 mark_object (cs->registry);
697 mark_object (cs->ccl_program);
699 mark_object (cs->decoding_table);
700 mark_object (cs->mother);
706 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
708 Lisp_Charset *cs = XCHARSET (obj);
712 error ("printing unreadable object #<charset %s 0x%x>",
713 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
716 write_c_string ("#<charset ", printcharfun);
717 print_internal (CHARSET_NAME (cs), printcharfun, 0);
718 write_c_string (" ", printcharfun);
719 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
720 write_c_string (" ", printcharfun);
721 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
722 write_c_string (" ", printcharfun);
723 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
724 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
726 CHARSET_DIMENSION (cs),
727 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
728 CHARSET_COLUMNS (cs),
729 CHARSET_GRAPHIC (cs),
731 write_c_string (buf, printcharfun);
732 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
733 sprintf (buf, " 0x%x>", cs->header.uid);
734 write_c_string (buf, printcharfun);
737 static const struct lrecord_description charset_description[] = {
738 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
739 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
740 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
741 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
742 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
743 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
744 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
746 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
747 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
752 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
753 mark_charset, print_charset, 0, 0, 0,
757 /* Make a new charset. */
758 /* #### SJT Should generic properties be allowed? */
760 make_charset (Charset_ID id, Lisp_Object name,
761 unsigned short chars, unsigned char dimension,
762 unsigned char columns, unsigned char graphic,
763 Bufbyte final, unsigned char direction, Lisp_Object short_name,
764 Lisp_Object long_name, Lisp_Object doc,
766 Lisp_Object decoding_table,
767 Emchar min_code, Emchar max_code,
768 Emchar code_offset, unsigned char byte_offset,
769 Lisp_Object mother, unsigned char conversion)
772 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
776 XSETCHARSET (obj, cs);
778 CHARSET_ID (cs) = id;
779 CHARSET_NAME (cs) = name;
780 CHARSET_SHORT_NAME (cs) = short_name;
781 CHARSET_LONG_NAME (cs) = long_name;
782 CHARSET_CHARS (cs) = chars;
783 CHARSET_DIMENSION (cs) = dimension;
784 CHARSET_DIRECTION (cs) = direction;
785 CHARSET_COLUMNS (cs) = columns;
786 CHARSET_GRAPHIC (cs) = graphic;
787 CHARSET_FINAL (cs) = final;
788 CHARSET_DOC_STRING (cs) = doc;
789 CHARSET_REGISTRY (cs) = reg;
790 CHARSET_CCL_PROGRAM (cs) = Qnil;
791 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
793 CHARSET_DECODING_TABLE(cs) = Qunbound;
794 CHARSET_MIN_CODE (cs) = min_code;
795 CHARSET_MAX_CODE (cs) = max_code;
796 CHARSET_CODE_OFFSET (cs) = code_offset;
797 CHARSET_BYTE_OFFSET (cs) = byte_offset;
798 CHARSET_MOTHER (cs) = mother;
799 CHARSET_CONVERSION (cs) = conversion;
803 if (id == LEADING_BYTE_ASCII)
804 CHARSET_REP_BYTES (cs) = 1;
806 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
808 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
813 /* some charsets do not have final characters. This includes
814 ASCII, Control-1, Composite, and the two faux private
816 unsigned char iso2022_type
817 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
819 if (code_offset == 0)
821 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
822 chlook->charset_by_attributes[iso2022_type][final] = obj;
826 (chlook->charset_by_attributes[iso2022_type][final][direction]));
827 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
831 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
832 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
834 /* Some charsets are "faux" and don't have names or really exist at
835 all except in the leading-byte table. */
837 Fputhash (name, obj, Vcharset_hash_table);
842 get_unallocated_leading_byte (int dimension)
847 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
850 lb = chlook->next_allocated_leading_byte++;
854 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
857 lb = chlook->next_allocated_1_byte_leading_byte++;
861 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
864 lb = chlook->next_allocated_2_byte_leading_byte++;
870 ("No more character sets free for this dimension",
871 make_int (dimension));
877 /* Number of Big5 characters which have the same code in 1st byte. */
879 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
882 decode_defined_char (Lisp_Object ccs, int code_point)
884 int dim = XCHARSET_DIMENSION (ccs);
885 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
893 = get_ccs_octet_table (decoding_table, ccs,
894 (code_point >> (dim * 8)) & 255);
896 if (CHARP (decoding_table))
897 return XCHAR (decoding_table);
900 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
902 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
904 if ( EQ (mother, Vcharset_ucs) )
905 return DECODE_CHAR (mother, code_point);
907 return decode_defined_char (mother, code_point);
914 decode_builtin_char (Lisp_Object charset, int code_point)
916 Lisp_Object mother = XCHARSET_MOTHER (charset);
919 if ( XCHARSET_MAX_CODE (charset) > 0 )
921 if ( CHARSETP (mother) )
923 int code = code_point;
925 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
927 int row = code_point >> 8;
928 int cell = code_point & 255;
932 else if (row < 16 + 32 + 30)
933 code = (row - (16 + 32)) * 94 + cell - 33;
934 else if (row < 18 + 32 + 30)
936 else if (row < 18 + 32 + 60)
937 code = (row - (18 + 32)) * 94 + cell - 33;
939 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
941 int plane = code_point >> 16;
942 int row = (code_point >> 8) & 255;
943 int cell = code_point & 255;
947 else if (row < 16 + 32 + 30)
949 = (plane - 33) * 94 * 60
950 + (row - (16 + 32)) * 94
952 else if (row < 18 + 32 + 30)
954 else if (row < 18 + 32 + 60)
956 = (plane - 33) * 94 * 60
957 + (row - (18 + 32)) * 94
961 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
966 = (XCHARSET_DIMENSION (charset) == 1
968 code_point - XCHARSET_BYTE_OFFSET (charset)
970 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
971 * XCHARSET_CHARS (charset)
972 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
973 + XCHARSET_CODE_OFFSET (charset);
974 if ((cid < XCHARSET_MIN_CODE (charset))
975 || (XCHARSET_MAX_CODE (charset) < cid))
980 else if ((final = XCHARSET_FINAL (charset)) >= '0')
982 if (XCHARSET_DIMENSION (charset) == 1)
984 switch (XCHARSET_CHARS (charset))
988 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
991 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
999 switch (XCHARSET_CHARS (charset))
1002 return MIN_CHAR_94x94
1003 + (final - '0') * 94 * 94
1004 + (((code_point >> 8) & 0x7F) - 33) * 94
1005 + ((code_point & 0x7F) - 33);
1007 return MIN_CHAR_96x96
1008 + (final - '0') * 96 * 96
1009 + (((code_point >> 8) & 0x7F) - 32) * 96
1010 + ((code_point & 0x7F) - 32);
1022 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1024 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1027 if ( CHAR_TABLEP (encoding_table)
1028 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1033 Lisp_Object mother = XCHARSET_MOTHER (charset);
1034 int min = XCHARSET_MIN_CODE (charset);
1035 int max = XCHARSET_MAX_CODE (charset);
1038 if ( CHARSETP (mother) )
1040 if (XCHARSET_FINAL (charset) >= '0')
1041 code = charset_code_point (mother, ch, 1);
1043 code = charset_code_point (mother, ch, defined_only);
1045 else if (defined_only)
1047 else if ( ((max == 0) && CHARSETP (mother)
1048 && (XCHARSET_FINAL (charset) == 0))
1049 || ((min <= ch) && (ch <= max)) )
1051 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1052 || ((min <= code) && (code <= max)) )
1054 int d = code - XCHARSET_CODE_OFFSET (charset);
1056 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1058 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1060 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1062 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1065 int cell = d % 94 + 33;
1071 return (row << 8) | cell;
1073 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1074 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1075 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1076 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1079 int plane = d / (94 * 60) + 33;
1080 int row = (d % (94 * 60)) / 94;
1081 int cell = d % 94 + 33;
1087 return (plane << 16) | (row << 8) | cell;
1089 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1091 ( (d / (94 * 94) + 33) << 16)
1092 | ((d / 94 % 94 + 33) << 8)
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1096 ( (d / (96 * 96) + 32) << 16)
1097 | ((d / 96 % 96 + 32) << 8)
1099 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1101 ( (d / (94 * 94 * 94) + 33) << 24)
1102 | ((d / (94 * 94) % 94 + 33) << 16)
1103 | ((d / 94 % 94 + 33) << 8)
1105 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1107 ( (d / (96 * 96 * 96) + 32) << 24)
1108 | ((d / (96 * 96) % 96 + 32) << 16)
1109 | ((d / 96 % 96 + 32) << 8)
1113 printf ("Unknown CCS-conversion %d is specified!",
1114 XCHARSET_CONVERSION (charset));
1118 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1119 ( XCHARSET_MIN_CODE (charset) == 0 )
1121 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1122 (XCHARSET_CODE_OFFSET (charset)
1123 == XCHARSET_MIN_CODE (charset))
1128 if (XCHARSET_DIMENSION (charset) == 1)
1130 if (XCHARSET_CHARS (charset) == 94)
1132 if (((d = ch - (MIN_CHAR_94
1133 + (XCHARSET_FINAL (charset) - '0') * 94))
1138 else if (XCHARSET_CHARS (charset) == 96)
1140 if (((d = ch - (MIN_CHAR_96
1141 + (XCHARSET_FINAL (charset) - '0') * 96))
1149 else if (XCHARSET_DIMENSION (charset) == 2)
1151 if (XCHARSET_CHARS (charset) == 94)
1153 if (((d = ch - (MIN_CHAR_94x94
1155 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1158 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1160 else if (XCHARSET_CHARS (charset) == 96)
1162 if (((d = ch - (MIN_CHAR_96x96
1164 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1167 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1178 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1180 if (c <= MAX_CHAR_BASIC_LATIN)
1182 *charset = Vcharset_ascii;
1187 *charset = Vcharset_control_1;
1192 *charset = Vcharset_latin_iso8859_1;
1196 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1198 *charset = Vcharset_hebrew_iso8859_8;
1199 return c - MIN_CHAR_HEBREW + 0x20;
1202 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1204 *charset = Vcharset_thai_tis620;
1205 return c - MIN_CHAR_THAI + 0x20;
1208 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1209 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1211 return list2 (Vcharset_katakana_jisx0201,
1212 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1215 else if (c <= MAX_CHAR_BMP)
1217 *charset = Vcharset_ucs_bmp;
1220 else if (c <= MAX_CHAR_SMP)
1222 *charset = Vcharset_ucs_smp;
1223 return c - MIN_CHAR_SMP;
1225 else if (c <= MAX_CHAR_SIP)
1227 *charset = Vcharset_ucs_sip;
1228 return c - MIN_CHAR_SIP;
1230 else if (c < MIN_CHAR_DAIKANWA)
1232 *charset = Vcharset_ucs;
1235 else if (c <= MAX_CHAR_DAIKANWA)
1237 *charset = Vcharset_ideograph_daikanwa;
1238 return c - MIN_CHAR_DAIKANWA;
1240 else if (c < MIN_CHAR_94)
1242 *charset = Vcharset_ucs;
1245 else if (c <= MAX_CHAR_94)
1247 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1248 ((c - MIN_CHAR_94) / 94) + '0',
1249 CHARSET_LEFT_TO_RIGHT);
1250 if (!NILP (*charset))
1251 return ((c - MIN_CHAR_94) % 94) + 33;
1254 *charset = Vcharset_ucs;
1258 else if (c <= MAX_CHAR_96)
1260 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1261 ((c - MIN_CHAR_96) / 96) + '0',
1262 CHARSET_LEFT_TO_RIGHT);
1263 if (!NILP (*charset))
1264 return ((c - MIN_CHAR_96) % 96) + 32;
1267 *charset = Vcharset_ucs;
1271 else if (c <= MAX_CHAR_94x94)
1274 = CHARSET_BY_ATTRIBUTES (94, 2,
1275 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1276 CHARSET_LEFT_TO_RIGHT);
1277 if (!NILP (*charset))
1278 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1279 | (((c - MIN_CHAR_94x94) % 94) + 33);
1282 *charset = Vcharset_ucs;
1286 else if (c <= MAX_CHAR_96x96)
1289 = CHARSET_BY_ATTRIBUTES (96, 2,
1290 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1291 CHARSET_LEFT_TO_RIGHT);
1292 if (!NILP (*charset))
1293 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1294 | (((c - MIN_CHAR_96x96) % 96) + 32);
1297 *charset = Vcharset_ucs;
1303 *charset = Vcharset_ucs;
1308 Lisp_Object Vdefault_coded_charset_priority_list;
1312 /************************************************************************/
1313 /* Basic charset Lisp functions */
1314 /************************************************************************/
1316 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1317 Return non-nil if OBJECT is a charset.
1321 return CHARSETP (object) ? Qt : Qnil;
1324 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1325 Retrieve the charset of the given name.
1326 If CHARSET-OR-NAME is a charset object, it is simply returned.
1327 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1328 nil is returned. Otherwise the associated charset object is returned.
1332 if (CHARSETP (charset_or_name))
1333 return charset_or_name;
1335 CHECK_SYMBOL (charset_or_name);
1336 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1339 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1340 Retrieve the charset of the given name.
1341 Same as `find-charset' except an error is signalled if there is no such
1342 charset instead of returning nil.
1346 Lisp_Object charset = Ffind_charset (name);
1349 signal_simple_error ("No such charset", name);
1353 /* We store the charsets in hash tables with the names as the key and the
1354 actual charset object as the value. Occasionally we need to use them
1355 in a list format. These routines provide us with that. */
1356 struct charset_list_closure
1358 Lisp_Object *charset_list;
1362 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1363 void *charset_list_closure)
1365 /* This function can GC */
1366 struct charset_list_closure *chcl =
1367 (struct charset_list_closure*) charset_list_closure;
1368 Lisp_Object *charset_list = chcl->charset_list;
1370 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1374 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1375 Return a list of the names of all defined charsets.
1379 Lisp_Object charset_list = Qnil;
1380 struct gcpro gcpro1;
1381 struct charset_list_closure charset_list_closure;
1383 GCPRO1 (charset_list);
1384 charset_list_closure.charset_list = &charset_list;
1385 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1386 &charset_list_closure);
1389 return charset_list;
1392 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1393 Return the name of charset CHARSET.
1397 return XCHARSET_NAME (Fget_charset (charset));
1400 /* #### SJT Should generic properties be allowed? */
1401 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1402 Define a new character set.
1403 This function is for use with Mule support.
1404 NAME is a symbol, the name by which the character set is normally referred.
1405 DOC-STRING is a string describing the character set.
1406 PROPS is a property list, describing the specific nature of the
1407 character set. Recognized properties are:
1409 'short-name Short version of the charset name (ex: Latin-1)
1410 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1411 'registry A regular expression matching the font registry field for
1413 'dimension Number of octets used to index a character in this charset.
1414 Either 1 or 2. Defaults to 1.
1415 If UTF-2000 feature is enabled, 3 or 4 are also available.
1416 'columns Number of columns used to display a character in this charset.
1417 Only used in TTY mode. (Under X, the actual width of a
1418 character can be derived from the font used to display the
1419 characters.) If unspecified, defaults to the dimension
1420 (this is almost always the correct value).
1421 'chars Number of characters in each dimension (94 or 96).
1422 Defaults to 94. Note that if the dimension is 2, the
1423 character set thus described is 94x94 or 96x96.
1424 If UTF-2000 feature is enabled, 128 or 256 are also available.
1425 'final Final byte of ISO 2022 escape sequence. Must be
1426 supplied. Each combination of (DIMENSION, CHARS) defines a
1427 separate namespace for final bytes. Note that ISO
1428 2022 restricts the final byte to the range
1429 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1430 dimension == 2. Note also that final bytes in the range
1431 0x30 - 0x3F are reserved for user-defined (not official)
1433 'graphic 0 (use left half of font on output) or 1 (use right half
1434 of font on output). Defaults to 0. For example, for
1435 a font whose registry is ISO8859-1, the left half
1436 (octets 0x20 - 0x7F) is the `ascii' character set, while
1437 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1438 character set. With 'graphic set to 0, the octets
1439 will have their high bit cleared; with it set to 1,
1440 the octets will have their high bit set.
1441 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1443 'ccl-program A compiled CCL program used to convert a character in
1444 this charset into an index into the font. This is in
1445 addition to the 'graphic property. The CCL program
1446 is passed the octets of the character, with the high
1447 bit cleared and set depending upon whether the value
1448 of the 'graphic property is 0 or 1.
1449 'mother [UTF-2000 only] Base coded-charset.
1450 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1451 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1452 'code-offset [UTF-2000 only] Offset for a code-point of a base
1454 'conversion [UTF-2000 only] Conversion for a code-point of a base
1455 coded-charset (94x60 or 94x94x60).
1457 (name, doc_string, props))
1459 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1460 int direction = CHARSET_LEFT_TO_RIGHT;
1461 Lisp_Object registry = Qnil;
1462 Lisp_Object charset;
1463 Lisp_Object ccl_program = Qnil;
1464 Lisp_Object short_name = Qnil, long_name = Qnil;
1465 Lisp_Object mother = Qnil;
1466 int min_code = 0, max_code = 0, code_offset = 0;
1467 int byte_offset = -1;
1470 CHECK_SYMBOL (name);
1471 if (!NILP (doc_string))
1472 CHECK_STRING (doc_string);
1474 charset = Ffind_charset (name);
1475 if (!NILP (charset))
1476 signal_simple_error ("Cannot redefine existing charset", name);
1479 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1481 if (EQ (keyword, Qshort_name))
1483 CHECK_STRING (value);
1487 if (EQ (keyword, Qlong_name))
1489 CHECK_STRING (value);
1493 else if (EQ (keyword, Qdimension))
1496 dimension = XINT (value);
1497 if (dimension < 1 ||
1504 signal_simple_error ("Invalid value for 'dimension", value);
1507 else if (EQ (keyword, Qchars))
1510 chars = XINT (value);
1511 if (chars != 94 && chars != 96
1513 && chars != 128 && chars != 256
1516 signal_simple_error ("Invalid value for 'chars", value);
1519 else if (EQ (keyword, Qcolumns))
1522 columns = XINT (value);
1523 if (columns != 1 && columns != 2)
1524 signal_simple_error ("Invalid value for 'columns", value);
1527 else if (EQ (keyword, Qgraphic))
1530 graphic = XINT (value);
1538 signal_simple_error ("Invalid value for 'graphic", value);
1541 else if (EQ (keyword, Qregistry))
1543 CHECK_STRING (value);
1547 else if (EQ (keyword, Qdirection))
1549 if (EQ (value, Ql2r))
1550 direction = CHARSET_LEFT_TO_RIGHT;
1551 else if (EQ (value, Qr2l))
1552 direction = CHARSET_RIGHT_TO_LEFT;
1554 signal_simple_error ("Invalid value for 'direction", value);
1557 else if (EQ (keyword, Qfinal))
1559 CHECK_CHAR_COERCE_INT (value);
1560 final = XCHAR (value);
1561 if (final < '0' || final > '~')
1562 signal_simple_error ("Invalid value for 'final", value);
1566 else if (EQ (keyword, Qmother))
1568 mother = Fget_charset (value);
1571 else if (EQ (keyword, Qmin_code))
1574 min_code = XUINT (value);
1577 else if (EQ (keyword, Qmax_code))
1580 max_code = XUINT (value);
1583 else if (EQ (keyword, Qcode_offset))
1586 code_offset = XUINT (value);
1589 else if (EQ (keyword, Qconversion))
1591 if (EQ (value, Q94x60))
1592 conversion = CONVERSION_94x60;
1593 else if (EQ (value, Q94x94x60))
1594 conversion = CONVERSION_94x94x60;
1596 signal_simple_error ("Unrecognized conversion", value);
1600 else if (EQ (keyword, Qccl_program))
1602 struct ccl_program test_ccl;
1604 if (setup_ccl_program (&test_ccl, value) < 0)
1605 signal_simple_error ("Invalid value for 'ccl-program", value);
1606 ccl_program = value;
1610 signal_simple_error ("Unrecognized property", keyword);
1616 error ("'final must be specified");
1618 if (dimension == 2 && final > 0x5F)
1620 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1623 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1624 CHARSET_LEFT_TO_RIGHT)) ||
1625 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1626 CHARSET_RIGHT_TO_LEFT)))
1628 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1630 id = get_unallocated_leading_byte (dimension);
1632 if (NILP (doc_string))
1633 doc_string = build_string ("");
1635 if (NILP (registry))
1636 registry = build_string ("");
1638 if (NILP (short_name))
1639 XSETSTRING (short_name, XSYMBOL (name)->name);
1641 if (NILP (long_name))
1642 long_name = doc_string;
1645 columns = dimension;
1647 if (byte_offset < 0)
1651 else if (chars == 96)
1657 charset = make_charset (id, name, chars, dimension, columns, graphic,
1658 final, direction, short_name, long_name,
1659 doc_string, registry,
1660 Qnil, min_code, max_code, code_offset, byte_offset,
1661 mother, conversion);
1662 if (!NILP (ccl_program))
1663 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1667 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1669 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1670 NEW-NAME is the name of the new charset. Return the new charset.
1672 (charset, new_name))
1674 Lisp_Object new_charset = Qnil;
1675 int id, chars, dimension, columns, graphic, final;
1677 Lisp_Object registry, doc_string, short_name, long_name;
1680 charset = Fget_charset (charset);
1681 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1682 signal_simple_error ("Charset already has reverse-direction charset",
1685 CHECK_SYMBOL (new_name);
1686 if (!NILP (Ffind_charset (new_name)))
1687 signal_simple_error ("Cannot redefine existing charset", new_name);
1689 cs = XCHARSET (charset);
1691 chars = CHARSET_CHARS (cs);
1692 dimension = CHARSET_DIMENSION (cs);
1693 columns = CHARSET_COLUMNS (cs);
1694 id = get_unallocated_leading_byte (dimension);
1696 graphic = CHARSET_GRAPHIC (cs);
1697 final = CHARSET_FINAL (cs);
1698 direction = CHARSET_RIGHT_TO_LEFT;
1699 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1700 direction = CHARSET_LEFT_TO_RIGHT;
1701 doc_string = CHARSET_DOC_STRING (cs);
1702 short_name = CHARSET_SHORT_NAME (cs);
1703 long_name = CHARSET_LONG_NAME (cs);
1704 registry = CHARSET_REGISTRY (cs);
1706 new_charset = make_charset (id, new_name, chars, dimension, columns,
1707 graphic, final, direction, short_name, long_name,
1708 doc_string, registry,
1710 CHARSET_DECODING_TABLE(cs),
1711 CHARSET_MIN_CODE(cs),
1712 CHARSET_MAX_CODE(cs),
1713 CHARSET_CODE_OFFSET(cs),
1714 CHARSET_BYTE_OFFSET(cs),
1716 CHARSET_CONVERSION (cs)
1718 Qnil, 0, 0, 0, 0, Qnil, 0
1722 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1723 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1728 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1729 Define symbol ALIAS as an alias for CHARSET.
1733 CHECK_SYMBOL (alias);
1734 charset = Fget_charset (charset);
1735 return Fputhash (alias, charset, Vcharset_hash_table);
1738 /* #### Reverse direction charsets not yet implemented. */
1740 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1742 Return the reverse-direction charset parallel to CHARSET, if any.
1743 This is the charset with the same properties (in particular, the same
1744 dimension, number of characters per dimension, and final byte) as
1745 CHARSET but whose characters are displayed in the opposite direction.
1749 charset = Fget_charset (charset);
1750 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1754 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1755 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1756 If DIRECTION is omitted, both directions will be checked (left-to-right
1757 will be returned if character sets exist for both directions).
1759 (dimension, chars, final, direction))
1761 int dm, ch, fi, di = -1;
1762 Lisp_Object obj = Qnil;
1764 CHECK_INT (dimension);
1765 dm = XINT (dimension);
1766 if (dm < 1 || dm > 2)
1767 signal_simple_error ("Invalid value for DIMENSION", dimension);
1771 if (ch != 94 && ch != 96)
1772 signal_simple_error ("Invalid value for CHARS", chars);
1774 CHECK_CHAR_COERCE_INT (final);
1776 if (fi < '0' || fi > '~')
1777 signal_simple_error ("Invalid value for FINAL", final);
1779 if (EQ (direction, Ql2r))
1780 di = CHARSET_LEFT_TO_RIGHT;
1781 else if (EQ (direction, Qr2l))
1782 di = CHARSET_RIGHT_TO_LEFT;
1783 else if (!NILP (direction))
1784 signal_simple_error ("Invalid value for DIRECTION", direction);
1786 if (dm == 2 && fi > 0x5F)
1788 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1792 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1794 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1797 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1800 return XCHARSET_NAME (obj);
1804 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1805 Return short name of CHARSET.
1809 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1812 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1813 Return long name of CHARSET.
1817 return XCHARSET_LONG_NAME (Fget_charset (charset));
1820 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1821 Return description of CHARSET.
1825 return XCHARSET_DOC_STRING (Fget_charset (charset));
1828 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1829 Return dimension of CHARSET.
1833 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1836 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1837 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1838 Recognized properties are those listed in `make-charset', as well as
1839 'name and 'doc-string.
1845 charset = Fget_charset (charset);
1846 cs = XCHARSET (charset);
1848 CHECK_SYMBOL (prop);
1849 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1850 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1851 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1852 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1853 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1854 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1855 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1856 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1857 Qnil : make_char (CHARSET_FINAL (cs));
1858 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1859 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1860 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1861 if (EQ (prop, Qdirection))
1862 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1863 if (EQ (prop, Qreverse_direction_charset))
1865 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1866 /* #### Is this translation OK? If so, error checking sufficient? */
1867 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1870 if (EQ (prop, Qmother))
1871 return CHARSET_MOTHER (cs);
1872 if (EQ (prop, Qmin_code))
1873 return make_int (CHARSET_MIN_CODE (cs));
1874 if (EQ (prop, Qmax_code))
1875 return make_int (CHARSET_MAX_CODE (cs));
1877 signal_simple_error ("Unrecognized charset property name", prop);
1878 return Qnil; /* not reached */
1881 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1882 Return charset identification number of CHARSET.
1886 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1889 /* #### We need to figure out which properties we really want to
1892 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1893 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1895 (charset, ccl_program))
1897 struct ccl_program test_ccl;
1899 charset = Fget_charset (charset);
1900 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1901 signal_simple_error ("Invalid ccl-program", ccl_program);
1902 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1907 invalidate_charset_font_caches (Lisp_Object charset)
1909 /* Invalidate font cache entries for charset on all devices. */
1910 Lisp_Object devcons, concons, hash_table;
1911 DEVICE_LOOP_NO_BREAK (devcons, concons)
1913 struct device *d = XDEVICE (XCAR (devcons));
1914 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1915 if (!UNBOUNDP (hash_table))
1916 Fclrhash (hash_table);
1920 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1921 Set the 'registry property of CHARSET to REGISTRY.
1923 (charset, registry))
1925 charset = Fget_charset (charset);
1926 CHECK_STRING (registry);
1927 XCHARSET_REGISTRY (charset) = registry;
1928 invalidate_charset_font_caches (charset);
1929 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1934 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1935 Return mapping-table of CHARSET.
1939 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1942 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1943 Set mapping-table of CHARSET to TABLE.
1947 struct Lisp_Charset *cs;
1951 charset = Fget_charset (charset);
1952 cs = XCHARSET (charset);
1956 CHARSET_DECODING_TABLE(cs) = Qnil;
1959 else if (VECTORP (table))
1961 int ccs_len = CHARSET_BYTE_SIZE (cs);
1962 int ret = decoding_table_check_elements (table,
1963 CHARSET_DIMENSION (cs),
1968 signal_simple_error ("Too big table", table);
1970 signal_simple_error ("Invalid element is found", table);
1972 signal_simple_error ("Something wrong", table);
1974 CHARSET_DECODING_TABLE(cs) = Qnil;
1977 signal_error (Qwrong_type_argument,
1978 list2 (build_translated_string ("vector-or-nil-p"),
1981 byte_offset = CHARSET_BYTE_OFFSET (cs);
1982 switch (CHARSET_DIMENSION (cs))
1985 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1987 Lisp_Object c = XVECTOR_DATA(table)[i];
1990 Fput_char_attribute (c, XCHARSET_NAME (charset),
1991 make_int (i + byte_offset));
1995 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1997 Lisp_Object v = XVECTOR_DATA(table)[i];
2003 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2005 Lisp_Object c = XVECTOR_DATA(v)[j];
2009 (c, XCHARSET_NAME (charset),
2010 make_int ( ( (i + byte_offset) << 8 )
2016 Fput_char_attribute (v, XCHARSET_NAME (charset),
2017 make_int (i + byte_offset));
2026 /************************************************************************/
2027 /* Lisp primitives for working with characters */
2028 /************************************************************************/
2031 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2032 Make a character from CHARSET and code-point CODE.
2033 If DEFINED_ONLY is non-nil, builtin character is not returned.
2034 If corresponding character is not found, nil is returned.
2036 (charset, code, defined_only))
2040 charset = Fget_charset (charset);
2043 if (XCHARSET_GRAPHIC (charset) == 1)
2045 if (NILP (defined_only))
2046 c = DECODE_CHAR (charset, c);
2048 c = decode_defined_char (charset, c);
2049 return c >= 0 ? make_char (c) : Qnil;
2052 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2053 Make a builtin character from CHARSET and code-point CODE.
2059 charset = Fget_charset (charset);
2061 if (EQ (charset, Vcharset_latin_viscii))
2063 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2069 (ret = Fget_char_attribute (chr,
2070 Vcharset_latin_viscii_lower,
2073 charset = Vcharset_latin_viscii_lower;
2077 (ret = Fget_char_attribute (chr,
2078 Vcharset_latin_viscii_upper,
2081 charset = Vcharset_latin_viscii_upper;
2088 if (XCHARSET_GRAPHIC (charset) == 1)
2091 c = decode_builtin_char (charset, c);
2092 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2096 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2097 Make a character from CHARSET and octets ARG1 and ARG2.
2098 ARG2 is required only for characters from two-dimensional charsets.
2099 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2100 character s with caron.
2102 (charset, arg1, arg2))
2106 int lowlim, highlim;
2108 charset = Fget_charset (charset);
2109 cs = XCHARSET (charset);
2111 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2112 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2114 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2116 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2117 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2120 /* It is useful (and safe, according to Olivier Galibert) to strip
2121 the 8th bit off ARG1 and ARG2 because it allows programmers to
2122 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2123 Latin 2 code of the character. */
2131 if (a1 < lowlim || a1 > highlim)
2132 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2134 if (CHARSET_DIMENSION (cs) == 1)
2138 ("Charset is of dimension one; second octet must be nil", arg2);
2139 return make_char (MAKE_CHAR (charset, a1, 0));
2148 a2 = XINT (arg2) & 0x7f;
2150 if (a2 < lowlim || a2 > highlim)
2151 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2153 return make_char (MAKE_CHAR (charset, a1, a2));
2156 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2157 Return the character set of CHARACTER.
2161 CHECK_CHAR_COERCE_INT (character);
2163 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2166 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2167 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2168 N defaults to 0 if omitted.
2172 Lisp_Object charset;
2175 CHECK_CHAR_COERCE_INT (character);
2177 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2179 if (NILP (n) || EQ (n, Qzero))
2180 return make_int (octet0);
2181 else if (EQ (n, make_int (1)))
2182 return make_int (octet1);
2184 signal_simple_error ("Octet number must be 0 or 1", n);
2188 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2189 Return code-point of CHARACTER in specified CHARSET.
2191 (character, charset, defined_only))
2195 CHECK_CHAR_COERCE_INT (character);
2196 charset = Fget_charset (charset);
2197 code_point = charset_code_point (charset, XCHAR (character),
2198 !NILP (defined_only));
2199 if (code_point >= 0)
2200 return make_int (code_point);
2206 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2207 Return list of charset and one or two position-codes of CHARACTER.
2211 /* This function can GC */
2212 struct gcpro gcpro1, gcpro2;
2213 Lisp_Object charset = Qnil;
2214 Lisp_Object rc = Qnil;
2222 GCPRO2 (charset, rc);
2223 CHECK_CHAR_COERCE_INT (character);
2226 code_point = ENCODE_CHAR (XCHAR (character), charset);
2227 dimension = XCHARSET_DIMENSION (charset);
2228 while (dimension > 0)
2230 rc = Fcons (make_int (code_point & 255), rc);
2234 rc = Fcons (XCHARSET_NAME (charset), rc);
2236 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2238 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2240 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2244 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2253 #ifdef ENABLE_COMPOSITE_CHARS
2254 /************************************************************************/
2255 /* composite character functions */
2256 /************************************************************************/
2259 lookup_composite_char (Bufbyte *str, int len)
2261 Lisp_Object lispstr = make_string (str, len);
2262 Lisp_Object ch = Fgethash (lispstr,
2263 Vcomposite_char_string2char_hash_table,
2269 if (composite_char_row_next >= 128)
2270 signal_simple_error ("No more composite chars available", lispstr);
2271 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2272 composite_char_col_next);
2273 Fputhash (make_char (emch), lispstr,
2274 Vcomposite_char_char2string_hash_table);
2275 Fputhash (lispstr, make_char (emch),
2276 Vcomposite_char_string2char_hash_table);
2277 composite_char_col_next++;
2278 if (composite_char_col_next >= 128)
2280 composite_char_col_next = 32;
2281 composite_char_row_next++;
2290 composite_char_string (Emchar ch)
2292 Lisp_Object str = Fgethash (make_char (ch),
2293 Vcomposite_char_char2string_hash_table,
2295 assert (!UNBOUNDP (str));
2299 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2300 Convert a string into a single composite character.
2301 The character is the result of overstriking all the characters in
2306 CHECK_STRING (string);
2307 return make_char (lookup_composite_char (XSTRING_DATA (string),
2308 XSTRING_LENGTH (string)));
2311 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2312 Return a string of the characters comprising a composite character.
2320 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2321 signal_simple_error ("Must be composite char", ch);
2322 return composite_char_string (emch);
2324 #endif /* ENABLE_COMPOSITE_CHARS */
2327 /************************************************************************/
2328 /* initialization */
2329 /************************************************************************/
2332 syms_of_mule_charset (void)
2334 INIT_LRECORD_IMPLEMENTATION (charset);
2336 DEFSUBR (Fcharsetp);
2337 DEFSUBR (Ffind_charset);
2338 DEFSUBR (Fget_charset);
2339 DEFSUBR (Fcharset_list);
2340 DEFSUBR (Fcharset_name);
2341 DEFSUBR (Fmake_charset);
2342 DEFSUBR (Fmake_reverse_direction_charset);
2343 /* DEFSUBR (Freverse_direction_charset); */
2344 DEFSUBR (Fdefine_charset_alias);
2345 DEFSUBR (Fcharset_from_attributes);
2346 DEFSUBR (Fcharset_short_name);
2347 DEFSUBR (Fcharset_long_name);
2348 DEFSUBR (Fcharset_description);
2349 DEFSUBR (Fcharset_dimension);
2350 DEFSUBR (Fcharset_property);
2351 DEFSUBR (Fcharset_id);
2352 DEFSUBR (Fset_charset_ccl_program);
2353 DEFSUBR (Fset_charset_registry);
2355 DEFSUBR (Fcharset_mapping_table);
2356 DEFSUBR (Fset_charset_mapping_table);
2360 DEFSUBR (Fdecode_char);
2361 DEFSUBR (Fdecode_builtin_char);
2362 DEFSUBR (Fencode_char);
2364 DEFSUBR (Fmake_char);
2365 DEFSUBR (Fchar_charset);
2366 DEFSUBR (Fchar_octet);
2367 DEFSUBR (Fsplit_char);
2369 #ifdef ENABLE_COMPOSITE_CHARS
2370 DEFSUBR (Fmake_composite_char);
2371 DEFSUBR (Fcomposite_char_string);
2374 defsymbol (&Qcharsetp, "charsetp");
2375 defsymbol (&Qregistry, "registry");
2376 defsymbol (&Qfinal, "final");
2377 defsymbol (&Qgraphic, "graphic");
2378 defsymbol (&Qdirection, "direction");
2379 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2380 defsymbol (&Qshort_name, "short-name");
2381 defsymbol (&Qlong_name, "long-name");
2383 defsymbol (&Qmother, "mother");
2384 defsymbol (&Qmin_code, "min-code");
2385 defsymbol (&Qmax_code, "max-code");
2386 defsymbol (&Qcode_offset, "code-offset");
2387 defsymbol (&Qconversion, "conversion");
2388 defsymbol (&Q94x60, "94x60");
2389 defsymbol (&Q94x94x60, "94x94x60");
2392 defsymbol (&Ql2r, "l2r");
2393 defsymbol (&Qr2l, "r2l");
2395 /* Charsets, compatible with FSF 20.3
2396 Naming convention is Script-Charset[-Edition] */
2397 defsymbol (&Qascii, "ascii");
2398 defsymbol (&Qcontrol_1, "control-1");
2399 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2400 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2401 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2402 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2403 defsymbol (&Qthai_tis620, "thai-tis620");
2404 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2405 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2406 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2407 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2408 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2409 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2410 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2411 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2412 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2413 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2414 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2415 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2416 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2417 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2418 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2419 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2421 defsymbol (&Qucs, "ucs");
2422 defsymbol (&Qucs_bmp, "ucs-bmp");
2423 defsymbol (&Qucs_smp, "ucs-smp");
2424 defsymbol (&Qucs_sip, "ucs-sip");
2425 defsymbol (&Qucs_gb, "ucs-gb");
2426 defsymbol (&Qucs_cns, "ucs-cns");
2427 defsymbol (&Qucs_jis, "ucs-jis");
2428 defsymbol (&Qucs_ks, "ucs-ks");
2429 defsymbol (&Qlatin_viscii, "latin-viscii");
2430 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2431 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2432 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2433 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2434 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2435 defsymbol (&Qjis_x0208, "=jis-x0208");
2436 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2437 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2438 defsymbol (&Qchinese_big5, "chinese-big5");
2439 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2440 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2441 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2442 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2443 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2444 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2445 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2446 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2447 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2448 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2449 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2450 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2451 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2452 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2454 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2455 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2457 defsymbol (&Qcomposite, "composite");
2461 vars_of_mule_charset (void)
2468 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2469 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2471 /* Table of charsets indexed by leading byte. */
2472 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2473 chlook->charset_by_leading_byte[i] = Qnil;
2476 /* Table of charsets indexed by type/final-byte. */
2477 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2478 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2479 chlook->charset_by_attributes[i][j] = Qnil;
2481 /* Table of charsets indexed by type/final-byte/direction. */
2482 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2483 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2484 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2485 chlook->charset_by_attributes[i][j][k] = Qnil;
2489 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2491 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2492 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2496 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2497 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2498 Leading-code of private TYPE9N charset of column-width 1.
2500 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2504 Vdefault_coded_charset_priority_list = Qnil;
2505 DEFVAR_LISP ("default-coded-charset-priority-list",
2506 &Vdefault_coded_charset_priority_list /*
2507 Default order of preferred coded-character-sets.
2513 complex_vars_of_mule_charset (void)
2515 staticpro (&Vcharset_hash_table);
2516 Vcharset_hash_table =
2517 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2519 /* Predefined character sets. We store them into variables for
2523 staticpro (&Vcharset_ucs);
2525 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2526 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2527 build_string ("UCS"),
2528 build_string ("UCS"),
2529 build_string ("ISO/IEC 10646"),
2531 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2532 staticpro (&Vcharset_ucs_bmp);
2534 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2535 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2536 build_string ("BMP"),
2537 build_string ("UCS-BMP"),
2538 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2540 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2541 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2542 staticpro (&Vcharset_ucs_smp);
2544 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2545 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2546 build_string ("SMP"),
2547 build_string ("UCS-SMP"),
2548 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2549 build_string ("UCS00-1"),
2550 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2551 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2552 staticpro (&Vcharset_ucs_sip);
2554 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2555 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2556 build_string ("SIP"),
2557 build_string ("UCS-SIP"),
2558 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2559 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2560 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2561 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2562 staticpro (&Vcharset_ucs_gb);
2564 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2565 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2566 build_string ("UCS for GB"),
2567 build_string ("UCS for GB"),
2568 build_string ("ISO/IEC 10646 for GB"),
2570 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2571 staticpro (&Vcharset_ucs_cns);
2573 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2574 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2575 build_string ("UCS for CNS"),
2576 build_string ("UCS for CNS 11643"),
2577 build_string ("ISO/IEC 10646 for CNS 11643"),
2579 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2580 staticpro (&Vcharset_ucs_jis);
2582 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2583 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2584 build_string ("UCS for JIS"),
2585 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2587 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2589 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2590 staticpro (&Vcharset_ucs_ks);
2592 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2593 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2594 build_string ("UCS for KS"),
2595 build_string ("UCS for CCS defined by KS"),
2596 build_string ("ISO/IEC 10646 for Korean Standards"),
2598 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2600 # define MIN_CHAR_THAI 0
2601 # define MAX_CHAR_THAI 0
2602 /* # define MIN_CHAR_HEBREW 0 */
2603 /* # define MAX_CHAR_HEBREW 0 */
2604 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2605 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2607 staticpro (&Vcharset_ascii);
2609 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2610 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2611 build_string ("ASCII"),
2612 build_string ("ASCII)"),
2613 build_string ("ASCII (ISO646 IRV)"),
2614 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2615 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2616 staticpro (&Vcharset_control_1);
2617 Vcharset_control_1 =
2618 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2619 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2620 build_string ("C1"),
2621 build_string ("Control characters"),
2622 build_string ("Control characters 128-191"),
2624 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2625 staticpro (&Vcharset_latin_iso8859_1);
2626 Vcharset_latin_iso8859_1 =
2627 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2628 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2629 build_string ("Latin-1"),
2630 build_string ("ISO8859-1 (Latin-1)"),
2631 build_string ("ISO8859-1 (Latin-1)"),
2632 build_string ("iso8859-1"),
2633 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2634 staticpro (&Vcharset_latin_iso8859_2);
2635 Vcharset_latin_iso8859_2 =
2636 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2637 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2638 build_string ("Latin-2"),
2639 build_string ("ISO8859-2 (Latin-2)"),
2640 build_string ("ISO8859-2 (Latin-2)"),
2641 build_string ("iso8859-2"),
2642 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2643 staticpro (&Vcharset_latin_iso8859_3);
2644 Vcharset_latin_iso8859_3 =
2645 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2646 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2647 build_string ("Latin-3"),
2648 build_string ("ISO8859-3 (Latin-3)"),
2649 build_string ("ISO8859-3 (Latin-3)"),
2650 build_string ("iso8859-3"),
2651 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2652 staticpro (&Vcharset_latin_iso8859_4);
2653 Vcharset_latin_iso8859_4 =
2654 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2655 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2656 build_string ("Latin-4"),
2657 build_string ("ISO8859-4 (Latin-4)"),
2658 build_string ("ISO8859-4 (Latin-4)"),
2659 build_string ("iso8859-4"),
2660 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2661 staticpro (&Vcharset_thai_tis620);
2662 Vcharset_thai_tis620 =
2663 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2664 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2665 build_string ("TIS620"),
2666 build_string ("TIS620 (Thai)"),
2667 build_string ("TIS620.2529 (Thai)"),
2668 build_string ("tis620"),
2669 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2670 staticpro (&Vcharset_greek_iso8859_7);
2671 Vcharset_greek_iso8859_7 =
2672 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2673 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2674 build_string ("ISO8859-7"),
2675 build_string ("ISO8859-7 (Greek)"),
2676 build_string ("ISO8859-7 (Greek)"),
2677 build_string ("iso8859-7"),
2678 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2679 staticpro (&Vcharset_arabic_iso8859_6);
2680 Vcharset_arabic_iso8859_6 =
2681 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2682 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2683 build_string ("ISO8859-6"),
2684 build_string ("ISO8859-6 (Arabic)"),
2685 build_string ("ISO8859-6 (Arabic)"),
2686 build_string ("iso8859-6"),
2687 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2688 staticpro (&Vcharset_hebrew_iso8859_8);
2689 Vcharset_hebrew_iso8859_8 =
2690 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2691 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2692 build_string ("ISO8859-8"),
2693 build_string ("ISO8859-8 (Hebrew)"),
2694 build_string ("ISO8859-8 (Hebrew)"),
2695 build_string ("iso8859-8"),
2697 0 /* MIN_CHAR_HEBREW */,
2698 0 /* MAX_CHAR_HEBREW */, 0, 32,
2699 Qnil, CONVERSION_IDENTICAL);
2700 staticpro (&Vcharset_katakana_jisx0201);
2701 Vcharset_katakana_jisx0201 =
2702 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2703 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2704 build_string ("JISX0201 Kana"),
2705 build_string ("JISX0201.1976 (Japanese Kana)"),
2706 build_string ("JISX0201.1976 Japanese Kana"),
2707 build_string ("jisx0201\\.1976"),
2708 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2709 staticpro (&Vcharset_latin_jisx0201);
2710 Vcharset_latin_jisx0201 =
2711 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2712 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2713 build_string ("JISX0201 Roman"),
2714 build_string ("JISX0201.1976 (Japanese Roman)"),
2715 build_string ("JISX0201.1976 Japanese Roman"),
2716 build_string ("jisx0201\\.1976"),
2717 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2718 staticpro (&Vcharset_cyrillic_iso8859_5);
2719 Vcharset_cyrillic_iso8859_5 =
2720 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2721 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2722 build_string ("ISO8859-5"),
2723 build_string ("ISO8859-5 (Cyrillic)"),
2724 build_string ("ISO8859-5 (Cyrillic)"),
2725 build_string ("iso8859-5"),
2726 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2727 staticpro (&Vcharset_latin_iso8859_9);
2728 Vcharset_latin_iso8859_9 =
2729 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2730 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2731 build_string ("Latin-5"),
2732 build_string ("ISO8859-9 (Latin-5)"),
2733 build_string ("ISO8859-9 (Latin-5)"),
2734 build_string ("iso8859-9"),
2735 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2737 staticpro (&Vcharset_jis_x0208);
2738 Vcharset_jis_x0208 =
2739 make_charset (LEADING_BYTE_JIS_X0208,
2741 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2742 build_string ("JIS X0208"),
2743 build_string ("JIS X0208 Common"),
2744 build_string ("JIS X0208 Common part"),
2745 build_string ("jisx0208\\.1990"),
2747 MIN_CHAR_JIS_X0208_1990,
2748 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2749 Qnil, CONVERSION_94x94);
2751 staticpro (&Vcharset_japanese_jisx0208_1978);
2752 Vcharset_japanese_jisx0208_1978 =
2753 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2754 Qjapanese_jisx0208_1978, 94, 2,
2755 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2756 build_string ("JIS X0208:1978"),
2757 build_string ("JIS X0208:1978 (Japanese)"),
2759 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2760 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2767 CONVERSION_IDENTICAL);
2768 staticpro (&Vcharset_chinese_gb2312);
2769 Vcharset_chinese_gb2312 =
2770 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2771 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2772 build_string ("GB2312"),
2773 build_string ("GB2312)"),
2774 build_string ("GB2312 Chinese simplified"),
2775 build_string ("gb2312"),
2776 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2777 staticpro (&Vcharset_chinese_gb12345);
2778 Vcharset_chinese_gb12345 =
2779 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2780 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2781 build_string ("G1"),
2782 build_string ("GB 12345)"),
2783 build_string ("GB 12345-1990"),
2784 build_string ("GB12345\\(\\.1990\\)?-0"),
2785 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2786 staticpro (&Vcharset_japanese_jisx0208);
2787 Vcharset_japanese_jisx0208 =
2788 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2789 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2790 build_string ("JISX0208"),
2791 build_string ("JIS X0208:1983 (Japanese)"),
2792 build_string ("JIS X0208:1983 Japanese Kanji"),
2793 build_string ("jisx0208\\.1983"),
2800 CONVERSION_IDENTICAL);
2802 staticpro (&Vcharset_japanese_jisx0208_1990);
2803 Vcharset_japanese_jisx0208_1990 =
2804 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2805 Qjapanese_jisx0208_1990, 94, 2,
2806 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2807 build_string ("JISX0208-1990"),
2808 build_string ("JIS X0208:1990 (Japanese)"),
2809 build_string ("JIS X0208:1990 Japanese Kanji"),
2810 build_string ("jisx0208\\.1990"),
2812 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2813 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2814 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2815 Vcharset_jis_x0208 /* Qnil */,
2816 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2818 staticpro (&Vcharset_korean_ksc5601);
2819 Vcharset_korean_ksc5601 =
2820 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2821 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2822 build_string ("KSC5601"),
2823 build_string ("KSC5601 (Korean"),
2824 build_string ("KSC5601 Korean Hangul and Hanja"),
2825 build_string ("ksc5601"),
2826 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2827 staticpro (&Vcharset_japanese_jisx0212);
2828 Vcharset_japanese_jisx0212 =
2829 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2830 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2831 build_string ("JISX0212"),
2832 build_string ("JISX0212 (Japanese)"),
2833 build_string ("JISX0212 Japanese Supplement"),
2834 build_string ("jisx0212"),
2835 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2837 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2838 staticpro (&Vcharset_chinese_cns11643_1);
2839 Vcharset_chinese_cns11643_1 =
2840 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2841 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2842 build_string ("CNS11643-1"),
2843 build_string ("CNS11643-1 (Chinese traditional)"),
2845 ("CNS 11643 Plane 1 Chinese traditional"),
2846 build_string (CHINESE_CNS_PLANE_RE("1")),
2847 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2848 staticpro (&Vcharset_chinese_cns11643_2);
2849 Vcharset_chinese_cns11643_2 =
2850 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2851 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2852 build_string ("CNS11643-2"),
2853 build_string ("CNS11643-2 (Chinese traditional)"),
2855 ("CNS 11643 Plane 2 Chinese traditional"),
2856 build_string (CHINESE_CNS_PLANE_RE("2")),
2857 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2859 staticpro (&Vcharset_latin_tcvn5712);
2860 Vcharset_latin_tcvn5712 =
2861 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2862 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2863 build_string ("TCVN 5712"),
2864 build_string ("TCVN 5712 (VSCII-2)"),
2865 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2866 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2867 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2868 staticpro (&Vcharset_latin_viscii_lower);
2869 Vcharset_latin_viscii_lower =
2870 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2871 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2872 build_string ("VISCII lower"),
2873 build_string ("VISCII lower (Vietnamese)"),
2874 build_string ("VISCII lower (Vietnamese)"),
2875 build_string ("MULEVISCII-LOWER"),
2876 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2877 staticpro (&Vcharset_latin_viscii_upper);
2878 Vcharset_latin_viscii_upper =
2879 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2880 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2881 build_string ("VISCII upper"),
2882 build_string ("VISCII upper (Vietnamese)"),
2883 build_string ("VISCII upper (Vietnamese)"),
2884 build_string ("MULEVISCII-UPPER"),
2885 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2886 staticpro (&Vcharset_latin_viscii);
2887 Vcharset_latin_viscii =
2888 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2889 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2890 build_string ("VISCII"),
2891 build_string ("VISCII 1.1 (Vietnamese)"),
2892 build_string ("VISCII 1.1 (Vietnamese)"),
2893 build_string ("VISCII1\\.1"),
2894 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2895 staticpro (&Vcharset_chinese_big5);
2896 Vcharset_chinese_big5 =
2897 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2898 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2899 build_string ("Big5"),
2900 build_string ("Big5"),
2901 build_string ("Big5 Chinese traditional"),
2902 build_string ("big5-0"),
2904 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2905 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2907 staticpro (&Vcharset_chinese_big5_cdp);
2908 Vcharset_chinese_big5_cdp =
2909 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2910 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2911 build_string ("Big5-CDP"),
2912 build_string ("Big5 + CDP extension"),
2913 build_string ("Big5 with CDP extension"),
2914 build_string ("big5\\.cdp-0"),
2915 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2916 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2918 #define DEF_HANZIKU(n) \
2919 staticpro (&Vcharset_ideograph_hanziku_##n); \
2920 Vcharset_ideograph_hanziku_##n = \
2921 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2922 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2923 build_string ("HZK-"#n), \
2924 build_string ("HANZIKU-"#n), \
2925 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2927 ("hanziku-"#n"$"), \
2928 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2929 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2943 staticpro (&Vcharset_ideograph_daikanwa_2);
2944 Vcharset_ideograph_daikanwa_2 =
2945 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
2946 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2947 build_string ("Daikanwa Rev."),
2948 build_string ("Morohashi's Daikanwa Rev."),
2950 ("Daikanwa dictionary (revised version)"),
2951 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
2952 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2953 staticpro (&Vcharset_ideograph_daikanwa);
2954 Vcharset_ideograph_daikanwa =
2955 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
2956 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2957 build_string ("Daikanwa"),
2958 build_string ("Morohashi's Daikanwa Rev.2"),
2960 ("Daikanwa dictionary (second revised version)"),
2961 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
2962 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
2963 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
2965 staticpro (&Vcharset_ethiopic_ucs);
2966 Vcharset_ethiopic_ucs =
2967 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2968 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2969 build_string ("Ethiopic (UCS)"),
2970 build_string ("Ethiopic (UCS)"),
2971 build_string ("Ethiopic of UCS"),
2972 build_string ("Ethiopic-Unicode"),
2973 Qnil, 0x1200, 0x137F, 0, 0,
2974 Qnil, CONVERSION_IDENTICAL);
2976 staticpro (&Vcharset_chinese_big5_1);
2977 Vcharset_chinese_big5_1 =
2978 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2979 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2980 build_string ("Big5"),
2981 build_string ("Big5 (Level-1)"),
2983 ("Big5 Level-1 Chinese traditional"),
2984 build_string ("big5"),
2985 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2986 staticpro (&Vcharset_chinese_big5_2);
2987 Vcharset_chinese_big5_2 =
2988 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2989 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2990 build_string ("Big5"),
2991 build_string ("Big5 (Level-2)"),
2993 ("Big5 Level-2 Chinese traditional"),
2994 build_string ("big5"),
2995 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2997 #ifdef ENABLE_COMPOSITE_CHARS
2998 /* #### For simplicity, we put composite chars into a 96x96 charset.
2999 This is going to lead to problems because you can run out of
3000 room, esp. as we don't yet recycle numbers. */
3001 staticpro (&Vcharset_composite);
3002 Vcharset_composite =
3003 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3004 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3005 build_string ("Composite"),
3006 build_string ("Composite characters"),
3007 build_string ("Composite characters"),
3010 /* #### not dumped properly */
3011 composite_char_row_next = 32;
3012 composite_char_col_next = 32;
3014 Vcomposite_char_string2char_hash_table =
3015 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3016 Vcomposite_char_char2string_hash_table =
3017 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3018 staticpro (&Vcomposite_char_string2char_hash_table);
3019 staticpro (&Vcomposite_char_char2string_hash_table);
3020 #endif /* ENABLE_COMPOSITE_CHARS */