1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 /* Lisp_Object Vcharset_chinese_big5_cdp; */
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_china3_jef;
95 Lisp_Object Vcharset_ideograph_cbeta;
96 Lisp_Object Vcharset_ideograph_gt;
97 Lisp_Object Vcharset_ideograph_gt_pj_1;
98 Lisp_Object Vcharset_ideograph_gt_pj_2;
99 Lisp_Object Vcharset_ideograph_gt_pj_3;
100 Lisp_Object Vcharset_ideograph_gt_pj_4;
101 Lisp_Object Vcharset_ideograph_gt_pj_5;
102 Lisp_Object Vcharset_ideograph_gt_pj_6;
103 Lisp_Object Vcharset_ideograph_gt_pj_7;
104 Lisp_Object Vcharset_ideograph_gt_pj_8;
105 Lisp_Object Vcharset_ideograph_gt_pj_9;
106 Lisp_Object Vcharset_ideograph_gt_pj_10;
107 Lisp_Object Vcharset_ideograph_gt_pj_11;
108 Lisp_Object Vcharset_ideograph_daikanwa_2;
109 Lisp_Object Vcharset_ideograph_daikanwa;
110 Lisp_Object Vcharset_ethiopic_ucs;
112 Lisp_Object Vcharset_chinese_big5_1;
113 Lisp_Object Vcharset_chinese_big5_2;
115 #ifdef ENABLE_COMPOSITE_CHARS
116 Lisp_Object Vcharset_composite;
118 /* Hash tables for composite chars. One maps string representing
119 composed chars to their equivalent chars; one goes the
121 Lisp_Object Vcomposite_char_char2string_hash_table;
122 Lisp_Object Vcomposite_char_string2char_hash_table;
124 static int composite_char_row_next;
125 static int composite_char_col_next;
127 #endif /* ENABLE_COMPOSITE_CHARS */
129 struct charset_lookup *chlook;
131 static const struct lrecord_description charset_lookup_description_1[] = {
132 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
141 static const struct struct_description charset_lookup_description = {
142 sizeof (struct charset_lookup),
143 charset_lookup_description_1
147 /* Table of number of bytes in the string representation of a character
148 indexed by the first byte of that representation.
150 rep_bytes_by_first_byte(c) is more efficient than the equivalent
151 canonical computation:
153 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
155 const Bytecount rep_bytes_by_first_byte[0xA0] =
156 { /* 0x00 - 0x7f are for straight ASCII */
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 /* 0x80 - 0x8f are for Dimension-1 official charsets */
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 /* 0x90 - 0x9d are for Dimension-2 official charsets */
172 /* 0x9e is for Dimension-1 private charsets */
173 /* 0x9f is for Dimension-2 private charsets */
174 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
180 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
182 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
186 if (XVECTOR_LENGTH (v) > ccs_len)
189 for (i = 0; i < XVECTOR_LENGTH (v); i++)
191 Lisp_Object c = XVECTOR_DATA(v)[i];
193 if (!NILP (c) && !CHARP (c))
197 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
209 put_char_ccs_code_point (Lisp_Object character,
210 Lisp_Object ccs, Lisp_Object value)
212 if (!EQ (XCHARSET_NAME (ccs), Qucs)
214 || (XCHAR (character) != XINT (value)))
216 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
220 { /* obsolete representation: value must be a list of bytes */
221 Lisp_Object ret = Fcar (value);
225 signal_simple_error ("Invalid value for coded-charset", value);
226 code_point = XINT (ret);
227 if (XCHARSET_GRAPHIC (ccs) == 1)
235 signal_simple_error ("Invalid value for coded-charset",
239 signal_simple_error ("Invalid value for coded-charset",
242 if (XCHARSET_GRAPHIC (ccs) == 1)
244 code_point = (code_point << 8) | j;
247 value = make_int (code_point);
249 else if (INTP (value))
251 code_point = XINT (value);
252 if (XCHARSET_GRAPHIC (ccs) == 1)
254 code_point &= 0x7F7F7F7F;
255 value = make_int (code_point);
259 signal_simple_error ("Invalid value for coded-charset", value);
263 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
266 decoding_table_remove_char (ccs, XINT (cpos));
269 decoding_table_put_char (ccs, code_point, character);
275 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
277 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
278 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
280 if (VECTORP (decoding_table))
282 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
286 decoding_table_remove_char (ccs, XINT (cpos));
289 if (CHAR_TABLEP (encoding_table))
291 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
299 int leading_code_private_11;
302 Lisp_Object Qcharsetp;
304 /* Qdoc_string, Qdimension, Qchars defined in general.c */
305 Lisp_Object Qregistry, Qfinal, Qgraphic;
306 Lisp_Object Qdirection;
307 Lisp_Object Qreverse_direction_charset;
308 Lisp_Object Qleading_byte;
309 Lisp_Object Qshort_name, Qlong_name;
311 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
312 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
329 Qjapanese_jisx0208_1978,
333 Qjapanese_jisx0208_1990,
351 Qvietnamese_viscii_lower,
352 Qvietnamese_viscii_upper,
355 /* Qchinese_big5_cdp, */
356 Qideograph_hanziku_1,
357 Qideograph_hanziku_2,
358 Qideograph_hanziku_3,
359 Qideograph_hanziku_4,
360 Qideograph_hanziku_5,
361 Qideograph_hanziku_6,
362 Qideograph_hanziku_7,
363 Qideograph_hanziku_8,
364 Qideograph_hanziku_9,
365 Qideograph_hanziku_10,
366 Qideograph_hanziku_11,
367 Qideograph_hanziku_12,
370 Qideograph_daikanwa_2,
390 Lisp_Object Ql2r, Qr2l;
392 Lisp_Object Vcharset_hash_table;
394 /* Composite characters are characters constructed by overstriking two
395 or more regular characters.
397 1) The old Mule implementation involves storing composite characters
398 in a buffer as a tag followed by all of the actual characters
399 used to make up the composite character. I think this is a bad
400 idea; it greatly complicates code that wants to handle strings
401 one character at a time because it has to deal with the possibility
402 of great big ungainly characters. It's much more reasonable to
403 simply store an index into a table of composite characters.
405 2) The current implementation only allows for 16,384 separate
406 composite characters over the lifetime of the XEmacs process.
407 This could become a potential problem if the user
408 edited lots of different files that use composite characters.
409 Due to FSF bogosity, increasing the number of allowable
410 composite characters under Mule would decrease the number
411 of possible faces that can exist. Mule already has shrunk
412 this to 2048, and further shrinkage would become uncomfortable.
413 No such problems exist in XEmacs.
415 Composite characters could be represented as 0x80 C1 C2 C3,
416 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
417 for slightly under 2^20 (one million) composite characters
418 over the XEmacs process lifetime, and you only need to
419 increase the size of a Mule character from 19 to 21 bits.
420 Or you could use 0x80 C1 C2 C3 C4, allowing for about
421 85 million (slightly over 2^26) composite characters. */
424 /************************************************************************/
425 /* Basic Emchar functions */
426 /************************************************************************/
428 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
429 string in STR. Returns the number of bytes stored.
430 Do not call this directly. Use the macro set_charptr_emchar() instead.
434 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
449 else if ( c <= 0x7ff )
451 *p++ = (c >> 6) | 0xc0;
452 *p++ = (c & 0x3f) | 0x80;
454 else if ( c <= 0xffff )
456 *p++ = (c >> 12) | 0xe0;
457 *p++ = ((c >> 6) & 0x3f) | 0x80;
458 *p++ = (c & 0x3f) | 0x80;
460 else if ( c <= 0x1fffff )
462 *p++ = (c >> 18) | 0xf0;
463 *p++ = ((c >> 12) & 0x3f) | 0x80;
464 *p++ = ((c >> 6) & 0x3f) | 0x80;
465 *p++ = (c & 0x3f) | 0x80;
467 else if ( c <= 0x3ffffff )
469 *p++ = (c >> 24) | 0xf8;
470 *p++ = ((c >> 18) & 0x3f) | 0x80;
471 *p++ = ((c >> 12) & 0x3f) | 0x80;
472 *p++ = ((c >> 6) & 0x3f) | 0x80;
473 *p++ = (c & 0x3f) | 0x80;
477 *p++ = (c >> 30) | 0xfc;
478 *p++ = ((c >> 24) & 0x3f) | 0x80;
479 *p++ = ((c >> 18) & 0x3f) | 0x80;
480 *p++ = ((c >> 12) & 0x3f) | 0x80;
481 *p++ = ((c >> 6) & 0x3f) | 0x80;
482 *p++ = (c & 0x3f) | 0x80;
485 BREAKUP_CHAR (c, charset, c1, c2);
486 lb = CHAR_LEADING_BYTE (c);
487 if (LEADING_BYTE_PRIVATE_P (lb))
488 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
490 if (EQ (charset, Vcharset_control_1))
499 /* Return the first character from a Mule-encoded string in STR,
500 assuming it's non-ASCII. Do not call this directly.
501 Use the macro charptr_emchar() instead. */
504 non_ascii_charptr_emchar (const Bufbyte *str)
517 else if ( b >= 0xf8 )
522 else if ( b >= 0xf0 )
527 else if ( b >= 0xe0 )
532 else if ( b >= 0xc0 )
542 for( ; len > 0; len-- )
545 ch = ( ch << 6 ) | ( b & 0x3f );
549 Bufbyte i0 = *str, i1, i2 = 0;
552 if (i0 == LEADING_BYTE_CONTROL_1)
553 return (Emchar) (*++str - 0x20);
555 if (LEADING_BYTE_PREFIX_P (i0))
560 charset = CHARSET_BY_LEADING_BYTE (i0);
561 if (XCHARSET_DIMENSION (charset) == 2)
564 return MAKE_CHAR (charset, i1, i2);
568 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
569 Do not call this directly. Use the macro valid_char_p() instead. */
573 non_ascii_valid_char_p (Emchar ch)
577 /* Must have only lowest 19 bits set */
581 f1 = CHAR_FIELD1 (ch);
582 f2 = CHAR_FIELD2 (ch);
583 f3 = CHAR_FIELD3 (ch);
589 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
590 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
591 f2 > MAX_CHAR_FIELD2_PRIVATE)
596 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
597 f2 <= MAX_CHAR_FIELD2_PRIVATE))
601 NOTE: This takes advantage of the fact that
602 FIELD2_TO_OFFICIAL_LEADING_BYTE and
603 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
605 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
606 if (EQ (charset, Qnil))
608 return (XCHARSET_CHARS (charset) == 96);
614 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
615 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
616 f1 > MAX_CHAR_FIELD1_PRIVATE)
618 if (f2 < 0x20 || f3 < 0x20)
621 #ifdef ENABLE_COMPOSITE_CHARS
622 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
624 if (UNBOUNDP (Fgethash (make_int (ch),
625 Vcomposite_char_char2string_hash_table,
630 #endif /* ENABLE_COMPOSITE_CHARS */
632 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
633 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
636 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
638 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
641 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
643 if (EQ (charset, Qnil))
645 return (XCHARSET_CHARS (charset) == 96);
651 /************************************************************************/
652 /* Basic string functions */
653 /************************************************************************/
655 /* Copy the character pointed to by SRC into DST. Do not call this
656 directly. Use the macro charptr_copy_char() instead.
657 Return the number of bytes copied. */
660 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
662 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
664 for (i = bytes; i; i--, dst++, src++)
670 /************************************************************************/
671 /* streams of Emchars */
672 /************************************************************************/
674 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
675 The functions below are not meant to be called directly; use
676 the macros in insdel.h. */
679 Lstream_get_emchar_1 (Lstream *stream, int ch)
681 Bufbyte str[MAX_EMCHAR_LEN];
682 Bufbyte *strptr = str;
685 str[0] = (Bufbyte) ch;
687 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
689 int c = Lstream_getc (stream);
690 bufpos_checking_assert (c >= 0);
691 *++strptr = (Bufbyte) c;
693 return charptr_emchar (str);
697 Lstream_fput_emchar (Lstream *stream, Emchar ch)
699 Bufbyte str[MAX_EMCHAR_LEN];
700 Bytecount len = set_charptr_emchar (str, ch);
701 return Lstream_write (stream, str, len);
705 Lstream_funget_emchar (Lstream *stream, Emchar ch)
707 Bufbyte str[MAX_EMCHAR_LEN];
708 Bytecount len = set_charptr_emchar (str, ch);
709 Lstream_unread (stream, str, len);
713 /************************************************************************/
715 /************************************************************************/
718 mark_charset (Lisp_Object obj)
720 Lisp_Charset *cs = XCHARSET (obj);
722 mark_object (cs->short_name);
723 mark_object (cs->long_name);
724 mark_object (cs->doc_string);
725 mark_object (cs->registry);
726 mark_object (cs->ccl_program);
728 mark_object (cs->decoding_table);
729 mark_object (cs->mother);
735 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
737 Lisp_Charset *cs = XCHARSET (obj);
741 error ("printing unreadable object #<charset %s 0x%x>",
742 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
745 write_c_string ("#<charset ", printcharfun);
746 print_internal (CHARSET_NAME (cs), printcharfun, 0);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
751 write_c_string (" ", printcharfun);
752 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
753 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
755 CHARSET_DIMENSION (cs),
756 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
757 CHARSET_COLUMNS (cs),
758 CHARSET_GRAPHIC (cs),
760 write_c_string (buf, printcharfun);
761 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
762 sprintf (buf, " 0x%x>", cs->header.uid);
763 write_c_string (buf, printcharfun);
766 static const struct lrecord_description charset_description[] = {
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
775 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
776 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
781 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
782 mark_charset, print_charset, 0, 0, 0,
786 /* Make a new charset. */
787 /* #### SJT Should generic properties be allowed? */
789 make_charset (Charset_ID id, Lisp_Object name,
790 unsigned short chars, unsigned char dimension,
791 unsigned char columns, unsigned char graphic,
792 Bufbyte final, unsigned char direction, Lisp_Object short_name,
793 Lisp_Object long_name, Lisp_Object doc,
795 Lisp_Object decoding_table,
796 Emchar min_code, Emchar max_code,
797 Emchar code_offset, unsigned char byte_offset,
798 Lisp_Object mother, unsigned char conversion)
801 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
805 XSETCHARSET (obj, cs);
807 CHARSET_ID (cs) = id;
808 CHARSET_NAME (cs) = name;
809 CHARSET_SHORT_NAME (cs) = short_name;
810 CHARSET_LONG_NAME (cs) = long_name;
811 CHARSET_CHARS (cs) = chars;
812 CHARSET_DIMENSION (cs) = dimension;
813 CHARSET_DIRECTION (cs) = direction;
814 CHARSET_COLUMNS (cs) = columns;
815 CHARSET_GRAPHIC (cs) = graphic;
816 CHARSET_FINAL (cs) = final;
817 CHARSET_DOC_STRING (cs) = doc;
818 CHARSET_REGISTRY (cs) = reg;
819 CHARSET_CCL_PROGRAM (cs) = Qnil;
820 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
822 CHARSET_DECODING_TABLE(cs) = Qunbound;
823 CHARSET_MIN_CODE (cs) = min_code;
824 CHARSET_MAX_CODE (cs) = max_code;
825 CHARSET_CODE_OFFSET (cs) = code_offset;
826 CHARSET_BYTE_OFFSET (cs) = byte_offset;
827 CHARSET_MOTHER (cs) = mother;
828 CHARSET_CONVERSION (cs) = conversion;
832 if (id == LEADING_BYTE_ASCII)
833 CHARSET_REP_BYTES (cs) = 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
837 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
842 /* some charsets do not have final characters. This includes
843 ASCII, Control-1, Composite, and the two faux private
845 unsigned char iso2022_type
846 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
848 if (code_offset == 0)
850 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
851 chlook->charset_by_attributes[iso2022_type][final] = obj;
855 (chlook->charset_by_attributes[iso2022_type][final][direction]));
856 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
860 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
861 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
863 /* Some charsets are "faux" and don't have names or really exist at
864 all except in the leading-byte table. */
866 Fputhash (name, obj, Vcharset_hash_table);
871 get_unallocated_leading_byte (int dimension)
876 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
879 lb = chlook->next_allocated_leading_byte++;
883 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
886 lb = chlook->next_allocated_1_byte_leading_byte++;
890 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
893 lb = chlook->next_allocated_2_byte_leading_byte++;
899 ("No more character sets free for this dimension",
900 make_int (dimension));
906 /* Number of Big5 characters which have the same code in 1st byte. */
908 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
911 decode_defined_char (Lisp_Object ccs, int code_point)
913 int dim = XCHARSET_DIMENSION (ccs);
914 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
922 = get_ccs_octet_table (decoding_table, ccs,
923 (code_point >> (dim * 8)) & 255);
925 if (CHARP (decoding_table))
926 return XCHAR (decoding_table);
929 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
931 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
933 if ( EQ (mother, Vcharset_ucs) )
934 return DECODE_CHAR (mother, code_point);
936 return decode_defined_char (mother, code_point);
943 decode_builtin_char (Lisp_Object charset, int code_point)
945 Lisp_Object mother = XCHARSET_MOTHER (charset);
948 if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
950 int code = code_point;
952 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
954 int row = code_point >> 8;
955 int cell = code_point & 255;
959 else if (row < 16 + 32 + 30)
960 code = (row - (16 + 32)) * 94 + cell - 33;
961 else if (row < 18 + 32 + 30)
963 else if (row < 18 + 32 + 60)
964 code = (row - (18 + 32)) * 94 + cell - 33;
966 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
968 int plane = code_point >> 16;
969 int row = (code_point >> 8) & 255;
970 int cell = code_point & 255;
974 else if (row < 16 + 32 + 30)
976 = (plane - 33) * 94 * 60
977 + (row - (16 + 32)) * 94
979 else if (row < 18 + 32 + 30)
981 else if (row < 18 + 32 + 60)
983 = (plane - 33) * 94 * 60
984 + (row - (18 + 32)) * 94
988 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
990 if (XCHARSET_MAX_CODE (charset))
993 = (XCHARSET_DIMENSION (charset) == 1
995 code_point - XCHARSET_BYTE_OFFSET (charset)
997 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
998 * XCHARSET_CHARS (charset)
999 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1000 + XCHARSET_CODE_OFFSET (charset);
1001 if ((cid < XCHARSET_MIN_CODE (charset))
1002 || (XCHARSET_MAX_CODE (charset) < cid))
1006 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1008 if (XCHARSET_DIMENSION (charset) == 1)
1010 switch (XCHARSET_CHARS (charset))
1014 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1017 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1025 switch (XCHARSET_CHARS (charset))
1028 return MIN_CHAR_94x94
1029 + (final - '0') * 94 * 94
1030 + (((code_point >> 8) & 0x7F) - 33) * 94
1031 + ((code_point & 0x7F) - 33);
1033 return MIN_CHAR_96x96
1034 + (final - '0') * 96 * 96
1035 + (((code_point >> 8) & 0x7F) - 32) * 96
1036 + ((code_point & 0x7F) - 32);
1048 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1050 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1053 if ( CHAR_TABLEP (encoding_table)
1054 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1059 Lisp_Object mother = XCHARSET_MOTHER (charset);
1060 int min = XCHARSET_MIN_CODE (charset);
1061 int max = XCHARSET_MAX_CODE (charset);
1064 if ( CHARSETP (mother) )
1065 code = charset_code_point (mother, ch, defined_only);
1066 else if (defined_only)
1070 if ( ((max == 0) && CHARSETP (mother)) ||
1071 ((min <= code) && (code <= max)) )
1073 int d = code - XCHARSET_CODE_OFFSET (charset);
1075 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1084 int cell = d % 94 + 33;
1090 return (row << 8) | cell;
1092 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1093 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1095 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1096 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1098 int plane = d / (94 * 60) + 33;
1099 int row = (d % (94 * 60)) / 94;
1100 int cell = d % 94 + 33;
1106 return (plane << 16) | (row << 8) | cell;
1108 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1110 ( (d / (94 * 94) + 33) << 16)
1111 | ((d / 94 % 94 + 33) << 8)
1113 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1115 ( (d / (96 * 96) + 32) << 16)
1116 | ((d / 96 % 96 + 32) << 8)
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1120 ( (d / (94 * 94 * 94) + 33) << 24)
1121 | ((d / (94 * 94) % 94 + 33) << 16)
1122 | ((d / 94 % 94 + 33) << 8)
1124 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1126 ( (d / (96 * 96 * 96) + 32) << 24)
1127 | ((d / (96 * 96) % 96 + 32) << 16)
1128 | ((d / 96 % 96 + 32) << 8)
1132 printf ("Unknown CCS-conversion %d is specified!",
1133 XCHARSET_CONVERSION (charset));
1137 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1138 ( XCHARSET_MIN_CODE (charset) == 0 )
1140 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1141 (XCHARSET_CODE_OFFSET (charset)
1142 == XCHARSET_MIN_CODE (charset))
1147 if (XCHARSET_DIMENSION (charset) == 1)
1149 if (XCHARSET_CHARS (charset) == 94)
1151 if (((d = ch - (MIN_CHAR_94
1152 + (XCHARSET_FINAL (charset) - '0') * 94))
1157 else if (XCHARSET_CHARS (charset) == 96)
1159 if (((d = ch - (MIN_CHAR_96
1160 + (XCHARSET_FINAL (charset) - '0') * 96))
1168 else if (XCHARSET_DIMENSION (charset) == 2)
1170 if (XCHARSET_CHARS (charset) == 94)
1172 if (((d = ch - (MIN_CHAR_94x94
1174 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1177 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1179 else if (XCHARSET_CHARS (charset) == 96)
1181 if (((d = ch - (MIN_CHAR_96x96
1183 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1186 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1197 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1199 if (c <= MAX_CHAR_BASIC_LATIN)
1201 *charset = Vcharset_ascii;
1206 *charset = Vcharset_control_1;
1211 *charset = Vcharset_latin_iso8859_1;
1215 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1217 *charset = Vcharset_hebrew_iso8859_8;
1218 return c - MIN_CHAR_HEBREW + 0x20;
1221 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1223 *charset = Vcharset_thai_tis620;
1224 return c - MIN_CHAR_THAI + 0x20;
1227 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1228 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1230 return list2 (Vcharset_katakana_jisx0201,
1231 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1234 else if (c <= MAX_CHAR_BMP)
1236 *charset = Vcharset_ucs_bmp;
1239 else if (c <= MAX_CHAR_SMP)
1241 *charset = Vcharset_ucs_smp;
1242 return c - MIN_CHAR_SMP;
1244 else if (c <= MAX_CHAR_SIP)
1246 *charset = Vcharset_ucs_sip;
1247 return c - MIN_CHAR_SIP;
1249 else if (c < MIN_CHAR_DAIKANWA)
1251 *charset = Vcharset_ucs;
1254 else if (c <= MAX_CHAR_DAIKANWA)
1256 *charset = Vcharset_ideograph_daikanwa;
1257 return c - MIN_CHAR_DAIKANWA;
1259 else if (c < MIN_CHAR_94)
1261 *charset = Vcharset_ucs;
1264 else if (c <= MAX_CHAR_94)
1266 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1267 ((c - MIN_CHAR_94) / 94) + '0',
1268 CHARSET_LEFT_TO_RIGHT);
1269 if (!NILP (*charset))
1270 return ((c - MIN_CHAR_94) % 94) + 33;
1273 *charset = Vcharset_ucs;
1277 else if (c <= MAX_CHAR_96)
1279 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1280 ((c - MIN_CHAR_96) / 96) + '0',
1281 CHARSET_LEFT_TO_RIGHT);
1282 if (!NILP (*charset))
1283 return ((c - MIN_CHAR_96) % 96) + 32;
1286 *charset = Vcharset_ucs;
1290 else if (c <= MAX_CHAR_94x94)
1293 = CHARSET_BY_ATTRIBUTES (94, 2,
1294 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1295 CHARSET_LEFT_TO_RIGHT);
1296 if (!NILP (*charset))
1297 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1298 | (((c - MIN_CHAR_94x94) % 94) + 33);
1301 *charset = Vcharset_ucs;
1305 else if (c <= MAX_CHAR_96x96)
1308 = CHARSET_BY_ATTRIBUTES (96, 2,
1309 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1310 CHARSET_LEFT_TO_RIGHT);
1311 if (!NILP (*charset))
1312 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1313 | (((c - MIN_CHAR_96x96) % 96) + 32);
1316 *charset = Vcharset_ucs;
1322 *charset = Vcharset_ucs;
1327 Lisp_Object Vdefault_coded_charset_priority_list;
1331 /************************************************************************/
1332 /* Basic charset Lisp functions */
1333 /************************************************************************/
1335 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1336 Return non-nil if OBJECT is a charset.
1340 return CHARSETP (object) ? Qt : Qnil;
1343 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1344 Retrieve the charset of the given name.
1345 If CHARSET-OR-NAME is a charset object, it is simply returned.
1346 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1347 nil is returned. Otherwise the associated charset object is returned.
1351 if (CHARSETP (charset_or_name))
1352 return charset_or_name;
1354 CHECK_SYMBOL (charset_or_name);
1355 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1358 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1359 Retrieve the charset of the given name.
1360 Same as `find-charset' except an error is signalled if there is no such
1361 charset instead of returning nil.
1365 Lisp_Object charset = Ffind_charset (name);
1368 signal_simple_error ("No such charset", name);
1372 /* We store the charsets in hash tables with the names as the key and the
1373 actual charset object as the value. Occasionally we need to use them
1374 in a list format. These routines provide us with that. */
1375 struct charset_list_closure
1377 Lisp_Object *charset_list;
1381 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1382 void *charset_list_closure)
1384 /* This function can GC */
1385 struct charset_list_closure *chcl =
1386 (struct charset_list_closure*) charset_list_closure;
1387 Lisp_Object *charset_list = chcl->charset_list;
1389 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1393 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1394 Return a list of the names of all defined charsets.
1398 Lisp_Object charset_list = Qnil;
1399 struct gcpro gcpro1;
1400 struct charset_list_closure charset_list_closure;
1402 GCPRO1 (charset_list);
1403 charset_list_closure.charset_list = &charset_list;
1404 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1405 &charset_list_closure);
1408 return charset_list;
1411 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1412 Return the name of charset CHARSET.
1416 return XCHARSET_NAME (Fget_charset (charset));
1419 /* #### SJT Should generic properties be allowed? */
1420 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1421 Define a new character set.
1422 This function is for use with Mule support.
1423 NAME is a symbol, the name by which the character set is normally referred.
1424 DOC-STRING is a string describing the character set.
1425 PROPS is a property list, describing the specific nature of the
1426 character set. Recognized properties are:
1428 'short-name Short version of the charset name (ex: Latin-1)
1429 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1430 'registry A regular expression matching the font registry field for
1432 'dimension Number of octets used to index a character in this charset.
1433 Either 1 or 2. Defaults to 1.
1434 If UTF-2000 feature is enabled, 3 or 4 are also available.
1435 'columns Number of columns used to display a character in this charset.
1436 Only used in TTY mode. (Under X, the actual width of a
1437 character can be derived from the font used to display the
1438 characters.) If unspecified, defaults to the dimension
1439 (this is almost always the correct value).
1440 'chars Number of characters in each dimension (94 or 96).
1441 Defaults to 94. Note that if the dimension is 2, the
1442 character set thus described is 94x94 or 96x96.
1443 If UTF-2000 feature is enabled, 128 or 256 are also available.
1444 'final Final byte of ISO 2022 escape sequence. Must be
1445 supplied. Each combination of (DIMENSION, CHARS) defines a
1446 separate namespace for final bytes. Note that ISO
1447 2022 restricts the final byte to the range
1448 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1449 dimension == 2. Note also that final bytes in the range
1450 0x30 - 0x3F are reserved for user-defined (not official)
1452 'graphic 0 (use left half of font on output) or 1 (use right half
1453 of font on output). Defaults to 0. For example, for
1454 a font whose registry is ISO8859-1, the left half
1455 (octets 0x20 - 0x7F) is the `ascii' character set, while
1456 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1457 character set. With 'graphic set to 0, the octets
1458 will have their high bit cleared; with it set to 1,
1459 the octets will have their high bit set.
1460 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1462 'ccl-program A compiled CCL program used to convert a character in
1463 this charset into an index into the font. This is in
1464 addition to the 'graphic property. The CCL program
1465 is passed the octets of the character, with the high
1466 bit cleared and set depending upon whether the value
1467 of the 'graphic property is 0 or 1.
1468 'mother [UTF-2000 only] Base coded-charset.
1469 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1470 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1471 'code-offset [UTF-2000 only] Offset for a code-point of a base
1473 'conversion [UTF-2000 only] Conversion for a code-point of a base
1474 coded-charset (94x60 or 94x94x60).
1476 (name, doc_string, props))
1478 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1479 int direction = CHARSET_LEFT_TO_RIGHT;
1480 Lisp_Object registry = Qnil;
1481 Lisp_Object charset;
1482 Lisp_Object ccl_program = Qnil;
1483 Lisp_Object short_name = Qnil, long_name = Qnil;
1484 Lisp_Object mother = Qnil;
1485 int min_code = 0, max_code = 0, code_offset = 0;
1486 int byte_offset = -1;
1489 CHECK_SYMBOL (name);
1490 if (!NILP (doc_string))
1491 CHECK_STRING (doc_string);
1493 charset = Ffind_charset (name);
1494 if (!NILP (charset))
1495 signal_simple_error ("Cannot redefine existing charset", name);
1498 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1500 if (EQ (keyword, Qshort_name))
1502 CHECK_STRING (value);
1506 if (EQ (keyword, Qlong_name))
1508 CHECK_STRING (value);
1512 else if (EQ (keyword, Qdimension))
1515 dimension = XINT (value);
1516 if (dimension < 1 ||
1523 signal_simple_error ("Invalid value for 'dimension", value);
1526 else if (EQ (keyword, Qchars))
1529 chars = XINT (value);
1530 if (chars != 94 && chars != 96
1532 && chars != 128 && chars != 256
1535 signal_simple_error ("Invalid value for 'chars", value);
1538 else if (EQ (keyword, Qcolumns))
1541 columns = XINT (value);
1542 if (columns != 1 && columns != 2)
1543 signal_simple_error ("Invalid value for 'columns", value);
1546 else if (EQ (keyword, Qgraphic))
1549 graphic = XINT (value);
1557 signal_simple_error ("Invalid value for 'graphic", value);
1560 else if (EQ (keyword, Qregistry))
1562 CHECK_STRING (value);
1566 else if (EQ (keyword, Qdirection))
1568 if (EQ (value, Ql2r))
1569 direction = CHARSET_LEFT_TO_RIGHT;
1570 else if (EQ (value, Qr2l))
1571 direction = CHARSET_RIGHT_TO_LEFT;
1573 signal_simple_error ("Invalid value for 'direction", value);
1576 else if (EQ (keyword, Qfinal))
1578 CHECK_CHAR_COERCE_INT (value);
1579 final = XCHAR (value);
1580 if (final < '0' || final > '~')
1581 signal_simple_error ("Invalid value for 'final", value);
1585 else if (EQ (keyword, Qmother))
1587 mother = Fget_charset (value);
1590 else if (EQ (keyword, Qmin_code))
1593 min_code = XUINT (value);
1596 else if (EQ (keyword, Qmax_code))
1599 max_code = XUINT (value);
1602 else if (EQ (keyword, Qcode_offset))
1605 code_offset = XUINT (value);
1608 else if (EQ (keyword, Qconversion))
1610 if (EQ (value, Q94x60))
1611 conversion = CONVERSION_94x60;
1612 else if (EQ (value, Q94x94x60))
1613 conversion = CONVERSION_94x94x60;
1615 signal_simple_error ("Unrecognized conversion", value);
1619 else if (EQ (keyword, Qccl_program))
1621 struct ccl_program test_ccl;
1623 if (setup_ccl_program (&test_ccl, value) < 0)
1624 signal_simple_error ("Invalid value for 'ccl-program", value);
1625 ccl_program = value;
1629 signal_simple_error ("Unrecognized property", keyword);
1635 error ("'final must be specified");
1637 if (dimension == 2 && final > 0x5F)
1639 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1642 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1643 CHARSET_LEFT_TO_RIGHT)) ||
1644 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1645 CHARSET_RIGHT_TO_LEFT)))
1647 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1649 id = get_unallocated_leading_byte (dimension);
1651 if (NILP (doc_string))
1652 doc_string = build_string ("");
1654 if (NILP (registry))
1655 registry = build_string ("");
1657 if (NILP (short_name))
1658 XSETSTRING (short_name, XSYMBOL (name)->name);
1660 if (NILP (long_name))
1661 long_name = doc_string;
1664 columns = dimension;
1666 if (byte_offset < 0)
1670 else if (chars == 96)
1676 charset = make_charset (id, name, chars, dimension, columns, graphic,
1677 final, direction, short_name, long_name,
1678 doc_string, registry,
1679 Qnil, min_code, max_code, code_offset, byte_offset,
1680 mother, conversion);
1681 if (!NILP (ccl_program))
1682 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1686 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1688 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1689 NEW-NAME is the name of the new charset. Return the new charset.
1691 (charset, new_name))
1693 Lisp_Object new_charset = Qnil;
1694 int id, chars, dimension, columns, graphic, final;
1696 Lisp_Object registry, doc_string, short_name, long_name;
1699 charset = Fget_charset (charset);
1700 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1701 signal_simple_error ("Charset already has reverse-direction charset",
1704 CHECK_SYMBOL (new_name);
1705 if (!NILP (Ffind_charset (new_name)))
1706 signal_simple_error ("Cannot redefine existing charset", new_name);
1708 cs = XCHARSET (charset);
1710 chars = CHARSET_CHARS (cs);
1711 dimension = CHARSET_DIMENSION (cs);
1712 columns = CHARSET_COLUMNS (cs);
1713 id = get_unallocated_leading_byte (dimension);
1715 graphic = CHARSET_GRAPHIC (cs);
1716 final = CHARSET_FINAL (cs);
1717 direction = CHARSET_RIGHT_TO_LEFT;
1718 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1719 direction = CHARSET_LEFT_TO_RIGHT;
1720 doc_string = CHARSET_DOC_STRING (cs);
1721 short_name = CHARSET_SHORT_NAME (cs);
1722 long_name = CHARSET_LONG_NAME (cs);
1723 registry = CHARSET_REGISTRY (cs);
1725 new_charset = make_charset (id, new_name, chars, dimension, columns,
1726 graphic, final, direction, short_name, long_name,
1727 doc_string, registry,
1729 CHARSET_DECODING_TABLE(cs),
1730 CHARSET_MIN_CODE(cs),
1731 CHARSET_MAX_CODE(cs),
1732 CHARSET_CODE_OFFSET(cs),
1733 CHARSET_BYTE_OFFSET(cs),
1735 CHARSET_CONVERSION (cs)
1737 Qnil, 0, 0, 0, 0, Qnil, 0
1741 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1742 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1747 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1748 Define symbol ALIAS as an alias for CHARSET.
1752 CHECK_SYMBOL (alias);
1753 charset = Fget_charset (charset);
1754 return Fputhash (alias, charset, Vcharset_hash_table);
1757 /* #### Reverse direction charsets not yet implemented. */
1759 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1761 Return the reverse-direction charset parallel to CHARSET, if any.
1762 This is the charset with the same properties (in particular, the same
1763 dimension, number of characters per dimension, and final byte) as
1764 CHARSET but whose characters are displayed in the opposite direction.
1768 charset = Fget_charset (charset);
1769 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1773 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1774 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1775 If DIRECTION is omitted, both directions will be checked (left-to-right
1776 will be returned if character sets exist for both directions).
1778 (dimension, chars, final, direction))
1780 int dm, ch, fi, di = -1;
1781 Lisp_Object obj = Qnil;
1783 CHECK_INT (dimension);
1784 dm = XINT (dimension);
1785 if (dm < 1 || dm > 2)
1786 signal_simple_error ("Invalid value for DIMENSION", dimension);
1790 if (ch != 94 && ch != 96)
1791 signal_simple_error ("Invalid value for CHARS", chars);
1793 CHECK_CHAR_COERCE_INT (final);
1795 if (fi < '0' || fi > '~')
1796 signal_simple_error ("Invalid value for FINAL", final);
1798 if (EQ (direction, Ql2r))
1799 di = CHARSET_LEFT_TO_RIGHT;
1800 else if (EQ (direction, Qr2l))
1801 di = CHARSET_RIGHT_TO_LEFT;
1802 else if (!NILP (direction))
1803 signal_simple_error ("Invalid value for DIRECTION", direction);
1805 if (dm == 2 && fi > 0x5F)
1807 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1811 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1813 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1816 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1819 return XCHARSET_NAME (obj);
1823 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1824 Return short name of CHARSET.
1828 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1831 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1832 Return long name of CHARSET.
1836 return XCHARSET_LONG_NAME (Fget_charset (charset));
1839 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1840 Return description of CHARSET.
1844 return XCHARSET_DOC_STRING (Fget_charset (charset));
1847 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1848 Return dimension of CHARSET.
1852 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1855 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1856 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1857 Recognized properties are those listed in `make-charset', as well as
1858 'name and 'doc-string.
1864 charset = Fget_charset (charset);
1865 cs = XCHARSET (charset);
1867 CHECK_SYMBOL (prop);
1868 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1869 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1870 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1871 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1872 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1873 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1874 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1875 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1876 Qnil : make_char (CHARSET_FINAL (cs));
1877 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1878 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1879 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1880 if (EQ (prop, Qdirection))
1881 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1882 if (EQ (prop, Qreverse_direction_charset))
1884 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1885 /* #### Is this translation OK? If so, error checking sufficient? */
1886 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1889 if (EQ (prop, Qmother))
1890 return CHARSET_MOTHER (cs);
1891 if (EQ (prop, Qmin_code))
1892 return make_int (CHARSET_MIN_CODE (cs));
1893 if (EQ (prop, Qmax_code))
1894 return make_int (CHARSET_MAX_CODE (cs));
1896 signal_simple_error ("Unrecognized charset property name", prop);
1897 return Qnil; /* not reached */
1900 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1901 Return charset identification number of CHARSET.
1905 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1908 /* #### We need to figure out which properties we really want to
1911 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1912 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1914 (charset, ccl_program))
1916 struct ccl_program test_ccl;
1918 charset = Fget_charset (charset);
1919 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1920 signal_simple_error ("Invalid ccl-program", ccl_program);
1921 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1926 invalidate_charset_font_caches (Lisp_Object charset)
1928 /* Invalidate font cache entries for charset on all devices. */
1929 Lisp_Object devcons, concons, hash_table;
1930 DEVICE_LOOP_NO_BREAK (devcons, concons)
1932 struct device *d = XDEVICE (XCAR (devcons));
1933 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1934 if (!UNBOUNDP (hash_table))
1935 Fclrhash (hash_table);
1939 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1940 Set the 'registry property of CHARSET to REGISTRY.
1942 (charset, registry))
1944 charset = Fget_charset (charset);
1945 CHECK_STRING (registry);
1946 XCHARSET_REGISTRY (charset) = registry;
1947 invalidate_charset_font_caches (charset);
1948 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1953 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1954 Return mapping-table of CHARSET.
1958 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1961 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1962 Set mapping-table of CHARSET to TABLE.
1966 struct Lisp_Charset *cs;
1970 charset = Fget_charset (charset);
1971 cs = XCHARSET (charset);
1975 CHARSET_DECODING_TABLE(cs) = Qnil;
1978 else if (VECTORP (table))
1980 int ccs_len = CHARSET_BYTE_SIZE (cs);
1981 int ret = decoding_table_check_elements (table,
1982 CHARSET_DIMENSION (cs),
1987 signal_simple_error ("Too big table", table);
1989 signal_simple_error ("Invalid element is found", table);
1991 signal_simple_error ("Something wrong", table);
1993 CHARSET_DECODING_TABLE(cs) = Qnil;
1996 signal_error (Qwrong_type_argument,
1997 list2 (build_translated_string ("vector-or-nil-p"),
2000 byte_offset = CHARSET_BYTE_OFFSET (cs);
2001 switch (CHARSET_DIMENSION (cs))
2004 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2006 Lisp_Object c = XVECTOR_DATA(table)[i];
2009 Fput_char_attribute (c, XCHARSET_NAME (charset),
2010 make_int (i + byte_offset));
2014 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2016 Lisp_Object v = XVECTOR_DATA(table)[i];
2022 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2024 Lisp_Object c = XVECTOR_DATA(v)[j];
2028 (c, XCHARSET_NAME (charset),
2029 make_int ( ( (i + byte_offset) << 8 )
2035 Fput_char_attribute (v, XCHARSET_NAME (charset),
2036 make_int (i + byte_offset));
2045 /************************************************************************/
2046 /* Lisp primitives for working with characters */
2047 /************************************************************************/
2050 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2051 Make a character from CHARSET and code-point CODE.
2052 If DEFINED_ONLY is non-nil, builtin character is not returned.
2053 If corresponding character is not found, nil is returned.
2055 (charset, code, defined_only))
2059 charset = Fget_charset (charset);
2062 if (XCHARSET_GRAPHIC (charset) == 1)
2064 if (NILP (defined_only))
2065 c = DECODE_CHAR (charset, c);
2067 c = decode_defined_char (charset, c);
2068 return c >= 0 ? make_char (c) : Qnil;
2071 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2072 Make a builtin character from CHARSET and code-point CODE.
2078 charset = Fget_charset (charset);
2080 if (EQ (charset, Vcharset_latin_viscii))
2082 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2088 (ret = Fget_char_attribute (chr,
2089 Vcharset_latin_viscii_lower,
2092 charset = Vcharset_latin_viscii_lower;
2096 (ret = Fget_char_attribute (chr,
2097 Vcharset_latin_viscii_upper,
2100 charset = Vcharset_latin_viscii_upper;
2107 if (XCHARSET_GRAPHIC (charset) == 1)
2110 c = decode_builtin_char (charset, c);
2111 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2115 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2116 Make a character from CHARSET and octets ARG1 and ARG2.
2117 ARG2 is required only for characters from two-dimensional charsets.
2118 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2119 character s with caron.
2121 (charset, arg1, arg2))
2125 int lowlim, highlim;
2127 charset = Fget_charset (charset);
2128 cs = XCHARSET (charset);
2130 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2131 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2133 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2135 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2136 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2139 /* It is useful (and safe, according to Olivier Galibert) to strip
2140 the 8th bit off ARG1 and ARG2 because it allows programmers to
2141 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2142 Latin 2 code of the character. */
2150 if (a1 < lowlim || a1 > highlim)
2151 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2153 if (CHARSET_DIMENSION (cs) == 1)
2157 ("Charset is of dimension one; second octet must be nil", arg2);
2158 return make_char (MAKE_CHAR (charset, a1, 0));
2167 a2 = XINT (arg2) & 0x7f;
2169 if (a2 < lowlim || a2 > highlim)
2170 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2172 return make_char (MAKE_CHAR (charset, a1, a2));
2175 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2176 Return the character set of CHARACTER.
2180 CHECK_CHAR_COERCE_INT (character);
2182 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2185 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2186 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2187 N defaults to 0 if omitted.
2191 Lisp_Object charset;
2194 CHECK_CHAR_COERCE_INT (character);
2196 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2198 if (NILP (n) || EQ (n, Qzero))
2199 return make_int (octet0);
2200 else if (EQ (n, make_int (1)))
2201 return make_int (octet1);
2203 signal_simple_error ("Octet number must be 0 or 1", n);
2207 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2208 Return code-point of CHARACTER in specified CHARSET.
2210 (character, charset, defined_only))
2214 CHECK_CHAR_COERCE_INT (character);
2215 charset = Fget_charset (charset);
2216 code_point = charset_code_point (charset, XCHAR (character),
2217 !NILP (defined_only));
2218 if (code_point >= 0)
2219 return make_int (code_point);
2225 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2226 Return list of charset and one or two position-codes of CHARACTER.
2230 /* This function can GC */
2231 struct gcpro gcpro1, gcpro2;
2232 Lisp_Object charset = Qnil;
2233 Lisp_Object rc = Qnil;
2241 GCPRO2 (charset, rc);
2242 CHECK_CHAR_COERCE_INT (character);
2245 code_point = ENCODE_CHAR (XCHAR (character), charset);
2246 dimension = XCHARSET_DIMENSION (charset);
2247 while (dimension > 0)
2249 rc = Fcons (make_int (code_point & 255), rc);
2253 rc = Fcons (XCHARSET_NAME (charset), rc);
2255 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2257 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2259 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2263 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2272 #ifdef ENABLE_COMPOSITE_CHARS
2273 /************************************************************************/
2274 /* composite character functions */
2275 /************************************************************************/
2278 lookup_composite_char (Bufbyte *str, int len)
2280 Lisp_Object lispstr = make_string (str, len);
2281 Lisp_Object ch = Fgethash (lispstr,
2282 Vcomposite_char_string2char_hash_table,
2288 if (composite_char_row_next >= 128)
2289 signal_simple_error ("No more composite chars available", lispstr);
2290 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2291 composite_char_col_next);
2292 Fputhash (make_char (emch), lispstr,
2293 Vcomposite_char_char2string_hash_table);
2294 Fputhash (lispstr, make_char (emch),
2295 Vcomposite_char_string2char_hash_table);
2296 composite_char_col_next++;
2297 if (composite_char_col_next >= 128)
2299 composite_char_col_next = 32;
2300 composite_char_row_next++;
2309 composite_char_string (Emchar ch)
2311 Lisp_Object str = Fgethash (make_char (ch),
2312 Vcomposite_char_char2string_hash_table,
2314 assert (!UNBOUNDP (str));
2318 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2319 Convert a string into a single composite character.
2320 The character is the result of overstriking all the characters in
2325 CHECK_STRING (string);
2326 return make_char (lookup_composite_char (XSTRING_DATA (string),
2327 XSTRING_LENGTH (string)));
2330 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2331 Return a string of the characters comprising a composite character.
2339 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2340 signal_simple_error ("Must be composite char", ch);
2341 return composite_char_string (emch);
2343 #endif /* ENABLE_COMPOSITE_CHARS */
2346 /************************************************************************/
2347 /* initialization */
2348 /************************************************************************/
2351 syms_of_mule_charset (void)
2353 INIT_LRECORD_IMPLEMENTATION (charset);
2355 DEFSUBR (Fcharsetp);
2356 DEFSUBR (Ffind_charset);
2357 DEFSUBR (Fget_charset);
2358 DEFSUBR (Fcharset_list);
2359 DEFSUBR (Fcharset_name);
2360 DEFSUBR (Fmake_charset);
2361 DEFSUBR (Fmake_reverse_direction_charset);
2362 /* DEFSUBR (Freverse_direction_charset); */
2363 DEFSUBR (Fdefine_charset_alias);
2364 DEFSUBR (Fcharset_from_attributes);
2365 DEFSUBR (Fcharset_short_name);
2366 DEFSUBR (Fcharset_long_name);
2367 DEFSUBR (Fcharset_description);
2368 DEFSUBR (Fcharset_dimension);
2369 DEFSUBR (Fcharset_property);
2370 DEFSUBR (Fcharset_id);
2371 DEFSUBR (Fset_charset_ccl_program);
2372 DEFSUBR (Fset_charset_registry);
2374 DEFSUBR (Fcharset_mapping_table);
2375 DEFSUBR (Fset_charset_mapping_table);
2379 DEFSUBR (Fdecode_char);
2380 DEFSUBR (Fdecode_builtin_char);
2381 DEFSUBR (Fencode_char);
2383 DEFSUBR (Fmake_char);
2384 DEFSUBR (Fchar_charset);
2385 DEFSUBR (Fchar_octet);
2386 DEFSUBR (Fsplit_char);
2388 #ifdef ENABLE_COMPOSITE_CHARS
2389 DEFSUBR (Fmake_composite_char);
2390 DEFSUBR (Fcomposite_char_string);
2393 defsymbol (&Qcharsetp, "charsetp");
2394 defsymbol (&Qregistry, "registry");
2395 defsymbol (&Qfinal, "final");
2396 defsymbol (&Qgraphic, "graphic");
2397 defsymbol (&Qdirection, "direction");
2398 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2399 defsymbol (&Qshort_name, "short-name");
2400 defsymbol (&Qlong_name, "long-name");
2402 defsymbol (&Qmother, "mother");
2403 defsymbol (&Qmin_code, "min-code");
2404 defsymbol (&Qmax_code, "max-code");
2405 defsymbol (&Qcode_offset, "code-offset");
2406 defsymbol (&Qconversion, "conversion");
2407 defsymbol (&Q94x60, "94x60");
2408 defsymbol (&Q94x94x60, "94x94x60");
2411 defsymbol (&Ql2r, "l2r");
2412 defsymbol (&Qr2l, "r2l");
2414 /* Charsets, compatible with FSF 20.3
2415 Naming convention is Script-Charset[-Edition] */
2416 defsymbol (&Qascii, "ascii");
2417 defsymbol (&Qcontrol_1, "control-1");
2418 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2419 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2420 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2421 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2422 defsymbol (&Qthai_tis620, "thai-tis620");
2423 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2424 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2425 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2426 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2427 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2428 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2429 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2430 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2431 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2432 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2433 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2434 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2435 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2436 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2437 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2438 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2440 defsymbol (&Qucs, "ucs");
2441 defsymbol (&Qucs_bmp, "ucs-bmp");
2442 defsymbol (&Qucs_smp, "ucs-smp");
2443 defsymbol (&Qucs_sip, "ucs-sip");
2444 defsymbol (&Qucs_cns, "ucs-cns");
2445 defsymbol (&Qucs_jis, "ucs-jis");
2446 defsymbol (&Qucs_ks, "ucs-ks");
2447 defsymbol (&Qucs_big5, "ucs-big5");
2448 defsymbol (&Qlatin_viscii, "latin-viscii");
2449 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2450 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2451 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2452 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2453 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2454 defsymbol (&Qjis_x0208, "=jis-x0208");
2455 defsymbol (&Qideograph_gt, "ideograph-gt");
2456 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2457 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2458 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2459 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2460 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2461 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2462 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2463 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2464 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2465 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2466 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2467 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2468 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2469 defsymbol (&Qchinese_big5, "chinese-big5");
2470 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2471 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2472 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2473 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2474 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2475 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2476 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2477 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2478 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2479 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2480 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2481 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2482 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2483 defsymbol (&Qchina3_jef, "china3-jef");
2484 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2485 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2487 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2488 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2490 defsymbol (&Qcomposite, "composite");
2494 vars_of_mule_charset (void)
2501 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2502 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2504 /* Table of charsets indexed by leading byte. */
2505 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2506 chlook->charset_by_leading_byte[i] = Qnil;
2509 /* Table of charsets indexed by type/final-byte. */
2510 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2511 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2512 chlook->charset_by_attributes[i][j] = Qnil;
2514 /* Table of charsets indexed by type/final-byte/direction. */
2515 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2516 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2517 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2518 chlook->charset_by_attributes[i][j][k] = Qnil;
2522 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2524 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2525 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2529 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2530 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2531 Leading-code of private TYPE9N charset of column-width 1.
2533 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2537 Vdefault_coded_charset_priority_list = Qnil;
2538 DEFVAR_LISP ("default-coded-charset-priority-list",
2539 &Vdefault_coded_charset_priority_list /*
2540 Default order of preferred coded-character-sets.
2546 complex_vars_of_mule_charset (void)
2548 staticpro (&Vcharset_hash_table);
2549 Vcharset_hash_table =
2550 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2552 /* Predefined character sets. We store them into variables for
2556 staticpro (&Vcharset_ucs);
2558 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2559 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2560 build_string ("UCS"),
2561 build_string ("UCS"),
2562 build_string ("ISO/IEC 10646"),
2564 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2565 staticpro (&Vcharset_ucs_bmp);
2567 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2568 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2569 build_string ("BMP"),
2570 build_string ("UCS-BMP"),
2571 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2573 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2574 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2575 staticpro (&Vcharset_ucs_smp);
2577 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2578 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2579 build_string ("SMP"),
2580 build_string ("UCS-SMP"),
2581 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2582 build_string ("UCS00-1"),
2583 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2584 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2585 staticpro (&Vcharset_ucs_sip);
2587 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2588 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2589 build_string ("SIP"),
2590 build_string ("UCS-SIP"),
2591 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2592 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2593 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2594 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2595 staticpro (&Vcharset_ucs_cns);
2597 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2598 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2599 build_string ("UCS for CNS"),
2600 build_string ("UCS for CNS 11643"),
2601 build_string ("ISO/IEC 10646 for CNS 11643"),
2603 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2604 staticpro (&Vcharset_ucs_jis);
2606 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2607 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2608 build_string ("UCS for JIS"),
2609 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2611 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2613 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2614 staticpro (&Vcharset_ucs_ks);
2616 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2617 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2618 build_string ("UCS for KS"),
2619 build_string ("UCS for CCS defined by KS"),
2620 build_string ("ISO/IEC 10646 for Korean Standards"),
2622 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2623 staticpro (&Vcharset_ucs_big5);
2625 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2626 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2627 build_string ("UCS for Big5"),
2628 build_string ("UCS for Big5"),
2629 build_string ("ISO/IEC 10646 for Big5"),
2631 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2633 # define MIN_CHAR_THAI 0
2634 # define MAX_CHAR_THAI 0
2635 /* # define MIN_CHAR_HEBREW 0 */
2636 /* # define MAX_CHAR_HEBREW 0 */
2637 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2638 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2640 staticpro (&Vcharset_ascii);
2642 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2643 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2644 build_string ("ASCII"),
2645 build_string ("ASCII)"),
2646 build_string ("ASCII (ISO646 IRV)"),
2647 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2648 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2649 staticpro (&Vcharset_control_1);
2650 Vcharset_control_1 =
2651 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2652 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2653 build_string ("C1"),
2654 build_string ("Control characters"),
2655 build_string ("Control characters 128-191"),
2657 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2658 staticpro (&Vcharset_latin_iso8859_1);
2659 Vcharset_latin_iso8859_1 =
2660 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2661 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2662 build_string ("Latin-1"),
2663 build_string ("ISO8859-1 (Latin-1)"),
2664 build_string ("ISO8859-1 (Latin-1)"),
2665 build_string ("iso8859-1"),
2666 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2667 staticpro (&Vcharset_latin_iso8859_2);
2668 Vcharset_latin_iso8859_2 =
2669 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2670 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2671 build_string ("Latin-2"),
2672 build_string ("ISO8859-2 (Latin-2)"),
2673 build_string ("ISO8859-2 (Latin-2)"),
2674 build_string ("iso8859-2"),
2675 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2676 staticpro (&Vcharset_latin_iso8859_3);
2677 Vcharset_latin_iso8859_3 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2679 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2680 build_string ("Latin-3"),
2681 build_string ("ISO8859-3 (Latin-3)"),
2682 build_string ("ISO8859-3 (Latin-3)"),
2683 build_string ("iso8859-3"),
2684 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2685 staticpro (&Vcharset_latin_iso8859_4);
2686 Vcharset_latin_iso8859_4 =
2687 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2688 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2689 build_string ("Latin-4"),
2690 build_string ("ISO8859-4 (Latin-4)"),
2691 build_string ("ISO8859-4 (Latin-4)"),
2692 build_string ("iso8859-4"),
2693 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2694 staticpro (&Vcharset_thai_tis620);
2695 Vcharset_thai_tis620 =
2696 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2697 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("TIS620"),
2699 build_string ("TIS620 (Thai)"),
2700 build_string ("TIS620.2529 (Thai)"),
2701 build_string ("tis620"),
2702 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2703 staticpro (&Vcharset_greek_iso8859_7);
2704 Vcharset_greek_iso8859_7 =
2705 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2706 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("ISO8859-7"),
2708 build_string ("ISO8859-7 (Greek)"),
2709 build_string ("ISO8859-7 (Greek)"),
2710 build_string ("iso8859-7"),
2711 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2712 staticpro (&Vcharset_arabic_iso8859_6);
2713 Vcharset_arabic_iso8859_6 =
2714 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2715 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2716 build_string ("ISO8859-6"),
2717 build_string ("ISO8859-6 (Arabic)"),
2718 build_string ("ISO8859-6 (Arabic)"),
2719 build_string ("iso8859-6"),
2720 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2721 staticpro (&Vcharset_hebrew_iso8859_8);
2722 Vcharset_hebrew_iso8859_8 =
2723 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2724 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2725 build_string ("ISO8859-8"),
2726 build_string ("ISO8859-8 (Hebrew)"),
2727 build_string ("ISO8859-8 (Hebrew)"),
2728 build_string ("iso8859-8"),
2730 0 /* MIN_CHAR_HEBREW */,
2731 0 /* MAX_CHAR_HEBREW */, 0, 32,
2732 Qnil, CONVERSION_IDENTICAL);
2733 staticpro (&Vcharset_katakana_jisx0201);
2734 Vcharset_katakana_jisx0201 =
2735 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2736 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2737 build_string ("JISX0201 Kana"),
2738 build_string ("JISX0201.1976 (Japanese Kana)"),
2739 build_string ("JISX0201.1976 Japanese Kana"),
2740 build_string ("jisx0201\\.1976"),
2741 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2742 staticpro (&Vcharset_latin_jisx0201);
2743 Vcharset_latin_jisx0201 =
2744 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2745 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2746 build_string ("JISX0201 Roman"),
2747 build_string ("JISX0201.1976 (Japanese Roman)"),
2748 build_string ("JISX0201.1976 Japanese Roman"),
2749 build_string ("jisx0201\\.1976"),
2750 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2751 staticpro (&Vcharset_cyrillic_iso8859_5);
2752 Vcharset_cyrillic_iso8859_5 =
2753 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2754 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2755 build_string ("ISO8859-5"),
2756 build_string ("ISO8859-5 (Cyrillic)"),
2757 build_string ("ISO8859-5 (Cyrillic)"),
2758 build_string ("iso8859-5"),
2759 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2760 staticpro (&Vcharset_latin_iso8859_9);
2761 Vcharset_latin_iso8859_9 =
2762 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2763 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2764 build_string ("Latin-5"),
2765 build_string ("ISO8859-9 (Latin-5)"),
2766 build_string ("ISO8859-9 (Latin-5)"),
2767 build_string ("iso8859-9"),
2768 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2770 staticpro (&Vcharset_jis_x0208);
2771 Vcharset_jis_x0208 =
2772 make_charset (LEADING_BYTE_JIS_X0208,
2774 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2775 build_string ("JIS X0208"),
2776 build_string ("JIS X0208 Common"),
2777 build_string ("JIS X0208 Common part"),
2778 build_string ("jisx0208\\.1990"),
2780 MIN_CHAR_JIS_X0208_1990,
2781 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2782 Qnil, CONVERSION_94x94);
2784 staticpro (&Vcharset_japanese_jisx0208_1978);
2785 Vcharset_japanese_jisx0208_1978 =
2786 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2787 Qjapanese_jisx0208_1978, 94, 2,
2788 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2789 build_string ("JIS X0208:1978"),
2790 build_string ("JIS X0208:1978 (Japanese)"),
2792 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2793 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2800 CONVERSION_IDENTICAL);
2801 staticpro (&Vcharset_chinese_gb2312);
2802 Vcharset_chinese_gb2312 =
2803 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2804 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2805 build_string ("GB2312"),
2806 build_string ("GB2312)"),
2807 build_string ("GB2312 Chinese simplified"),
2808 build_string ("gb2312"),
2809 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2810 staticpro (&Vcharset_chinese_gb12345);
2811 Vcharset_chinese_gb12345 =
2812 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2813 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2814 build_string ("G1"),
2815 build_string ("GB 12345)"),
2816 build_string ("GB 12345-1990"),
2817 build_string ("GB12345\\(\\.1990\\)?-0"),
2818 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2819 staticpro (&Vcharset_japanese_jisx0208);
2820 Vcharset_japanese_jisx0208 =
2821 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2822 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2823 build_string ("JISX0208"),
2824 build_string ("JIS X0208:1983 (Japanese)"),
2825 build_string ("JIS X0208:1983 Japanese Kanji"),
2826 build_string ("jisx0208\\.1983"),
2833 CONVERSION_IDENTICAL);
2835 staticpro (&Vcharset_japanese_jisx0208_1990);
2836 Vcharset_japanese_jisx0208_1990 =
2837 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2838 Qjapanese_jisx0208_1990, 94, 2,
2839 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2840 build_string ("JISX0208-1990"),
2841 build_string ("JIS X0208:1990 (Japanese)"),
2842 build_string ("JIS X0208:1990 Japanese Kanji"),
2843 build_string ("jisx0208\\.1990"),
2845 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2846 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2847 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2848 Vcharset_jis_x0208 /* Qnil */,
2849 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2851 staticpro (&Vcharset_korean_ksc5601);
2852 Vcharset_korean_ksc5601 =
2853 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2854 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2855 build_string ("KSC5601"),
2856 build_string ("KSC5601 (Korean"),
2857 build_string ("KSC5601 Korean Hangul and Hanja"),
2858 build_string ("ksc5601"),
2859 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2860 staticpro (&Vcharset_japanese_jisx0212);
2861 Vcharset_japanese_jisx0212 =
2862 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2863 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2864 build_string ("JISX0212"),
2865 build_string ("JISX0212 (Japanese)"),
2866 build_string ("JISX0212 Japanese Supplement"),
2867 build_string ("jisx0212"),
2868 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2870 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2871 staticpro (&Vcharset_chinese_cns11643_1);
2872 Vcharset_chinese_cns11643_1 =
2873 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2874 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2875 build_string ("CNS11643-1"),
2876 build_string ("CNS11643-1 (Chinese traditional)"),
2878 ("CNS 11643 Plane 1 Chinese traditional"),
2879 build_string (CHINESE_CNS_PLANE_RE("1")),
2880 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_chinese_cns11643_2);
2882 Vcharset_chinese_cns11643_2 =
2883 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2884 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2885 build_string ("CNS11643-2"),
2886 build_string ("CNS11643-2 (Chinese traditional)"),
2888 ("CNS 11643 Plane 2 Chinese traditional"),
2889 build_string (CHINESE_CNS_PLANE_RE("2")),
2890 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2892 staticpro (&Vcharset_latin_tcvn5712);
2893 Vcharset_latin_tcvn5712 =
2894 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2895 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2896 build_string ("TCVN 5712"),
2897 build_string ("TCVN 5712 (VSCII-2)"),
2898 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2899 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2900 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2901 staticpro (&Vcharset_latin_viscii_lower);
2902 Vcharset_latin_viscii_lower =
2903 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2904 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2905 build_string ("VISCII lower"),
2906 build_string ("VISCII lower (Vietnamese)"),
2907 build_string ("VISCII lower (Vietnamese)"),
2908 build_string ("MULEVISCII-LOWER"),
2909 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2910 staticpro (&Vcharset_latin_viscii_upper);
2911 Vcharset_latin_viscii_upper =
2912 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2913 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2914 build_string ("VISCII upper"),
2915 build_string ("VISCII upper (Vietnamese)"),
2916 build_string ("VISCII upper (Vietnamese)"),
2917 build_string ("MULEVISCII-UPPER"),
2918 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2919 staticpro (&Vcharset_latin_viscii);
2920 Vcharset_latin_viscii =
2921 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2922 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2923 build_string ("VISCII"),
2924 build_string ("VISCII 1.1 (Vietnamese)"),
2925 build_string ("VISCII 1.1 (Vietnamese)"),
2926 build_string ("VISCII1\\.1"),
2927 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2928 staticpro (&Vcharset_chinese_big5);
2929 Vcharset_chinese_big5 =
2930 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2931 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2932 build_string ("Big5"),
2933 build_string ("Big5"),
2934 build_string ("Big5 Chinese traditional"),
2935 build_string ("big5-0"),
2937 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2938 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2940 staticpro (&Vcharset_chinese_big5_cdp);
2941 Vcharset_chinese_big5_cdp =
2942 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2943 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2944 build_string ("Big5-CDP"),
2945 build_string ("Big5 + CDP extension"),
2946 build_string ("Big5 with CDP extension"),
2947 build_string ("big5\\.cdp-0"),
2948 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2949 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2951 #define DEF_HANZIKU(n) \
2952 staticpro (&Vcharset_ideograph_hanziku_##n); \
2953 Vcharset_ideograph_hanziku_##n = \
2954 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2955 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2956 build_string ("HZK-"#n), \
2957 build_string ("HANZIKU-"#n), \
2958 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2960 ("hanziku-"#n"$"), \
2961 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2962 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2975 staticpro (&Vcharset_china3_jef);
2976 Vcharset_china3_jef =
2977 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2978 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2979 build_string ("JC3"),
2980 build_string ("JEF + CHINA3"),
2981 build_string ("JEF + CHINA3 private characters"),
2982 build_string ("china3jef-0"),
2983 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2984 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2985 staticpro (&Vcharset_ideograph_cbeta);
2986 Vcharset_ideograph_cbeta =
2987 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2988 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2989 build_string ("CB"),
2990 build_string ("CBETA"),
2991 build_string ("CBETA private characters"),
2992 build_string ("cbeta-0"),
2993 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2994 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2995 staticpro (&Vcharset_ideograph_gt);
2996 Vcharset_ideograph_gt =
2997 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2998 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2999 build_string ("GT"),
3000 build_string ("GT"),
3001 build_string ("GT"),
3003 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3004 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3005 #define DEF_GT_PJ(n) \
3006 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3007 Vcharset_ideograph_gt_pj_##n = \
3008 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3009 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3010 build_string ("GT-PJ-"#n), \
3011 build_string ("GT (pseudo JIS encoding) part "#n), \
3012 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3014 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3015 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3028 staticpro (&Vcharset_ideograph_daikanwa_2);
3029 Vcharset_ideograph_daikanwa_2 =
3030 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3031 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3032 build_string ("Daikanwa Rev."),
3033 build_string ("Morohashi's Daikanwa Rev."),
3035 ("Daikanwa dictionary (revised version)"),
3036 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3037 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3038 staticpro (&Vcharset_ideograph_daikanwa);
3039 Vcharset_ideograph_daikanwa =
3040 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3041 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3042 build_string ("Daikanwa"),
3043 build_string ("Morohashi's Daikanwa Rev.2"),
3045 ("Daikanwa dictionary (second revised version)"),
3046 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3047 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3048 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3050 staticpro (&Vcharset_ethiopic_ucs);
3051 Vcharset_ethiopic_ucs =
3052 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3053 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3054 build_string ("Ethiopic (UCS)"),
3055 build_string ("Ethiopic (UCS)"),
3056 build_string ("Ethiopic of UCS"),
3057 build_string ("Ethiopic-Unicode"),
3058 Qnil, 0x1200, 0x137F, 0, 0,
3059 Qnil, CONVERSION_IDENTICAL);
3061 staticpro (&Vcharset_chinese_big5_1);
3062 Vcharset_chinese_big5_1 =
3063 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3064 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3065 build_string ("Big5"),
3066 build_string ("Big5 (Level-1)"),
3068 ("Big5 Level-1 Chinese traditional"),
3069 build_string ("big5"),
3070 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3071 staticpro (&Vcharset_chinese_big5_2);
3072 Vcharset_chinese_big5_2 =
3073 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3074 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3075 build_string ("Big5"),
3076 build_string ("Big5 (Level-2)"),
3078 ("Big5 Level-2 Chinese traditional"),
3079 build_string ("big5"),
3080 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3082 #ifdef ENABLE_COMPOSITE_CHARS
3083 /* #### For simplicity, we put composite chars into a 96x96 charset.
3084 This is going to lead to problems because you can run out of
3085 room, esp. as we don't yet recycle numbers. */
3086 staticpro (&Vcharset_composite);
3087 Vcharset_composite =
3088 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3089 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3090 build_string ("Composite"),
3091 build_string ("Composite characters"),
3092 build_string ("Composite characters"),
3095 /* #### not dumped properly */
3096 composite_char_row_next = 32;
3097 composite_char_col_next = 32;
3099 Vcomposite_char_string2char_hash_table =
3100 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3101 Vcomposite_char_char2string_hash_table =
3102 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3103 staticpro (&Vcomposite_char_string2char_hash_table);
3104 staticpro (&Vcomposite_char_char2string_hash_table);
3105 #endif /* ENABLE_COMPOSITE_CHARS */