1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 /* Lisp_Object Vcharset_chinese_big5_cdp; */
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
353 /* Qchinese_big5_cdp, */
354 Qideograph_hanziku_1,
355 Qideograph_hanziku_2,
356 Qideograph_hanziku_3,
357 Qideograph_hanziku_4,
358 Qideograph_hanziku_5,
359 Qideograph_hanziku_6,
360 Qideograph_hanziku_7,
361 Qideograph_hanziku_8,
362 Qideograph_hanziku_9,
363 Qideograph_hanziku_10,
364 Qideograph_hanziku_11,
365 Qideograph_hanziku_12,
368 Qideograph_daikanwa_2,
388 Lisp_Object Ql2r, Qr2l;
390 Lisp_Object Vcharset_hash_table;
392 /* Composite characters are characters constructed by overstriking two
393 or more regular characters.
395 1) The old Mule implementation involves storing composite characters
396 in a buffer as a tag followed by all of the actual characters
397 used to make up the composite character. I think this is a bad
398 idea; it greatly complicates code that wants to handle strings
399 one character at a time because it has to deal with the possibility
400 of great big ungainly characters. It's much more reasonable to
401 simply store an index into a table of composite characters.
403 2) The current implementation only allows for 16,384 separate
404 composite characters over the lifetime of the XEmacs process.
405 This could become a potential problem if the user
406 edited lots of different files that use composite characters.
407 Due to FSF bogosity, increasing the number of allowable
408 composite characters under Mule would decrease the number
409 of possible faces that can exist. Mule already has shrunk
410 this to 2048, and further shrinkage would become uncomfortable.
411 No such problems exist in XEmacs.
413 Composite characters could be represented as 0x80 C1 C2 C3,
414 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
415 for slightly under 2^20 (one million) composite characters
416 over the XEmacs process lifetime, and you only need to
417 increase the size of a Mule character from 19 to 21 bits.
418 Or you could use 0x80 C1 C2 C3 C4, allowing for about
419 85 million (slightly over 2^26) composite characters. */
422 /************************************************************************/
423 /* Basic Emchar functions */
424 /************************************************************************/
426 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
427 string in STR. Returns the number of bytes stored.
428 Do not call this directly. Use the macro set_charptr_emchar() instead.
432 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
447 else if ( c <= 0x7ff )
449 *p++ = (c >> 6) | 0xc0;
450 *p++ = (c & 0x3f) | 0x80;
452 else if ( c <= 0xffff )
454 *p++ = (c >> 12) | 0xe0;
455 *p++ = ((c >> 6) & 0x3f) | 0x80;
456 *p++ = (c & 0x3f) | 0x80;
458 else if ( c <= 0x1fffff )
460 *p++ = (c >> 18) | 0xf0;
461 *p++ = ((c >> 12) & 0x3f) | 0x80;
462 *p++ = ((c >> 6) & 0x3f) | 0x80;
463 *p++ = (c & 0x3f) | 0x80;
465 else if ( c <= 0x3ffffff )
467 *p++ = (c >> 24) | 0xf8;
468 *p++ = ((c >> 18) & 0x3f) | 0x80;
469 *p++ = ((c >> 12) & 0x3f) | 0x80;
470 *p++ = ((c >> 6) & 0x3f) | 0x80;
471 *p++ = (c & 0x3f) | 0x80;
475 *p++ = (c >> 30) | 0xfc;
476 *p++ = ((c >> 24) & 0x3f) | 0x80;
477 *p++ = ((c >> 18) & 0x3f) | 0x80;
478 *p++ = ((c >> 12) & 0x3f) | 0x80;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
483 BREAKUP_CHAR (c, charset, c1, c2);
484 lb = CHAR_LEADING_BYTE (c);
485 if (LEADING_BYTE_PRIVATE_P (lb))
486 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
488 if (EQ (charset, Vcharset_control_1))
497 /* Return the first character from a Mule-encoded string in STR,
498 assuming it's non-ASCII. Do not call this directly.
499 Use the macro charptr_emchar() instead. */
502 non_ascii_charptr_emchar (const Bufbyte *str)
515 else if ( b >= 0xf8 )
520 else if ( b >= 0xf0 )
525 else if ( b >= 0xe0 )
530 else if ( b >= 0xc0 )
540 for( ; len > 0; len-- )
543 ch = ( ch << 6 ) | ( b & 0x3f );
547 Bufbyte i0 = *str, i1, i2 = 0;
550 if (i0 == LEADING_BYTE_CONTROL_1)
551 return (Emchar) (*++str - 0x20);
553 if (LEADING_BYTE_PREFIX_P (i0))
558 charset = CHARSET_BY_LEADING_BYTE (i0);
559 if (XCHARSET_DIMENSION (charset) == 2)
562 return MAKE_CHAR (charset, i1, i2);
566 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
567 Do not call this directly. Use the macro valid_char_p() instead. */
571 non_ascii_valid_char_p (Emchar ch)
575 /* Must have only lowest 19 bits set */
579 f1 = CHAR_FIELD1 (ch);
580 f2 = CHAR_FIELD2 (ch);
581 f3 = CHAR_FIELD3 (ch);
587 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
588 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
589 f2 > MAX_CHAR_FIELD2_PRIVATE)
594 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
595 f2 <= MAX_CHAR_FIELD2_PRIVATE))
599 NOTE: This takes advantage of the fact that
600 FIELD2_TO_OFFICIAL_LEADING_BYTE and
601 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
603 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
604 if (EQ (charset, Qnil))
606 return (XCHARSET_CHARS (charset) == 96);
612 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
613 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
614 f1 > MAX_CHAR_FIELD1_PRIVATE)
616 if (f2 < 0x20 || f3 < 0x20)
619 #ifdef ENABLE_COMPOSITE_CHARS
620 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
622 if (UNBOUNDP (Fgethash (make_int (ch),
623 Vcomposite_char_char2string_hash_table,
628 #endif /* ENABLE_COMPOSITE_CHARS */
630 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
631 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
634 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
636 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
639 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
641 if (EQ (charset, Qnil))
643 return (XCHARSET_CHARS (charset) == 96);
649 /************************************************************************/
650 /* Basic string functions */
651 /************************************************************************/
653 /* Copy the character pointed to by SRC into DST. Do not call this
654 directly. Use the macro charptr_copy_char() instead.
655 Return the number of bytes copied. */
658 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
660 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
662 for (i = bytes; i; i--, dst++, src++)
668 /************************************************************************/
669 /* streams of Emchars */
670 /************************************************************************/
672 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
673 The functions below are not meant to be called directly; use
674 the macros in insdel.h. */
677 Lstream_get_emchar_1 (Lstream *stream, int ch)
679 Bufbyte str[MAX_EMCHAR_LEN];
680 Bufbyte *strptr = str;
683 str[0] = (Bufbyte) ch;
685 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
687 int c = Lstream_getc (stream);
688 bufpos_checking_assert (c >= 0);
689 *++strptr = (Bufbyte) c;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj)
718 Lisp_Charset *cs = XCHARSET (obj);
720 mark_object (cs->short_name);
721 mark_object (cs->long_name);
722 mark_object (cs->doc_string);
723 mark_object (cs->registry);
724 mark_object (cs->ccl_program);
726 mark_object (cs->decoding_table);
727 mark_object (cs->mother);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
753 CHARSET_DIMENSION (cs),
754 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
755 CHARSET_COLUMNS (cs),
756 CHARSET_GRAPHIC (cs),
758 write_c_string (buf, printcharfun);
759 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
760 sprintf (buf, " 0x%x>", cs->header.uid);
761 write_c_string (buf, printcharfun);
764 static const struct lrecord_description charset_description[] = {
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
779 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
780 mark_charset, print_charset, 0, 0, 0,
784 /* Make a new charset. */
785 /* #### SJT Should generic properties be allowed? */
787 make_charset (Charset_ID id, Lisp_Object name,
788 unsigned short chars, unsigned char dimension,
789 unsigned char columns, unsigned char graphic,
790 Bufbyte final, unsigned char direction, Lisp_Object short_name,
791 Lisp_Object long_name, Lisp_Object doc,
793 Lisp_Object decoding_table,
794 Emchar min_code, Emchar max_code,
795 Emchar code_offset, unsigned char byte_offset,
796 Lisp_Object mother, unsigned char conversion)
799 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
803 XSETCHARSET (obj, cs);
805 CHARSET_ID (cs) = id;
806 CHARSET_NAME (cs) = name;
807 CHARSET_SHORT_NAME (cs) = short_name;
808 CHARSET_LONG_NAME (cs) = long_name;
809 CHARSET_CHARS (cs) = chars;
810 CHARSET_DIMENSION (cs) = dimension;
811 CHARSET_DIRECTION (cs) = direction;
812 CHARSET_COLUMNS (cs) = columns;
813 CHARSET_GRAPHIC (cs) = graphic;
814 CHARSET_FINAL (cs) = final;
815 CHARSET_DOC_STRING (cs) = doc;
816 CHARSET_REGISTRY (cs) = reg;
817 CHARSET_CCL_PROGRAM (cs) = Qnil;
818 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
820 CHARSET_DECODING_TABLE(cs) = Qunbound;
821 CHARSET_MIN_CODE (cs) = min_code;
822 CHARSET_MAX_CODE (cs) = max_code;
823 CHARSET_CODE_OFFSET (cs) = code_offset;
824 CHARSET_BYTE_OFFSET (cs) = byte_offset;
825 CHARSET_MOTHER (cs) = mother;
826 CHARSET_CONVERSION (cs) = conversion;
830 if (id == LEADING_BYTE_ASCII)
831 CHARSET_REP_BYTES (cs) = 1;
833 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
840 /* some charsets do not have final characters. This includes
841 ASCII, Control-1, Composite, and the two faux private
843 unsigned char iso2022_type
844 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
846 if (code_offset == 0)
848 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
849 chlook->charset_by_attributes[iso2022_type][final] = obj;
853 (chlook->charset_by_attributes[iso2022_type][final][direction]));
854 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
858 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
859 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
861 /* Some charsets are "faux" and don't have names or really exist at
862 all except in the leading-byte table. */
864 Fputhash (name, obj, Vcharset_hash_table);
869 get_unallocated_leading_byte (int dimension)
874 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
877 lb = chlook->next_allocated_leading_byte++;
881 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
884 lb = chlook->next_allocated_1_byte_leading_byte++;
888 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
891 lb = chlook->next_allocated_2_byte_leading_byte++;
897 ("No more character sets free for this dimension",
898 make_int (dimension));
904 /* Number of Big5 characters which have the same code in 1st byte. */
906 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
909 decode_defined_char (Lisp_Object ccs, int code_point)
911 int dim = XCHARSET_DIMENSION (ccs);
912 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
920 = get_ccs_octet_table (decoding_table, ccs,
921 (code_point >> (dim * 8)) & 255);
923 if (CHARP (decoding_table))
924 return XCHAR (decoding_table);
927 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
929 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
931 if ( EQ (mother, Vcharset_ucs) )
932 return DECODE_CHAR (mother, code_point);
934 return decode_defined_char (mother, code_point);
941 decode_builtin_char (Lisp_Object charset, int code_point)
943 Lisp_Object mother = XCHARSET_MOTHER (charset);
946 if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
948 int code = code_point;
950 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
952 int row = code_point >> 8;
953 int cell = code_point & 255;
957 else if (row < 16 + 32 + 30)
958 code = (row - (16 + 32)) * 94 + cell - 33;
959 else if (row < 18 + 32 + 30)
961 else if (row < 18 + 32 + 60)
962 code = (row - (18 + 32)) * 94 + cell - 33;
964 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
966 int plane = code_point >> 16;
967 int row = (code_point >> 8) & 255;
968 int cell = code_point & 255;
972 else if (row < 16 + 32 + 30)
974 = (plane - 33) * 94 * 60
975 + (row - (16 + 32)) * 94
977 else if (row < 18 + 32 + 30)
979 else if (row < 18 + 32 + 60)
981 = (plane - 33) * 94 * 60
982 + (row - (18 + 32)) * 94
986 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
989 else if (EQ (charset, Vcharset_chinese_big5))
991 int c1 = code_point >> 8;
992 int c2 = code_point & 0xFF;
995 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
997 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
998 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1000 I = (c1 - 0xA1) * BIG5_SAME_ROW
1001 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1005 charset = Vcharset_chinese_big5_1;
1009 charset = Vcharset_chinese_big5_2;
1010 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1012 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1016 if ((final = XCHARSET_FINAL (charset)) >= '0')
1018 if (XCHARSET_DIMENSION (charset) == 1)
1020 switch (XCHARSET_CHARS (charset))
1024 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1027 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1035 switch (XCHARSET_CHARS (charset))
1038 return MIN_CHAR_94x94
1039 + (final - '0') * 94 * 94
1040 + (((code_point >> 8) & 0x7F) - 33) * 94
1041 + ((code_point & 0x7F) - 33);
1043 return MIN_CHAR_96x96
1044 + (final - '0') * 96 * 96
1045 + (((code_point >> 8) & 0x7F) - 32) * 96
1046 + ((code_point & 0x7F) - 32);
1053 else if (XCHARSET_MAX_CODE (charset))
1056 = (XCHARSET_DIMENSION (charset) == 1
1058 code_point - XCHARSET_BYTE_OFFSET (charset)
1060 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1061 * XCHARSET_CHARS (charset)
1062 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1063 + XCHARSET_CODE_OFFSET (charset);
1064 if ((cid < XCHARSET_MIN_CODE (charset))
1065 || (XCHARSET_MAX_CODE (charset) < cid))
1074 charset_code_point (Lisp_Object charset, Emchar ch)
1076 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1079 if ( CHAR_TABLEP (encoding_table)
1080 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1085 Lisp_Object mother = XCHARSET_MOTHER (charset);
1086 int min = XCHARSET_MIN_CODE (charset);
1087 int max = XCHARSET_MAX_CODE (charset);
1090 if ( CHARSETP (mother) )
1091 code = charset_code_point (mother, ch);
1094 if ( ((max == 0) && CHARSETP (mother)) ||
1095 ((min <= code) && (code <= max)) )
1097 int d = code - XCHARSET_CODE_OFFSET (charset);
1099 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1102 int cell = d % 94 + 33;
1108 return (row << 8) | cell;
1110 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1112 int plane = d / (94 * 60) + 33;
1113 int row = (d % (94 * 60)) / 94;
1114 int cell = d % 94 + 33;
1120 return (plane << 16) | (row << 8) | cell;
1122 else if (XCHARSET_CHARS (charset) == 94)
1124 if (XCHARSET_DIMENSION (charset) == 1)
1126 else if (XCHARSET_DIMENSION (charset) == 2)
1127 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1128 else if (XCHARSET_DIMENSION (charset) == 3)
1130 ( (d / (94 * 94) + 33) << 16)
1131 | ((d / 94 % 94 + 33) << 8)
1133 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1135 ( (d / (94 * 94 * 94) + 33) << 24)
1136 | ((d / (94 * 94) % 94 + 33) << 16)
1137 | ((d / 94 % 94 + 33) << 8)
1140 else if (XCHARSET_CHARS (charset) == 96)
1142 if (XCHARSET_DIMENSION (charset) == 1)
1144 else if (XCHARSET_DIMENSION (charset) == 2)
1145 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1146 else if (XCHARSET_DIMENSION (charset) == 3)
1148 ( (d / (96 * 96) + 32) << 16)
1149 | ((d / 96 % 96 + 32) << 8)
1151 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1153 ( (d / (96 * 96 * 96) + 32) << 24)
1154 | ((d / (96 * 96) % 96 + 32) << 16)
1155 | ((d / 96 % 96 + 32) << 8)
1159 return code - XCHARSET_CODE_OFFSET (charset);
1161 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1162 (XCHARSET_CODE_OFFSET (charset)
1163 == XCHARSET_MIN_CODE (charset)) )
1167 if (XCHARSET_DIMENSION (charset) == 1)
1169 if (XCHARSET_CHARS (charset) == 94)
1171 if (((d = ch - (MIN_CHAR_94
1172 + (XCHARSET_FINAL (charset) - '0') * 94))
1177 else if (XCHARSET_CHARS (charset) == 96)
1179 if (((d = ch - (MIN_CHAR_96
1180 + (XCHARSET_FINAL (charset) - '0') * 96))
1188 else if (XCHARSET_DIMENSION (charset) == 2)
1190 if (XCHARSET_CHARS (charset) == 94)
1192 if (((d = ch - (MIN_CHAR_94x94
1194 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1197 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1199 else if (XCHARSET_CHARS (charset) == 96)
1201 if (((d = ch - (MIN_CHAR_96x96
1203 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1206 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1217 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1219 if (c <= MAX_CHAR_BASIC_LATIN)
1221 *charset = Vcharset_ascii;
1226 *charset = Vcharset_control_1;
1231 *charset = Vcharset_latin_iso8859_1;
1235 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1237 *charset = Vcharset_hebrew_iso8859_8;
1238 return c - MIN_CHAR_HEBREW + 0x20;
1241 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1243 *charset = Vcharset_thai_tis620;
1244 return c - MIN_CHAR_THAI + 0x20;
1247 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1248 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1250 return list2 (Vcharset_katakana_jisx0201,
1251 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1254 else if (c <= MAX_CHAR_BMP)
1256 *charset = Vcharset_ucs_bmp;
1259 else if (c <= MAX_CHAR_SMP)
1261 *charset = Vcharset_ucs_smp;
1262 return c - MIN_CHAR_SMP;
1264 else if (c <= MAX_CHAR_SIP)
1266 *charset = Vcharset_ucs_sip;
1267 return c - MIN_CHAR_SIP;
1269 else if (c < MIN_CHAR_DAIKANWA)
1271 *charset = Vcharset_ucs;
1274 else if (c <= MAX_CHAR_DAIKANWA)
1276 *charset = Vcharset_ideograph_daikanwa;
1277 return c - MIN_CHAR_DAIKANWA;
1279 else if (c < MIN_CHAR_94)
1281 *charset = Vcharset_ucs;
1284 else if (c <= MAX_CHAR_94)
1286 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1287 ((c - MIN_CHAR_94) / 94) + '0',
1288 CHARSET_LEFT_TO_RIGHT);
1289 if (!NILP (*charset))
1290 return ((c - MIN_CHAR_94) % 94) + 33;
1293 *charset = Vcharset_ucs;
1297 else if (c <= MAX_CHAR_96)
1299 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1300 ((c - MIN_CHAR_96) / 96) + '0',
1301 CHARSET_LEFT_TO_RIGHT);
1302 if (!NILP (*charset))
1303 return ((c - MIN_CHAR_96) % 96) + 32;
1306 *charset = Vcharset_ucs;
1310 else if (c <= MAX_CHAR_94x94)
1313 = CHARSET_BY_ATTRIBUTES (94, 2,
1314 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1315 CHARSET_LEFT_TO_RIGHT);
1316 if (!NILP (*charset))
1317 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1318 | (((c - MIN_CHAR_94x94) % 94) + 33);
1321 *charset = Vcharset_ucs;
1325 else if (c <= MAX_CHAR_96x96)
1328 = CHARSET_BY_ATTRIBUTES (96, 2,
1329 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1330 CHARSET_LEFT_TO_RIGHT);
1331 if (!NILP (*charset))
1332 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1333 | (((c - MIN_CHAR_96x96) % 96) + 32);
1336 *charset = Vcharset_ucs;
1342 *charset = Vcharset_ucs;
1347 Lisp_Object Vdefault_coded_charset_priority_list;
1351 /************************************************************************/
1352 /* Basic charset Lisp functions */
1353 /************************************************************************/
1355 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1356 Return non-nil if OBJECT is a charset.
1360 return CHARSETP (object) ? Qt : Qnil;
1363 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1364 Retrieve the charset of the given name.
1365 If CHARSET-OR-NAME is a charset object, it is simply returned.
1366 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1367 nil is returned. Otherwise the associated charset object is returned.
1371 if (CHARSETP (charset_or_name))
1372 return charset_or_name;
1374 CHECK_SYMBOL (charset_or_name);
1375 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1378 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1379 Retrieve the charset of the given name.
1380 Same as `find-charset' except an error is signalled if there is no such
1381 charset instead of returning nil.
1385 Lisp_Object charset = Ffind_charset (name);
1388 signal_simple_error ("No such charset", name);
1392 /* We store the charsets in hash tables with the names as the key and the
1393 actual charset object as the value. Occasionally we need to use them
1394 in a list format. These routines provide us with that. */
1395 struct charset_list_closure
1397 Lisp_Object *charset_list;
1401 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1402 void *charset_list_closure)
1404 /* This function can GC */
1405 struct charset_list_closure *chcl =
1406 (struct charset_list_closure*) charset_list_closure;
1407 Lisp_Object *charset_list = chcl->charset_list;
1409 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1413 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1414 Return a list of the names of all defined charsets.
1418 Lisp_Object charset_list = Qnil;
1419 struct gcpro gcpro1;
1420 struct charset_list_closure charset_list_closure;
1422 GCPRO1 (charset_list);
1423 charset_list_closure.charset_list = &charset_list;
1424 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1425 &charset_list_closure);
1428 return charset_list;
1431 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1432 Return the name of charset CHARSET.
1436 return XCHARSET_NAME (Fget_charset (charset));
1439 /* #### SJT Should generic properties be allowed? */
1440 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1441 Define a new character set.
1442 This function is for use with Mule support.
1443 NAME is a symbol, the name by which the character set is normally referred.
1444 DOC-STRING is a string describing the character set.
1445 PROPS is a property list, describing the specific nature of the
1446 character set. Recognized properties are:
1448 'short-name Short version of the charset name (ex: Latin-1)
1449 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1450 'registry A regular expression matching the font registry field for
1452 'dimension Number of octets used to index a character in this charset.
1453 Either 1 or 2. Defaults to 1.
1454 If UTF-2000 feature is enabled, 3 or 4 are also available.
1455 'columns Number of columns used to display a character in this charset.
1456 Only used in TTY mode. (Under X, the actual width of a
1457 character can be derived from the font used to display the
1458 characters.) If unspecified, defaults to the dimension
1459 (this is almost always the correct value).
1460 'chars Number of characters in each dimension (94 or 96).
1461 Defaults to 94. Note that if the dimension is 2, the
1462 character set thus described is 94x94 or 96x96.
1463 If UTF-2000 feature is enabled, 128 or 256 are also available.
1464 'final Final byte of ISO 2022 escape sequence. Must be
1465 supplied. Each combination of (DIMENSION, CHARS) defines a
1466 separate namespace for final bytes. Note that ISO
1467 2022 restricts the final byte to the range
1468 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1469 dimension == 2. Note also that final bytes in the range
1470 0x30 - 0x3F are reserved for user-defined (not official)
1472 'graphic 0 (use left half of font on output) or 1 (use right half
1473 of font on output). Defaults to 0. For example, for
1474 a font whose registry is ISO8859-1, the left half
1475 (octets 0x20 - 0x7F) is the `ascii' character set, while
1476 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1477 character set. With 'graphic set to 0, the octets
1478 will have their high bit cleared; with it set to 1,
1479 the octets will have their high bit set.
1480 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1482 'ccl-program A compiled CCL program used to convert a character in
1483 this charset into an index into the font. This is in
1484 addition to the 'graphic property. The CCL program
1485 is passed the octets of the character, with the high
1486 bit cleared and set depending upon whether the value
1487 of the 'graphic property is 0 or 1.
1488 'mother [UTF-2000 only] Base coded-charset.
1489 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1490 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1491 'code-offset [UTF-2000 only] Offset for a code-point of a base
1493 'conversion [UTF-2000 only] Conversion for a code-point of a base
1494 coded-charset (94x60 or 94x94x60).
1496 (name, doc_string, props))
1498 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1499 int direction = CHARSET_LEFT_TO_RIGHT;
1500 Lisp_Object registry = Qnil;
1501 Lisp_Object charset;
1502 Lisp_Object ccl_program = Qnil;
1503 Lisp_Object short_name = Qnil, long_name = Qnil;
1504 Lisp_Object mother = Qnil;
1505 int min_code = 0, max_code = 0, code_offset = 0;
1506 int byte_offset = -1;
1509 CHECK_SYMBOL (name);
1510 if (!NILP (doc_string))
1511 CHECK_STRING (doc_string);
1513 charset = Ffind_charset (name);
1514 if (!NILP (charset))
1515 signal_simple_error ("Cannot redefine existing charset", name);
1518 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1520 if (EQ (keyword, Qshort_name))
1522 CHECK_STRING (value);
1526 if (EQ (keyword, Qlong_name))
1528 CHECK_STRING (value);
1532 else if (EQ (keyword, Qdimension))
1535 dimension = XINT (value);
1536 if (dimension < 1 ||
1543 signal_simple_error ("Invalid value for 'dimension", value);
1546 else if (EQ (keyword, Qchars))
1549 chars = XINT (value);
1550 if (chars != 94 && chars != 96
1552 && chars != 128 && chars != 256
1555 signal_simple_error ("Invalid value for 'chars", value);
1558 else if (EQ (keyword, Qcolumns))
1561 columns = XINT (value);
1562 if (columns != 1 && columns != 2)
1563 signal_simple_error ("Invalid value for 'columns", value);
1566 else if (EQ (keyword, Qgraphic))
1569 graphic = XINT (value);
1577 signal_simple_error ("Invalid value for 'graphic", value);
1580 else if (EQ (keyword, Qregistry))
1582 CHECK_STRING (value);
1586 else if (EQ (keyword, Qdirection))
1588 if (EQ (value, Ql2r))
1589 direction = CHARSET_LEFT_TO_RIGHT;
1590 else if (EQ (value, Qr2l))
1591 direction = CHARSET_RIGHT_TO_LEFT;
1593 signal_simple_error ("Invalid value for 'direction", value);
1596 else if (EQ (keyword, Qfinal))
1598 CHECK_CHAR_COERCE_INT (value);
1599 final = XCHAR (value);
1600 if (final < '0' || final > '~')
1601 signal_simple_error ("Invalid value for 'final", value);
1605 else if (EQ (keyword, Qmother))
1607 mother = Fget_charset (value);
1610 else if (EQ (keyword, Qmin_code))
1613 min_code = XUINT (value);
1616 else if (EQ (keyword, Qmax_code))
1619 max_code = XUINT (value);
1622 else if (EQ (keyword, Qcode_offset))
1625 code_offset = XUINT (value);
1628 else if (EQ (keyword, Qconversion))
1630 if (EQ (value, Q94x60))
1631 conversion = CONVERSION_94x60;
1632 else if (EQ (value, Q94x94x60))
1633 conversion = CONVERSION_94x94x60;
1635 signal_simple_error ("Unrecognized conversion", value);
1639 else if (EQ (keyword, Qccl_program))
1641 struct ccl_program test_ccl;
1643 if (setup_ccl_program (&test_ccl, value) < 0)
1644 signal_simple_error ("Invalid value for 'ccl-program", value);
1645 ccl_program = value;
1649 signal_simple_error ("Unrecognized property", keyword);
1655 error ("'final must be specified");
1657 if (dimension == 2 && final > 0x5F)
1659 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1662 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1663 CHARSET_LEFT_TO_RIGHT)) ||
1664 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1665 CHARSET_RIGHT_TO_LEFT)))
1667 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1669 id = get_unallocated_leading_byte (dimension);
1671 if (NILP (doc_string))
1672 doc_string = build_string ("");
1674 if (NILP (registry))
1675 registry = build_string ("");
1677 if (NILP (short_name))
1678 XSETSTRING (short_name, XSYMBOL (name)->name);
1680 if (NILP (long_name))
1681 long_name = doc_string;
1684 columns = dimension;
1686 if (byte_offset < 0)
1690 else if (chars == 96)
1696 charset = make_charset (id, name, chars, dimension, columns, graphic,
1697 final, direction, short_name, long_name,
1698 doc_string, registry,
1699 Qnil, min_code, max_code, code_offset, byte_offset,
1700 mother, conversion);
1701 if (!NILP (ccl_program))
1702 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1706 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1708 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1709 NEW-NAME is the name of the new charset. Return the new charset.
1711 (charset, new_name))
1713 Lisp_Object new_charset = Qnil;
1714 int id, chars, dimension, columns, graphic, final;
1716 Lisp_Object registry, doc_string, short_name, long_name;
1719 charset = Fget_charset (charset);
1720 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1721 signal_simple_error ("Charset already has reverse-direction charset",
1724 CHECK_SYMBOL (new_name);
1725 if (!NILP (Ffind_charset (new_name)))
1726 signal_simple_error ("Cannot redefine existing charset", new_name);
1728 cs = XCHARSET (charset);
1730 chars = CHARSET_CHARS (cs);
1731 dimension = CHARSET_DIMENSION (cs);
1732 columns = CHARSET_COLUMNS (cs);
1733 id = get_unallocated_leading_byte (dimension);
1735 graphic = CHARSET_GRAPHIC (cs);
1736 final = CHARSET_FINAL (cs);
1737 direction = CHARSET_RIGHT_TO_LEFT;
1738 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1739 direction = CHARSET_LEFT_TO_RIGHT;
1740 doc_string = CHARSET_DOC_STRING (cs);
1741 short_name = CHARSET_SHORT_NAME (cs);
1742 long_name = CHARSET_LONG_NAME (cs);
1743 registry = CHARSET_REGISTRY (cs);
1745 new_charset = make_charset (id, new_name, chars, dimension, columns,
1746 graphic, final, direction, short_name, long_name,
1747 doc_string, registry,
1749 CHARSET_DECODING_TABLE(cs),
1750 CHARSET_MIN_CODE(cs),
1751 CHARSET_MAX_CODE(cs),
1752 CHARSET_CODE_OFFSET(cs),
1753 CHARSET_BYTE_OFFSET(cs),
1755 CHARSET_CONVERSION (cs)
1757 Qnil, 0, 0, 0, 0, Qnil, 0
1761 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1762 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1767 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1768 Define symbol ALIAS as an alias for CHARSET.
1772 CHECK_SYMBOL (alias);
1773 charset = Fget_charset (charset);
1774 return Fputhash (alias, charset, Vcharset_hash_table);
1777 /* #### Reverse direction charsets not yet implemented. */
1779 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1781 Return the reverse-direction charset parallel to CHARSET, if any.
1782 This is the charset with the same properties (in particular, the same
1783 dimension, number of characters per dimension, and final byte) as
1784 CHARSET but whose characters are displayed in the opposite direction.
1788 charset = Fget_charset (charset);
1789 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1793 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1794 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1795 If DIRECTION is omitted, both directions will be checked (left-to-right
1796 will be returned if character sets exist for both directions).
1798 (dimension, chars, final, direction))
1800 int dm, ch, fi, di = -1;
1801 Lisp_Object obj = Qnil;
1803 CHECK_INT (dimension);
1804 dm = XINT (dimension);
1805 if (dm < 1 || dm > 2)
1806 signal_simple_error ("Invalid value for DIMENSION", dimension);
1810 if (ch != 94 && ch != 96)
1811 signal_simple_error ("Invalid value for CHARS", chars);
1813 CHECK_CHAR_COERCE_INT (final);
1815 if (fi < '0' || fi > '~')
1816 signal_simple_error ("Invalid value for FINAL", final);
1818 if (EQ (direction, Ql2r))
1819 di = CHARSET_LEFT_TO_RIGHT;
1820 else if (EQ (direction, Qr2l))
1821 di = CHARSET_RIGHT_TO_LEFT;
1822 else if (!NILP (direction))
1823 signal_simple_error ("Invalid value for DIRECTION", direction);
1825 if (dm == 2 && fi > 0x5F)
1827 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1831 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1833 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1836 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1839 return XCHARSET_NAME (obj);
1843 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1844 Return short name of CHARSET.
1848 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1851 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1852 Return long name of CHARSET.
1856 return XCHARSET_LONG_NAME (Fget_charset (charset));
1859 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1860 Return description of CHARSET.
1864 return XCHARSET_DOC_STRING (Fget_charset (charset));
1867 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1868 Return dimension of CHARSET.
1872 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1875 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1876 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1877 Recognized properties are those listed in `make-charset', as well as
1878 'name and 'doc-string.
1884 charset = Fget_charset (charset);
1885 cs = XCHARSET (charset);
1887 CHECK_SYMBOL (prop);
1888 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1889 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1890 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1891 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1892 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1893 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1894 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1895 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1896 Qnil : make_char (CHARSET_FINAL (cs));
1897 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1898 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1899 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1900 if (EQ (prop, Qdirection))
1901 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1902 if (EQ (prop, Qreverse_direction_charset))
1904 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1905 /* #### Is this translation OK? If so, error checking sufficient? */
1906 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1909 if (EQ (prop, Qmother))
1910 return CHARSET_MOTHER (cs);
1911 if (EQ (prop, Qmin_code))
1912 return make_int (CHARSET_MIN_CODE (cs));
1913 if (EQ (prop, Qmax_code))
1914 return make_int (CHARSET_MAX_CODE (cs));
1916 signal_simple_error ("Unrecognized charset property name", prop);
1917 return Qnil; /* not reached */
1920 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1921 Return charset identification number of CHARSET.
1925 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1928 /* #### We need to figure out which properties we really want to
1931 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1932 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1934 (charset, ccl_program))
1936 struct ccl_program test_ccl;
1938 charset = Fget_charset (charset);
1939 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1940 signal_simple_error ("Invalid ccl-program", ccl_program);
1941 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1946 invalidate_charset_font_caches (Lisp_Object charset)
1948 /* Invalidate font cache entries for charset on all devices. */
1949 Lisp_Object devcons, concons, hash_table;
1950 DEVICE_LOOP_NO_BREAK (devcons, concons)
1952 struct device *d = XDEVICE (XCAR (devcons));
1953 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1954 if (!UNBOUNDP (hash_table))
1955 Fclrhash (hash_table);
1959 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1960 Set the 'registry property of CHARSET to REGISTRY.
1962 (charset, registry))
1964 charset = Fget_charset (charset);
1965 CHECK_STRING (registry);
1966 XCHARSET_REGISTRY (charset) = registry;
1967 invalidate_charset_font_caches (charset);
1968 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1973 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1974 Return mapping-table of CHARSET.
1978 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1981 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1982 Set mapping-table of CHARSET to TABLE.
1986 struct Lisp_Charset *cs;
1990 charset = Fget_charset (charset);
1991 cs = XCHARSET (charset);
1995 CHARSET_DECODING_TABLE(cs) = Qnil;
1998 else if (VECTORP (table))
2000 int ccs_len = CHARSET_BYTE_SIZE (cs);
2001 int ret = decoding_table_check_elements (table,
2002 CHARSET_DIMENSION (cs),
2007 signal_simple_error ("Too big table", table);
2009 signal_simple_error ("Invalid element is found", table);
2011 signal_simple_error ("Something wrong", table);
2013 CHARSET_DECODING_TABLE(cs) = Qnil;
2016 signal_error (Qwrong_type_argument,
2017 list2 (build_translated_string ("vector-or-nil-p"),
2020 byte_offset = CHARSET_BYTE_OFFSET (cs);
2021 switch (CHARSET_DIMENSION (cs))
2024 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2026 Lisp_Object c = XVECTOR_DATA(table)[i];
2029 Fput_char_attribute (c, XCHARSET_NAME (charset),
2030 make_int (i + byte_offset));
2034 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2036 Lisp_Object v = XVECTOR_DATA(table)[i];
2042 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2044 Lisp_Object c = XVECTOR_DATA(v)[j];
2048 (c, XCHARSET_NAME (charset),
2049 make_int ( ( (i + byte_offset) << 8 )
2055 Fput_char_attribute (v, XCHARSET_NAME (charset),
2056 make_int (i + byte_offset));
2065 /************************************************************************/
2066 /* Lisp primitives for working with characters */
2067 /************************************************************************/
2070 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2071 Make a character from CHARSET and code-point CODE.
2072 If DEFINED_ONLY is non-nil, builtin character is not returned.
2073 If corresponding character is not found, nil is returned.
2075 (charset, code, defined_only))
2079 charset = Fget_charset (charset);
2082 if (XCHARSET_GRAPHIC (charset) == 1)
2084 if (NILP (defined_only))
2085 c = DECODE_CHAR (charset, c);
2087 c = decode_defined_char (charset, c);
2088 return c >= 0 ? make_char (c) : Qnil;
2091 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2092 Make a builtin character from CHARSET and code-point CODE.
2098 charset = Fget_charset (charset);
2100 if (EQ (charset, Vcharset_latin_viscii))
2102 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2108 (ret = Fget_char_attribute (chr,
2109 Vcharset_latin_viscii_lower,
2112 charset = Vcharset_latin_viscii_lower;
2116 (ret = Fget_char_attribute (chr,
2117 Vcharset_latin_viscii_upper,
2120 charset = Vcharset_latin_viscii_upper;
2127 if (XCHARSET_GRAPHIC (charset) == 1)
2130 c = decode_builtin_char (charset, c);
2131 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2135 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2136 Make a character from CHARSET and octets ARG1 and ARG2.
2137 ARG2 is required only for characters from two-dimensional charsets.
2138 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2139 character s with caron.
2141 (charset, arg1, arg2))
2145 int lowlim, highlim;
2147 charset = Fget_charset (charset);
2148 cs = XCHARSET (charset);
2150 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2151 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2153 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2155 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2156 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2159 /* It is useful (and safe, according to Olivier Galibert) to strip
2160 the 8th bit off ARG1 and ARG2 because it allows programmers to
2161 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2162 Latin 2 code of the character. */
2170 if (a1 < lowlim || a1 > highlim)
2171 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2173 if (CHARSET_DIMENSION (cs) == 1)
2177 ("Charset is of dimension one; second octet must be nil", arg2);
2178 return make_char (MAKE_CHAR (charset, a1, 0));
2187 a2 = XINT (arg2) & 0x7f;
2189 if (a2 < lowlim || a2 > highlim)
2190 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2192 return make_char (MAKE_CHAR (charset, a1, a2));
2195 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2196 Return the character set of CHARACTER.
2200 CHECK_CHAR_COERCE_INT (character);
2202 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2205 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2206 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2207 N defaults to 0 if omitted.
2211 Lisp_Object charset;
2214 CHECK_CHAR_COERCE_INT (character);
2216 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2218 if (NILP (n) || EQ (n, Qzero))
2219 return make_int (octet0);
2220 else if (EQ (n, make_int (1)))
2221 return make_int (octet1);
2223 signal_simple_error ("Octet number must be 0 or 1", n);
2227 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2228 Return code-point of CHARACTER in specified CHARSET.
2230 (character, charset))
2234 CHECK_CHAR_COERCE_INT (character);
2235 charset = Fget_charset (charset);
2236 code_point = charset_code_point (charset, XCHAR (character));
2237 if (code_point >= 0)
2238 return make_int (code_point);
2244 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2245 Return list of charset and one or two position-codes of CHARACTER.
2249 /* This function can GC */
2250 struct gcpro gcpro1, gcpro2;
2251 Lisp_Object charset = Qnil;
2252 Lisp_Object rc = Qnil;
2260 GCPRO2 (charset, rc);
2261 CHECK_CHAR_COERCE_INT (character);
2264 code_point = ENCODE_CHAR (XCHAR (character), charset);
2265 dimension = XCHARSET_DIMENSION (charset);
2266 while (dimension > 0)
2268 rc = Fcons (make_int (code_point & 255), rc);
2272 rc = Fcons (XCHARSET_NAME (charset), rc);
2274 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2276 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2278 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2282 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2291 #ifdef ENABLE_COMPOSITE_CHARS
2292 /************************************************************************/
2293 /* composite character functions */
2294 /************************************************************************/
2297 lookup_composite_char (Bufbyte *str, int len)
2299 Lisp_Object lispstr = make_string (str, len);
2300 Lisp_Object ch = Fgethash (lispstr,
2301 Vcomposite_char_string2char_hash_table,
2307 if (composite_char_row_next >= 128)
2308 signal_simple_error ("No more composite chars available", lispstr);
2309 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2310 composite_char_col_next);
2311 Fputhash (make_char (emch), lispstr,
2312 Vcomposite_char_char2string_hash_table);
2313 Fputhash (lispstr, make_char (emch),
2314 Vcomposite_char_string2char_hash_table);
2315 composite_char_col_next++;
2316 if (composite_char_col_next >= 128)
2318 composite_char_col_next = 32;
2319 composite_char_row_next++;
2328 composite_char_string (Emchar ch)
2330 Lisp_Object str = Fgethash (make_char (ch),
2331 Vcomposite_char_char2string_hash_table,
2333 assert (!UNBOUNDP (str));
2337 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2338 Convert a string into a single composite character.
2339 The character is the result of overstriking all the characters in
2344 CHECK_STRING (string);
2345 return make_char (lookup_composite_char (XSTRING_DATA (string),
2346 XSTRING_LENGTH (string)));
2349 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2350 Return a string of the characters comprising a composite character.
2358 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2359 signal_simple_error ("Must be composite char", ch);
2360 return composite_char_string (emch);
2362 #endif /* ENABLE_COMPOSITE_CHARS */
2365 /************************************************************************/
2366 /* initialization */
2367 /************************************************************************/
2370 syms_of_mule_charset (void)
2372 INIT_LRECORD_IMPLEMENTATION (charset);
2374 DEFSUBR (Fcharsetp);
2375 DEFSUBR (Ffind_charset);
2376 DEFSUBR (Fget_charset);
2377 DEFSUBR (Fcharset_list);
2378 DEFSUBR (Fcharset_name);
2379 DEFSUBR (Fmake_charset);
2380 DEFSUBR (Fmake_reverse_direction_charset);
2381 /* DEFSUBR (Freverse_direction_charset); */
2382 DEFSUBR (Fdefine_charset_alias);
2383 DEFSUBR (Fcharset_from_attributes);
2384 DEFSUBR (Fcharset_short_name);
2385 DEFSUBR (Fcharset_long_name);
2386 DEFSUBR (Fcharset_description);
2387 DEFSUBR (Fcharset_dimension);
2388 DEFSUBR (Fcharset_property);
2389 DEFSUBR (Fcharset_id);
2390 DEFSUBR (Fset_charset_ccl_program);
2391 DEFSUBR (Fset_charset_registry);
2393 DEFSUBR (Fcharset_mapping_table);
2394 DEFSUBR (Fset_charset_mapping_table);
2398 DEFSUBR (Fdecode_char);
2399 DEFSUBR (Fdecode_builtin_char);
2400 DEFSUBR (Fencode_char);
2402 DEFSUBR (Fmake_char);
2403 DEFSUBR (Fchar_charset);
2404 DEFSUBR (Fchar_octet);
2405 DEFSUBR (Fsplit_char);
2407 #ifdef ENABLE_COMPOSITE_CHARS
2408 DEFSUBR (Fmake_composite_char);
2409 DEFSUBR (Fcomposite_char_string);
2412 defsymbol (&Qcharsetp, "charsetp");
2413 defsymbol (&Qregistry, "registry");
2414 defsymbol (&Qfinal, "final");
2415 defsymbol (&Qgraphic, "graphic");
2416 defsymbol (&Qdirection, "direction");
2417 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2418 defsymbol (&Qshort_name, "short-name");
2419 defsymbol (&Qlong_name, "long-name");
2421 defsymbol (&Qmother, "mother");
2422 defsymbol (&Qmin_code, "min-code");
2423 defsymbol (&Qmax_code, "max-code");
2424 defsymbol (&Qcode_offset, "code-offset");
2425 defsymbol (&Qconversion, "conversion");
2426 defsymbol (&Q94x60, "94x60");
2427 defsymbol (&Q94x94x60, "94x94x60");
2430 defsymbol (&Ql2r, "l2r");
2431 defsymbol (&Qr2l, "r2l");
2433 /* Charsets, compatible with FSF 20.3
2434 Naming convention is Script-Charset[-Edition] */
2435 defsymbol (&Qascii, "ascii");
2436 defsymbol (&Qcontrol_1, "control-1");
2437 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2438 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2439 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2440 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2441 defsymbol (&Qthai_tis620, "thai-tis620");
2442 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2443 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2444 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2445 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2446 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2447 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2448 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2449 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2450 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2451 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2452 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2453 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2454 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2455 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2456 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2457 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2459 defsymbol (&Qucs, "ucs");
2460 defsymbol (&Qucs_bmp, "ucs-bmp");
2461 defsymbol (&Qucs_smp, "ucs-smp");
2462 defsymbol (&Qucs_sip, "ucs-sip");
2463 defsymbol (&Qucs_cns, "ucs-cns");
2464 defsymbol (&Qucs_jis, "ucs-jis");
2465 defsymbol (&Qucs_ks, "ucs-ks");
2466 defsymbol (&Qucs_big5, "ucs-big5");
2467 defsymbol (&Qlatin_viscii, "latin-viscii");
2468 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2469 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2470 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2471 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2472 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2473 defsymbol (&Qideograph_gt, "ideograph-gt");
2474 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2475 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2476 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2477 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2478 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2479 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2480 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2481 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2482 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2483 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2484 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2485 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2486 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2487 defsymbol (&Qchinese_big5, "chinese-big5");
2488 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2489 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2490 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2491 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2492 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2493 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2494 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2495 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2496 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2497 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2498 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2499 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2500 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2501 defsymbol (&Qchina3_jef, "china3-jef");
2502 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2503 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2505 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2506 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2508 defsymbol (&Qcomposite, "composite");
2512 vars_of_mule_charset (void)
2519 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2520 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2522 /* Table of charsets indexed by leading byte. */
2523 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2524 chlook->charset_by_leading_byte[i] = Qnil;
2527 /* Table of charsets indexed by type/final-byte. */
2528 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2529 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2530 chlook->charset_by_attributes[i][j] = Qnil;
2532 /* Table of charsets indexed by type/final-byte/direction. */
2533 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2534 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2535 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2536 chlook->charset_by_attributes[i][j][k] = Qnil;
2540 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2542 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2543 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2547 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2548 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2549 Leading-code of private TYPE9N charset of column-width 1.
2551 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2555 Vdefault_coded_charset_priority_list = Qnil;
2556 DEFVAR_LISP ("default-coded-charset-priority-list",
2557 &Vdefault_coded_charset_priority_list /*
2558 Default order of preferred coded-character-sets.
2564 complex_vars_of_mule_charset (void)
2566 staticpro (&Vcharset_hash_table);
2567 Vcharset_hash_table =
2568 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2570 /* Predefined character sets. We store them into variables for
2574 staticpro (&Vcharset_ucs);
2576 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2577 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2578 build_string ("UCS"),
2579 build_string ("UCS"),
2580 build_string ("ISO/IEC 10646"),
2582 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2583 staticpro (&Vcharset_ucs_bmp);
2585 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2586 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2587 build_string ("BMP"),
2588 build_string ("UCS-BMP"),
2589 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2591 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2592 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2593 staticpro (&Vcharset_ucs_smp);
2595 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2596 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2597 build_string ("SMP"),
2598 build_string ("UCS-SMP"),
2599 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2600 build_string ("UCS00-1"),
2601 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2602 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2603 staticpro (&Vcharset_ucs_sip);
2605 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2606 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2607 build_string ("SIP"),
2608 build_string ("UCS-SIP"),
2609 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2610 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2611 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2612 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2613 staticpro (&Vcharset_ucs_cns);
2615 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2616 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2617 build_string ("UCS for CNS"),
2618 build_string ("UCS for CNS 11643"),
2619 build_string ("ISO/IEC 10646 for CNS 11643"),
2621 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2622 staticpro (&Vcharset_ucs_jis);
2624 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2625 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2626 build_string ("UCS for JIS"),
2627 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2629 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2631 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2632 staticpro (&Vcharset_ucs_ks);
2634 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2635 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2636 build_string ("UCS for KS"),
2637 build_string ("UCS for CCS defined by KS"),
2638 build_string ("ISO/IEC 10646 for Korean Standards"),
2640 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2641 staticpro (&Vcharset_ucs_big5);
2643 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2644 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2645 build_string ("UCS for Big5"),
2646 build_string ("UCS for Big5"),
2647 build_string ("ISO/IEC 10646 for Big5"),
2649 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2651 # define MIN_CHAR_THAI 0
2652 # define MAX_CHAR_THAI 0
2653 /* # define MIN_CHAR_HEBREW 0 */
2654 /* # define MAX_CHAR_HEBREW 0 */
2655 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2656 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2658 staticpro (&Vcharset_ascii);
2660 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2661 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2662 build_string ("ASCII"),
2663 build_string ("ASCII)"),
2664 build_string ("ASCII (ISO646 IRV)"),
2665 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2666 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2667 staticpro (&Vcharset_control_1);
2668 Vcharset_control_1 =
2669 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2670 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2671 build_string ("C1"),
2672 build_string ("Control characters"),
2673 build_string ("Control characters 128-191"),
2675 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2676 staticpro (&Vcharset_latin_iso8859_1);
2677 Vcharset_latin_iso8859_1 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2679 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2680 build_string ("Latin-1"),
2681 build_string ("ISO8859-1 (Latin-1)"),
2682 build_string ("ISO8859-1 (Latin-1)"),
2683 build_string ("iso8859-1"),
2684 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2685 staticpro (&Vcharset_latin_iso8859_2);
2686 Vcharset_latin_iso8859_2 =
2687 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2688 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2689 build_string ("Latin-2"),
2690 build_string ("ISO8859-2 (Latin-2)"),
2691 build_string ("ISO8859-2 (Latin-2)"),
2692 build_string ("iso8859-2"),
2693 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2694 staticpro (&Vcharset_latin_iso8859_3);
2695 Vcharset_latin_iso8859_3 =
2696 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2697 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("Latin-3"),
2699 build_string ("ISO8859-3 (Latin-3)"),
2700 build_string ("ISO8859-3 (Latin-3)"),
2701 build_string ("iso8859-3"),
2702 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2703 staticpro (&Vcharset_latin_iso8859_4);
2704 Vcharset_latin_iso8859_4 =
2705 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2706 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("Latin-4"),
2708 build_string ("ISO8859-4 (Latin-4)"),
2709 build_string ("ISO8859-4 (Latin-4)"),
2710 build_string ("iso8859-4"),
2711 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2712 staticpro (&Vcharset_thai_tis620);
2713 Vcharset_thai_tis620 =
2714 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2715 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2716 build_string ("TIS620"),
2717 build_string ("TIS620 (Thai)"),
2718 build_string ("TIS620.2529 (Thai)"),
2719 build_string ("tis620"),
2720 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2721 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2722 staticpro (&Vcharset_greek_iso8859_7);
2723 Vcharset_greek_iso8859_7 =
2724 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2725 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2726 build_string ("ISO8859-7"),
2727 build_string ("ISO8859-7 (Greek)"),
2728 build_string ("ISO8859-7 (Greek)"),
2729 build_string ("iso8859-7"),
2730 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2731 staticpro (&Vcharset_arabic_iso8859_6);
2732 Vcharset_arabic_iso8859_6 =
2733 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2734 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2735 build_string ("ISO8859-6"),
2736 build_string ("ISO8859-6 (Arabic)"),
2737 build_string ("ISO8859-6 (Arabic)"),
2738 build_string ("iso8859-6"),
2739 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2740 staticpro (&Vcharset_hebrew_iso8859_8);
2741 Vcharset_hebrew_iso8859_8 =
2742 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2743 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2744 build_string ("ISO8859-8"),
2745 build_string ("ISO8859-8 (Hebrew)"),
2746 build_string ("ISO8859-8 (Hebrew)"),
2747 build_string ("iso8859-8"),
2749 0 /* MIN_CHAR_HEBREW */,
2750 0 /* MAX_CHAR_HEBREW */, 0, 32,
2751 Qnil, CONVERSION_IDENTICAL);
2752 staticpro (&Vcharset_katakana_jisx0201);
2753 Vcharset_katakana_jisx0201 =
2754 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2755 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2756 build_string ("JISX0201 Kana"),
2757 build_string ("JISX0201.1976 (Japanese Kana)"),
2758 build_string ("JISX0201.1976 Japanese Kana"),
2759 build_string ("jisx0201\\.1976"),
2760 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2761 staticpro (&Vcharset_latin_jisx0201);
2762 Vcharset_latin_jisx0201 =
2763 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2764 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2765 build_string ("JISX0201 Roman"),
2766 build_string ("JISX0201.1976 (Japanese Roman)"),
2767 build_string ("JISX0201.1976 Japanese Roman"),
2768 build_string ("jisx0201\\.1976"),
2769 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2770 staticpro (&Vcharset_cyrillic_iso8859_5);
2771 Vcharset_cyrillic_iso8859_5 =
2772 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2773 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2774 build_string ("ISO8859-5"),
2775 build_string ("ISO8859-5 (Cyrillic)"),
2776 build_string ("ISO8859-5 (Cyrillic)"),
2777 build_string ("iso8859-5"),
2778 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2779 staticpro (&Vcharset_latin_iso8859_9);
2780 Vcharset_latin_iso8859_9 =
2781 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2782 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2783 build_string ("Latin-5"),
2784 build_string ("ISO8859-9 (Latin-5)"),
2785 build_string ("ISO8859-9 (Latin-5)"),
2786 build_string ("iso8859-9"),
2787 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2788 staticpro (&Vcharset_japanese_jisx0208_1978);
2789 Vcharset_japanese_jisx0208_1978 =
2790 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2791 Qjapanese_jisx0208_1978, 94, 2,
2792 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2793 build_string ("JIS X0208:1978"),
2794 build_string ("JIS X0208:1978 (Japanese)"),
2796 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2797 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2798 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2799 staticpro (&Vcharset_chinese_gb2312);
2800 Vcharset_chinese_gb2312 =
2801 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2802 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2803 build_string ("GB2312"),
2804 build_string ("GB2312)"),
2805 build_string ("GB2312 Chinese simplified"),
2806 build_string ("gb2312"),
2807 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2808 staticpro (&Vcharset_chinese_gb12345);
2809 Vcharset_chinese_gb12345 =
2810 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2811 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2812 build_string ("G1"),
2813 build_string ("GB 12345)"),
2814 build_string ("GB 12345-1990"),
2815 build_string ("GB12345\\(\\.1990\\)?-0"),
2816 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_japanese_jisx0208);
2818 Vcharset_japanese_jisx0208 =
2819 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2820 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2821 build_string ("JISX0208"),
2822 build_string ("JIS X0208:1983 (Japanese)"),
2823 build_string ("JIS X0208:1983 Japanese Kanji"),
2824 build_string ("jisx0208\\.1983"),
2825 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2827 staticpro (&Vcharset_japanese_jisx0208_1990);
2828 Vcharset_japanese_jisx0208_1990 =
2829 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2830 Qjapanese_jisx0208_1990, 94, 2,
2831 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2832 build_string ("JISX0208-1990"),
2833 build_string ("JIS X0208:1990 (Japanese)"),
2834 build_string ("JIS X0208:1990 Japanese Kanji"),
2835 build_string ("jisx0208\\.1990"),
2837 MIN_CHAR_JIS_X0208_1990,
2838 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2839 Qnil, CONVERSION_IDENTICAL);
2841 staticpro (&Vcharset_korean_ksc5601);
2842 Vcharset_korean_ksc5601 =
2843 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2844 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2845 build_string ("KSC5601"),
2846 build_string ("KSC5601 (Korean"),
2847 build_string ("KSC5601 Korean Hangul and Hanja"),
2848 build_string ("ksc5601"),
2849 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2850 staticpro (&Vcharset_japanese_jisx0212);
2851 Vcharset_japanese_jisx0212 =
2852 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2853 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2854 build_string ("JISX0212"),
2855 build_string ("JISX0212 (Japanese)"),
2856 build_string ("JISX0212 Japanese Supplement"),
2857 build_string ("jisx0212"),
2858 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2860 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2861 staticpro (&Vcharset_chinese_cns11643_1);
2862 Vcharset_chinese_cns11643_1 =
2863 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2864 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2865 build_string ("CNS11643-1"),
2866 build_string ("CNS11643-1 (Chinese traditional)"),
2868 ("CNS 11643 Plane 1 Chinese traditional"),
2869 build_string (CHINESE_CNS_PLANE_RE("1")),
2870 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2871 staticpro (&Vcharset_chinese_cns11643_2);
2872 Vcharset_chinese_cns11643_2 =
2873 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2874 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2875 build_string ("CNS11643-2"),
2876 build_string ("CNS11643-2 (Chinese traditional)"),
2878 ("CNS 11643 Plane 2 Chinese traditional"),
2879 build_string (CHINESE_CNS_PLANE_RE("2")),
2880 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2882 staticpro (&Vcharset_latin_tcvn5712);
2883 Vcharset_latin_tcvn5712 =
2884 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2885 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2886 build_string ("TCVN 5712"),
2887 build_string ("TCVN 5712 (VSCII-2)"),
2888 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2889 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2890 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2891 staticpro (&Vcharset_latin_viscii_lower);
2892 Vcharset_latin_viscii_lower =
2893 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2894 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2895 build_string ("VISCII lower"),
2896 build_string ("VISCII lower (Vietnamese)"),
2897 build_string ("VISCII lower (Vietnamese)"),
2898 build_string ("MULEVISCII-LOWER"),
2899 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2900 staticpro (&Vcharset_latin_viscii_upper);
2901 Vcharset_latin_viscii_upper =
2902 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2903 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2904 build_string ("VISCII upper"),
2905 build_string ("VISCII upper (Vietnamese)"),
2906 build_string ("VISCII upper (Vietnamese)"),
2907 build_string ("MULEVISCII-UPPER"),
2908 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2909 staticpro (&Vcharset_latin_viscii);
2910 Vcharset_latin_viscii =
2911 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2912 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2913 build_string ("VISCII"),
2914 build_string ("VISCII 1.1 (Vietnamese)"),
2915 build_string ("VISCII 1.1 (Vietnamese)"),
2916 build_string ("VISCII1\\.1"),
2917 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2918 staticpro (&Vcharset_chinese_big5);
2919 Vcharset_chinese_big5 =
2920 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2921 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2922 build_string ("Big5"),
2923 build_string ("Big5"),
2924 build_string ("Big5 Chinese traditional"),
2925 build_string ("big5-0"),
2927 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2928 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2930 staticpro (&Vcharset_chinese_big5_cdp);
2931 Vcharset_chinese_big5_cdp =
2932 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2933 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2934 build_string ("Big5-CDP"),
2935 build_string ("Big5 + CDP extension"),
2936 build_string ("Big5 with CDP extension"),
2937 build_string ("big5\\.cdp-0"),
2938 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2939 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2941 #define DEF_HANZIKU(n) \
2942 staticpro (&Vcharset_ideograph_hanziku_##n); \
2943 Vcharset_ideograph_hanziku_##n = \
2944 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2945 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2946 build_string ("HZK-"#n), \
2947 build_string ("HANZIKU-"#n), \
2948 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2950 ("hanziku-"#n"$"), \
2951 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2952 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2965 staticpro (&Vcharset_china3_jef);
2966 Vcharset_china3_jef =
2967 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2968 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2969 build_string ("JC3"),
2970 build_string ("JEF + CHINA3"),
2971 build_string ("JEF + CHINA3 private characters"),
2972 build_string ("china3jef-0"),
2973 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2974 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2975 staticpro (&Vcharset_ideograph_cbeta);
2976 Vcharset_ideograph_cbeta =
2977 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2978 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2979 build_string ("CB"),
2980 build_string ("CBETA"),
2981 build_string ("CBETA private characters"),
2982 build_string ("cbeta-0"),
2983 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2984 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2985 staticpro (&Vcharset_ideograph_gt);
2986 Vcharset_ideograph_gt =
2987 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2988 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2989 build_string ("GT"),
2990 build_string ("GT"),
2991 build_string ("GT"),
2993 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
2994 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
2995 #define DEF_GT_PJ(n) \
2996 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2997 Vcharset_ideograph_gt_pj_##n = \
2998 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2999 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3000 build_string ("GT-PJ-"#n), \
3001 build_string ("GT (pseudo JIS encoding) part "#n), \
3002 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3004 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3005 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3018 staticpro (&Vcharset_ideograph_daikanwa_2);
3019 Vcharset_ideograph_daikanwa_2 =
3020 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3021 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3022 build_string ("Daikanwa Rev."),
3023 build_string ("Morohashi's Daikanwa Rev."),
3025 ("Daikanwa dictionary (revised version)"),
3026 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3027 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3028 staticpro (&Vcharset_ideograph_daikanwa);
3029 Vcharset_ideograph_daikanwa =
3030 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3031 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3032 build_string ("Daikanwa"),
3033 build_string ("Morohashi's Daikanwa Rev.2"),
3035 ("Daikanwa dictionary (second revised version)"),
3036 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3037 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3038 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3040 staticpro (&Vcharset_ethiopic_ucs);
3041 Vcharset_ethiopic_ucs =
3042 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3043 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3044 build_string ("Ethiopic (UCS)"),
3045 build_string ("Ethiopic (UCS)"),
3046 build_string ("Ethiopic of UCS"),
3047 build_string ("Ethiopic-Unicode"),
3048 Qnil, 0x1200, 0x137F, 0, 0,
3049 Qnil, CONVERSION_IDENTICAL);
3051 staticpro (&Vcharset_chinese_big5_1);
3052 Vcharset_chinese_big5_1 =
3053 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3054 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3055 build_string ("Big5"),
3056 build_string ("Big5 (Level-1)"),
3058 ("Big5 Level-1 Chinese traditional"),
3059 build_string ("big5"),
3060 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3061 staticpro (&Vcharset_chinese_big5_2);
3062 Vcharset_chinese_big5_2 =
3063 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3064 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3065 build_string ("Big5"),
3066 build_string ("Big5 (Level-2)"),
3068 ("Big5 Level-2 Chinese traditional"),
3069 build_string ("big5"),
3070 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3072 #ifdef ENABLE_COMPOSITE_CHARS
3073 /* #### For simplicity, we put composite chars into a 96x96 charset.
3074 This is going to lead to problems because you can run out of
3075 room, esp. as we don't yet recycle numbers. */
3076 staticpro (&Vcharset_composite);
3077 Vcharset_composite =
3078 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3079 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3080 build_string ("Composite"),
3081 build_string ("Composite characters"),
3082 build_string ("Composite characters"),
3085 /* #### not dumped properly */
3086 composite_char_row_next = 32;
3087 composite_char_col_next = 32;
3089 Vcomposite_char_string2char_hash_table =
3090 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3091 Vcomposite_char_char2string_hash_table =
3092 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3093 staticpro (&Vcomposite_char_string2char_hash_table);
3094 staticpro (&Vcomposite_char_char2string_hash_table);
3095 #endif /* ENABLE_COMPOSITE_CHARS */