1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
194 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
198 if (XVECTOR_LENGTH (v) > ccs_len)
201 for (i = 0; i < XVECTOR_LENGTH (v); i++)
203 Lisp_Object c = XVECTOR_DATA(v)[i];
205 if (!NILP (c) && !CHARP (c))
209 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
221 put_char_ccs_code_point (Lisp_Object character,
222 Lisp_Object ccs, Lisp_Object value)
224 if (!EQ (XCHARSET_NAME (ccs), Qucs)
226 || (XCHAR (character) != XINT (value)))
228 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
229 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
233 { /* obsolete representation: value must be a list of bytes */
234 Lisp_Object ret = Fcar (value);
238 signal_simple_error ("Invalid value for coded-charset", value);
239 code_point = XINT (ret);
240 if (XCHARSET_GRAPHIC (ccs) == 1)
248 signal_simple_error ("Invalid value for coded-charset",
252 signal_simple_error ("Invalid value for coded-charset",
255 if (XCHARSET_GRAPHIC (ccs) == 1)
257 code_point = (code_point << 8) | j;
260 value = make_int (code_point);
262 else if (INTP (value))
264 code_point = XINT (value);
265 if (XCHARSET_GRAPHIC (ccs) == 1)
267 code_point &= 0x7F7F7F7F;
268 value = make_int (code_point);
272 signal_simple_error ("Invalid value for coded-charset", value);
276 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
279 decoding_table_remove_char (ccs, XINT (cpos));
284 XCHARSET_DECODING_TABLE (ccs)
285 = v = make_vector (ccs_len, Qnil);
288 decoding_table_put_char (ccs, code_point, character);
294 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
296 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
297 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
299 if (VECTORP (decoding_table))
301 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
305 decoding_table_remove_char (ccs, XINT (cpos));
308 if (CHAR_TABLEP (encoding_table))
310 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
318 int leading_code_private_11;
321 Lisp_Object Qcharsetp;
323 /* Qdoc_string, Qdimension, Qchars defined in general.c */
324 Lisp_Object Qregistry, Qfinal, Qgraphic;
325 Lisp_Object Qdirection;
326 Lisp_Object Qreverse_direction_charset;
327 Lisp_Object Qleading_byte;
328 Lisp_Object Qshort_name, Qlong_name;
330 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
331 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
348 Qjapanese_jisx0208_1978,
352 Qjapanese_jisx0208_1990,
370 Qvietnamese_viscii_lower,
371 Qvietnamese_viscii_upper,
374 Qideograph_hanziku_1,
375 Qideograph_hanziku_2,
376 Qideograph_hanziku_3,
377 Qideograph_hanziku_4,
378 Qideograph_hanziku_5,
379 Qideograph_hanziku_6,
380 Qideograph_hanziku_7,
381 Qideograph_hanziku_8,
382 Qideograph_hanziku_9,
383 Qideograph_hanziku_10,
384 Qideograph_hanziku_11,
385 Qideograph_hanziku_12,
388 Qideograph_daikanwa_2,
408 Lisp_Object Ql2r, Qr2l;
410 Lisp_Object Vcharset_hash_table;
412 /* Composite characters are characters constructed by overstriking two
413 or more regular characters.
415 1) The old Mule implementation involves storing composite characters
416 in a buffer as a tag followed by all of the actual characters
417 used to make up the composite character. I think this is a bad
418 idea; it greatly complicates code that wants to handle strings
419 one character at a time because it has to deal with the possibility
420 of great big ungainly characters. It's much more reasonable to
421 simply store an index into a table of composite characters.
423 2) The current implementation only allows for 16,384 separate
424 composite characters over the lifetime of the XEmacs process.
425 This could become a potential problem if the user
426 edited lots of different files that use composite characters.
427 Due to FSF bogosity, increasing the number of allowable
428 composite characters under Mule would decrease the number
429 of possible faces that can exist. Mule already has shrunk
430 this to 2048, and further shrinkage would become uncomfortable.
431 No such problems exist in XEmacs.
433 Composite characters could be represented as 0x80 C1 C2 C3,
434 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
435 for slightly under 2^20 (one million) composite characters
436 over the XEmacs process lifetime, and you only need to
437 increase the size of a Mule character from 19 to 21 bits.
438 Or you could use 0x80 C1 C2 C3 C4, allowing for about
439 85 million (slightly over 2^26) composite characters. */
442 /************************************************************************/
443 /* Basic Emchar functions */
444 /************************************************************************/
446 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
447 string in STR. Returns the number of bytes stored.
448 Do not call this directly. Use the macro set_charptr_emchar() instead.
452 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
467 else if ( c <= 0x7ff )
469 *p++ = (c >> 6) | 0xc0;
470 *p++ = (c & 0x3f) | 0x80;
472 else if ( c <= 0xffff )
474 *p++ = (c >> 12) | 0xe0;
475 *p++ = ((c >> 6) & 0x3f) | 0x80;
476 *p++ = (c & 0x3f) | 0x80;
478 else if ( c <= 0x1fffff )
480 *p++ = (c >> 18) | 0xf0;
481 *p++ = ((c >> 12) & 0x3f) | 0x80;
482 *p++ = ((c >> 6) & 0x3f) | 0x80;
483 *p++ = (c & 0x3f) | 0x80;
485 else if ( c <= 0x3ffffff )
487 *p++ = (c >> 24) | 0xf8;
488 *p++ = ((c >> 18) & 0x3f) | 0x80;
489 *p++ = ((c >> 12) & 0x3f) | 0x80;
490 *p++ = ((c >> 6) & 0x3f) | 0x80;
491 *p++ = (c & 0x3f) | 0x80;
495 *p++ = (c >> 30) | 0xfc;
496 *p++ = ((c >> 24) & 0x3f) | 0x80;
497 *p++ = ((c >> 18) & 0x3f) | 0x80;
498 *p++ = ((c >> 12) & 0x3f) | 0x80;
499 *p++ = ((c >> 6) & 0x3f) | 0x80;
500 *p++ = (c & 0x3f) | 0x80;
503 BREAKUP_CHAR (c, charset, c1, c2);
504 lb = CHAR_LEADING_BYTE (c);
505 if (LEADING_BYTE_PRIVATE_P (lb))
506 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
508 if (EQ (charset, Vcharset_control_1))
517 /* Return the first character from a Mule-encoded string in STR,
518 assuming it's non-ASCII. Do not call this directly.
519 Use the macro charptr_emchar() instead. */
522 non_ascii_charptr_emchar (const Bufbyte *str)
535 else if ( b >= 0xf8 )
540 else if ( b >= 0xf0 )
545 else if ( b >= 0xe0 )
550 else if ( b >= 0xc0 )
560 for( ; len > 0; len-- )
563 ch = ( ch << 6 ) | ( b & 0x3f );
567 Bufbyte i0 = *str, i1, i2 = 0;
570 if (i0 == LEADING_BYTE_CONTROL_1)
571 return (Emchar) (*++str - 0x20);
573 if (LEADING_BYTE_PREFIX_P (i0))
578 charset = CHARSET_BY_LEADING_BYTE (i0);
579 if (XCHARSET_DIMENSION (charset) == 2)
582 return MAKE_CHAR (charset, i1, i2);
586 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
587 Do not call this directly. Use the macro valid_char_p() instead. */
591 non_ascii_valid_char_p (Emchar ch)
595 /* Must have only lowest 19 bits set */
599 f1 = CHAR_FIELD1 (ch);
600 f2 = CHAR_FIELD2 (ch);
601 f3 = CHAR_FIELD3 (ch);
607 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
608 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
609 f2 > MAX_CHAR_FIELD2_PRIVATE)
614 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
615 f2 <= MAX_CHAR_FIELD2_PRIVATE))
619 NOTE: This takes advantage of the fact that
620 FIELD2_TO_OFFICIAL_LEADING_BYTE and
621 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
623 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
624 if (EQ (charset, Qnil))
626 return (XCHARSET_CHARS (charset) == 96);
632 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
633 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
634 f1 > MAX_CHAR_FIELD1_PRIVATE)
636 if (f2 < 0x20 || f3 < 0x20)
639 #ifdef ENABLE_COMPOSITE_CHARS
640 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
642 if (UNBOUNDP (Fgethash (make_int (ch),
643 Vcomposite_char_char2string_hash_table,
648 #endif /* ENABLE_COMPOSITE_CHARS */
650 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
651 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
654 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
656 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
659 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
661 if (EQ (charset, Qnil))
663 return (XCHARSET_CHARS (charset) == 96);
669 /************************************************************************/
670 /* Basic string functions */
671 /************************************************************************/
673 /* Copy the character pointed to by SRC into DST. Do not call this
674 directly. Use the macro charptr_copy_char() instead.
675 Return the number of bytes copied. */
678 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
680 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
682 for (i = bytes; i; i--, dst++, src++)
688 /************************************************************************/
689 /* streams of Emchars */
690 /************************************************************************/
692 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
693 The functions below are not meant to be called directly; use
694 the macros in insdel.h. */
697 Lstream_get_emchar_1 (Lstream *stream, int ch)
699 Bufbyte str[MAX_EMCHAR_LEN];
700 Bufbyte *strptr = str;
703 str[0] = (Bufbyte) ch;
705 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
707 int c = Lstream_getc (stream);
708 bufpos_checking_assert (c >= 0);
709 *++strptr = (Bufbyte) c;
711 return charptr_emchar (str);
715 Lstream_fput_emchar (Lstream *stream, Emchar ch)
717 Bufbyte str[MAX_EMCHAR_LEN];
718 Bytecount len = set_charptr_emchar (str, ch);
719 return Lstream_write (stream, str, len);
723 Lstream_funget_emchar (Lstream *stream, Emchar ch)
725 Bufbyte str[MAX_EMCHAR_LEN];
726 Bytecount len = set_charptr_emchar (str, ch);
727 Lstream_unread (stream, str, len);
731 /************************************************************************/
733 /************************************************************************/
736 mark_charset (Lisp_Object obj)
738 Lisp_Charset *cs = XCHARSET (obj);
740 mark_object (cs->short_name);
741 mark_object (cs->long_name);
742 mark_object (cs->doc_string);
743 mark_object (cs->registry);
744 mark_object (cs->ccl_program);
746 mark_object (cs->decoding_table);
747 mark_object (cs->mother);
753 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
755 Lisp_Charset *cs = XCHARSET (obj);
759 error ("printing unreadable object #<charset %s 0x%x>",
760 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
763 write_c_string ("#<charset ", printcharfun);
764 print_internal (CHARSET_NAME (cs), printcharfun, 0);
765 write_c_string (" ", printcharfun);
766 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
767 write_c_string (" ", printcharfun);
768 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
769 write_c_string (" ", printcharfun);
770 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
771 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
773 CHARSET_DIMENSION (cs),
774 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
775 CHARSET_COLUMNS (cs),
776 CHARSET_GRAPHIC (cs),
778 write_c_string (buf, printcharfun);
779 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
780 sprintf (buf, " 0x%x>", cs->header.uid);
781 write_c_string (buf, printcharfun);
784 static const struct lrecord_description charset_description[] = {
785 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
786 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
787 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
788 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
789 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
790 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
791 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
793 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
794 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
799 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
800 mark_charset, print_charset, 0, 0, 0,
804 /* Make a new charset. */
805 /* #### SJT Should generic properties be allowed? */
807 make_charset (Charset_ID id, Lisp_Object name,
808 unsigned short chars, unsigned char dimension,
809 unsigned char columns, unsigned char graphic,
810 Bufbyte final, unsigned char direction, Lisp_Object short_name,
811 Lisp_Object long_name, Lisp_Object doc,
813 Lisp_Object decoding_table,
814 Emchar min_code, Emchar max_code,
815 Emchar code_offset, unsigned char byte_offset,
816 Lisp_Object mother, unsigned char conversion)
819 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
823 XSETCHARSET (obj, cs);
825 CHARSET_ID (cs) = id;
826 CHARSET_NAME (cs) = name;
827 CHARSET_SHORT_NAME (cs) = short_name;
828 CHARSET_LONG_NAME (cs) = long_name;
829 CHARSET_CHARS (cs) = chars;
830 CHARSET_DIMENSION (cs) = dimension;
831 CHARSET_DIRECTION (cs) = direction;
832 CHARSET_COLUMNS (cs) = columns;
833 CHARSET_GRAPHIC (cs) = graphic;
834 CHARSET_FINAL (cs) = final;
835 CHARSET_DOC_STRING (cs) = doc;
836 CHARSET_REGISTRY (cs) = reg;
837 CHARSET_CCL_PROGRAM (cs) = Qnil;
838 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
840 CHARSET_DECODING_TABLE(cs) = Qnil;
841 CHARSET_MIN_CODE (cs) = min_code;
842 CHARSET_MAX_CODE (cs) = max_code;
843 CHARSET_CODE_OFFSET (cs) = code_offset;
844 CHARSET_BYTE_OFFSET (cs) = byte_offset;
845 CHARSET_MOTHER (cs) = mother;
846 CHARSET_CONVERSION (cs) = conversion;
850 if (id == LEADING_BYTE_ASCII)
851 CHARSET_REP_BYTES (cs) = 1;
853 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
855 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
860 /* some charsets do not have final characters. This includes
861 ASCII, Control-1, Composite, and the two faux private
863 unsigned char iso2022_type
864 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
866 if (code_offset == 0)
868 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
869 chlook->charset_by_attributes[iso2022_type][final] = obj;
873 (chlook->charset_by_attributes[iso2022_type][final][direction]));
874 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
878 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
879 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
881 /* Some charsets are "faux" and don't have names or really exist at
882 all except in the leading-byte table. */
884 Fputhash (name, obj, Vcharset_hash_table);
889 get_unallocated_leading_byte (int dimension)
894 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
897 lb = chlook->next_allocated_leading_byte++;
901 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
904 lb = chlook->next_allocated_1_byte_leading_byte++;
908 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
911 lb = chlook->next_allocated_2_byte_leading_byte++;
917 ("No more character sets free for this dimension",
918 make_int (dimension));
924 /* Number of Big5 characters which have the same code in 1st byte. */
926 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
929 decode_builtin_char (Lisp_Object charset, int code_point)
931 Lisp_Object mother = XCHARSET_MOTHER (charset);
934 if ( CHARSETP (mother) )
936 int code = code_point;
938 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
940 int row = code_point >> 8;
941 int cell = code_point & 255;
945 else if (row < 16 + 32 + 30)
946 code = (row - (16 + 32)) * 94 + cell - 33;
947 else if (row < 18 + 32 + 30)
949 else if (row < 18 + 32 + 60)
950 code = (row - (18 + 32)) * 94 + cell - 33;
952 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
954 int plane = code_point >> 16;
955 int row = (code_point >> 8) & 255;
956 int cell = code_point & 255;
960 else if (row < 16 + 32 + 30)
962 = (plane - 33) * 94 * 60
963 + (row - (16 + 32)) * 94
965 else if (row < 18 + 32 + 30)
967 else if (row < 18 + 32 + 60)
969 = (plane - 33) * 94 * 60
970 + (row - (18 + 32)) * 94
973 return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
975 else if (EQ (charset, Vcharset_chinese_big5))
977 int c1 = code_point >> 8;
978 int c2 = code_point & 0xFF;
981 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
983 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
984 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
986 I = (c1 - 0xA1) * BIG5_SAME_ROW
987 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
991 charset = Vcharset_chinese_big5_1;
995 charset = Vcharset_chinese_big5_2;
996 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
998 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1001 if ((final = XCHARSET_FINAL (charset)) >= '0')
1003 if (XCHARSET_DIMENSION (charset) == 1)
1005 switch (XCHARSET_CHARS (charset))
1009 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1012 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1020 switch (XCHARSET_CHARS (charset))
1023 return MIN_CHAR_94x94
1024 + (final - '0') * 94 * 94
1025 + (((code_point >> 8) & 0x7F) - 33) * 94
1026 + ((code_point & 0x7F) - 33);
1028 return MIN_CHAR_96x96
1029 + (final - '0') * 96 * 96
1030 + (((code_point >> 8) & 0x7F) - 32) * 96
1031 + ((code_point & 0x7F) - 32);
1038 else if (XCHARSET_MAX_CODE (charset))
1041 = (XCHARSET_DIMENSION (charset) == 1
1043 code_point - XCHARSET_BYTE_OFFSET (charset)
1045 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1046 * XCHARSET_CHARS (charset)
1047 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1048 + XCHARSET_CODE_OFFSET (charset);
1049 if ((cid < XCHARSET_MIN_CODE (charset))
1050 || (XCHARSET_MAX_CODE (charset) < cid))
1059 charset_code_point (Lisp_Object charset, Emchar ch)
1061 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1064 if ( CHAR_TABLEP (encoding_table)
1065 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1070 Lisp_Object mother = XCHARSET_MOTHER (charset);
1071 int min = XCHARSET_MIN_CODE (charset);
1072 int max = XCHARSET_MAX_CODE (charset);
1075 if ( CHARSETP (mother) )
1076 code = charset_code_point (mother, ch);
1079 if ( (min <= code) && (code <= max) )
1081 int d = code - XCHARSET_CODE_OFFSET (charset);
1083 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1086 int cell = d % 94 + 33;
1092 return (row << 8) | cell;
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1096 int plane = d / (94 * 60) + 33;
1097 int row = (d % (94 * 60)) / 94;
1098 int cell = d % 94 + 33;
1104 return (plane << 16) | (row << 8) | cell;
1106 else if (XCHARSET_CHARS (charset) == 94)
1108 if (XCHARSET_DIMENSION (charset) == 1)
1110 else if (XCHARSET_DIMENSION (charset) == 2)
1111 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1112 else if (XCHARSET_DIMENSION (charset) == 3)
1114 ( (d / (94 * 94) + 33) << 16)
1115 | ((d / 94 % 94 + 33) << 8)
1117 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1119 ( (d / (94 * 94 * 94) + 33) << 24)
1120 | ((d / (94 * 94) % 94 + 33) << 16)
1121 | ((d / 94 % 94 + 33) << 8)
1124 else if (XCHARSET_CHARS (charset) == 96)
1126 if (XCHARSET_DIMENSION (charset) == 1)
1128 else if (XCHARSET_DIMENSION (charset) == 2)
1129 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1130 else if (XCHARSET_DIMENSION (charset) == 3)
1132 ( (d / (96 * 96) + 32) << 16)
1133 | ((d / 96 % 96 + 32) << 8)
1135 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1137 ( (d / (96 * 96 * 96) + 32) << 24)
1138 | ((d / (96 * 96) % 96 + 32) << 16)
1139 | ((d / 96 % 96 + 32) << 8)
1143 return code - XCHARSET_CODE_OFFSET (charset);
1145 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1146 (XCHARSET_CODE_OFFSET (charset)
1147 == XCHARSET_MIN_CODE (charset)) )
1151 if (XCHARSET_DIMENSION (charset) == 1)
1153 if (XCHARSET_CHARS (charset) == 94)
1155 if (((d = ch - (MIN_CHAR_94
1156 + (XCHARSET_FINAL (charset) - '0') * 94))
1161 else if (XCHARSET_CHARS (charset) == 96)
1163 if (((d = ch - (MIN_CHAR_96
1164 + (XCHARSET_FINAL (charset) - '0') * 96))
1172 else if (XCHARSET_DIMENSION (charset) == 2)
1174 if (XCHARSET_CHARS (charset) == 94)
1176 if (((d = ch - (MIN_CHAR_94x94
1178 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1181 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1183 else if (XCHARSET_CHARS (charset) == 96)
1185 if (((d = ch - (MIN_CHAR_96x96
1187 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1190 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1201 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1203 if (c <= MAX_CHAR_BASIC_LATIN)
1205 *charset = Vcharset_ascii;
1210 *charset = Vcharset_control_1;
1215 *charset = Vcharset_latin_iso8859_1;
1219 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1221 *charset = Vcharset_hebrew_iso8859_8;
1222 return c - MIN_CHAR_HEBREW + 0x20;
1225 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1227 *charset = Vcharset_thai_tis620;
1228 return c - MIN_CHAR_THAI + 0x20;
1231 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1232 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1234 return list2 (Vcharset_katakana_jisx0201,
1235 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1238 else if (c <= MAX_CHAR_BMP)
1240 *charset = Vcharset_ucs_bmp;
1243 else if (c <= MAX_CHAR_SMP)
1245 *charset = Vcharset_ucs_smp;
1246 return c - MIN_CHAR_SMP;
1248 else if (c <= MAX_CHAR_SIP)
1250 *charset = Vcharset_ucs_sip;
1251 return c - MIN_CHAR_SIP;
1253 else if (c < MIN_CHAR_DAIKANWA)
1255 *charset = Vcharset_ucs;
1258 else if (c <= MAX_CHAR_DAIKANWA)
1260 *charset = Vcharset_ideograph_daikanwa;
1261 return c - MIN_CHAR_DAIKANWA;
1263 else if (c < MIN_CHAR_94)
1265 *charset = Vcharset_ucs;
1268 else if (c <= MAX_CHAR_94)
1270 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1271 ((c - MIN_CHAR_94) / 94) + '0',
1272 CHARSET_LEFT_TO_RIGHT);
1273 if (!NILP (*charset))
1274 return ((c - MIN_CHAR_94) % 94) + 33;
1277 *charset = Vcharset_ucs;
1281 else if (c <= MAX_CHAR_96)
1283 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1284 ((c - MIN_CHAR_96) / 96) + '0',
1285 CHARSET_LEFT_TO_RIGHT);
1286 if (!NILP (*charset))
1287 return ((c - MIN_CHAR_96) % 96) + 32;
1290 *charset = Vcharset_ucs;
1294 else if (c <= MAX_CHAR_94x94)
1297 = CHARSET_BY_ATTRIBUTES (94, 2,
1298 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1299 CHARSET_LEFT_TO_RIGHT);
1300 if (!NILP (*charset))
1301 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1302 | (((c - MIN_CHAR_94x94) % 94) + 33);
1305 *charset = Vcharset_ucs;
1309 else if (c <= MAX_CHAR_96x96)
1312 = CHARSET_BY_ATTRIBUTES (96, 2,
1313 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1314 CHARSET_LEFT_TO_RIGHT);
1315 if (!NILP (*charset))
1316 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1317 | (((c - MIN_CHAR_96x96) % 96) + 32);
1320 *charset = Vcharset_ucs;
1326 *charset = Vcharset_ucs;
1331 Lisp_Object Vdefault_coded_charset_priority_list;
1335 /************************************************************************/
1336 /* Basic charset Lisp functions */
1337 /************************************************************************/
1339 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1340 Return non-nil if OBJECT is a charset.
1344 return CHARSETP (object) ? Qt : Qnil;
1347 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1348 Retrieve the charset of the given name.
1349 If CHARSET-OR-NAME is a charset object, it is simply returned.
1350 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1351 nil is returned. Otherwise the associated charset object is returned.
1355 if (CHARSETP (charset_or_name))
1356 return charset_or_name;
1358 CHECK_SYMBOL (charset_or_name);
1359 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1362 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1363 Retrieve the charset of the given name.
1364 Same as `find-charset' except an error is signalled if there is no such
1365 charset instead of returning nil.
1369 Lisp_Object charset = Ffind_charset (name);
1372 signal_simple_error ("No such charset", name);
1376 /* We store the charsets in hash tables with the names as the key and the
1377 actual charset object as the value. Occasionally we need to use them
1378 in a list format. These routines provide us with that. */
1379 struct charset_list_closure
1381 Lisp_Object *charset_list;
1385 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1386 void *charset_list_closure)
1388 /* This function can GC */
1389 struct charset_list_closure *chcl =
1390 (struct charset_list_closure*) charset_list_closure;
1391 Lisp_Object *charset_list = chcl->charset_list;
1393 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1397 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1398 Return a list of the names of all defined charsets.
1402 Lisp_Object charset_list = Qnil;
1403 struct gcpro gcpro1;
1404 struct charset_list_closure charset_list_closure;
1406 GCPRO1 (charset_list);
1407 charset_list_closure.charset_list = &charset_list;
1408 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1409 &charset_list_closure);
1412 return charset_list;
1415 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1416 Return the name of charset CHARSET.
1420 return XCHARSET_NAME (Fget_charset (charset));
1423 /* #### SJT Should generic properties be allowed? */
1424 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1425 Define a new character set.
1426 This function is for use with Mule support.
1427 NAME is a symbol, the name by which the character set is normally referred.
1428 DOC-STRING is a string describing the character set.
1429 PROPS is a property list, describing the specific nature of the
1430 character set. Recognized properties are:
1432 'short-name Short version of the charset name (ex: Latin-1)
1433 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1434 'registry A regular expression matching the font registry field for
1436 'dimension Number of octets used to index a character in this charset.
1437 Either 1 or 2. Defaults to 1.
1438 If UTF-2000 feature is enabled, 3 or 4 are also available.
1439 'columns Number of columns used to display a character in this charset.
1440 Only used in TTY mode. (Under X, the actual width of a
1441 character can be derived from the font used to display the
1442 characters.) If unspecified, defaults to the dimension
1443 (this is almost always the correct value).
1444 'chars Number of characters in each dimension (94 or 96).
1445 Defaults to 94. Note that if the dimension is 2, the
1446 character set thus described is 94x94 or 96x96.
1447 If UTF-2000 feature is enabled, 128 or 256 are also available.
1448 'final Final byte of ISO 2022 escape sequence. Must be
1449 supplied. Each combination of (DIMENSION, CHARS) defines a
1450 separate namespace for final bytes. Note that ISO
1451 2022 restricts the final byte to the range
1452 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1453 dimension == 2. Note also that final bytes in the range
1454 0x30 - 0x3F are reserved for user-defined (not official)
1456 'graphic 0 (use left half of font on output) or 1 (use right half
1457 of font on output). Defaults to 0. For example, for
1458 a font whose registry is ISO8859-1, the left half
1459 (octets 0x20 - 0x7F) is the `ascii' character set, while
1460 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1461 character set. With 'graphic set to 0, the octets
1462 will have their high bit cleared; with it set to 1,
1463 the octets will have their high bit set.
1464 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1466 'ccl-program A compiled CCL program used to convert a character in
1467 this charset into an index into the font. This is in
1468 addition to the 'graphic property. The CCL program
1469 is passed the octets of the character, with the high
1470 bit cleared and set depending upon whether the value
1471 of the 'graphic property is 0 or 1.
1472 'mother [UTF-2000 only] Base coded-charset.
1473 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1474 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1475 'code-offset [UTF-2000 only] Offset for a code-point of a base
1477 'conversion [UTF-2000 only] Conversion for a code-point of a base
1478 coded-charset (94x60 or 94x94x60).
1480 (name, doc_string, props))
1482 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1483 int direction = CHARSET_LEFT_TO_RIGHT;
1484 Lisp_Object registry = Qnil;
1485 Lisp_Object charset;
1486 Lisp_Object ccl_program = Qnil;
1487 Lisp_Object short_name = Qnil, long_name = Qnil;
1488 Lisp_Object mother = Qnil;
1489 int min_code = 0, max_code = 0, code_offset = 0;
1490 int byte_offset = -1;
1493 CHECK_SYMBOL (name);
1494 if (!NILP (doc_string))
1495 CHECK_STRING (doc_string);
1497 charset = Ffind_charset (name);
1498 if (!NILP (charset))
1499 signal_simple_error ("Cannot redefine existing charset", name);
1502 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1504 if (EQ (keyword, Qshort_name))
1506 CHECK_STRING (value);
1510 if (EQ (keyword, Qlong_name))
1512 CHECK_STRING (value);
1516 else if (EQ (keyword, Qdimension))
1519 dimension = XINT (value);
1520 if (dimension < 1 ||
1527 signal_simple_error ("Invalid value for 'dimension", value);
1530 else if (EQ (keyword, Qchars))
1533 chars = XINT (value);
1534 if (chars != 94 && chars != 96
1536 && chars != 128 && chars != 256
1539 signal_simple_error ("Invalid value for 'chars", value);
1542 else if (EQ (keyword, Qcolumns))
1545 columns = XINT (value);
1546 if (columns != 1 && columns != 2)
1547 signal_simple_error ("Invalid value for 'columns", value);
1550 else if (EQ (keyword, Qgraphic))
1553 graphic = XINT (value);
1561 signal_simple_error ("Invalid value for 'graphic", value);
1564 else if (EQ (keyword, Qregistry))
1566 CHECK_STRING (value);
1570 else if (EQ (keyword, Qdirection))
1572 if (EQ (value, Ql2r))
1573 direction = CHARSET_LEFT_TO_RIGHT;
1574 else if (EQ (value, Qr2l))
1575 direction = CHARSET_RIGHT_TO_LEFT;
1577 signal_simple_error ("Invalid value for 'direction", value);
1580 else if (EQ (keyword, Qfinal))
1582 CHECK_CHAR_COERCE_INT (value);
1583 final = XCHAR (value);
1584 if (final < '0' || final > '~')
1585 signal_simple_error ("Invalid value for 'final", value);
1589 else if (EQ (keyword, Qmother))
1591 mother = Fget_charset (value);
1594 else if (EQ (keyword, Qmin_code))
1597 min_code = XUINT (value);
1600 else if (EQ (keyword, Qmax_code))
1603 max_code = XUINT (value);
1606 else if (EQ (keyword, Qcode_offset))
1609 code_offset = XUINT (value);
1612 else if (EQ (keyword, Qconversion))
1614 if (EQ (value, Q94x60))
1615 conversion = CONVERSION_94x60;
1616 else if (EQ (value, Q94x94x60))
1617 conversion = CONVERSION_94x94x60;
1619 signal_simple_error ("Unrecognized conversion", value);
1623 else if (EQ (keyword, Qccl_program))
1625 struct ccl_program test_ccl;
1627 if (setup_ccl_program (&test_ccl, value) < 0)
1628 signal_simple_error ("Invalid value for 'ccl-program", value);
1629 ccl_program = value;
1633 signal_simple_error ("Unrecognized property", keyword);
1639 error ("'final must be specified");
1641 if (dimension == 2 && final > 0x5F)
1643 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1646 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1647 CHARSET_LEFT_TO_RIGHT)) ||
1648 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1649 CHARSET_RIGHT_TO_LEFT)))
1651 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1653 id = get_unallocated_leading_byte (dimension);
1655 if (NILP (doc_string))
1656 doc_string = build_string ("");
1658 if (NILP (registry))
1659 registry = build_string ("");
1661 if (NILP (short_name))
1662 XSETSTRING (short_name, XSYMBOL (name)->name);
1664 if (NILP (long_name))
1665 long_name = doc_string;
1668 columns = dimension;
1670 if (byte_offset < 0)
1674 else if (chars == 96)
1680 charset = make_charset (id, name, chars, dimension, columns, graphic,
1681 final, direction, short_name, long_name,
1682 doc_string, registry,
1683 Qnil, min_code, max_code, code_offset, byte_offset,
1684 mother, conversion);
1685 if (!NILP (ccl_program))
1686 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1690 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1692 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1693 NEW-NAME is the name of the new charset. Return the new charset.
1695 (charset, new_name))
1697 Lisp_Object new_charset = Qnil;
1698 int id, chars, dimension, columns, graphic, final;
1700 Lisp_Object registry, doc_string, short_name, long_name;
1703 charset = Fget_charset (charset);
1704 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1705 signal_simple_error ("Charset already has reverse-direction charset",
1708 CHECK_SYMBOL (new_name);
1709 if (!NILP (Ffind_charset (new_name)))
1710 signal_simple_error ("Cannot redefine existing charset", new_name);
1712 cs = XCHARSET (charset);
1714 chars = CHARSET_CHARS (cs);
1715 dimension = CHARSET_DIMENSION (cs);
1716 columns = CHARSET_COLUMNS (cs);
1717 id = get_unallocated_leading_byte (dimension);
1719 graphic = CHARSET_GRAPHIC (cs);
1720 final = CHARSET_FINAL (cs);
1721 direction = CHARSET_RIGHT_TO_LEFT;
1722 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1723 direction = CHARSET_LEFT_TO_RIGHT;
1724 doc_string = CHARSET_DOC_STRING (cs);
1725 short_name = CHARSET_SHORT_NAME (cs);
1726 long_name = CHARSET_LONG_NAME (cs);
1727 registry = CHARSET_REGISTRY (cs);
1729 new_charset = make_charset (id, new_name, chars, dimension, columns,
1730 graphic, final, direction, short_name, long_name,
1731 doc_string, registry,
1733 CHARSET_DECODING_TABLE(cs),
1734 CHARSET_MIN_CODE(cs),
1735 CHARSET_MAX_CODE(cs),
1736 CHARSET_CODE_OFFSET(cs),
1737 CHARSET_BYTE_OFFSET(cs),
1739 CHARSET_CONVERSION (cs)
1741 Qnil, 0, 0, 0, 0, Qnil, 0
1745 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1746 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1751 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1752 Define symbol ALIAS as an alias for CHARSET.
1756 CHECK_SYMBOL (alias);
1757 charset = Fget_charset (charset);
1758 return Fputhash (alias, charset, Vcharset_hash_table);
1761 /* #### Reverse direction charsets not yet implemented. */
1763 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1765 Return the reverse-direction charset parallel to CHARSET, if any.
1766 This is the charset with the same properties (in particular, the same
1767 dimension, number of characters per dimension, and final byte) as
1768 CHARSET but whose characters are displayed in the opposite direction.
1772 charset = Fget_charset (charset);
1773 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1777 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1778 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1779 If DIRECTION is omitted, both directions will be checked (left-to-right
1780 will be returned if character sets exist for both directions).
1782 (dimension, chars, final, direction))
1784 int dm, ch, fi, di = -1;
1785 Lisp_Object obj = Qnil;
1787 CHECK_INT (dimension);
1788 dm = XINT (dimension);
1789 if (dm < 1 || dm > 2)
1790 signal_simple_error ("Invalid value for DIMENSION", dimension);
1794 if (ch != 94 && ch != 96)
1795 signal_simple_error ("Invalid value for CHARS", chars);
1797 CHECK_CHAR_COERCE_INT (final);
1799 if (fi < '0' || fi > '~')
1800 signal_simple_error ("Invalid value for FINAL", final);
1802 if (EQ (direction, Ql2r))
1803 di = CHARSET_LEFT_TO_RIGHT;
1804 else if (EQ (direction, Qr2l))
1805 di = CHARSET_RIGHT_TO_LEFT;
1806 else if (!NILP (direction))
1807 signal_simple_error ("Invalid value for DIRECTION", direction);
1809 if (dm == 2 && fi > 0x5F)
1811 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1815 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1817 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1820 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1823 return XCHARSET_NAME (obj);
1827 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1828 Return short name of CHARSET.
1832 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1835 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1836 Return long name of CHARSET.
1840 return XCHARSET_LONG_NAME (Fget_charset (charset));
1843 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1844 Return description of CHARSET.
1848 return XCHARSET_DOC_STRING (Fget_charset (charset));
1851 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1852 Return dimension of CHARSET.
1856 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1859 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1860 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1861 Recognized properties are those listed in `make-charset', as well as
1862 'name and 'doc-string.
1868 charset = Fget_charset (charset);
1869 cs = XCHARSET (charset);
1871 CHECK_SYMBOL (prop);
1872 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1873 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1874 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1875 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1876 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1877 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1878 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1879 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1880 Qnil : make_char (CHARSET_FINAL (cs));
1881 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1882 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1883 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1884 if (EQ (prop, Qdirection))
1885 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1886 if (EQ (prop, Qreverse_direction_charset))
1888 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1889 /* #### Is this translation OK? If so, error checking sufficient? */
1890 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1893 if (EQ (prop, Qmother))
1894 return CHARSET_MOTHER (cs);
1895 if (EQ (prop, Qmin_code))
1896 return make_int (CHARSET_MIN_CODE (cs));
1897 if (EQ (prop, Qmax_code))
1898 return make_int (CHARSET_MAX_CODE (cs));
1900 signal_simple_error ("Unrecognized charset property name", prop);
1901 return Qnil; /* not reached */
1904 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1905 Return charset identification number of CHARSET.
1909 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1912 /* #### We need to figure out which properties we really want to
1915 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1916 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1918 (charset, ccl_program))
1920 struct ccl_program test_ccl;
1922 charset = Fget_charset (charset);
1923 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1924 signal_simple_error ("Invalid ccl-program", ccl_program);
1925 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1930 invalidate_charset_font_caches (Lisp_Object charset)
1932 /* Invalidate font cache entries for charset on all devices. */
1933 Lisp_Object devcons, concons, hash_table;
1934 DEVICE_LOOP_NO_BREAK (devcons, concons)
1936 struct device *d = XDEVICE (XCAR (devcons));
1937 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1938 if (!UNBOUNDP (hash_table))
1939 Fclrhash (hash_table);
1943 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1944 Set the 'registry property of CHARSET to REGISTRY.
1946 (charset, registry))
1948 charset = Fget_charset (charset);
1949 CHECK_STRING (registry);
1950 XCHARSET_REGISTRY (charset) = registry;
1951 invalidate_charset_font_caches (charset);
1952 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1957 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1958 Return mapping-table of CHARSET.
1962 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1965 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1966 Set mapping-table of CHARSET to TABLE.
1970 struct Lisp_Charset *cs;
1974 charset = Fget_charset (charset);
1975 cs = XCHARSET (charset);
1979 CHARSET_DECODING_TABLE(cs) = Qnil;
1982 else if (VECTORP (table))
1984 int ccs_len = CHARSET_BYTE_SIZE (cs);
1985 int ret = decoding_table_check_elements (table,
1986 CHARSET_DIMENSION (cs),
1991 signal_simple_error ("Too big table", table);
1993 signal_simple_error ("Invalid element is found", table);
1995 signal_simple_error ("Something wrong", table);
1997 CHARSET_DECODING_TABLE(cs) = Qnil;
2000 signal_error (Qwrong_type_argument,
2001 list2 (build_translated_string ("vector-or-nil-p"),
2004 byte_offset = CHARSET_BYTE_OFFSET (cs);
2005 switch (CHARSET_DIMENSION (cs))
2008 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2010 Lisp_Object c = XVECTOR_DATA(table)[i];
2013 Fput_char_attribute (c, XCHARSET_NAME (charset),
2014 make_int (i + byte_offset));
2018 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2020 Lisp_Object v = XVECTOR_DATA(table)[i];
2026 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2028 Lisp_Object c = XVECTOR_DATA(v)[j];
2032 (c, XCHARSET_NAME (charset),
2033 make_int ( ( (i + byte_offset) << 8 )
2039 Fput_char_attribute (v, XCHARSET_NAME (charset),
2040 make_int (i + byte_offset));
2049 /************************************************************************/
2050 /* Lisp primitives for working with characters */
2051 /************************************************************************/
2054 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2055 Make a character from CHARSET and code-point CODE.
2056 If DEFINED_ONLY is non-nil, builtin character is not returned.
2057 If corresponding character is not found, nil is returned.
2059 (charset, code, defined_only))
2063 charset = Fget_charset (charset);
2066 if (XCHARSET_GRAPHIC (charset) == 1)
2068 if (NILP (defined_only))
2069 c = DECODE_CHAR (charset, c);
2071 c = DECODE_DEFINED_CHAR (charset, c);
2072 return c >= 0 ? make_char (c) : Qnil;
2075 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2076 Make a builtin character from CHARSET and code-point CODE.
2082 charset = Fget_charset (charset);
2084 if (EQ (charset, Vcharset_latin_viscii))
2086 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2092 (ret = Fget_char_attribute (chr,
2093 Vcharset_latin_viscii_lower,
2096 charset = Vcharset_latin_viscii_lower;
2100 (ret = Fget_char_attribute (chr,
2101 Vcharset_latin_viscii_upper,
2104 charset = Vcharset_latin_viscii_upper;
2111 if (XCHARSET_GRAPHIC (charset) == 1)
2114 c = decode_builtin_char (charset, c);
2115 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2119 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2120 Make a character from CHARSET and octets ARG1 and ARG2.
2121 ARG2 is required only for characters from two-dimensional charsets.
2122 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2123 character s with caron.
2125 (charset, arg1, arg2))
2129 int lowlim, highlim;
2131 charset = Fget_charset (charset);
2132 cs = XCHARSET (charset);
2134 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2135 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2137 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2139 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2140 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2143 /* It is useful (and safe, according to Olivier Galibert) to strip
2144 the 8th bit off ARG1 and ARG2 because it allows programmers to
2145 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2146 Latin 2 code of the character. */
2154 if (a1 < lowlim || a1 > highlim)
2155 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2157 if (CHARSET_DIMENSION (cs) == 1)
2161 ("Charset is of dimension one; second octet must be nil", arg2);
2162 return make_char (MAKE_CHAR (charset, a1, 0));
2171 a2 = XINT (arg2) & 0x7f;
2173 if (a2 < lowlim || a2 > highlim)
2174 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2176 return make_char (MAKE_CHAR (charset, a1, a2));
2179 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2180 Return the character set of CHARACTER.
2184 CHECK_CHAR_COERCE_INT (character);
2186 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2189 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2190 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2191 N defaults to 0 if omitted.
2195 Lisp_Object charset;
2198 CHECK_CHAR_COERCE_INT (character);
2200 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2202 if (NILP (n) || EQ (n, Qzero))
2203 return make_int (octet0);
2204 else if (EQ (n, make_int (1)))
2205 return make_int (octet1);
2207 signal_simple_error ("Octet number must be 0 or 1", n);
2211 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2212 Return code-point of CHARACTER in specified CHARSET.
2214 (character, charset))
2218 CHECK_CHAR_COERCE_INT (character);
2219 charset = Fget_charset (charset);
2220 code_point = charset_code_point (charset, XCHAR (character));
2221 if (code_point >= 0)
2222 return make_int (code_point);
2228 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2229 Return list of charset and one or two position-codes of CHARACTER.
2233 /* This function can GC */
2234 struct gcpro gcpro1, gcpro2;
2235 Lisp_Object charset = Qnil;
2236 Lisp_Object rc = Qnil;
2244 GCPRO2 (charset, rc);
2245 CHECK_CHAR_COERCE_INT (character);
2248 code_point = ENCODE_CHAR (XCHAR (character), charset);
2249 dimension = XCHARSET_DIMENSION (charset);
2250 while (dimension > 0)
2252 rc = Fcons (make_int (code_point & 255), rc);
2256 rc = Fcons (XCHARSET_NAME (charset), rc);
2258 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2260 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2262 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2266 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2275 #ifdef ENABLE_COMPOSITE_CHARS
2276 /************************************************************************/
2277 /* composite character functions */
2278 /************************************************************************/
2281 lookup_composite_char (Bufbyte *str, int len)
2283 Lisp_Object lispstr = make_string (str, len);
2284 Lisp_Object ch = Fgethash (lispstr,
2285 Vcomposite_char_string2char_hash_table,
2291 if (composite_char_row_next >= 128)
2292 signal_simple_error ("No more composite chars available", lispstr);
2293 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2294 composite_char_col_next);
2295 Fputhash (make_char (emch), lispstr,
2296 Vcomposite_char_char2string_hash_table);
2297 Fputhash (lispstr, make_char (emch),
2298 Vcomposite_char_string2char_hash_table);
2299 composite_char_col_next++;
2300 if (composite_char_col_next >= 128)
2302 composite_char_col_next = 32;
2303 composite_char_row_next++;
2312 composite_char_string (Emchar ch)
2314 Lisp_Object str = Fgethash (make_char (ch),
2315 Vcomposite_char_char2string_hash_table,
2317 assert (!UNBOUNDP (str));
2321 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2322 Convert a string into a single composite character.
2323 The character is the result of overstriking all the characters in
2328 CHECK_STRING (string);
2329 return make_char (lookup_composite_char (XSTRING_DATA (string),
2330 XSTRING_LENGTH (string)));
2333 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2334 Return a string of the characters comprising a composite character.
2342 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2343 signal_simple_error ("Must be composite char", ch);
2344 return composite_char_string (emch);
2346 #endif /* ENABLE_COMPOSITE_CHARS */
2349 /************************************************************************/
2350 /* initialization */
2351 /************************************************************************/
2354 syms_of_mule_charset (void)
2356 INIT_LRECORD_IMPLEMENTATION (charset);
2358 DEFSUBR (Fcharsetp);
2359 DEFSUBR (Ffind_charset);
2360 DEFSUBR (Fget_charset);
2361 DEFSUBR (Fcharset_list);
2362 DEFSUBR (Fcharset_name);
2363 DEFSUBR (Fmake_charset);
2364 DEFSUBR (Fmake_reverse_direction_charset);
2365 /* DEFSUBR (Freverse_direction_charset); */
2366 DEFSUBR (Fdefine_charset_alias);
2367 DEFSUBR (Fcharset_from_attributes);
2368 DEFSUBR (Fcharset_short_name);
2369 DEFSUBR (Fcharset_long_name);
2370 DEFSUBR (Fcharset_description);
2371 DEFSUBR (Fcharset_dimension);
2372 DEFSUBR (Fcharset_property);
2373 DEFSUBR (Fcharset_id);
2374 DEFSUBR (Fset_charset_ccl_program);
2375 DEFSUBR (Fset_charset_registry);
2377 DEFSUBR (Fcharset_mapping_table);
2378 DEFSUBR (Fset_charset_mapping_table);
2382 DEFSUBR (Fdecode_char);
2383 DEFSUBR (Fdecode_builtin_char);
2384 DEFSUBR (Fencode_char);
2386 DEFSUBR (Fmake_char);
2387 DEFSUBR (Fchar_charset);
2388 DEFSUBR (Fchar_octet);
2389 DEFSUBR (Fsplit_char);
2391 #ifdef ENABLE_COMPOSITE_CHARS
2392 DEFSUBR (Fmake_composite_char);
2393 DEFSUBR (Fcomposite_char_string);
2396 defsymbol (&Qcharsetp, "charsetp");
2397 defsymbol (&Qregistry, "registry");
2398 defsymbol (&Qfinal, "final");
2399 defsymbol (&Qgraphic, "graphic");
2400 defsymbol (&Qdirection, "direction");
2401 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2402 defsymbol (&Qshort_name, "short-name");
2403 defsymbol (&Qlong_name, "long-name");
2405 defsymbol (&Qmother, "mother");
2406 defsymbol (&Qmin_code, "min-code");
2407 defsymbol (&Qmax_code, "max-code");
2408 defsymbol (&Qcode_offset, "code-offset");
2409 defsymbol (&Qconversion, "conversion");
2410 defsymbol (&Q94x60, "94x60");
2411 defsymbol (&Q94x94x60, "94x94x60");
2414 defsymbol (&Ql2r, "l2r");
2415 defsymbol (&Qr2l, "r2l");
2417 /* Charsets, compatible with FSF 20.3
2418 Naming convention is Script-Charset[-Edition] */
2419 defsymbol (&Qascii, "ascii");
2420 defsymbol (&Qcontrol_1, "control-1");
2421 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2422 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2423 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2424 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2425 defsymbol (&Qthai_tis620, "thai-tis620");
2426 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2427 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2428 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2429 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2430 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2431 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2432 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2433 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2434 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2435 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2436 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2437 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2438 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2439 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2440 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2441 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2443 defsymbol (&Qucs, "ucs");
2444 defsymbol (&Qucs_bmp, "ucs-bmp");
2445 defsymbol (&Qucs_smp, "ucs-smp");
2446 defsymbol (&Qucs_sip, "ucs-sip");
2447 defsymbol (&Qucs_cns, "ucs-cns");
2448 defsymbol (&Qucs_jis, "ucs-jis");
2449 defsymbol (&Qucs_ks, "ucs-ks");
2450 defsymbol (&Qucs_big5, "ucs-big5");
2451 defsymbol (&Qlatin_viscii, "latin-viscii");
2452 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2453 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2454 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2455 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2456 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2457 defsymbol (&Qideograph_gt, "ideograph-gt");
2458 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2459 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2460 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2461 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2462 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2463 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2464 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2465 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2466 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2467 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2468 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2469 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2470 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2471 defsymbol (&Qchinese_big5, "chinese-big5");
2472 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2473 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2474 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2475 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2476 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2477 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2478 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2479 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2480 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2481 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2482 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2483 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2484 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2485 defsymbol (&Qchina3_jef, "china3-jef");
2486 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2487 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2489 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2490 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2492 defsymbol (&Qcomposite, "composite");
2496 vars_of_mule_charset (void)
2503 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2504 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2506 /* Table of charsets indexed by leading byte. */
2507 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2508 chlook->charset_by_leading_byte[i] = Qnil;
2511 /* Table of charsets indexed by type/final-byte. */
2512 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2513 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2514 chlook->charset_by_attributes[i][j] = Qnil;
2516 /* Table of charsets indexed by type/final-byte/direction. */
2517 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2518 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2519 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2520 chlook->charset_by_attributes[i][j][k] = Qnil;
2524 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2526 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2527 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2531 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2532 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2533 Leading-code of private TYPE9N charset of column-width 1.
2535 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2539 Vdefault_coded_charset_priority_list = Qnil;
2540 DEFVAR_LISP ("default-coded-charset-priority-list",
2541 &Vdefault_coded_charset_priority_list /*
2542 Default order of preferred coded-character-sets.
2548 complex_vars_of_mule_charset (void)
2550 staticpro (&Vcharset_hash_table);
2551 Vcharset_hash_table =
2552 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2554 /* Predefined character sets. We store them into variables for
2558 staticpro (&Vcharset_ucs);
2560 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2561 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2562 build_string ("UCS"),
2563 build_string ("UCS"),
2564 build_string ("ISO/IEC 10646"),
2566 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2567 staticpro (&Vcharset_ucs_bmp);
2569 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2570 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2571 build_string ("BMP"),
2572 build_string ("UCS-BMP"),
2573 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2575 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2576 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2577 staticpro (&Vcharset_ucs_smp);
2579 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2580 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2581 build_string ("SMP"),
2582 build_string ("UCS-SMP"),
2583 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2584 build_string ("UCS00-1"),
2585 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2586 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2587 staticpro (&Vcharset_ucs_sip);
2589 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2590 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2591 build_string ("SIP"),
2592 build_string ("UCS-SIP"),
2593 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2594 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2595 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2596 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2597 staticpro (&Vcharset_ucs_cns);
2599 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2600 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2601 build_string ("UCS for CNS"),
2602 build_string ("UCS for CNS 11643"),
2603 build_string ("ISO/IEC 10646 for CNS 11643"),
2606 Qnil, CONVERSION_IDENTICAL);
2607 staticpro (&Vcharset_ucs_jis);
2609 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2610 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2611 build_string ("UCS for JIS"),
2612 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2613 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2615 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2616 staticpro (&Vcharset_ucs_ks);
2618 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2619 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2620 build_string ("UCS for KS"),
2621 build_string ("UCS for CCS defined by KS"),
2622 build_string ("ISO/IEC 10646 for Korean Standards"),
2624 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2625 staticpro (&Vcharset_ucs_big5);
2627 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2628 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2629 build_string ("UCS for Big5"),
2630 build_string ("UCS for Big5"),
2631 build_string ("ISO/IEC 10646 for Big5"),
2633 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2635 # define MIN_CHAR_THAI 0
2636 # define MAX_CHAR_THAI 0
2637 /* # define MIN_CHAR_HEBREW 0 */
2638 /* # define MAX_CHAR_HEBREW 0 */
2639 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2640 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2642 staticpro (&Vcharset_ascii);
2644 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2645 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2646 build_string ("ASCII"),
2647 build_string ("ASCII)"),
2648 build_string ("ASCII (ISO646 IRV)"),
2649 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2650 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2651 staticpro (&Vcharset_control_1);
2652 Vcharset_control_1 =
2653 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2654 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2655 build_string ("C1"),
2656 build_string ("Control characters"),
2657 build_string ("Control characters 128-191"),
2659 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2660 staticpro (&Vcharset_latin_iso8859_1);
2661 Vcharset_latin_iso8859_1 =
2662 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2663 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2664 build_string ("Latin-1"),
2665 build_string ("ISO8859-1 (Latin-1)"),
2666 build_string ("ISO8859-1 (Latin-1)"),
2667 build_string ("iso8859-1"),
2668 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2669 staticpro (&Vcharset_latin_iso8859_2);
2670 Vcharset_latin_iso8859_2 =
2671 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2672 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2673 build_string ("Latin-2"),
2674 build_string ("ISO8859-2 (Latin-2)"),
2675 build_string ("ISO8859-2 (Latin-2)"),
2676 build_string ("iso8859-2"),
2677 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2678 staticpro (&Vcharset_latin_iso8859_3);
2679 Vcharset_latin_iso8859_3 =
2680 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2681 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2682 build_string ("Latin-3"),
2683 build_string ("ISO8859-3 (Latin-3)"),
2684 build_string ("ISO8859-3 (Latin-3)"),
2685 build_string ("iso8859-3"),
2686 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2687 staticpro (&Vcharset_latin_iso8859_4);
2688 Vcharset_latin_iso8859_4 =
2689 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2690 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2691 build_string ("Latin-4"),
2692 build_string ("ISO8859-4 (Latin-4)"),
2693 build_string ("ISO8859-4 (Latin-4)"),
2694 build_string ("iso8859-4"),
2695 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2696 staticpro (&Vcharset_thai_tis620);
2697 Vcharset_thai_tis620 =
2698 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2699 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2700 build_string ("TIS620"),
2701 build_string ("TIS620 (Thai)"),
2702 build_string ("TIS620.2529 (Thai)"),
2703 build_string ("tis620"),
2704 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2705 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2706 staticpro (&Vcharset_greek_iso8859_7);
2707 Vcharset_greek_iso8859_7 =
2708 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2709 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2710 build_string ("ISO8859-7"),
2711 build_string ("ISO8859-7 (Greek)"),
2712 build_string ("ISO8859-7 (Greek)"),
2713 build_string ("iso8859-7"),
2714 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2715 staticpro (&Vcharset_arabic_iso8859_6);
2716 Vcharset_arabic_iso8859_6 =
2717 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2718 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2719 build_string ("ISO8859-6"),
2720 build_string ("ISO8859-6 (Arabic)"),
2721 build_string ("ISO8859-6 (Arabic)"),
2722 build_string ("iso8859-6"),
2723 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2724 staticpro (&Vcharset_hebrew_iso8859_8);
2725 Vcharset_hebrew_iso8859_8 =
2726 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2727 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2728 build_string ("ISO8859-8"),
2729 build_string ("ISO8859-8 (Hebrew)"),
2730 build_string ("ISO8859-8 (Hebrew)"),
2731 build_string ("iso8859-8"),
2733 0 /* MIN_CHAR_HEBREW */,
2734 0 /* MAX_CHAR_HEBREW */, 0, 32,
2735 Qnil, CONVERSION_IDENTICAL);
2736 staticpro (&Vcharset_katakana_jisx0201);
2737 Vcharset_katakana_jisx0201 =
2738 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2739 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2740 build_string ("JISX0201 Kana"),
2741 build_string ("JISX0201.1976 (Japanese Kana)"),
2742 build_string ("JISX0201.1976 Japanese Kana"),
2743 build_string ("jisx0201\\.1976"),
2744 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2745 staticpro (&Vcharset_latin_jisx0201);
2746 Vcharset_latin_jisx0201 =
2747 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2748 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2749 build_string ("JISX0201 Roman"),
2750 build_string ("JISX0201.1976 (Japanese Roman)"),
2751 build_string ("JISX0201.1976 Japanese Roman"),
2752 build_string ("jisx0201\\.1976"),
2753 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2754 staticpro (&Vcharset_cyrillic_iso8859_5);
2755 Vcharset_cyrillic_iso8859_5 =
2756 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2757 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2758 build_string ("ISO8859-5"),
2759 build_string ("ISO8859-5 (Cyrillic)"),
2760 build_string ("ISO8859-5 (Cyrillic)"),
2761 build_string ("iso8859-5"),
2762 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2763 staticpro (&Vcharset_latin_iso8859_9);
2764 Vcharset_latin_iso8859_9 =
2765 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2766 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2767 build_string ("Latin-5"),
2768 build_string ("ISO8859-9 (Latin-5)"),
2769 build_string ("ISO8859-9 (Latin-5)"),
2770 build_string ("iso8859-9"),
2771 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2772 staticpro (&Vcharset_japanese_jisx0208_1978);
2773 Vcharset_japanese_jisx0208_1978 =
2774 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2775 Qjapanese_jisx0208_1978, 94, 2,
2776 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2777 build_string ("JIS X0208:1978"),
2778 build_string ("JIS X0208:1978 (Japanese)"),
2780 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2781 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2782 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2783 staticpro (&Vcharset_chinese_gb2312);
2784 Vcharset_chinese_gb2312 =
2785 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2786 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2787 build_string ("GB2312"),
2788 build_string ("GB2312)"),
2789 build_string ("GB2312 Chinese simplified"),
2790 build_string ("gb2312"),
2791 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2792 staticpro (&Vcharset_chinese_gb12345);
2793 Vcharset_chinese_gb12345 =
2794 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2795 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2796 build_string ("G1"),
2797 build_string ("GB 12345)"),
2798 build_string ("GB 12345-1990"),
2799 build_string ("GB12345\\(\\.1990\\)?-0"),
2800 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2801 staticpro (&Vcharset_japanese_jisx0208);
2802 Vcharset_japanese_jisx0208 =
2803 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2804 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2805 build_string ("JISX0208"),
2806 build_string ("JIS X0208:1983 (Japanese)"),
2807 build_string ("JIS X0208:1983 Japanese Kanji"),
2808 build_string ("jisx0208\\.1983"),
2809 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2811 staticpro (&Vcharset_japanese_jisx0208_1990);
2812 Vcharset_japanese_jisx0208_1990 =
2813 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2814 Qjapanese_jisx0208_1990, 94, 2,
2815 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2816 build_string ("JISX0208-1990"),
2817 build_string ("JIS X0208:1990 (Japanese)"),
2818 build_string ("JIS X0208:1990 Japanese Kanji"),
2819 build_string ("jisx0208\\.1990"),
2821 MIN_CHAR_JIS_X0208_1990,
2822 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2823 Qnil, CONVERSION_IDENTICAL);
2825 staticpro (&Vcharset_korean_ksc5601);
2826 Vcharset_korean_ksc5601 =
2827 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2828 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2829 build_string ("KSC5601"),
2830 build_string ("KSC5601 (Korean"),
2831 build_string ("KSC5601 Korean Hangul and Hanja"),
2832 build_string ("ksc5601"),
2833 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2834 staticpro (&Vcharset_japanese_jisx0212);
2835 Vcharset_japanese_jisx0212 =
2836 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2837 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2838 build_string ("JISX0212"),
2839 build_string ("JISX0212 (Japanese)"),
2840 build_string ("JISX0212 Japanese Supplement"),
2841 build_string ("jisx0212"),
2842 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2844 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2845 staticpro (&Vcharset_chinese_cns11643_1);
2846 Vcharset_chinese_cns11643_1 =
2847 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2848 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2849 build_string ("CNS11643-1"),
2850 build_string ("CNS11643-1 (Chinese traditional)"),
2852 ("CNS 11643 Plane 1 Chinese traditional"),
2853 build_string (CHINESE_CNS_PLANE_RE("1")),
2854 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2855 staticpro (&Vcharset_chinese_cns11643_2);
2856 Vcharset_chinese_cns11643_2 =
2857 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2858 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2859 build_string ("CNS11643-2"),
2860 build_string ("CNS11643-2 (Chinese traditional)"),
2862 ("CNS 11643 Plane 2 Chinese traditional"),
2863 build_string (CHINESE_CNS_PLANE_RE("2")),
2864 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2866 staticpro (&Vcharset_latin_tcvn5712);
2867 Vcharset_latin_tcvn5712 =
2868 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2869 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2870 build_string ("TCVN 5712"),
2871 build_string ("TCVN 5712 (VSCII-2)"),
2872 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2873 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2874 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2875 staticpro (&Vcharset_latin_viscii_lower);
2876 Vcharset_latin_viscii_lower =
2877 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2878 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2879 build_string ("VISCII lower"),
2880 build_string ("VISCII lower (Vietnamese)"),
2881 build_string ("VISCII lower (Vietnamese)"),
2882 build_string ("MULEVISCII-LOWER"),
2883 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2884 staticpro (&Vcharset_latin_viscii_upper);
2885 Vcharset_latin_viscii_upper =
2886 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2887 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2888 build_string ("VISCII upper"),
2889 build_string ("VISCII upper (Vietnamese)"),
2890 build_string ("VISCII upper (Vietnamese)"),
2891 build_string ("MULEVISCII-UPPER"),
2892 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2893 staticpro (&Vcharset_latin_viscii);
2894 Vcharset_latin_viscii =
2895 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2896 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2897 build_string ("VISCII"),
2898 build_string ("VISCII 1.1 (Vietnamese)"),
2899 build_string ("VISCII 1.1 (Vietnamese)"),
2900 build_string ("VISCII1\\.1"),
2901 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2902 staticpro (&Vcharset_chinese_big5);
2903 Vcharset_chinese_big5 =
2904 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2905 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2906 build_string ("Big5"),
2907 build_string ("Big5"),
2908 build_string ("Big5 Chinese traditional"),
2909 build_string ("big5"),
2911 0 /* MIN_CHAR_BIG5_CDP */,
2912 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
2913 Qnil, CONVERSION_IDENTICAL);
2914 staticpro (&Vcharset_chinese_big5_cdp);
2915 Vcharset_chinese_big5_cdp =
2916 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2917 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2918 build_string ("Big5-CDP"),
2919 build_string ("Big5 + CDP extension"),
2920 build_string ("Big5 with CDP extension"),
2921 build_string ("big5\\.cdp-0"),
2922 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2923 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2924 #define DEF_HANZIKU(n) \
2925 staticpro (&Vcharset_ideograph_hanziku_##n); \
2926 Vcharset_ideograph_hanziku_##n = \
2927 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2928 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2929 build_string ("HZK-"#n), \
2930 build_string ("HANZIKU-"#n), \
2931 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2933 ("hanziku-"#n"$"), \
2934 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2935 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2948 staticpro (&Vcharset_china3_jef);
2949 Vcharset_china3_jef =
2950 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2951 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2952 build_string ("JC3"),
2953 build_string ("JEF + CHINA3"),
2954 build_string ("JEF + CHINA3 private characters"),
2955 build_string ("china3jef-0"),
2956 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2957 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2958 staticpro (&Vcharset_ideograph_cbeta);
2959 Vcharset_ideograph_cbeta =
2960 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2961 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2962 build_string ("CB"),
2963 build_string ("CBETA"),
2964 build_string ("CBETA private characters"),
2965 build_string ("cbeta-0"),
2966 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2967 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2968 staticpro (&Vcharset_ideograph_gt);
2969 Vcharset_ideograph_gt =
2970 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2971 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2972 build_string ("GT"),
2973 build_string ("GT"),
2974 build_string ("GT"),
2976 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
2977 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
2978 #define DEF_GT_PJ(n) \
2979 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2980 Vcharset_ideograph_gt_pj_##n = \
2981 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2982 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2983 build_string ("GT-PJ-"#n), \
2984 build_string ("GT (pseudo JIS encoding) part "#n), \
2985 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2987 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2988 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3001 staticpro (&Vcharset_ideograph_daikanwa_2);
3002 Vcharset_ideograph_daikanwa_2 =
3003 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3004 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3005 build_string ("Daikanwa Rev."),
3006 build_string ("Morohashi's Daikanwa Rev."),
3008 ("Daikanwa dictionary (revised version)"),
3009 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3010 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3011 staticpro (&Vcharset_ideograph_daikanwa);
3012 Vcharset_ideograph_daikanwa =
3013 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3014 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3015 build_string ("Daikanwa"),
3016 build_string ("Morohashi's Daikanwa Rev.2"),
3018 ("Daikanwa dictionary (second revised version)"),
3019 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3020 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3021 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3023 staticpro (&Vcharset_ethiopic_ucs);
3024 Vcharset_ethiopic_ucs =
3025 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3026 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3027 build_string ("Ethiopic (UCS)"),
3028 build_string ("Ethiopic (UCS)"),
3029 build_string ("Ethiopic of UCS"),
3030 build_string ("Ethiopic-Unicode"),
3031 Qnil, 0x1200, 0x137F, 0, 0,
3032 Qnil, CONVERSION_IDENTICAL);
3034 staticpro (&Vcharset_chinese_big5_1);
3035 Vcharset_chinese_big5_1 =
3036 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3037 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3038 build_string ("Big5"),
3039 build_string ("Big5 (Level-1)"),
3041 ("Big5 Level-1 Chinese traditional"),
3042 build_string ("big5"),
3043 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3044 staticpro (&Vcharset_chinese_big5_2);
3045 Vcharset_chinese_big5_2 =
3046 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3047 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3048 build_string ("Big5"),
3049 build_string ("Big5 (Level-2)"),
3051 ("Big5 Level-2 Chinese traditional"),
3052 build_string ("big5"),
3053 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3055 #ifdef ENABLE_COMPOSITE_CHARS
3056 /* #### For simplicity, we put composite chars into a 96x96 charset.
3057 This is going to lead to problems because you can run out of
3058 room, esp. as we don't yet recycle numbers. */
3059 staticpro (&Vcharset_composite);
3060 Vcharset_composite =
3061 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3062 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3063 build_string ("Composite"),
3064 build_string ("Composite characters"),
3065 build_string ("Composite characters"),
3068 /* #### not dumped properly */
3069 composite_char_row_next = 32;
3070 composite_char_col_next = 32;
3072 Vcomposite_char_string2char_hash_table =
3073 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3074 Vcomposite_char_char2string_hash_table =
3075 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3076 staticpro (&Vcomposite_char_string2char_hash_table);
3077 staticpro (&Vcomposite_char_char2string_hash_table);
3078 #endif /* ENABLE_COMPOSITE_CHARS */