1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_ucs_big5;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
351 Qvietnamese_viscii_lower,
352 Qvietnamese_viscii_upper,
355 /* Qchinese_big5_cdp, */
356 Qideograph_hanziku_1,
357 Qideograph_hanziku_2,
358 Qideograph_hanziku_3,
359 Qideograph_hanziku_4,
360 Qideograph_hanziku_5,
361 Qideograph_hanziku_6,
362 Qideograph_hanziku_7,
363 Qideograph_hanziku_8,
364 Qideograph_hanziku_9,
365 Qideograph_hanziku_10,
366 Qideograph_hanziku_11,
367 Qideograph_hanziku_12,
369 Qideograph_daikanwa_2,
389 Lisp_Object Ql2r, Qr2l;
391 Lisp_Object Vcharset_hash_table;
393 /* Composite characters are characters constructed by overstriking two
394 or more regular characters.
396 1) The old Mule implementation involves storing composite characters
397 in a buffer as a tag followed by all of the actual characters
398 used to make up the composite character. I think this is a bad
399 idea; it greatly complicates code that wants to handle strings
400 one character at a time because it has to deal with the possibility
401 of great big ungainly characters. It's much more reasonable to
402 simply store an index into a table of composite characters.
404 2) The current implementation only allows for 16,384 separate
405 composite characters over the lifetime of the XEmacs process.
406 This could become a potential problem if the user
407 edited lots of different files that use composite characters.
408 Due to FSF bogosity, increasing the number of allowable
409 composite characters under Mule would decrease the number
410 of possible faces that can exist. Mule already has shrunk
411 this to 2048, and further shrinkage would become uncomfortable.
412 No such problems exist in XEmacs.
414 Composite characters could be represented as 0x80 C1 C2 C3,
415 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
416 for slightly under 2^20 (one million) composite characters
417 over the XEmacs process lifetime, and you only need to
418 increase the size of a Mule character from 19 to 21 bits.
419 Or you could use 0x80 C1 C2 C3 C4, allowing for about
420 85 million (slightly over 2^26) composite characters. */
423 /************************************************************************/
424 /* Basic Emchar functions */
425 /************************************************************************/
427 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
428 string in STR. Returns the number of bytes stored.
429 Do not call this directly. Use the macro set_charptr_emchar() instead.
433 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
448 else if ( c <= 0x7ff )
450 *p++ = (c >> 6) | 0xc0;
451 *p++ = (c & 0x3f) | 0x80;
453 else if ( c <= 0xffff )
455 *p++ = (c >> 12) | 0xe0;
456 *p++ = ((c >> 6) & 0x3f) | 0x80;
457 *p++ = (c & 0x3f) | 0x80;
459 else if ( c <= 0x1fffff )
461 *p++ = (c >> 18) | 0xf0;
462 *p++ = ((c >> 12) & 0x3f) | 0x80;
463 *p++ = ((c >> 6) & 0x3f) | 0x80;
464 *p++ = (c & 0x3f) | 0x80;
466 else if ( c <= 0x3ffffff )
468 *p++ = (c >> 24) | 0xf8;
469 *p++ = ((c >> 18) & 0x3f) | 0x80;
470 *p++ = ((c >> 12) & 0x3f) | 0x80;
471 *p++ = ((c >> 6) & 0x3f) | 0x80;
472 *p++ = (c & 0x3f) | 0x80;
476 *p++ = (c >> 30) | 0xfc;
477 *p++ = ((c >> 24) & 0x3f) | 0x80;
478 *p++ = ((c >> 18) & 0x3f) | 0x80;
479 *p++ = ((c >> 12) & 0x3f) | 0x80;
480 *p++ = ((c >> 6) & 0x3f) | 0x80;
481 *p++ = (c & 0x3f) | 0x80;
484 BREAKUP_CHAR (c, charset, c1, c2);
485 lb = CHAR_LEADING_BYTE (c);
486 if (LEADING_BYTE_PRIVATE_P (lb))
487 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
489 if (EQ (charset, Vcharset_control_1))
498 /* Return the first character from a Mule-encoded string in STR,
499 assuming it's non-ASCII. Do not call this directly.
500 Use the macro charptr_emchar() instead. */
503 non_ascii_charptr_emchar (const Bufbyte *str)
516 else if ( b >= 0xf8 )
521 else if ( b >= 0xf0 )
526 else if ( b >= 0xe0 )
531 else if ( b >= 0xc0 )
541 for( ; len > 0; len-- )
544 ch = ( ch << 6 ) | ( b & 0x3f );
548 Bufbyte i0 = *str, i1, i2 = 0;
551 if (i0 == LEADING_BYTE_CONTROL_1)
552 return (Emchar) (*++str - 0x20);
554 if (LEADING_BYTE_PREFIX_P (i0))
559 charset = CHARSET_BY_LEADING_BYTE (i0);
560 if (XCHARSET_DIMENSION (charset) == 2)
563 return MAKE_CHAR (charset, i1, i2);
567 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
568 Do not call this directly. Use the macro valid_char_p() instead. */
572 non_ascii_valid_char_p (Emchar ch)
576 /* Must have only lowest 19 bits set */
580 f1 = CHAR_FIELD1 (ch);
581 f2 = CHAR_FIELD2 (ch);
582 f3 = CHAR_FIELD3 (ch);
588 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
589 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
590 f2 > MAX_CHAR_FIELD2_PRIVATE)
595 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
596 f2 <= MAX_CHAR_FIELD2_PRIVATE))
600 NOTE: This takes advantage of the fact that
601 FIELD2_TO_OFFICIAL_LEADING_BYTE and
602 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
604 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
605 if (EQ (charset, Qnil))
607 return (XCHARSET_CHARS (charset) == 96);
613 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
614 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
615 f1 > MAX_CHAR_FIELD1_PRIVATE)
617 if (f2 < 0x20 || f3 < 0x20)
620 #ifdef ENABLE_COMPOSITE_CHARS
621 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
623 if (UNBOUNDP (Fgethash (make_int (ch),
624 Vcomposite_char_char2string_hash_table,
629 #endif /* ENABLE_COMPOSITE_CHARS */
631 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
632 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
635 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
637 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
640 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
642 if (EQ (charset, Qnil))
644 return (XCHARSET_CHARS (charset) == 96);
650 /************************************************************************/
651 /* Basic string functions */
652 /************************************************************************/
654 /* Copy the character pointed to by SRC into DST. Do not call this
655 directly. Use the macro charptr_copy_char() instead.
656 Return the number of bytes copied. */
659 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
661 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
663 for (i = bytes; i; i--, dst++, src++)
669 /************************************************************************/
670 /* streams of Emchars */
671 /************************************************************************/
673 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
674 The functions below are not meant to be called directly; use
675 the macros in insdel.h. */
678 Lstream_get_emchar_1 (Lstream *stream, int ch)
680 Bufbyte str[MAX_EMCHAR_LEN];
681 Bufbyte *strptr = str;
684 str[0] = (Bufbyte) ch;
686 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
688 int c = Lstream_getc (stream);
689 bufpos_checking_assert (c >= 0);
690 *++strptr = (Bufbyte) c;
692 return charptr_emchar (str);
696 Lstream_fput_emchar (Lstream *stream, Emchar ch)
698 Bufbyte str[MAX_EMCHAR_LEN];
699 Bytecount len = set_charptr_emchar (str, ch);
700 return Lstream_write (stream, str, len);
704 Lstream_funget_emchar (Lstream *stream, Emchar ch)
706 Bufbyte str[MAX_EMCHAR_LEN];
707 Bytecount len = set_charptr_emchar (str, ch);
708 Lstream_unread (stream, str, len);
712 /************************************************************************/
714 /************************************************************************/
717 mark_charset (Lisp_Object obj)
719 Lisp_Charset *cs = XCHARSET (obj);
721 mark_object (cs->short_name);
722 mark_object (cs->long_name);
723 mark_object (cs->doc_string);
724 mark_object (cs->registry);
725 mark_object (cs->ccl_program);
727 mark_object (cs->decoding_table);
728 mark_object (cs->mother);
734 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
736 Lisp_Charset *cs = XCHARSET (obj);
740 error ("printing unreadable object #<charset %s 0x%x>",
741 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
744 write_c_string ("#<charset ", printcharfun);
745 print_internal (CHARSET_NAME (cs), printcharfun, 0);
746 write_c_string (" ", printcharfun);
747 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
748 write_c_string (" ", printcharfun);
749 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
750 write_c_string (" ", printcharfun);
751 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
752 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
754 CHARSET_DIMENSION (cs),
755 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
756 CHARSET_COLUMNS (cs),
757 CHARSET_GRAPHIC (cs),
759 write_c_string (buf, printcharfun);
760 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
761 sprintf (buf, " 0x%x>", cs->header.uid);
762 write_c_string (buf, printcharfun);
765 static const struct lrecord_description charset_description[] = {
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
775 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
780 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
781 mark_charset, print_charset, 0, 0, 0,
785 /* Make a new charset. */
786 /* #### SJT Should generic properties be allowed? */
788 make_charset (Charset_ID id, Lisp_Object name,
789 unsigned short chars, unsigned char dimension,
790 unsigned char columns, unsigned char graphic,
791 Bufbyte final, unsigned char direction, Lisp_Object short_name,
792 Lisp_Object long_name, Lisp_Object doc,
794 Lisp_Object decoding_table,
795 Emchar min_code, Emchar max_code,
796 Emchar code_offset, unsigned char byte_offset,
797 Lisp_Object mother, unsigned char conversion)
800 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
804 XSETCHARSET (obj, cs);
806 CHARSET_ID (cs) = id;
807 CHARSET_NAME (cs) = name;
808 CHARSET_SHORT_NAME (cs) = short_name;
809 CHARSET_LONG_NAME (cs) = long_name;
810 CHARSET_CHARS (cs) = chars;
811 CHARSET_DIMENSION (cs) = dimension;
812 CHARSET_DIRECTION (cs) = direction;
813 CHARSET_COLUMNS (cs) = columns;
814 CHARSET_GRAPHIC (cs) = graphic;
815 CHARSET_FINAL (cs) = final;
816 CHARSET_DOC_STRING (cs) = doc;
817 CHARSET_REGISTRY (cs) = reg;
818 CHARSET_CCL_PROGRAM (cs) = Qnil;
819 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
821 CHARSET_DECODING_TABLE(cs) = Qunbound;
822 CHARSET_MIN_CODE (cs) = min_code;
823 CHARSET_MAX_CODE (cs) = max_code;
824 CHARSET_CODE_OFFSET (cs) = code_offset;
825 CHARSET_BYTE_OFFSET (cs) = byte_offset;
826 CHARSET_MOTHER (cs) = mother;
827 CHARSET_CONVERSION (cs) = conversion;
831 if (id == LEADING_BYTE_ASCII)
832 CHARSET_REP_BYTES (cs) = 1;
834 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
836 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
841 /* some charsets do not have final characters. This includes
842 ASCII, Control-1, Composite, and the two faux private
844 unsigned char iso2022_type
845 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
847 if (code_offset == 0)
849 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
850 chlook->charset_by_attributes[iso2022_type][final] = obj;
854 (chlook->charset_by_attributes[iso2022_type][final][direction]));
855 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
859 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
860 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
862 /* Some charsets are "faux" and don't have names or really exist at
863 all except in the leading-byte table. */
865 Fputhash (name, obj, Vcharset_hash_table);
870 get_unallocated_leading_byte (int dimension)
875 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
878 lb = chlook->next_allocated_leading_byte++;
882 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
885 lb = chlook->next_allocated_1_byte_leading_byte++;
889 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
892 lb = chlook->next_allocated_2_byte_leading_byte++;
898 ("No more character sets free for this dimension",
899 make_int (dimension));
905 /* Number of Big5 characters which have the same code in 1st byte. */
907 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
910 decode_defined_char (Lisp_Object ccs, int code_point)
912 int dim = XCHARSET_DIMENSION (ccs);
913 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
921 = get_ccs_octet_table (decoding_table, ccs,
922 (code_point >> (dim * 8)) & 255);
924 if (CHARP (decoding_table))
925 return XCHAR (decoding_table);
928 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
930 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
932 if ( EQ (mother, Vcharset_ucs) )
933 return DECODE_CHAR (mother, code_point);
935 return decode_defined_char (mother, code_point);
942 decode_builtin_char (Lisp_Object charset, int code_point)
944 Lisp_Object mother = XCHARSET_MOTHER (charset);
947 if ( XCHARSET_MAX_CODE (charset) > 0 )
949 if ( CHARSETP (mother) )
951 int code = code_point;
953 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
955 int row = code_point >> 8;
956 int cell = code_point & 255;
960 else if (row < 16 + 32 + 30)
961 code = (row - (16 + 32)) * 94 + cell - 33;
962 else if (row < 18 + 32 + 30)
964 else if (row < 18 + 32 + 60)
965 code = (row - (18 + 32)) * 94 + cell - 33;
967 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
969 int plane = code_point >> 16;
970 int row = (code_point >> 8) & 255;
971 int cell = code_point & 255;
975 else if (row < 16 + 32 + 30)
977 = (plane - 33) * 94 * 60
978 + (row - (16 + 32)) * 94
980 else if (row < 18 + 32 + 30)
982 else if (row < 18 + 32 + 60)
984 = (plane - 33) * 94 * 60
985 + (row - (18 + 32)) * 94
989 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
994 = (XCHARSET_DIMENSION (charset) == 1
996 code_point - XCHARSET_BYTE_OFFSET (charset)
998 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
999 * XCHARSET_CHARS (charset)
1000 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1001 + XCHARSET_CODE_OFFSET (charset);
1002 if ((cid < XCHARSET_MIN_CODE (charset))
1003 || (XCHARSET_MAX_CODE (charset) < cid))
1008 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1010 if (XCHARSET_DIMENSION (charset) == 1)
1012 switch (XCHARSET_CHARS (charset))
1016 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1019 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1027 switch (XCHARSET_CHARS (charset))
1030 return MIN_CHAR_94x94
1031 + (final - '0') * 94 * 94
1032 + (((code_point >> 8) & 0x7F) - 33) * 94
1033 + ((code_point & 0x7F) - 33);
1035 return MIN_CHAR_96x96
1036 + (final - '0') * 96 * 96
1037 + (((code_point >> 8) & 0x7F) - 32) * 96
1038 + ((code_point & 0x7F) - 32);
1050 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1052 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1055 if ( CHAR_TABLEP (encoding_table)
1056 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1061 Lisp_Object mother = XCHARSET_MOTHER (charset);
1062 int min = XCHARSET_MIN_CODE (charset);
1063 int max = XCHARSET_MAX_CODE (charset);
1066 if ( CHARSETP (mother) )
1068 if (XCHARSET_FINAL (charset) >= '0')
1069 code = charset_code_point (mother, ch, 1);
1071 code = charset_code_point (mother, ch, defined_only);
1073 else if (defined_only)
1075 else if ( ((max == 0) && CHARSETP (mother)
1076 && (XCHARSET_FINAL (charset) == 0))
1077 || ((min <= ch) && (ch <= max)) )
1079 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1080 || ((min <= code) && (code <= max)) )
1082 int d = code - XCHARSET_CODE_OFFSET (charset);
1084 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1086 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1088 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1090 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1093 int cell = d % 94 + 33;
1099 return (row << 8) | cell;
1101 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1102 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1103 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1104 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1105 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1107 int plane = d / (94 * 60) + 33;
1108 int row = (d % (94 * 60)) / 94;
1109 int cell = d % 94 + 33;
1115 return (plane << 16) | (row << 8) | cell;
1117 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1119 ( (d / (94 * 94) + 33) << 16)
1120 | ((d / 94 % 94 + 33) << 8)
1122 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1124 ( (d / (96 * 96) + 32) << 16)
1125 | ((d / 96 % 96 + 32) << 8)
1127 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1129 ( (d / (94 * 94 * 94) + 33) << 24)
1130 | ((d / (94 * 94) % 94 + 33) << 16)
1131 | ((d / 94 % 94 + 33) << 8)
1133 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1135 ( (d / (96 * 96 * 96) + 32) << 24)
1136 | ((d / (96 * 96) % 96 + 32) << 16)
1137 | ((d / 96 % 96 + 32) << 8)
1141 printf ("Unknown CCS-conversion %d is specified!",
1142 XCHARSET_CONVERSION (charset));
1146 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1147 ( XCHARSET_MIN_CODE (charset) == 0 )
1149 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1150 (XCHARSET_CODE_OFFSET (charset)
1151 == XCHARSET_MIN_CODE (charset))
1156 if (XCHARSET_DIMENSION (charset) == 1)
1158 if (XCHARSET_CHARS (charset) == 94)
1160 if (((d = ch - (MIN_CHAR_94
1161 + (XCHARSET_FINAL (charset) - '0') * 94))
1166 else if (XCHARSET_CHARS (charset) == 96)
1168 if (((d = ch - (MIN_CHAR_96
1169 + (XCHARSET_FINAL (charset) - '0') * 96))
1177 else if (XCHARSET_DIMENSION (charset) == 2)
1179 if (XCHARSET_CHARS (charset) == 94)
1181 if (((d = ch - (MIN_CHAR_94x94
1183 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1186 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1188 else if (XCHARSET_CHARS (charset) == 96)
1190 if (((d = ch - (MIN_CHAR_96x96
1192 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1195 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1206 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1208 if (c <= MAX_CHAR_BASIC_LATIN)
1210 *charset = Vcharset_ascii;
1215 *charset = Vcharset_control_1;
1220 *charset = Vcharset_latin_iso8859_1;
1224 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1226 *charset = Vcharset_hebrew_iso8859_8;
1227 return c - MIN_CHAR_HEBREW + 0x20;
1230 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1232 *charset = Vcharset_thai_tis620;
1233 return c - MIN_CHAR_THAI + 0x20;
1236 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1237 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1239 return list2 (Vcharset_katakana_jisx0201,
1240 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1243 else if (c <= MAX_CHAR_BMP)
1245 *charset = Vcharset_ucs_bmp;
1248 else if (c <= MAX_CHAR_SMP)
1250 *charset = Vcharset_ucs_smp;
1251 return c - MIN_CHAR_SMP;
1253 else if (c <= MAX_CHAR_SIP)
1255 *charset = Vcharset_ucs_sip;
1256 return c - MIN_CHAR_SIP;
1258 else if (c < MIN_CHAR_DAIKANWA)
1260 *charset = Vcharset_ucs;
1263 else if (c <= MAX_CHAR_DAIKANWA)
1265 *charset = Vcharset_ideograph_daikanwa;
1266 return c - MIN_CHAR_DAIKANWA;
1268 else if (c < MIN_CHAR_94)
1270 *charset = Vcharset_ucs;
1273 else if (c <= MAX_CHAR_94)
1275 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1276 ((c - MIN_CHAR_94) / 94) + '0',
1277 CHARSET_LEFT_TO_RIGHT);
1278 if (!NILP (*charset))
1279 return ((c - MIN_CHAR_94) % 94) + 33;
1282 *charset = Vcharset_ucs;
1286 else if (c <= MAX_CHAR_96)
1288 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1289 ((c - MIN_CHAR_96) / 96) + '0',
1290 CHARSET_LEFT_TO_RIGHT);
1291 if (!NILP (*charset))
1292 return ((c - MIN_CHAR_96) % 96) + 32;
1295 *charset = Vcharset_ucs;
1299 else if (c <= MAX_CHAR_94x94)
1302 = CHARSET_BY_ATTRIBUTES (94, 2,
1303 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1304 CHARSET_LEFT_TO_RIGHT);
1305 if (!NILP (*charset))
1306 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1307 | (((c - MIN_CHAR_94x94) % 94) + 33);
1310 *charset = Vcharset_ucs;
1314 else if (c <= MAX_CHAR_96x96)
1317 = CHARSET_BY_ATTRIBUTES (96, 2,
1318 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1319 CHARSET_LEFT_TO_RIGHT);
1320 if (!NILP (*charset))
1321 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1322 | (((c - MIN_CHAR_96x96) % 96) + 32);
1325 *charset = Vcharset_ucs;
1331 *charset = Vcharset_ucs;
1336 Lisp_Object Vdefault_coded_charset_priority_list;
1340 /************************************************************************/
1341 /* Basic charset Lisp functions */
1342 /************************************************************************/
1344 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1345 Return non-nil if OBJECT is a charset.
1349 return CHARSETP (object) ? Qt : Qnil;
1352 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1353 Retrieve the charset of the given name.
1354 If CHARSET-OR-NAME is a charset object, it is simply returned.
1355 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1356 nil is returned. Otherwise the associated charset object is returned.
1360 if (CHARSETP (charset_or_name))
1361 return charset_or_name;
1363 CHECK_SYMBOL (charset_or_name);
1364 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1367 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1368 Retrieve the charset of the given name.
1369 Same as `find-charset' except an error is signalled if there is no such
1370 charset instead of returning nil.
1374 Lisp_Object charset = Ffind_charset (name);
1377 signal_simple_error ("No such charset", name);
1381 /* We store the charsets in hash tables with the names as the key and the
1382 actual charset object as the value. Occasionally we need to use them
1383 in a list format. These routines provide us with that. */
1384 struct charset_list_closure
1386 Lisp_Object *charset_list;
1390 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1391 void *charset_list_closure)
1393 /* This function can GC */
1394 struct charset_list_closure *chcl =
1395 (struct charset_list_closure*) charset_list_closure;
1396 Lisp_Object *charset_list = chcl->charset_list;
1398 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1402 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1403 Return a list of the names of all defined charsets.
1407 Lisp_Object charset_list = Qnil;
1408 struct gcpro gcpro1;
1409 struct charset_list_closure charset_list_closure;
1411 GCPRO1 (charset_list);
1412 charset_list_closure.charset_list = &charset_list;
1413 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1414 &charset_list_closure);
1417 return charset_list;
1420 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1421 Return the name of charset CHARSET.
1425 return XCHARSET_NAME (Fget_charset (charset));
1428 /* #### SJT Should generic properties be allowed? */
1429 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1430 Define a new character set.
1431 This function is for use with Mule support.
1432 NAME is a symbol, the name by which the character set is normally referred.
1433 DOC-STRING is a string describing the character set.
1434 PROPS is a property list, describing the specific nature of the
1435 character set. Recognized properties are:
1437 'short-name Short version of the charset name (ex: Latin-1)
1438 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1439 'registry A regular expression matching the font registry field for
1441 'dimension Number of octets used to index a character in this charset.
1442 Either 1 or 2. Defaults to 1.
1443 If UTF-2000 feature is enabled, 3 or 4 are also available.
1444 'columns Number of columns used to display a character in this charset.
1445 Only used in TTY mode. (Under X, the actual width of a
1446 character can be derived from the font used to display the
1447 characters.) If unspecified, defaults to the dimension
1448 (this is almost always the correct value).
1449 'chars Number of characters in each dimension (94 or 96).
1450 Defaults to 94. Note that if the dimension is 2, the
1451 character set thus described is 94x94 or 96x96.
1452 If UTF-2000 feature is enabled, 128 or 256 are also available.
1453 'final Final byte of ISO 2022 escape sequence. Must be
1454 supplied. Each combination of (DIMENSION, CHARS) defines a
1455 separate namespace for final bytes. Note that ISO
1456 2022 restricts the final byte to the range
1457 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1458 dimension == 2. Note also that final bytes in the range
1459 0x30 - 0x3F are reserved for user-defined (not official)
1461 'graphic 0 (use left half of font on output) or 1 (use right half
1462 of font on output). Defaults to 0. For example, for
1463 a font whose registry is ISO8859-1, the left half
1464 (octets 0x20 - 0x7F) is the `ascii' character set, while
1465 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1466 character set. With 'graphic set to 0, the octets
1467 will have their high bit cleared; with it set to 1,
1468 the octets will have their high bit set.
1469 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1471 'ccl-program A compiled CCL program used to convert a character in
1472 this charset into an index into the font. This is in
1473 addition to the 'graphic property. The CCL program
1474 is passed the octets of the character, with the high
1475 bit cleared and set depending upon whether the value
1476 of the 'graphic property is 0 or 1.
1477 'mother [UTF-2000 only] Base coded-charset.
1478 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1479 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1480 'code-offset [UTF-2000 only] Offset for a code-point of a base
1482 'conversion [UTF-2000 only] Conversion for a code-point of a base
1483 coded-charset (94x60 or 94x94x60).
1485 (name, doc_string, props))
1487 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1488 int direction = CHARSET_LEFT_TO_RIGHT;
1489 Lisp_Object registry = Qnil;
1490 Lisp_Object charset;
1491 Lisp_Object ccl_program = Qnil;
1492 Lisp_Object short_name = Qnil, long_name = Qnil;
1493 Lisp_Object mother = Qnil;
1494 int min_code = 0, max_code = 0, code_offset = 0;
1495 int byte_offset = -1;
1498 CHECK_SYMBOL (name);
1499 if (!NILP (doc_string))
1500 CHECK_STRING (doc_string);
1502 charset = Ffind_charset (name);
1503 if (!NILP (charset))
1504 signal_simple_error ("Cannot redefine existing charset", name);
1507 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1509 if (EQ (keyword, Qshort_name))
1511 CHECK_STRING (value);
1515 if (EQ (keyword, Qlong_name))
1517 CHECK_STRING (value);
1521 else if (EQ (keyword, Qdimension))
1524 dimension = XINT (value);
1525 if (dimension < 1 ||
1532 signal_simple_error ("Invalid value for 'dimension", value);
1535 else if (EQ (keyword, Qchars))
1538 chars = XINT (value);
1539 if (chars != 94 && chars != 96
1541 && chars != 128 && chars != 256
1544 signal_simple_error ("Invalid value for 'chars", value);
1547 else if (EQ (keyword, Qcolumns))
1550 columns = XINT (value);
1551 if (columns != 1 && columns != 2)
1552 signal_simple_error ("Invalid value for 'columns", value);
1555 else if (EQ (keyword, Qgraphic))
1558 graphic = XINT (value);
1566 signal_simple_error ("Invalid value for 'graphic", value);
1569 else if (EQ (keyword, Qregistry))
1571 CHECK_STRING (value);
1575 else if (EQ (keyword, Qdirection))
1577 if (EQ (value, Ql2r))
1578 direction = CHARSET_LEFT_TO_RIGHT;
1579 else if (EQ (value, Qr2l))
1580 direction = CHARSET_RIGHT_TO_LEFT;
1582 signal_simple_error ("Invalid value for 'direction", value);
1585 else if (EQ (keyword, Qfinal))
1587 CHECK_CHAR_COERCE_INT (value);
1588 final = XCHAR (value);
1589 if (final < '0' || final > '~')
1590 signal_simple_error ("Invalid value for 'final", value);
1594 else if (EQ (keyword, Qmother))
1596 mother = Fget_charset (value);
1599 else if (EQ (keyword, Qmin_code))
1602 min_code = XUINT (value);
1605 else if (EQ (keyword, Qmax_code))
1608 max_code = XUINT (value);
1611 else if (EQ (keyword, Qcode_offset))
1614 code_offset = XUINT (value);
1617 else if (EQ (keyword, Qconversion))
1619 if (EQ (value, Q94x60))
1620 conversion = CONVERSION_94x60;
1621 else if (EQ (value, Q94x94x60))
1622 conversion = CONVERSION_94x94x60;
1624 signal_simple_error ("Unrecognized conversion", value);
1628 else if (EQ (keyword, Qccl_program))
1630 struct ccl_program test_ccl;
1632 if (setup_ccl_program (&test_ccl, value) < 0)
1633 signal_simple_error ("Invalid value for 'ccl-program", value);
1634 ccl_program = value;
1638 signal_simple_error ("Unrecognized property", keyword);
1644 error ("'final must be specified");
1646 if (dimension == 2 && final > 0x5F)
1648 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1651 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1652 CHARSET_LEFT_TO_RIGHT)) ||
1653 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1654 CHARSET_RIGHT_TO_LEFT)))
1656 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1658 id = get_unallocated_leading_byte (dimension);
1660 if (NILP (doc_string))
1661 doc_string = build_string ("");
1663 if (NILP (registry))
1664 registry = build_string ("");
1666 if (NILP (short_name))
1667 XSETSTRING (short_name, XSYMBOL (name)->name);
1669 if (NILP (long_name))
1670 long_name = doc_string;
1673 columns = dimension;
1675 if (byte_offset < 0)
1679 else if (chars == 96)
1685 charset = make_charset (id, name, chars, dimension, columns, graphic,
1686 final, direction, short_name, long_name,
1687 doc_string, registry,
1688 Qnil, min_code, max_code, code_offset, byte_offset,
1689 mother, conversion);
1690 if (!NILP (ccl_program))
1691 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1695 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1697 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1698 NEW-NAME is the name of the new charset. Return the new charset.
1700 (charset, new_name))
1702 Lisp_Object new_charset = Qnil;
1703 int id, chars, dimension, columns, graphic, final;
1705 Lisp_Object registry, doc_string, short_name, long_name;
1708 charset = Fget_charset (charset);
1709 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1710 signal_simple_error ("Charset already has reverse-direction charset",
1713 CHECK_SYMBOL (new_name);
1714 if (!NILP (Ffind_charset (new_name)))
1715 signal_simple_error ("Cannot redefine existing charset", new_name);
1717 cs = XCHARSET (charset);
1719 chars = CHARSET_CHARS (cs);
1720 dimension = CHARSET_DIMENSION (cs);
1721 columns = CHARSET_COLUMNS (cs);
1722 id = get_unallocated_leading_byte (dimension);
1724 graphic = CHARSET_GRAPHIC (cs);
1725 final = CHARSET_FINAL (cs);
1726 direction = CHARSET_RIGHT_TO_LEFT;
1727 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1728 direction = CHARSET_LEFT_TO_RIGHT;
1729 doc_string = CHARSET_DOC_STRING (cs);
1730 short_name = CHARSET_SHORT_NAME (cs);
1731 long_name = CHARSET_LONG_NAME (cs);
1732 registry = CHARSET_REGISTRY (cs);
1734 new_charset = make_charset (id, new_name, chars, dimension, columns,
1735 graphic, final, direction, short_name, long_name,
1736 doc_string, registry,
1738 CHARSET_DECODING_TABLE(cs),
1739 CHARSET_MIN_CODE(cs),
1740 CHARSET_MAX_CODE(cs),
1741 CHARSET_CODE_OFFSET(cs),
1742 CHARSET_BYTE_OFFSET(cs),
1744 CHARSET_CONVERSION (cs)
1746 Qnil, 0, 0, 0, 0, Qnil, 0
1750 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1751 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1756 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1757 Define symbol ALIAS as an alias for CHARSET.
1761 CHECK_SYMBOL (alias);
1762 charset = Fget_charset (charset);
1763 return Fputhash (alias, charset, Vcharset_hash_table);
1766 /* #### Reverse direction charsets not yet implemented. */
1768 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1770 Return the reverse-direction charset parallel to CHARSET, if any.
1771 This is the charset with the same properties (in particular, the same
1772 dimension, number of characters per dimension, and final byte) as
1773 CHARSET but whose characters are displayed in the opposite direction.
1777 charset = Fget_charset (charset);
1778 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1782 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1783 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1784 If DIRECTION is omitted, both directions will be checked (left-to-right
1785 will be returned if character sets exist for both directions).
1787 (dimension, chars, final, direction))
1789 int dm, ch, fi, di = -1;
1790 Lisp_Object obj = Qnil;
1792 CHECK_INT (dimension);
1793 dm = XINT (dimension);
1794 if (dm < 1 || dm > 2)
1795 signal_simple_error ("Invalid value for DIMENSION", dimension);
1799 if (ch != 94 && ch != 96)
1800 signal_simple_error ("Invalid value for CHARS", chars);
1802 CHECK_CHAR_COERCE_INT (final);
1804 if (fi < '0' || fi > '~')
1805 signal_simple_error ("Invalid value for FINAL", final);
1807 if (EQ (direction, Ql2r))
1808 di = CHARSET_LEFT_TO_RIGHT;
1809 else if (EQ (direction, Qr2l))
1810 di = CHARSET_RIGHT_TO_LEFT;
1811 else if (!NILP (direction))
1812 signal_simple_error ("Invalid value for DIRECTION", direction);
1814 if (dm == 2 && fi > 0x5F)
1816 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1820 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1822 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1825 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1828 return XCHARSET_NAME (obj);
1832 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1833 Return short name of CHARSET.
1837 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1840 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1841 Return long name of CHARSET.
1845 return XCHARSET_LONG_NAME (Fget_charset (charset));
1848 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1849 Return description of CHARSET.
1853 return XCHARSET_DOC_STRING (Fget_charset (charset));
1856 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1857 Return dimension of CHARSET.
1861 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1864 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1865 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1866 Recognized properties are those listed in `make-charset', as well as
1867 'name and 'doc-string.
1873 charset = Fget_charset (charset);
1874 cs = XCHARSET (charset);
1876 CHECK_SYMBOL (prop);
1877 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1878 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1879 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1880 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1881 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1882 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1883 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1884 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1885 Qnil : make_char (CHARSET_FINAL (cs));
1886 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1887 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1888 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1889 if (EQ (prop, Qdirection))
1890 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1891 if (EQ (prop, Qreverse_direction_charset))
1893 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1894 /* #### Is this translation OK? If so, error checking sufficient? */
1895 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1898 if (EQ (prop, Qmother))
1899 return CHARSET_MOTHER (cs);
1900 if (EQ (prop, Qmin_code))
1901 return make_int (CHARSET_MIN_CODE (cs));
1902 if (EQ (prop, Qmax_code))
1903 return make_int (CHARSET_MAX_CODE (cs));
1905 signal_simple_error ("Unrecognized charset property name", prop);
1906 return Qnil; /* not reached */
1909 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1910 Return charset identification number of CHARSET.
1914 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1917 /* #### We need to figure out which properties we really want to
1920 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1921 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1923 (charset, ccl_program))
1925 struct ccl_program test_ccl;
1927 charset = Fget_charset (charset);
1928 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1929 signal_simple_error ("Invalid ccl-program", ccl_program);
1930 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1935 invalidate_charset_font_caches (Lisp_Object charset)
1937 /* Invalidate font cache entries for charset on all devices. */
1938 Lisp_Object devcons, concons, hash_table;
1939 DEVICE_LOOP_NO_BREAK (devcons, concons)
1941 struct device *d = XDEVICE (XCAR (devcons));
1942 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1943 if (!UNBOUNDP (hash_table))
1944 Fclrhash (hash_table);
1948 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1949 Set the 'registry property of CHARSET to REGISTRY.
1951 (charset, registry))
1953 charset = Fget_charset (charset);
1954 CHECK_STRING (registry);
1955 XCHARSET_REGISTRY (charset) = registry;
1956 invalidate_charset_font_caches (charset);
1957 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1962 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1963 Return mapping-table of CHARSET.
1967 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1970 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1971 Set mapping-table of CHARSET to TABLE.
1975 struct Lisp_Charset *cs;
1979 charset = Fget_charset (charset);
1980 cs = XCHARSET (charset);
1984 CHARSET_DECODING_TABLE(cs) = Qnil;
1987 else if (VECTORP (table))
1989 int ccs_len = CHARSET_BYTE_SIZE (cs);
1990 int ret = decoding_table_check_elements (table,
1991 CHARSET_DIMENSION (cs),
1996 signal_simple_error ("Too big table", table);
1998 signal_simple_error ("Invalid element is found", table);
2000 signal_simple_error ("Something wrong", table);
2002 CHARSET_DECODING_TABLE(cs) = Qnil;
2005 signal_error (Qwrong_type_argument,
2006 list2 (build_translated_string ("vector-or-nil-p"),
2009 byte_offset = CHARSET_BYTE_OFFSET (cs);
2010 switch (CHARSET_DIMENSION (cs))
2013 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2015 Lisp_Object c = XVECTOR_DATA(table)[i];
2018 Fput_char_attribute (c, XCHARSET_NAME (charset),
2019 make_int (i + byte_offset));
2023 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2025 Lisp_Object v = XVECTOR_DATA(table)[i];
2031 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2033 Lisp_Object c = XVECTOR_DATA(v)[j];
2037 (c, XCHARSET_NAME (charset),
2038 make_int ( ( (i + byte_offset) << 8 )
2044 Fput_char_attribute (v, XCHARSET_NAME (charset),
2045 make_int (i + byte_offset));
2054 /************************************************************************/
2055 /* Lisp primitives for working with characters */
2056 /************************************************************************/
2059 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2060 Make a character from CHARSET and code-point CODE.
2061 If DEFINED_ONLY is non-nil, builtin character is not returned.
2062 If corresponding character is not found, nil is returned.
2064 (charset, code, defined_only))
2068 charset = Fget_charset (charset);
2071 if (XCHARSET_GRAPHIC (charset) == 1)
2073 if (NILP (defined_only))
2074 c = DECODE_CHAR (charset, c);
2076 c = decode_defined_char (charset, c);
2077 return c >= 0 ? make_char (c) : Qnil;
2080 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2081 Make a builtin character from CHARSET and code-point CODE.
2087 charset = Fget_charset (charset);
2089 if (EQ (charset, Vcharset_latin_viscii))
2091 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2097 (ret = Fget_char_attribute (chr,
2098 Vcharset_latin_viscii_lower,
2101 charset = Vcharset_latin_viscii_lower;
2105 (ret = Fget_char_attribute (chr,
2106 Vcharset_latin_viscii_upper,
2109 charset = Vcharset_latin_viscii_upper;
2116 if (XCHARSET_GRAPHIC (charset) == 1)
2119 c = decode_builtin_char (charset, c);
2120 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2124 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2125 Make a character from CHARSET and octets ARG1 and ARG2.
2126 ARG2 is required only for characters from two-dimensional charsets.
2127 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2128 character s with caron.
2130 (charset, arg1, arg2))
2134 int lowlim, highlim;
2136 charset = Fget_charset (charset);
2137 cs = XCHARSET (charset);
2139 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2140 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2142 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2144 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2145 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2148 /* It is useful (and safe, according to Olivier Galibert) to strip
2149 the 8th bit off ARG1 and ARG2 because it allows programmers to
2150 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2151 Latin 2 code of the character. */
2159 if (a1 < lowlim || a1 > highlim)
2160 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2162 if (CHARSET_DIMENSION (cs) == 1)
2166 ("Charset is of dimension one; second octet must be nil", arg2);
2167 return make_char (MAKE_CHAR (charset, a1, 0));
2176 a2 = XINT (arg2) & 0x7f;
2178 if (a2 < lowlim || a2 > highlim)
2179 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2181 return make_char (MAKE_CHAR (charset, a1, a2));
2184 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2185 Return the character set of CHARACTER.
2189 CHECK_CHAR_COERCE_INT (character);
2191 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2194 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2195 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2196 N defaults to 0 if omitted.
2200 Lisp_Object charset;
2203 CHECK_CHAR_COERCE_INT (character);
2205 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2207 if (NILP (n) || EQ (n, Qzero))
2208 return make_int (octet0);
2209 else if (EQ (n, make_int (1)))
2210 return make_int (octet1);
2212 signal_simple_error ("Octet number must be 0 or 1", n);
2216 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2217 Return code-point of CHARACTER in specified CHARSET.
2219 (character, charset, defined_only))
2223 CHECK_CHAR_COERCE_INT (character);
2224 charset = Fget_charset (charset);
2225 code_point = charset_code_point (charset, XCHAR (character),
2226 !NILP (defined_only));
2227 if (code_point >= 0)
2228 return make_int (code_point);
2234 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2235 Return list of charset and one or two position-codes of CHARACTER.
2239 /* This function can GC */
2240 struct gcpro gcpro1, gcpro2;
2241 Lisp_Object charset = Qnil;
2242 Lisp_Object rc = Qnil;
2250 GCPRO2 (charset, rc);
2251 CHECK_CHAR_COERCE_INT (character);
2254 code_point = ENCODE_CHAR (XCHAR (character), charset);
2255 dimension = XCHARSET_DIMENSION (charset);
2256 while (dimension > 0)
2258 rc = Fcons (make_int (code_point & 255), rc);
2262 rc = Fcons (XCHARSET_NAME (charset), rc);
2264 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2266 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2268 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2272 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2281 #ifdef ENABLE_COMPOSITE_CHARS
2282 /************************************************************************/
2283 /* composite character functions */
2284 /************************************************************************/
2287 lookup_composite_char (Bufbyte *str, int len)
2289 Lisp_Object lispstr = make_string (str, len);
2290 Lisp_Object ch = Fgethash (lispstr,
2291 Vcomposite_char_string2char_hash_table,
2297 if (composite_char_row_next >= 128)
2298 signal_simple_error ("No more composite chars available", lispstr);
2299 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2300 composite_char_col_next);
2301 Fputhash (make_char (emch), lispstr,
2302 Vcomposite_char_char2string_hash_table);
2303 Fputhash (lispstr, make_char (emch),
2304 Vcomposite_char_string2char_hash_table);
2305 composite_char_col_next++;
2306 if (composite_char_col_next >= 128)
2308 composite_char_col_next = 32;
2309 composite_char_row_next++;
2318 composite_char_string (Emchar ch)
2320 Lisp_Object str = Fgethash (make_char (ch),
2321 Vcomposite_char_char2string_hash_table,
2323 assert (!UNBOUNDP (str));
2327 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2328 Convert a string into a single composite character.
2329 The character is the result of overstriking all the characters in
2334 CHECK_STRING (string);
2335 return make_char (lookup_composite_char (XSTRING_DATA (string),
2336 XSTRING_LENGTH (string)));
2339 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2340 Return a string of the characters comprising a composite character.
2348 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2349 signal_simple_error ("Must be composite char", ch);
2350 return composite_char_string (emch);
2352 #endif /* ENABLE_COMPOSITE_CHARS */
2355 /************************************************************************/
2356 /* initialization */
2357 /************************************************************************/
2360 syms_of_mule_charset (void)
2362 INIT_LRECORD_IMPLEMENTATION (charset);
2364 DEFSUBR (Fcharsetp);
2365 DEFSUBR (Ffind_charset);
2366 DEFSUBR (Fget_charset);
2367 DEFSUBR (Fcharset_list);
2368 DEFSUBR (Fcharset_name);
2369 DEFSUBR (Fmake_charset);
2370 DEFSUBR (Fmake_reverse_direction_charset);
2371 /* DEFSUBR (Freverse_direction_charset); */
2372 DEFSUBR (Fdefine_charset_alias);
2373 DEFSUBR (Fcharset_from_attributes);
2374 DEFSUBR (Fcharset_short_name);
2375 DEFSUBR (Fcharset_long_name);
2376 DEFSUBR (Fcharset_description);
2377 DEFSUBR (Fcharset_dimension);
2378 DEFSUBR (Fcharset_property);
2379 DEFSUBR (Fcharset_id);
2380 DEFSUBR (Fset_charset_ccl_program);
2381 DEFSUBR (Fset_charset_registry);
2383 DEFSUBR (Fcharset_mapping_table);
2384 DEFSUBR (Fset_charset_mapping_table);
2388 DEFSUBR (Fdecode_char);
2389 DEFSUBR (Fdecode_builtin_char);
2390 DEFSUBR (Fencode_char);
2392 DEFSUBR (Fmake_char);
2393 DEFSUBR (Fchar_charset);
2394 DEFSUBR (Fchar_octet);
2395 DEFSUBR (Fsplit_char);
2397 #ifdef ENABLE_COMPOSITE_CHARS
2398 DEFSUBR (Fmake_composite_char);
2399 DEFSUBR (Fcomposite_char_string);
2402 defsymbol (&Qcharsetp, "charsetp");
2403 defsymbol (&Qregistry, "registry");
2404 defsymbol (&Qfinal, "final");
2405 defsymbol (&Qgraphic, "graphic");
2406 defsymbol (&Qdirection, "direction");
2407 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2408 defsymbol (&Qshort_name, "short-name");
2409 defsymbol (&Qlong_name, "long-name");
2411 defsymbol (&Qmother, "mother");
2412 defsymbol (&Qmin_code, "min-code");
2413 defsymbol (&Qmax_code, "max-code");
2414 defsymbol (&Qcode_offset, "code-offset");
2415 defsymbol (&Qconversion, "conversion");
2416 defsymbol (&Q94x60, "94x60");
2417 defsymbol (&Q94x94x60, "94x94x60");
2420 defsymbol (&Ql2r, "l2r");
2421 defsymbol (&Qr2l, "r2l");
2423 /* Charsets, compatible with FSF 20.3
2424 Naming convention is Script-Charset[-Edition] */
2425 defsymbol (&Qascii, "ascii");
2426 defsymbol (&Qcontrol_1, "control-1");
2427 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2428 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2429 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2430 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2431 defsymbol (&Qthai_tis620, "thai-tis620");
2432 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2433 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2434 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2435 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2436 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2437 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2438 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2439 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2440 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2441 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2442 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2443 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2444 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2445 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2446 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2447 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2449 defsymbol (&Qucs, "ucs");
2450 defsymbol (&Qucs_bmp, "ucs-bmp");
2451 defsymbol (&Qucs_smp, "ucs-smp");
2452 defsymbol (&Qucs_sip, "ucs-sip");
2453 defsymbol (&Qucs_gb, "ucs-gb");
2454 defsymbol (&Qucs_cns, "ucs-cns");
2455 defsymbol (&Qucs_jis, "ucs-jis");
2456 defsymbol (&Qucs_ks, "ucs-ks");
2457 defsymbol (&Qucs_big5, "ucs-big5");
2458 defsymbol (&Qlatin_viscii, "latin-viscii");
2459 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2460 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2461 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2462 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2463 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2464 defsymbol (&Qjis_x0208, "=jis-x0208");
2465 defsymbol (&Qideograph_gt, "ideograph-gt");
2466 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2467 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2468 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2469 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2470 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2471 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2472 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2473 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2474 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2475 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2476 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2477 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2478 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2479 defsymbol (&Qchinese_big5, "chinese-big5");
2480 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2481 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2482 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2483 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2484 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2485 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2486 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2487 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2488 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2489 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2490 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2491 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2492 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2493 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2494 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2496 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2497 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2499 defsymbol (&Qcomposite, "composite");
2503 vars_of_mule_charset (void)
2510 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2511 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2513 /* Table of charsets indexed by leading byte. */
2514 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2515 chlook->charset_by_leading_byte[i] = Qnil;
2518 /* Table of charsets indexed by type/final-byte. */
2519 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2520 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2521 chlook->charset_by_attributes[i][j] = Qnil;
2523 /* Table of charsets indexed by type/final-byte/direction. */
2524 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2525 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2526 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2527 chlook->charset_by_attributes[i][j][k] = Qnil;
2531 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2533 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2534 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2538 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2539 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2540 Leading-code of private TYPE9N charset of column-width 1.
2542 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2546 Vdefault_coded_charset_priority_list = Qnil;
2547 DEFVAR_LISP ("default-coded-charset-priority-list",
2548 &Vdefault_coded_charset_priority_list /*
2549 Default order of preferred coded-character-sets.
2555 complex_vars_of_mule_charset (void)
2557 staticpro (&Vcharset_hash_table);
2558 Vcharset_hash_table =
2559 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2561 /* Predefined character sets. We store them into variables for
2565 staticpro (&Vcharset_ucs);
2567 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2568 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2569 build_string ("UCS"),
2570 build_string ("UCS"),
2571 build_string ("ISO/IEC 10646"),
2573 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2574 staticpro (&Vcharset_ucs_bmp);
2576 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2577 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2578 build_string ("BMP"),
2579 build_string ("UCS-BMP"),
2580 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2582 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2583 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2584 staticpro (&Vcharset_ucs_smp);
2586 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2587 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2588 build_string ("SMP"),
2589 build_string ("UCS-SMP"),
2590 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2591 build_string ("UCS00-1"),
2592 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2593 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2594 staticpro (&Vcharset_ucs_sip);
2596 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2597 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2598 build_string ("SIP"),
2599 build_string ("UCS-SIP"),
2600 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2601 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2602 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2603 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2604 staticpro (&Vcharset_ucs_gb);
2606 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2607 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2608 build_string ("UCS for GB"),
2609 build_string ("UCS for GB"),
2610 build_string ("ISO/IEC 10646 for GB"),
2612 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2613 staticpro (&Vcharset_ucs_cns);
2615 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2616 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2617 build_string ("UCS for CNS"),
2618 build_string ("UCS for CNS 11643"),
2619 build_string ("ISO/IEC 10646 for CNS 11643"),
2621 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2622 staticpro (&Vcharset_ucs_jis);
2624 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2625 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2626 build_string ("UCS for JIS"),
2627 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2629 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2631 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2632 staticpro (&Vcharset_ucs_ks);
2634 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2635 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2636 build_string ("UCS for KS"),
2637 build_string ("UCS for CCS defined by KS"),
2638 build_string ("ISO/IEC 10646 for Korean Standards"),
2640 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2641 staticpro (&Vcharset_ucs_big5);
2643 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2644 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2645 build_string ("UCS for Big5"),
2646 build_string ("UCS for Big5"),
2647 build_string ("ISO/IEC 10646 for Big5"),
2649 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2651 # define MIN_CHAR_THAI 0
2652 # define MAX_CHAR_THAI 0
2653 /* # define MIN_CHAR_HEBREW 0 */
2654 /* # define MAX_CHAR_HEBREW 0 */
2655 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2656 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2658 staticpro (&Vcharset_ascii);
2660 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2661 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2662 build_string ("ASCII"),
2663 build_string ("ASCII)"),
2664 build_string ("ASCII (ISO646 IRV)"),
2665 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2666 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2667 staticpro (&Vcharset_control_1);
2668 Vcharset_control_1 =
2669 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2670 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2671 build_string ("C1"),
2672 build_string ("Control characters"),
2673 build_string ("Control characters 128-191"),
2675 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2676 staticpro (&Vcharset_latin_iso8859_1);
2677 Vcharset_latin_iso8859_1 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2679 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2680 build_string ("Latin-1"),
2681 build_string ("ISO8859-1 (Latin-1)"),
2682 build_string ("ISO8859-1 (Latin-1)"),
2683 build_string ("iso8859-1"),
2684 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2685 staticpro (&Vcharset_latin_iso8859_2);
2686 Vcharset_latin_iso8859_2 =
2687 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2688 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2689 build_string ("Latin-2"),
2690 build_string ("ISO8859-2 (Latin-2)"),
2691 build_string ("ISO8859-2 (Latin-2)"),
2692 build_string ("iso8859-2"),
2693 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2694 staticpro (&Vcharset_latin_iso8859_3);
2695 Vcharset_latin_iso8859_3 =
2696 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2697 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("Latin-3"),
2699 build_string ("ISO8859-3 (Latin-3)"),
2700 build_string ("ISO8859-3 (Latin-3)"),
2701 build_string ("iso8859-3"),
2702 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2703 staticpro (&Vcharset_latin_iso8859_4);
2704 Vcharset_latin_iso8859_4 =
2705 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2706 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("Latin-4"),
2708 build_string ("ISO8859-4 (Latin-4)"),
2709 build_string ("ISO8859-4 (Latin-4)"),
2710 build_string ("iso8859-4"),
2711 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2712 staticpro (&Vcharset_thai_tis620);
2713 Vcharset_thai_tis620 =
2714 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2715 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2716 build_string ("TIS620"),
2717 build_string ("TIS620 (Thai)"),
2718 build_string ("TIS620.2529 (Thai)"),
2719 build_string ("tis620"),
2720 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2721 staticpro (&Vcharset_greek_iso8859_7);
2722 Vcharset_greek_iso8859_7 =
2723 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2724 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2725 build_string ("ISO8859-7"),
2726 build_string ("ISO8859-7 (Greek)"),
2727 build_string ("ISO8859-7 (Greek)"),
2728 build_string ("iso8859-7"),
2729 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2730 staticpro (&Vcharset_arabic_iso8859_6);
2731 Vcharset_arabic_iso8859_6 =
2732 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2733 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2734 build_string ("ISO8859-6"),
2735 build_string ("ISO8859-6 (Arabic)"),
2736 build_string ("ISO8859-6 (Arabic)"),
2737 build_string ("iso8859-6"),
2738 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2739 staticpro (&Vcharset_hebrew_iso8859_8);
2740 Vcharset_hebrew_iso8859_8 =
2741 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2742 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2743 build_string ("ISO8859-8"),
2744 build_string ("ISO8859-8 (Hebrew)"),
2745 build_string ("ISO8859-8 (Hebrew)"),
2746 build_string ("iso8859-8"),
2748 0 /* MIN_CHAR_HEBREW */,
2749 0 /* MAX_CHAR_HEBREW */, 0, 32,
2750 Qnil, CONVERSION_IDENTICAL);
2751 staticpro (&Vcharset_katakana_jisx0201);
2752 Vcharset_katakana_jisx0201 =
2753 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2754 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2755 build_string ("JISX0201 Kana"),
2756 build_string ("JISX0201.1976 (Japanese Kana)"),
2757 build_string ("JISX0201.1976 Japanese Kana"),
2758 build_string ("jisx0201\\.1976"),
2759 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2760 staticpro (&Vcharset_latin_jisx0201);
2761 Vcharset_latin_jisx0201 =
2762 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2763 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2764 build_string ("JISX0201 Roman"),
2765 build_string ("JISX0201.1976 (Japanese Roman)"),
2766 build_string ("JISX0201.1976 Japanese Roman"),
2767 build_string ("jisx0201\\.1976"),
2768 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2769 staticpro (&Vcharset_cyrillic_iso8859_5);
2770 Vcharset_cyrillic_iso8859_5 =
2771 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2772 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2773 build_string ("ISO8859-5"),
2774 build_string ("ISO8859-5 (Cyrillic)"),
2775 build_string ("ISO8859-5 (Cyrillic)"),
2776 build_string ("iso8859-5"),
2777 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2778 staticpro (&Vcharset_latin_iso8859_9);
2779 Vcharset_latin_iso8859_9 =
2780 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2781 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2782 build_string ("Latin-5"),
2783 build_string ("ISO8859-9 (Latin-5)"),
2784 build_string ("ISO8859-9 (Latin-5)"),
2785 build_string ("iso8859-9"),
2786 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2788 staticpro (&Vcharset_jis_x0208);
2789 Vcharset_jis_x0208 =
2790 make_charset (LEADING_BYTE_JIS_X0208,
2792 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2793 build_string ("JIS X0208"),
2794 build_string ("JIS X0208 Common"),
2795 build_string ("JIS X0208 Common part"),
2796 build_string ("jisx0208\\.1990"),
2798 MIN_CHAR_JIS_X0208_1990,
2799 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2800 Qnil, CONVERSION_94x94);
2802 staticpro (&Vcharset_japanese_jisx0208_1978);
2803 Vcharset_japanese_jisx0208_1978 =
2804 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2805 Qjapanese_jisx0208_1978, 94, 2,
2806 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2807 build_string ("JIS X0208:1978"),
2808 build_string ("JIS X0208:1978 (Japanese)"),
2810 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2811 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2818 CONVERSION_IDENTICAL);
2819 staticpro (&Vcharset_chinese_gb2312);
2820 Vcharset_chinese_gb2312 =
2821 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2822 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2823 build_string ("GB2312"),
2824 build_string ("GB2312)"),
2825 build_string ("GB2312 Chinese simplified"),
2826 build_string ("gb2312"),
2827 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2828 staticpro (&Vcharset_chinese_gb12345);
2829 Vcharset_chinese_gb12345 =
2830 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2831 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2832 build_string ("G1"),
2833 build_string ("GB 12345)"),
2834 build_string ("GB 12345-1990"),
2835 build_string ("GB12345\\(\\.1990\\)?-0"),
2836 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2837 staticpro (&Vcharset_japanese_jisx0208);
2838 Vcharset_japanese_jisx0208 =
2839 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2840 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2841 build_string ("JISX0208"),
2842 build_string ("JIS X0208:1983 (Japanese)"),
2843 build_string ("JIS X0208:1983 Japanese Kanji"),
2844 build_string ("jisx0208\\.1983"),
2851 CONVERSION_IDENTICAL);
2853 staticpro (&Vcharset_japanese_jisx0208_1990);
2854 Vcharset_japanese_jisx0208_1990 =
2855 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2856 Qjapanese_jisx0208_1990, 94, 2,
2857 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2858 build_string ("JISX0208-1990"),
2859 build_string ("JIS X0208:1990 (Japanese)"),
2860 build_string ("JIS X0208:1990 Japanese Kanji"),
2861 build_string ("jisx0208\\.1990"),
2863 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2864 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2865 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2866 Vcharset_jis_x0208 /* Qnil */,
2867 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2869 staticpro (&Vcharset_korean_ksc5601);
2870 Vcharset_korean_ksc5601 =
2871 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2872 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2873 build_string ("KSC5601"),
2874 build_string ("KSC5601 (Korean"),
2875 build_string ("KSC5601 Korean Hangul and Hanja"),
2876 build_string ("ksc5601"),
2877 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2878 staticpro (&Vcharset_japanese_jisx0212);
2879 Vcharset_japanese_jisx0212 =
2880 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2881 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2882 build_string ("JISX0212"),
2883 build_string ("JISX0212 (Japanese)"),
2884 build_string ("JISX0212 Japanese Supplement"),
2885 build_string ("jisx0212"),
2886 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2888 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2889 staticpro (&Vcharset_chinese_cns11643_1);
2890 Vcharset_chinese_cns11643_1 =
2891 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2892 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2893 build_string ("CNS11643-1"),
2894 build_string ("CNS11643-1 (Chinese traditional)"),
2896 ("CNS 11643 Plane 1 Chinese traditional"),
2897 build_string (CHINESE_CNS_PLANE_RE("1")),
2898 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2899 staticpro (&Vcharset_chinese_cns11643_2);
2900 Vcharset_chinese_cns11643_2 =
2901 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2902 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2903 build_string ("CNS11643-2"),
2904 build_string ("CNS11643-2 (Chinese traditional)"),
2906 ("CNS 11643 Plane 2 Chinese traditional"),
2907 build_string (CHINESE_CNS_PLANE_RE("2")),
2908 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2910 staticpro (&Vcharset_latin_tcvn5712);
2911 Vcharset_latin_tcvn5712 =
2912 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2913 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2914 build_string ("TCVN 5712"),
2915 build_string ("TCVN 5712 (VSCII-2)"),
2916 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2917 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2918 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2919 staticpro (&Vcharset_latin_viscii_lower);
2920 Vcharset_latin_viscii_lower =
2921 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2922 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2923 build_string ("VISCII lower"),
2924 build_string ("VISCII lower (Vietnamese)"),
2925 build_string ("VISCII lower (Vietnamese)"),
2926 build_string ("MULEVISCII-LOWER"),
2927 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2928 staticpro (&Vcharset_latin_viscii_upper);
2929 Vcharset_latin_viscii_upper =
2930 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2931 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2932 build_string ("VISCII upper"),
2933 build_string ("VISCII upper (Vietnamese)"),
2934 build_string ("VISCII upper (Vietnamese)"),
2935 build_string ("MULEVISCII-UPPER"),
2936 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2937 staticpro (&Vcharset_latin_viscii);
2938 Vcharset_latin_viscii =
2939 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2940 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2941 build_string ("VISCII"),
2942 build_string ("VISCII 1.1 (Vietnamese)"),
2943 build_string ("VISCII 1.1 (Vietnamese)"),
2944 build_string ("VISCII1\\.1"),
2945 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2946 staticpro (&Vcharset_chinese_big5);
2947 Vcharset_chinese_big5 =
2948 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2949 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2950 build_string ("Big5"),
2951 build_string ("Big5"),
2952 build_string ("Big5 Chinese traditional"),
2953 build_string ("big5-0"),
2955 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2956 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2958 staticpro (&Vcharset_chinese_big5_cdp);
2959 Vcharset_chinese_big5_cdp =
2960 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2961 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2962 build_string ("Big5-CDP"),
2963 build_string ("Big5 + CDP extension"),
2964 build_string ("Big5 with CDP extension"),
2965 build_string ("big5\\.cdp-0"),
2966 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2967 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2969 #define DEF_HANZIKU(n) \
2970 staticpro (&Vcharset_ideograph_hanziku_##n); \
2971 Vcharset_ideograph_hanziku_##n = \
2972 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2973 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2974 build_string ("HZK-"#n), \
2975 build_string ("HANZIKU-"#n), \
2976 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2978 ("hanziku-"#n"$"), \
2979 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2980 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2993 staticpro (&Vcharset_ideograph_cbeta);
2994 Vcharset_ideograph_cbeta =
2995 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2996 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2997 build_string ("CB"),
2998 build_string ("CBETA"),
2999 build_string ("CBETA private characters"),
3000 build_string ("cbeta-0"),
3001 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3002 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3003 staticpro (&Vcharset_ideograph_gt);
3004 Vcharset_ideograph_gt =
3005 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3006 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3007 build_string ("GT"),
3008 build_string ("GT"),
3009 build_string ("GT"),
3011 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3012 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3013 #define DEF_GT_PJ(n) \
3014 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3015 Vcharset_ideograph_gt_pj_##n = \
3016 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3017 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3018 build_string ("GT-PJ-"#n), \
3019 build_string ("GT (pseudo JIS encoding) part "#n), \
3020 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3022 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3023 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3036 staticpro (&Vcharset_ideograph_daikanwa_2);
3037 Vcharset_ideograph_daikanwa_2 =
3038 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3039 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3040 build_string ("Daikanwa Rev."),
3041 build_string ("Morohashi's Daikanwa Rev."),
3043 ("Daikanwa dictionary (revised version)"),
3044 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3045 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3046 staticpro (&Vcharset_ideograph_daikanwa);
3047 Vcharset_ideograph_daikanwa =
3048 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3049 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3050 build_string ("Daikanwa"),
3051 build_string ("Morohashi's Daikanwa Rev.2"),
3053 ("Daikanwa dictionary (second revised version)"),
3054 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3055 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3056 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3058 staticpro (&Vcharset_ethiopic_ucs);
3059 Vcharset_ethiopic_ucs =
3060 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3061 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3062 build_string ("Ethiopic (UCS)"),
3063 build_string ("Ethiopic (UCS)"),
3064 build_string ("Ethiopic of UCS"),
3065 build_string ("Ethiopic-Unicode"),
3066 Qnil, 0x1200, 0x137F, 0, 0,
3067 Qnil, CONVERSION_IDENTICAL);
3069 staticpro (&Vcharset_chinese_big5_1);
3070 Vcharset_chinese_big5_1 =
3071 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3072 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3073 build_string ("Big5"),
3074 build_string ("Big5 (Level-1)"),
3076 ("Big5 Level-1 Chinese traditional"),
3077 build_string ("big5"),
3078 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3079 staticpro (&Vcharset_chinese_big5_2);
3080 Vcharset_chinese_big5_2 =
3081 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3082 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3083 build_string ("Big5"),
3084 build_string ("Big5 (Level-2)"),
3086 ("Big5 Level-2 Chinese traditional"),
3087 build_string ("big5"),
3088 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3090 #ifdef ENABLE_COMPOSITE_CHARS
3091 /* #### For simplicity, we put composite chars into a 96x96 charset.
3092 This is going to lead to problems because you can run out of
3093 room, esp. as we don't yet recycle numbers. */
3094 staticpro (&Vcharset_composite);
3095 Vcharset_composite =
3096 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3097 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3098 build_string ("Composite"),
3099 build_string ("Composite characters"),
3100 build_string ("Composite characters"),
3103 /* #### not dumped properly */
3104 composite_char_row_next = 32;
3105 composite_char_col_next = 32;
3107 Vcomposite_char_string2char_hash_table =
3108 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3109 Vcomposite_char_char2string_hash_table =
3110 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3111 staticpro (&Vcomposite_char_string2char_hash_table);
3112 staticpro (&Vcomposite_char_char2string_hash_table);
3113 #endif /* ENABLE_COMPOSITE_CHARS */