1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2011
7 This file is part of XEmacs.
9 XEmacs is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with XEmacs; see the file COPYING. If not, write to
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
26 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
45 /* The various pre-defined charsets. */
47 Lisp_Object Vcharset_ascii;
48 Lisp_Object Vcharset_control_1;
49 Lisp_Object Vcharset_latin_iso8859_1;
50 Lisp_Object Vcharset_latin_iso8859_2;
51 Lisp_Object Vcharset_latin_iso8859_3;
52 Lisp_Object Vcharset_latin_iso8859_4;
53 Lisp_Object Vcharset_thai_tis620;
54 Lisp_Object Vcharset_greek_iso8859_7;
55 Lisp_Object Vcharset_arabic_iso8859_6;
56 Lisp_Object Vcharset_hebrew_iso8859_8;
57 Lisp_Object Vcharset_katakana_jisx0201;
58 Lisp_Object Vcharset_latin_jisx0201;
59 Lisp_Object Vcharset_cyrillic_iso8859_5;
60 Lisp_Object Vcharset_latin_iso8859_9;
61 /* Lisp_Object Vcharset_japanese_jisx0208_1978; */
62 Lisp_Object Vcharset_chinese_gb2312;
63 Lisp_Object Vcharset_chinese_gb12345;
64 Lisp_Object Vcharset_japanese_jisx0208;
65 Lisp_Object Vcharset_japanese_jisx0208_1990;
66 Lisp_Object Vcharset_korean_ksc5601;
67 Lisp_Object Vcharset_japanese_jisx0212;
68 Lisp_Object Vcharset_chinese_cns11643_1;
69 Lisp_Object Vcharset_chinese_cns11643_2;
71 Lisp_Object Vcharset_system_char_id;
72 Lisp_Object Vcharset_ucs;
73 Lisp_Object Vcharset_ucs_bmp;
74 Lisp_Object Vcharset_ucs_smp;
75 Lisp_Object Vcharset_ucs_sip;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ethiopic_ucs;
84 Lisp_Object Vcharset_chinese_big5_1;
85 Lisp_Object Vcharset_chinese_big5_2;
87 #ifdef ENABLE_COMPOSITE_CHARS
88 Lisp_Object Vcharset_composite;
90 /* Hash tables for composite chars. One maps string representing
91 composed chars to their equivalent chars; one goes the
93 Lisp_Object Vcomposite_char_char2string_hash_table;
94 Lisp_Object Vcomposite_char_string2char_hash_table;
96 static int composite_char_row_next;
97 static int composite_char_col_next;
99 #endif /* ENABLE_COMPOSITE_CHARS */
101 struct charset_lookup *chlook;
103 static const struct lrecord_description charset_lookup_description_1[] = {
104 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
106 NUM_LEADING_BYTES+4*128
113 static const struct struct_description charset_lookup_description = {
114 sizeof (struct charset_lookup),
115 charset_lookup_description_1
119 /* Table of number of bytes in the string representation of a character
120 indexed by the first byte of that representation.
122 rep_bytes_by_first_byte(c) is more efficient than the equivalent
123 canonical computation:
125 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
127 const Bytecount rep_bytes_by_first_byte[0xA0] =
128 { /* 0x00 - 0x7f are for straight ASCII */
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 /* 0x80 - 0x8f are for Dimension-1 official charsets */
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
141 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 /* 0x90 - 0x9d are for Dimension-2 official charsets */
144 /* 0x9e is for Dimension-1 private charsets */
145 /* 0x9f is for Dimension-2 private charsets */
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
152 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
154 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
158 if (XVECTOR_LENGTH (v) > ccs_len)
161 for (i = 0; i < XVECTOR_LENGTH (v); i++)
163 Lisp_Object c = XVECTOR_DATA(v)[i];
165 if (!NILP (c) && !CHARP (c))
169 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
181 decoding_table_put_char (Lisp_Object ccs,
182 int code_point, Lisp_Object character)
185 Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
186 int dim = XCHARSET_DIMENSION (ccs);
189 XCHARSET_DECODING_TABLE (ccs)
190 = put_ccs_octet_table (table1, ccs, code_point, character);
194 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
196 table2 = put_ccs_octet_table (table2, ccs,
197 (unsigned char)code_point, character);
198 XCHARSET_DECODING_TABLE (ccs)
199 = put_ccs_octet_table (table1, ccs,
200 (unsigned char)(code_point >> 8), table2);
205 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
207 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
209 table3 = put_ccs_octet_table (table3, ccs,
210 (unsigned char)code_point, character);
211 table2 = put_ccs_octet_table (table2, ccs,
212 (unsigned char)(code_point >> 8), table3);
213 XCHARSET_DECODING_TABLE (ccs)
214 = put_ccs_octet_table (table1, ccs,
215 (unsigned char)(code_point >> 16), table2);
217 else /* if (dim == 4) */
220 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
222 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
224 = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
226 table4 = put_ccs_octet_table (table4, ccs,
227 (unsigned char)code_point, character);
228 table3 = put_ccs_octet_table (table3, ccs,
229 (unsigned char)(code_point >> 8), table4);
230 table2 = put_ccs_octet_table (table2, ccs,
231 (unsigned char)(code_point >> 16), table3);
232 XCHARSET_DECODING_TABLE (ccs)
233 = put_ccs_octet_table (table1, ccs,
234 (unsigned char)(code_point >> 24), table2);
237 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
238 int dim = XCHARSET_DIMENSION (ccs);
239 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
242 int ccs_len = XVECTOR_LENGTH (v);
247 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
248 nv = XVECTOR_DATA(v)[i];
253 if (EQ (nv, character))
256 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
263 XVECTOR_DATA(v)[i] = character;
268 put_char_ccs_code_point (Lisp_Object character,
269 Lisp_Object ccs, Lisp_Object value)
271 if ( !( EQ (XCHARSET_NAME (ccs), Qrep_ucs)
272 && INTP (value) && (XINT (value) < 0xF0000)
273 && XCHAR (character) == XINT (value) )
276 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
280 { /* obsolete representation: value must be a list of bytes */
281 Lisp_Object ret = Fcar (value);
285 signal_simple_error ("Invalid value for coded-charset", value);
286 code_point = XINT (ret);
287 if (XCHARSET_GRAPHIC (ccs) == 1)
295 signal_simple_error ("Invalid value for coded-charset",
299 signal_simple_error ("Invalid value for coded-charset",
302 if (XCHARSET_GRAPHIC (ccs) == 1)
304 code_point = (code_point << 8) | j;
307 value = make_int (code_point);
309 else if (INTP (value))
311 code_point = XINT (value);
312 if (XCHARSET_GRAPHIC (ccs) == 1)
314 code_point &= 0x7F7F7F7F;
315 value = make_int (code_point);
319 signal_simple_error ("Invalid value for coded-charset", value);
323 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
326 decoding_table_remove_char (ccs, XINT (cpos));
329 decoding_table_put_char (ccs, code_point, character);
335 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
337 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
338 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
340 if (VECTORP (decoding_table))
342 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
346 decoding_table_remove_char (ccs, XINT (cpos));
349 if (CHAR_TABLEP (encoding_table))
351 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
359 int leading_code_private_11;
362 Lisp_Object Qcharsetp;
364 /* Qdoc_string, Qdimension, Qchars defined in general.c */
365 Lisp_Object Qregistry, Qfinal, Qgraphic;
366 Lisp_Object Qdirection;
367 Lisp_Object Qreverse_direction_charset;
368 Lisp_Object Qleading_byte;
369 Lisp_Object Qshort_name, Qlong_name;
372 Lisp_Object Qto_iso_ir;
373 Lisp_Object Qpartial;
374 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
375 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
392 /* Qrep_jis_x0208_1978, */
410 Qvietnamese_viscii_lower,
411 Qvietnamese_viscii_upper,
421 Lisp_Object Ql2r, Qr2l;
423 Lisp_Object Vcharset_hash_table;
425 /* Composite characters are characters constructed by overstriking two
426 or more regular characters.
428 1) The old Mule implementation involves storing composite characters
429 in a buffer as a tag followed by all of the actual characters
430 used to make up the composite character. I think this is a bad
431 idea; it greatly complicates code that wants to handle strings
432 one character at a time because it has to deal with the possibility
433 of great big ungainly characters. It's much more reasonable to
434 simply store an index into a table of composite characters.
436 2) The current implementation only allows for 16,384 separate
437 composite characters over the lifetime of the XEmacs process.
438 This could become a potential problem if the user
439 edited lots of different files that use composite characters.
440 Due to FSF bogosity, increasing the number of allowable
441 composite characters under Mule would decrease the number
442 of possible faces that can exist. Mule already has shrunk
443 this to 2048, and further shrinkage would become uncomfortable.
444 No such problems exist in XEmacs.
446 Composite characters could be represented as 0x80 C1 C2 C3,
447 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
448 for slightly under 2^20 (one million) composite characters
449 over the XEmacs process lifetime, and you only need to
450 increase the size of a Mule character from 19 to 21 bits.
451 Or you could use 0x80 C1 C2 C3 C4, allowing for about
452 85 million (slightly over 2^26) composite characters. */
455 /************************************************************************/
456 /* Basic Emchar functions */
457 /************************************************************************/
459 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
460 string in STR. Returns the number of bytes stored.
461 Do not call this directly. Use the macro set_charptr_emchar() instead.
465 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
480 else if ( c <= 0x7ff )
482 *p++ = (c >> 6) | 0xc0;
483 *p++ = (c & 0x3f) | 0x80;
485 else if ( c <= 0xffff )
487 *p++ = (c >> 12) | 0xe0;
488 *p++ = ((c >> 6) & 0x3f) | 0x80;
489 *p++ = (c & 0x3f) | 0x80;
491 else if ( c <= 0x1fffff )
493 *p++ = (c >> 18) | 0xf0;
494 *p++ = ((c >> 12) & 0x3f) | 0x80;
495 *p++ = ((c >> 6) & 0x3f) | 0x80;
496 *p++ = (c & 0x3f) | 0x80;
498 else if ( c <= 0x3ffffff )
500 *p++ = (c >> 24) | 0xf8;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
508 *p++ = (c >> 30) | 0xfc;
509 *p++ = ((c >> 24) & 0x3f) | 0x80;
510 *p++ = ((c >> 18) & 0x3f) | 0x80;
511 *p++ = ((c >> 12) & 0x3f) | 0x80;
512 *p++ = ((c >> 6) & 0x3f) | 0x80;
513 *p++ = (c & 0x3f) | 0x80;
516 BREAKUP_CHAR (c, charset, c1, c2);
517 lb = CHAR_LEADING_BYTE (c);
518 if (LEADING_BYTE_PRIVATE_P (lb))
519 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
521 if (EQ (charset, Vcharset_control_1))
530 /* Return the first character from a Mule-encoded string in STR,
531 assuming it's non-ASCII. Do not call this directly.
532 Use the macro charptr_emchar() instead. */
535 non_ascii_charptr_emchar (const Bufbyte *str)
548 else if ( b >= 0xf8 )
553 else if ( b >= 0xf0 )
558 else if ( b >= 0xe0 )
563 else if ( b >= 0xc0 )
573 for( ; len > 0; len-- )
576 ch = ( ch << 6 ) | ( b & 0x3f );
580 Bufbyte i0 = *str, i1, i2 = 0;
583 if (i0 == LEADING_BYTE_CONTROL_1)
584 return (Emchar) (*++str - 0x20);
586 if (LEADING_BYTE_PREFIX_P (i0))
591 charset = CHARSET_BY_LEADING_BYTE (i0);
592 if (XCHARSET_DIMENSION (charset) == 2)
595 return MAKE_CHAR (charset, i1, i2);
599 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
600 Do not call this directly. Use the macro valid_char_p() instead. */
604 non_ascii_valid_char_p (Emchar ch)
608 /* Must have only lowest 19 bits set */
612 f1 = CHAR_FIELD1 (ch);
613 f2 = CHAR_FIELD2 (ch);
614 f3 = CHAR_FIELD3 (ch);
620 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
621 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
622 f2 > MAX_CHAR_FIELD2_PRIVATE)
627 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
628 f2 <= MAX_CHAR_FIELD2_PRIVATE))
632 NOTE: This takes advantage of the fact that
633 FIELD2_TO_OFFICIAL_LEADING_BYTE and
634 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
636 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
637 if (EQ (charset, Qnil))
639 return (XCHARSET_CHARS (charset) == 96);
645 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
646 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
647 f1 > MAX_CHAR_FIELD1_PRIVATE)
649 if (f2 < 0x20 || f3 < 0x20)
652 #ifdef ENABLE_COMPOSITE_CHARS
653 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
655 if (UNBOUNDP (Fgethash (make_int (ch),
656 Vcomposite_char_char2string_hash_table,
661 #endif /* ENABLE_COMPOSITE_CHARS */
663 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
664 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
667 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
669 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
672 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
674 if (EQ (charset, Qnil))
676 return (XCHARSET_CHARS (charset) == 96);
682 /************************************************************************/
683 /* Basic string functions */
684 /************************************************************************/
686 /* Copy the character pointed to by SRC into DST. Do not call this
687 directly. Use the macro charptr_copy_char() instead.
688 Return the number of bytes copied. */
691 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
693 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
695 for (i = bytes; i; i--, dst++, src++)
701 /************************************************************************/
702 /* streams of Emchars */
703 /************************************************************************/
705 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
706 The functions below are not meant to be called directly; use
707 the macros in insdel.h. */
710 Lstream_get_emchar_1 (Lstream *stream, int ch)
712 Bufbyte str[MAX_EMCHAR_LEN];
713 Bufbyte *strptr = str;
716 str[0] = (Bufbyte) ch;
718 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
720 int c = Lstream_getc (stream);
721 bufpos_checking_assert (c >= 0);
722 *++strptr = (Bufbyte) c;
724 return charptr_emchar (str);
728 Lstream_fput_emchar (Lstream *stream, Emchar ch)
730 Bufbyte str[MAX_EMCHAR_LEN];
731 Bytecount len = set_charptr_emchar (str, ch);
732 return Lstream_write (stream, str, len);
736 Lstream_funget_emchar (Lstream *stream, Emchar ch)
738 Bufbyte str[MAX_EMCHAR_LEN];
739 Bytecount len = set_charptr_emchar (str, ch);
740 Lstream_unread (stream, str, len);
744 /************************************************************************/
746 /************************************************************************/
749 mark_charset (Lisp_Object obj)
751 Lisp_Charset *cs = XCHARSET (obj);
753 mark_object (cs->short_name);
754 mark_object (cs->long_name);
755 mark_object (cs->doc_string);
756 mark_object (cs->registry);
757 mark_object (cs->ccl_program);
759 mark_object (cs->decoding_table);
760 mark_object (cs->mother);
766 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
768 Lisp_Charset *cs = XCHARSET (obj);
772 error ("printing unreadable object #<charset %s 0x%x>",
773 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
776 write_c_string ("#<charset ", printcharfun);
777 print_internal (CHARSET_NAME (cs), printcharfun, 0);
778 write_c_string (" ", printcharfun);
779 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
780 write_c_string (" ", printcharfun);
781 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
782 write_c_string (" ", printcharfun);
783 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
784 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
786 CHARSET_DIMENSION (cs),
787 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
788 CHARSET_COLUMNS (cs),
789 CHARSET_GRAPHIC (cs),
791 write_c_string (buf, printcharfun);
792 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
793 sprintf (buf, " 0x%x>", cs->header.uid);
794 write_c_string (buf, printcharfun);
797 static const struct lrecord_description charset_description[] = {
798 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
799 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
800 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
801 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
802 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
803 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
804 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
806 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
807 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
812 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
813 mark_charset, print_charset, 0, 0, 0,
817 /* Make a new charset. */
818 /* #### SJT Should generic properties be allowed? */
820 make_charset (Charset_ID id, Lisp_Object name,
821 unsigned short chars, unsigned char dimension,
822 unsigned char columns, unsigned char graphic,
823 Bufbyte final, unsigned char direction, Lisp_Object short_name,
824 Lisp_Object long_name, Lisp_Object doc,
827 Lisp_Object decoding_table,
828 Emchar min_code, Emchar max_code,
829 Emchar code_offset, unsigned char byte_offset,
830 Lisp_Object mother, unsigned char conversion,
834 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
838 XSETCHARSET (obj, cs);
840 CHARSET_ID (cs) = id;
841 CHARSET_NAME (cs) = name;
842 CHARSET_SHORT_NAME (cs) = short_name;
843 CHARSET_LONG_NAME (cs) = long_name;
844 CHARSET_CHARS (cs) = chars;
845 CHARSET_DIMENSION (cs) = dimension;
846 CHARSET_DIRECTION (cs) = direction;
847 CHARSET_COLUMNS (cs) = columns;
848 CHARSET_GRAPHIC (cs) = graphic;
849 CHARSET_FINAL (cs) = final;
850 CHARSET_DOC_STRING (cs) = doc;
851 CHARSET_REGISTRY (cs) = reg;
852 CHARSET_CCL_PROGRAM (cs) = Qnil;
853 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
855 CHARSET_ISO_IR (cs) = iso_ir;
856 CHARSET_DECODING_TABLE(cs) = Qunbound;
857 CHARSET_MIN_CODE (cs) = min_code;
858 CHARSET_MAX_CODE (cs) = max_code;
859 CHARSET_CODE_OFFSET (cs) = code_offset;
860 CHARSET_BYTE_OFFSET (cs) = byte_offset;
861 CHARSET_MOTHER (cs) = mother;
862 CHARSET_CONVERSION (cs) = conversion;
866 if (id == LEADING_BYTE_ASCII)
867 CHARSET_REP_BYTES (cs) = 1;
869 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
871 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
876 /* some charsets do not have final characters. This includes
877 ASCII, Control-1, Composite, and the two faux private
879 unsigned char iso2022_type
880 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
882 if ( ( !partial ) && ( code_offset == 0 ) )
884 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
885 chlook->charset_by_attributes[iso2022_type][final] = obj;
889 (chlook->charset_by_attributes[iso2022_type][final][direction]));
890 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
894 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
895 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
897 /* Some charsets are "faux" and don't have names or really exist at
898 all except in the leading-byte table. */
900 Fputhash (name, obj, Vcharset_hash_table);
905 get_unallocated_leading_byte (int dimension)
910 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
913 lb = chlook->next_allocated_leading_byte++;
917 if (chlook->next_allocated_1_byte_leading_byte >
918 MAX_LEADING_BYTE_PRIVATE_1)
921 lb = chlook->next_allocated_1_byte_leading_byte++;
925 /* awfully fragile, but correct */
926 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
927 if (chlook->next_allocated_2_byte_leading_byte == 0)
929 if (chlook->next_allocated_2_byte_leading_byte >
930 MAX_LEADING_BYTE_PRIVATE_2)
934 lb = chlook->next_allocated_2_byte_leading_byte++;
940 ("No more character sets free for this dimension",
941 make_int (dimension));
947 /* Number of Big5 characters which have the same code in 1st byte. */
949 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
952 decode_ccs_conversion (int conv_type, int code_point)
954 if ( conv_type == CONVERSION_IDENTICAL )
958 if ( conv_type == CONVERSION_94x60 )
960 int row = code_point >> 8;
961 int cell = code_point & 255;
965 else if (row < 16 + 32 + 30)
966 return (row - (16 + 32)) * 94 + cell - 33;
967 else if (row < 18 + 32 + 30)
969 else if (row < 18 + 32 + 60)
970 return (row - (18 + 32)) * 94 + cell - 33;
972 else if ( conv_type == CONVERSION_94x94x60 )
974 int plane = code_point >> 16;
975 int row = (code_point >> 8) & 255;
976 int cell = code_point & 255;
980 else if (row < 16 + 32 + 30)
982 (plane - 33) * 94 * 60
983 + (row - (16 + 32)) * 94
985 else if (row < 18 + 32 + 30)
987 else if (row < 18 + 32 + 60)
989 (plane - 33) * 94 * 60
990 + (row - (18 + 32)) * 94
993 else if ( conv_type == CONVERSION_BIG5_1 )
996 = (((code_point >> 8) & 0x7F) - 33) * 94
997 + (( code_point & 0x7F) - 33);
998 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
999 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1001 b2 += b2 < 0x3F ? 0x40 : 0x62;
1002 return (b1 << 8) | b2;
1004 else if ( conv_type == CONVERSION_BIG5_2 )
1007 = (((code_point >> 8) & 0x7F) - 33) * 94
1008 + (( code_point & 0x7F) - 33)
1009 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1010 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1011 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1013 b2 += b2 < 0x3F ? 0x40 : 0x62;
1014 return (b1 << 8) | b2;
1020 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
1022 int dim = XCHARSET_DIMENSION (ccs);
1023 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1024 Emchar char_id = -1;
1031 = get_ccs_octet_table (decoding_table, ccs,
1032 (code_point >> (dim * 8)) & 255);
1034 if (CHARP (decoding_table))
1035 return XCHAR (decoding_table);
1037 if (EQ (decoding_table, Qunloaded))
1039 char_id = load_char_decoding_entry_maybe (ccs, code_point);
1041 #endif /* HAVE_CHISE */
1044 else if ( !without_inheritance
1045 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
1048 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
1052 code += XCHARSET_CODE_OFFSET(ccs);
1053 if ( EQ (mother, Vcharset_ucs) )
1054 return DECODE_CHAR (mother, code, without_inheritance);
1056 return decode_defined_char (mother, code,
1057 without_inheritance);
1064 decode_builtin_char (Lisp_Object charset, int code_point)
1066 Lisp_Object mother = XCHARSET_MOTHER (charset);
1069 if ( XCHARSET_MAX_CODE (charset) > 0 )
1071 if ( CHARSETP (mother) )
1074 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
1079 decode_builtin_char (mother,
1080 code + XCHARSET_CODE_OFFSET(charset));
1087 = (XCHARSET_DIMENSION (charset) == 1
1089 code_point - XCHARSET_BYTE_OFFSET (charset)
1091 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1092 * XCHARSET_CHARS (charset)
1093 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1094 + XCHARSET_CODE_OFFSET (charset);
1095 if ((cid < XCHARSET_MIN_CODE (charset))
1096 || (XCHARSET_MAX_CODE (charset) < cid))
1101 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1103 if (XCHARSET_DIMENSION (charset) == 1)
1105 switch (XCHARSET_CHARS (charset))
1109 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1112 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1120 switch (XCHARSET_CHARS (charset))
1123 return MIN_CHAR_94x94
1124 + (final - '0') * 94 * 94
1125 + (((code_point >> 8) & 0x7F) - 33) * 94
1126 + ((code_point & 0x7F) - 33);
1128 return MIN_CHAR_96x96
1129 + (final - '0') * 96 * 96
1130 + (((code_point >> 8) & 0x7F) - 32) * 96
1131 + ((code_point & 0x7F) - 32);
1143 charset_code_point (Lisp_Object charset, Emchar ch, int accepted_mode)
1147 if ( accepted_mode >= 0 )
1149 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1151 if ( CHAR_TABLEP (encoding_table)
1152 #ifdef HAVE_LIBCHISE
1153 && !UNBOUNDP (ret = get_char_id_table_ce (XCHAR_TABLE
1157 && !UNBOUNDP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1167 Lisp_Object mother = XCHARSET_MOTHER (charset);
1168 int min = XCHARSET_MIN_CODE (charset);
1169 int max = XCHARSET_MAX_CODE (charset);
1172 if ( CHARSETP (mother) && ( accepted_mode >= 0)
1173 && ( XCHARSET_FINAL (charset) >= '0' )
1175 code = charset_code_point (mother, ch, CHAR_DEFINED_ONLY);
1176 else if ( CHARSETP (mother)
1177 && ( XCHARSET_FINAL (charset) < '0' )
1179 code = charset_code_point (mother, ch, accepted_mode);
1180 else if ( accepted_mode == CHAR_DEFINED_ONLY )
1182 else if ( ((max == 0) && CHARSETP (mother)
1183 && (XCHARSET_FINAL (charset) == 0))
1184 || ((min <= ch) && (ch <= max)) )
1187 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1188 || ((min <= code) && (code <= max)) )
1190 int d = code - XCHARSET_CODE_OFFSET (charset);
1192 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1194 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1196 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1198 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1201 int cell = d % 94 + 33;
1207 return (row << 8) | cell;
1209 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1211 int B1 = d >> 8, B2 = d & 0xFF;
1213 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1214 - (B2 < 0x7F ? 0x40 : 0x62);
1218 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1221 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1223 int B1 = d >> 8, B2 = d & 0xFF;
1225 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1226 - (B2 < 0x7F ? 0x40 : 0x62);
1230 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1231 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1234 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1235 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1236 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1237 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1238 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1240 int plane = d / (94 * 60) + 33;
1241 int row = (d % (94 * 60)) / 94;
1242 int cell = d % 94 + 33;
1248 return (plane << 16) | (row << 8) | cell;
1250 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1252 ( (d / (94 * 94) + 33) << 16)
1253 | ((d / 94 % 94 + 33) << 8)
1255 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1257 ( (d / (96 * 96) + 32) << 16)
1258 | ((d / 96 % 96 + 32) << 8)
1260 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1262 ( (d / (94 * 94 * 94) + 33) << 24)
1263 | ((d / (94 * 94) % 94 + 33) << 16)
1264 | ((d / 94 % 94 + 33) << 8)
1266 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1268 ( (d / (96 * 96 * 96) + 32) << 24)
1269 | ((d / (96 * 96) % 96 + 32) << 16)
1270 | ((d / 96 % 96 + 32) << 8)
1274 printf ("Unknown CCS-conversion %d is specified!",
1275 XCHARSET_CONVERSION (charset));
1279 else if ( accepted_mode == CHAR_DEFINED_ONLY )
1281 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1282 ( XCHARSET_MIN_CODE (charset) == 0 )
1284 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1285 (XCHARSET_CODE_OFFSET (charset)
1286 == XCHARSET_MIN_CODE (charset))
1291 if (XCHARSET_DIMENSION (charset) == 1)
1293 if (XCHARSET_CHARS (charset) == 94)
1295 if (((d = ch - (MIN_CHAR_94
1296 + (XCHARSET_FINAL (charset) - '0') * 94))
1301 else if (XCHARSET_CHARS (charset) == 96)
1303 if (((d = ch - (MIN_CHAR_96
1304 + (XCHARSET_FINAL (charset) - '0') * 96))
1312 else if (XCHARSET_DIMENSION (charset) == 2)
1314 if (XCHARSET_CHARS (charset) == 94)
1316 if (((d = ch - (MIN_CHAR_94x94
1318 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1321 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1323 else if (XCHARSET_CHARS (charset) == 96)
1325 if (((d = ch - (MIN_CHAR_96x96
1327 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1330 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1341 encode_char_2 (Emchar ch, Lisp_Object* charset)
1343 Lisp_Object charsets = Vdefault_coded_charset_priority_list;
1346 while (!NILP (charsets))
1348 *charset = Ffind_charset (Fcar (charsets));
1349 if ( !NILP (*charset)
1350 && (XCHARSET_DIMENSION (*charset) <= 2) )
1352 code_point = charset_code_point (*charset, ch, 0);
1353 if (code_point >= 0)
1356 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1357 NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1359 code_point = encode_char_2_search_children (ch, charset);
1360 if (code_point >= 0)
1364 charsets = Fcdr (charsets);
1367 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1368 !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1370 charsets = Vdefault_coded_charset_priority_list;
1371 while (!NILP (charsets))
1373 *charset = Ffind_charset (Fcar (charsets));
1374 if ( !NILP (*charset)
1375 && (XCHARSET_DIMENSION (*charset) <= 2) )
1377 code_point = encode_char_2_search_children (ch, charset);
1378 if (code_point >= 0)
1381 charsets = Fcdr (charsets);
1385 /* otherwise --- maybe for bootstrap */
1386 return encode_builtin_char_1 (ch, charset);
1390 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1392 if (c <= MAX_CHAR_BASIC_LATIN)
1394 *charset = Vcharset_ascii;
1399 *charset = Vcharset_control_1;
1404 *charset = Vcharset_latin_iso8859_1;
1408 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1410 *charset = Vcharset_hebrew_iso8859_8;
1411 return c - MIN_CHAR_HEBREW + 0x20;
1414 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1416 *charset = Vcharset_thai_tis620;
1417 return c - MIN_CHAR_THAI + 0x20;
1420 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1421 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1423 return list2 (Vcharset_katakana_jisx0201,
1424 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1427 else if (c <= MAX_CHAR_BMP)
1429 *charset = Vcharset_ucs_bmp;
1432 else if (c <= MAX_CHAR_SMP)
1434 *charset = Vcharset_ucs_smp;
1435 return c - MIN_CHAR_SMP;
1437 else if (c <= MAX_CHAR_SIP)
1439 *charset = Vcharset_ucs_sip;
1440 return c - MIN_CHAR_SIP;
1442 else if (c < MIN_CHAR_94)
1444 *charset = Vcharset_ucs;
1447 else if (c <= MAX_CHAR_94)
1449 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1450 ((c - MIN_CHAR_94) / 94) + '0',
1451 CHARSET_LEFT_TO_RIGHT);
1452 if (!NILP (*charset))
1453 return ((c - MIN_CHAR_94) % 94) + 33;
1456 *charset = Vcharset_ucs;
1460 else if (c <= MAX_CHAR_96)
1462 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1463 ((c - MIN_CHAR_96) / 96) + '0',
1464 CHARSET_LEFT_TO_RIGHT);
1465 if (!NILP (*charset))
1466 return ((c - MIN_CHAR_96) % 96) + 32;
1469 *charset = Vcharset_ucs;
1473 else if (c <= MAX_CHAR_94x94)
1476 = CHARSET_BY_ATTRIBUTES (94, 2,
1477 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1478 CHARSET_LEFT_TO_RIGHT);
1479 if (!NILP (*charset))
1480 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1481 | (((c - MIN_CHAR_94x94) % 94) + 33);
1484 *charset = Vcharset_ucs;
1488 else if (c <= MAX_CHAR_96x96)
1491 = CHARSET_BY_ATTRIBUTES (96, 2,
1492 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1493 CHARSET_LEFT_TO_RIGHT);
1494 if (!NILP (*charset))
1495 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1496 | (((c - MIN_CHAR_96x96) % 96) + 32);
1499 *charset = Vcharset_ucs;
1505 *charset = Vcharset_ucs;
1510 Lisp_Object Vdefault_coded_charset_priority_list;
1511 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1512 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1516 /************************************************************************/
1517 /* Basic charset Lisp functions */
1518 /************************************************************************/
1520 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1521 Return non-nil if OBJECT is a charset.
1525 return CHARSETP (object) ? Qt : Qnil;
1528 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1529 Retrieve the charset of the given name.
1530 If CHARSET-OR-NAME is a charset object, it is simply returned.
1531 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1532 nil is returned. Otherwise the associated charset object is returned.
1536 if (CHARSETP (charset_or_name))
1537 return charset_or_name;
1539 CHECK_SYMBOL (charset_or_name);
1540 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1543 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1544 Retrieve the charset of the given name.
1545 Same as `find-charset' except an error is signalled if there is no such
1546 charset instead of returning nil.
1550 Lisp_Object charset = Ffind_charset (name);
1553 signal_simple_error ("No such charset", name);
1557 /* We store the charsets in hash tables with the names as the key and the
1558 actual charset object as the value. Occasionally we need to use them
1559 in a list format. These routines provide us with that. */
1560 struct charset_list_closure
1562 Lisp_Object *charset_list;
1566 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1567 void *charset_list_closure)
1569 /* This function can GC */
1570 struct charset_list_closure *chcl =
1571 (struct charset_list_closure*) charset_list_closure;
1572 Lisp_Object *charset_list = chcl->charset_list;
1574 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1578 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1579 Return a list of the names of all defined charsets.
1583 Lisp_Object charset_list = Qnil;
1584 struct gcpro gcpro1;
1585 struct charset_list_closure charset_list_closure;
1587 GCPRO1 (charset_list);
1588 charset_list_closure.charset_list = &charset_list;
1589 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1590 &charset_list_closure);
1593 return charset_list;
1596 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1597 Return the name of charset CHARSET.
1601 return XCHARSET_NAME (Fget_charset (charset));
1604 /* #### SJT Should generic properties be allowed? */
1605 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1606 Define a new character set.
1607 This function is for use with Mule support.
1608 NAME is a symbol, the name by which the character set is normally referred.
1609 DOC-STRING is a string describing the character set.
1610 PROPS is a property list, describing the specific nature of the
1611 character set. Recognized properties are:
1613 'short-name Short version of the charset name (ex: Latin-1)
1614 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1615 'registry A regular expression matching the font registry field for
1617 'dimension Number of octets used to index a character in this charset.
1618 Either 1 or 2. Defaults to 1.
1619 If UTF-2000 feature is enabled, 3 or 4 are also available.
1620 'columns Number of columns used to display a character in this charset.
1621 Only used in TTY mode. (Under X, the actual width of a
1622 character can be derived from the font used to display the
1623 characters.) If unspecified, defaults to the dimension
1624 (this is almost always the correct value).
1625 'chars Number of characters in each dimension (94 or 96).
1626 Defaults to 94. Note that if the dimension is 2, the
1627 character set thus described is 94x94 or 96x96.
1628 If UTF-2000 feature is enabled, 128 or 256 are also available.
1629 'final Final byte of ISO 2022 escape sequence. Must be
1630 supplied. Each combination of (DIMENSION, CHARS) defines a
1631 separate namespace for final bytes. Note that ISO
1632 2022 restricts the final byte to the range
1633 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1634 dimension == 2. Note also that final bytes in the range
1635 0x30 - 0x3F are reserved for user-defined (not official)
1637 'graphic 0 (use left half of font on output) or 1 (use right half
1638 of font on output). Defaults to 0. For example, for
1639 a font whose registry is ISO8859-1, the left half
1640 (octets 0x20 - 0x7F) is the `ascii' character set, while
1641 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1642 character set. With 'graphic set to 0, the octets
1643 will have their high bit cleared; with it set to 1,
1644 the octets will have their high bit set.
1645 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1647 'ccl-program A compiled CCL program used to convert a character in
1648 this charset into an index into the font. This is in
1649 addition to the 'graphic property. The CCL program
1650 is passed the octets of the character, with the high
1651 bit cleared and set depending upon whether the value
1652 of the 'graphic property is 0 or 1.
1653 'iso-ir ISO-IR number (for representative coded-charset).
1654 '=>iso-ir [UTF-2000 only] Corresponding ISO-IR number.
1655 'mother [UTF-2000 only] Base coded-charset.
1656 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1657 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1658 'code-offset [UTF-2000 only] Offset for a code-point of a base
1660 'conversion [UTF-2000 only] Conversion for a code-point of a base
1661 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1662 'partial [UTF-2000 only] If t, specify as a partial coded-charset.
1664 (name, doc_string, props))
1666 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1668 int direction = CHARSET_LEFT_TO_RIGHT;
1669 Lisp_Object registry = Qnil;
1670 Lisp_Object charset;
1671 Lisp_Object ccl_program = Qnil;
1672 Lisp_Object short_name = Qnil, long_name = Qnil;
1673 Lisp_Object mother = Qnil;
1675 int min_code = 0, max_code = 0, code_offset = 0;
1676 int byte_offset = -1;
1679 CHECK_SYMBOL (name);
1680 if (!NILP (doc_string))
1681 CHECK_STRING (doc_string);
1683 charset = Ffind_charset (name);
1684 if (!NILP (charset))
1685 signal_simple_error ("Cannot redefine existing charset", name);
1688 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1690 if (EQ (keyword, Qshort_name))
1692 CHECK_STRING (value);
1696 else if (EQ (keyword, Qlong_name))
1698 CHECK_STRING (value);
1702 else if (EQ (keyword, Qiso_ir))
1706 iso_ir = XINT (value);
1712 else if (EQ (keyword, Qto_iso_ir))
1715 iso_ir = XINT (value);
1719 else if (EQ (keyword, Qdimension))
1722 dimension = XINT (value);
1723 if (dimension < 1 ||
1730 signal_simple_error ("Invalid value for 'dimension", value);
1733 else if (EQ (keyword, Qchars))
1736 chars = XINT (value);
1737 if (chars != 94 && chars != 96
1739 && chars != 128 && chars != 256
1742 signal_simple_error ("Invalid value for 'chars", value);
1745 else if (EQ (keyword, Qcolumns))
1748 columns = XINT (value);
1749 if (columns != 1 && columns != 2)
1750 signal_simple_error ("Invalid value for 'columns", value);
1753 else if (EQ (keyword, Qgraphic))
1756 graphic = XINT (value);
1764 signal_simple_error ("Invalid value for 'graphic", value);
1767 else if (EQ (keyword, Qregistry))
1769 CHECK_STRING (value);
1773 else if (EQ (keyword, Qdirection))
1775 if (EQ (value, Ql2r))
1776 direction = CHARSET_LEFT_TO_RIGHT;
1777 else if (EQ (value, Qr2l))
1778 direction = CHARSET_RIGHT_TO_LEFT;
1780 signal_simple_error ("Invalid value for 'direction", value);
1783 else if (EQ (keyword, Qfinal))
1785 CHECK_CHAR_COERCE_INT (value);
1786 final = XCHAR (value);
1787 if (final < '0' || final > '~')
1788 signal_simple_error ("Invalid value for 'final", value);
1792 else if (EQ (keyword, Qpartial))
1794 partial = !NILP (value);
1797 else if (EQ (keyword, Qmother))
1799 mother = Fget_charset (value);
1802 else if (EQ (keyword, Qmin_code))
1805 min_code = XUINT (value);
1808 else if (EQ (keyword, Qmax_code))
1811 max_code = XUINT (value);
1814 else if (EQ (keyword, Qcode_offset))
1817 code_offset = XUINT (value);
1820 else if (EQ (keyword, Qconversion))
1822 if (EQ (value, Q94x60))
1823 conversion = CONVERSION_94x60;
1824 else if (EQ (value, Q94x94x60))
1825 conversion = CONVERSION_94x94x60;
1826 else if (EQ (value, Qbig5_1))
1827 conversion = CONVERSION_BIG5_1;
1828 else if (EQ (value, Qbig5_2))
1829 conversion = CONVERSION_BIG5_2;
1831 signal_simple_error ("Unrecognized conversion", value);
1835 else if (EQ (keyword, Qccl_program))
1837 struct ccl_program test_ccl;
1839 if (setup_ccl_program (&test_ccl, value) < 0)
1840 signal_simple_error ("Invalid value for 'ccl-program", value);
1841 ccl_program = value;
1845 signal_simple_error ("Unrecognized property", keyword);
1851 error ("'final must be specified");
1853 if (dimension == 2 && final > 0x5F)
1855 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1858 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1859 CHARSET_LEFT_TO_RIGHT)) ||
1860 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1861 CHARSET_RIGHT_TO_LEFT)))
1863 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1866 id = get_unallocated_leading_byte (dimension);
1868 if (NILP (doc_string))
1869 doc_string = build_string ("");
1871 if (NILP (registry))
1872 registry = build_string ("");
1874 if (NILP (short_name))
1875 XSETSTRING (short_name, XSYMBOL (name)->name);
1877 if (NILP (long_name))
1878 long_name = doc_string;
1881 columns = dimension;
1883 if (byte_offset < 0)
1887 else if (chars == 96)
1893 if ( (conversion == 0) && NILP (mother) && (min_code > 0) )
1901 conversion = CONVERSION_94;
1904 conversion = CONVERSION_94x94;
1907 conversion = CONVERSION_94x94x94;
1910 conversion = CONVERSION_94x94x94x94;
1918 conversion = CONVERSION_96;
1921 conversion = CONVERSION_96x96;
1924 conversion = CONVERSION_96x96x96;
1927 conversion = CONVERSION_96x96x96x96;
1934 charset = make_charset (id, name, chars, dimension, columns, graphic,
1935 final, direction, short_name, long_name,
1936 doc_string, registry, iso_ir,
1937 Qnil, min_code, max_code, code_offset, byte_offset,
1938 mother, conversion, partial);
1939 if (!NILP (ccl_program))
1940 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1944 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1946 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1947 NEW-NAME is the name of the new charset. Return the new charset.
1949 (charset, new_name))
1951 Lisp_Object new_charset = Qnil;
1952 int id, chars, dimension, columns, graphic, final;
1954 Lisp_Object registry, doc_string, short_name, long_name;
1957 charset = Fget_charset (charset);
1958 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1959 signal_simple_error ("Charset already has reverse-direction charset",
1962 CHECK_SYMBOL (new_name);
1963 if (!NILP (Ffind_charset (new_name)))
1964 signal_simple_error ("Cannot redefine existing charset", new_name);
1966 cs = XCHARSET (charset);
1968 chars = CHARSET_CHARS (cs);
1969 dimension = CHARSET_DIMENSION (cs);
1970 columns = CHARSET_COLUMNS (cs);
1971 id = get_unallocated_leading_byte (dimension);
1973 graphic = CHARSET_GRAPHIC (cs);
1974 final = CHARSET_FINAL (cs);
1975 direction = CHARSET_RIGHT_TO_LEFT;
1976 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1977 direction = CHARSET_LEFT_TO_RIGHT;
1978 doc_string = CHARSET_DOC_STRING (cs);
1979 short_name = CHARSET_SHORT_NAME (cs);
1980 long_name = CHARSET_LONG_NAME (cs);
1981 registry = CHARSET_REGISTRY (cs);
1983 new_charset = make_charset (id, new_name, chars, dimension, columns,
1984 graphic, final, direction, short_name, long_name,
1985 doc_string, registry,
1988 CHARSET_DECODING_TABLE(cs),
1989 CHARSET_MIN_CODE(cs),
1990 CHARSET_MAX_CODE(cs),
1991 CHARSET_CODE_OFFSET(cs),
1992 CHARSET_BYTE_OFFSET(cs),
1994 CHARSET_CONVERSION (cs)
1996 Qnil, 0, 0, 0, 0, Qnil, 0
2000 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2001 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2006 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2007 Define symbol ALIAS as an alias for CHARSET.
2011 CHECK_SYMBOL (alias);
2012 charset = Fget_charset (charset);
2013 return Fputhash (alias, charset, Vcharset_hash_table);
2016 /* #### Reverse direction charsets not yet implemented. */
2018 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2020 Return the reverse-direction charset parallel to CHARSET, if any.
2021 This is the charset with the same properties (in particular, the same
2022 dimension, number of characters per dimension, and final byte) as
2023 CHARSET but whose characters are displayed in the opposite direction.
2027 charset = Fget_charset (charset);
2028 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2032 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2033 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2034 If DIRECTION is omitted, both directions will be checked (left-to-right
2035 will be returned if character sets exist for both directions).
2037 (dimension, chars, final, direction))
2039 int dm, ch, fi, di = -1;
2040 Lisp_Object obj = Qnil;
2042 CHECK_INT (dimension);
2043 dm = XINT (dimension);
2044 if (dm < 1 || dm > 2)
2045 signal_simple_error ("Invalid value for DIMENSION", dimension);
2049 if (ch != 94 && ch != 96)
2050 signal_simple_error ("Invalid value for CHARS", chars);
2052 CHECK_CHAR_COERCE_INT (final);
2054 if (fi < '0' || fi > '~')
2055 signal_simple_error ("Invalid value for FINAL", final);
2057 if (EQ (direction, Ql2r))
2058 di = CHARSET_LEFT_TO_RIGHT;
2059 else if (EQ (direction, Qr2l))
2060 di = CHARSET_RIGHT_TO_LEFT;
2061 else if (!NILP (direction))
2062 signal_simple_error ("Invalid value for DIRECTION", direction);
2064 if (dm == 2 && fi > 0x5F)
2066 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2070 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2072 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2075 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2078 return XCHARSET_NAME (obj);
2082 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2083 Return short name of CHARSET.
2087 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2090 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2091 Return long name of CHARSET.
2095 return XCHARSET_LONG_NAME (Fget_charset (charset));
2098 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2099 Return description of CHARSET.
2103 return XCHARSET_DOC_STRING (Fget_charset (charset));
2106 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2107 Return dimension of CHARSET.
2111 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2114 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2115 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2116 Recognized properties are those listed in `make-charset', as well as
2117 'name and 'doc-string.
2123 charset = Fget_charset (charset);
2124 cs = XCHARSET (charset);
2126 CHECK_SYMBOL (prop);
2127 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2128 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2129 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2130 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2131 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2132 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2133 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2134 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
2135 Qnil : make_char (CHARSET_FINAL (cs));
2136 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2137 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2138 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2139 if (EQ (prop, Qdirection))
2140 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2141 if (EQ (prop, Qreverse_direction_charset))
2143 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2144 /* #### Is this translation OK? If so, error checking sufficient? */
2145 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2148 if (EQ (prop, Qiso_ir)||
2149 EQ (prop, Qto_iso_ir))
2151 if ( CHARSET_ISO_IR (cs) > 0 )
2152 return make_int (CHARSET_ISO_IR (cs));
2156 if (EQ (prop, Qmother))
2157 return CHARSET_MOTHER (cs);
2158 if (EQ (prop, Qmin_code))
2159 return make_int (CHARSET_MIN_CODE (cs));
2160 if (EQ (prop, Qmax_code))
2161 return make_int (CHARSET_MAX_CODE (cs));
2163 signal_simple_error ("Unrecognized charset property name", prop);
2164 return Qnil; /* not reached */
2167 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2168 Return charset identification number of CHARSET.
2172 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2175 /* #### We need to figure out which properties we really want to
2178 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2179 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2181 (charset, ccl_program))
2183 struct ccl_program test_ccl;
2185 charset = Fget_charset (charset);
2186 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2187 signal_simple_error ("Invalid ccl-program", ccl_program);
2188 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2193 invalidate_charset_font_caches (Lisp_Object charset)
2195 /* Invalidate font cache entries for charset on all devices. */
2196 Lisp_Object devcons, concons, hash_table;
2197 DEVICE_LOOP_NO_BREAK (devcons, concons)
2199 struct device *d = XDEVICE (XCAR (devcons));
2200 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2201 if (!UNBOUNDP (hash_table))
2202 Fclrhash (hash_table);
2206 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2207 Set the 'registry property of CHARSET to REGISTRY.
2209 (charset, registry))
2211 charset = Fget_charset (charset);
2212 CHECK_STRING (registry);
2213 XCHARSET_REGISTRY (charset) = registry;
2214 invalidate_charset_font_caches (charset);
2215 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2220 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2221 Return mapping-table of CHARSET.
2225 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2228 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2229 Set mapping-table of CHARSET to TABLE.
2233 struct Lisp_Charset *cs;
2237 charset = Fget_charset (charset);
2238 cs = XCHARSET (charset);
2242 CHARSET_DECODING_TABLE(cs) = Qnil;
2245 else if (VECTORP (table))
2247 int ccs_len = CHARSET_BYTE_SIZE (cs);
2248 int ret = decoding_table_check_elements (table,
2249 CHARSET_DIMENSION (cs),
2254 signal_simple_error ("Too big table", table);
2256 signal_simple_error ("Invalid element is found", table);
2258 signal_simple_error ("Something wrong", table);
2260 CHARSET_DECODING_TABLE(cs) = Qnil;
2263 signal_error (Qwrong_type_argument,
2264 list2 (build_translated_string ("vector-or-nil-p"),
2267 byte_offset = CHARSET_BYTE_OFFSET (cs);
2268 switch (CHARSET_DIMENSION (cs))
2271 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2273 Lisp_Object c = XVECTOR_DATA(table)[i];
2276 Fput_char_attribute (c, XCHARSET_NAME (charset),
2277 make_int (i + byte_offset));
2281 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2283 Lisp_Object v = XVECTOR_DATA(table)[i];
2289 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2291 Lisp_Object c = XVECTOR_DATA(v)[j];
2295 (c, XCHARSET_NAME (charset),
2296 make_int ( ( (i + byte_offset) << 8 )
2302 Fput_char_attribute (v, XCHARSET_NAME (charset),
2303 make_int (i + byte_offset));
2311 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2312 Save mapping-table of CHARSET.
2316 struct Lisp_Charset *cs;
2317 int byte_min, byte_max;
2318 #ifdef HAVE_LIBCHISE
2320 #else /* HAVE_LIBCHISE */
2322 Lisp_Object db_file;
2323 #endif /* not HAVE_LIBCHISE */
2325 charset = Fget_charset (charset);
2326 cs = XCHARSET (charset);
2328 #ifdef HAVE_LIBCHISE
2329 if ( open_chise_data_source_maybe () )
2333 = chise_ds_get_ccs (default_chise_data_source,
2334 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2337 printf ("Can't open decoding-table %s\n",
2338 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2341 #else /* HAVE_LIBCHISE */
2342 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2343 Qsystem_char_id, 1);
2344 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2345 #endif /* not HAVE_LIBCHISE */
2347 byte_min = CHARSET_BYTE_OFFSET (cs);
2348 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2349 switch (CHARSET_DIMENSION (cs))
2353 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2356 for (cell = byte_min; cell < byte_max; cell++)
2358 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2362 #ifdef HAVE_LIBCHISE
2363 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2364 #else /* HAVE_LIBCHISE */
2365 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2366 Fprin1_to_string (c, Qnil),
2368 #endif /* not HAVE_LIBCHISE */
2375 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2378 for (row = byte_min; row < byte_max; row++)
2380 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2383 for (cell = byte_min; cell < byte_max; cell++)
2385 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2389 #ifdef HAVE_LIBCHISE
2390 chise_ccs_set_decoded_char
2392 (row << 8) | cell, XCHAR (c));
2393 #else /* HAVE_LIBCHISE */
2394 Fput_database (Fprin1_to_string (make_int ((row << 8)
2397 Fprin1_to_string (c, Qnil),
2399 #endif /* not HAVE_LIBCHISE */
2407 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2410 for (plane = byte_min; plane < byte_max; plane++)
2413 = get_ccs_octet_table (table_p, charset, plane);
2416 for (row = byte_min; row < byte_max; row++)
2419 = get_ccs_octet_table (table_r, charset, row);
2422 for (cell = byte_min; cell < byte_max; cell++)
2424 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2429 #ifdef HAVE_LIBCHISE
2430 chise_ccs_set_decoded_char
2435 #else /* HAVE_LIBCHISE */
2436 Fput_database (Fprin1_to_string
2437 (make_int ((plane << 16)
2441 Fprin1_to_string (c, Qnil),
2443 #endif /* not HAVE_LIBCHISE */
2452 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2455 for (group = byte_min; group < byte_max; group++)
2458 = get_ccs_octet_table (table_g, charset, group);
2461 for (plane = byte_min; plane < byte_max; plane++)
2464 = get_ccs_octet_table (table_p, charset, plane);
2467 for (row = byte_min; row < byte_max; row++)
2470 = get_ccs_octet_table (table_r, charset, row);
2473 for (cell = byte_min; cell < byte_max; cell++)
2476 = get_ccs_octet_table (table_c, charset, cell);
2480 #ifdef HAVE_LIBCHISE
2481 chise_ccs_set_decoded_char
2487 #else /* HAVE_LIBCHISE */
2488 Fput_database (Fprin1_to_string
2489 (make_int (( group << 24)
2494 Fprin1_to_string (c, Qnil),
2496 #endif /* not HAVE_LIBCHISE */
2504 #ifdef HAVE_LIBCHISE
2505 chise_ccs_sync (dt_ccs);
2507 #else /* HAVE_LIBCHISE */
2508 return Fclose_database (db);
2509 #endif /* not HAVE_LIBCHISE */
2512 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2513 Reset mapping-table of CCS with database file.
2517 #ifdef HAVE_LIBCHISE
2518 CHISE_CCS chise_ccs;
2520 Lisp_Object db_file;
2523 ccs = Fget_charset (ccs);
2525 #ifdef HAVE_LIBCHISE
2526 if ( open_chise_data_source_maybe () )
2529 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2530 XSTRING_DATA (Fsymbol_name
2531 (XCHARSET_NAME(ccs))));
2532 if (chise_ccs == NULL)
2535 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2536 Qsystem_char_id, 0);
2540 #ifdef HAVE_LIBCHISE
2541 chise_ccs_setup_db (chise_ccs, 0) == 0
2543 !NILP (Ffile_exists_p (db_file))
2547 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2554 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2556 #ifdef HAVE_LIBCHISE
2557 CHISE_Char_ID char_id;
2559 if ( open_chise_data_source_maybe () )
2563 = chise_ds_decode_char (default_chise_data_source,
2564 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2568 decoding_table_put_char (ccs, code_point, make_char (char_id));
2570 decoding_table_put_char (ccs, code_point, Qnil);
2573 /* chise_ccst_close (dt_ccs); */
2575 #else /* HAVE_LIBCHISE */
2578 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2581 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2585 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2592 decoding_table_put_char (ccs, code_point, ret);
2593 Fclose_database (db);
2597 decoding_table_put_char (ccs, code_point, Qnil);
2598 Fclose_database (db);
2601 #endif /* not HAVE_LIBCHISE */
2604 #ifdef HAVE_LIBCHISE
2605 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2606 Save properties of CHARSET.
2610 struct Lisp_Charset *cs;
2611 CHISE_Property property;
2613 unsigned char* feature_name;
2615 ccs = Fget_charset (charset);
2616 cs = XCHARSET (ccs);
2618 if ( open_chise_data_source_maybe () )
2621 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2623 property = chise_ds_get_property (default_chise_data_source,
2625 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2626 chise_feature_set_property_value
2627 (chise_ds_get_feature (default_chise_data_source, feature_name),
2628 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2630 chise_property_sync (property);
2632 charset = XCHARSET_NAME (ccs);
2633 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2635 property = chise_ds_get_property (default_chise_data_source,
2637 chise_feature_set_property_value
2638 (chise_ds_get_feature (default_chise_data_source, feature_name),
2639 property, XSTRING_DATA (Fprin1_to_string
2640 (CHARSET_DOC_STRING (cs), Qnil)));
2641 chise_property_sync (property);
2643 property = chise_ds_get_property (default_chise_data_source, "type");
2644 chise_feature_set_property_value
2645 (chise_ds_get_feature (default_chise_data_source, feature_name),
2647 chise_property_sync (property);
2649 property = chise_ds_get_property (default_chise_data_source, "chars");
2650 chise_feature_set_property_value
2651 (chise_ds_get_feature (default_chise_data_source, feature_name),
2652 property, XSTRING_DATA (Fprin1_to_string (make_int
2653 (CHARSET_CHARS (cs)),
2655 chise_property_sync (property);
2657 property = chise_ds_get_property (default_chise_data_source, "dimension");
2658 chise_feature_set_property_value
2659 (chise_ds_get_feature (default_chise_data_source, feature_name),
2660 property, XSTRING_DATA (Fprin1_to_string (make_int
2661 (CHARSET_DIMENSION (cs)),
2663 chise_property_sync (property);
2665 if ( CHARSET_FINAL (cs) != 0 )
2667 property = chise_ds_get_property (default_chise_data_source,
2669 chise_feature_set_property_value
2670 (chise_ds_get_feature (default_chise_data_source, feature_name),
2671 property, XSTRING_DATA (Fprin1_to_string (make_int
2672 (CHARSET_FINAL (cs)),
2674 chise_property_sync (property);
2677 if ( !NILP (CHARSET_MOTHER (cs)) )
2679 Lisp_Object mother = CHARSET_MOTHER (cs);
2681 if ( CHARSETP (mother) )
2682 mother = XCHARSET_NAME (mother);
2684 property = chise_ds_get_property (default_chise_data_source,
2686 chise_feature_set_property_value
2687 (chise_ds_get_feature (default_chise_data_source, feature_name),
2688 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2689 chise_property_sync (property);
2692 if ( CHARSET_MAX_CODE (cs) != 0 )
2696 property = chise_ds_get_property (default_chise_data_source,
2698 if ( CHARSET_MIN_CODE (cs) == 0 )
2699 chise_feature_set_property_value
2700 (chise_ds_get_feature (default_chise_data_source, feature_name),
2704 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2705 chise_feature_set_property_value
2706 (chise_ds_get_feature (default_chise_data_source, feature_name),
2709 chise_property_sync (property);
2711 property = chise_ds_get_property (default_chise_data_source,
2713 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2714 chise_feature_set_property_value
2715 (chise_ds_get_feature (default_chise_data_source, feature_name),
2717 chise_property_sync (property);
2719 property = chise_ds_get_property (default_chise_data_source,
2720 "mother-code-offset");
2721 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2722 chise_feature_set_property_value
2723 (chise_ds_get_feature (default_chise_data_source, feature_name),
2727 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2728 chise_feature_set_property_value
2729 (chise_ds_get_feature (default_chise_data_source, feature_name),
2732 chise_property_sync (property);
2734 property = chise_ds_get_property (default_chise_data_source,
2735 "mother-code-conversion");
2736 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2737 chise_feature_set_property_value
2738 (chise_ds_get_feature (default_chise_data_source, feature_name),
2739 property, "identical");
2742 Lisp_Object sym = Qnil;
2744 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2746 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2748 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2750 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2753 chise_feature_set_property_value
2754 (chise_ds_get_feature (default_chise_data_source, feature_name),
2755 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2757 chise_feature_set_property_value
2758 (chise_ds_get_feature (default_chise_data_source, feature_name),
2759 property, "unknown");
2761 chise_property_sync (property);
2765 #endif /* HAVE_LIBCHISE */
2767 #endif /* HAVE_CHISE */
2768 #endif /* UTF2000 */
2771 /************************************************************************/
2772 /* Lisp primitives for working with characters */
2773 /************************************************************************/
2776 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2777 Make a character from CHARSET and code-point CODE.
2778 If DEFINED_ONLY is non-nil, builtin character is not returned.
2779 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2780 If corresponding character is not found, nil is returned.
2782 (charset, code, defined_only, without_inheritance))
2786 charset = Fget_charset (charset);
2789 if ( (XCHARSET_GRAPHIC (charset) == 0) ||
2790 (XCHARSET_GRAPHIC (charset) == 1) )
2792 if (NILP (defined_only))
2793 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2795 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2796 return c >= 0 ? make_char (c) : Qnil;
2799 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2800 Make a builtin character from CHARSET and code-point CODE.
2807 charset = Fget_charset (charset);
2809 if (EQ (charset, Vcharset_latin_viscii))
2811 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2817 (ret = Fget_char_attribute (chr,
2818 Vcharset_latin_viscii_lower,
2821 charset = Vcharset_latin_viscii_lower;
2825 (ret = Fget_char_attribute (chr,
2826 Vcharset_latin_viscii_upper,
2829 charset = Vcharset_latin_viscii_upper;
2836 if (XCHARSET_GRAPHIC (charset) == 1)
2839 ch = decode_builtin_char (charset, c);
2841 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2845 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2846 Make a character from CHARSET and octets ARG1 and ARG2.
2847 ARG2 is required only for characters from two-dimensional charsets.
2848 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2849 character s with caron.
2851 (charset, arg1, arg2))
2855 int lowlim, highlim;
2857 charset = Fget_charset (charset);
2858 cs = XCHARSET (charset);
2860 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2861 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2863 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2865 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2866 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2869 /* It is useful (and safe, according to Olivier Galibert) to strip
2870 the 8th bit off ARG1 and ARG2 because it allows programmers to
2871 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2872 Latin 2 code of the character. */
2880 if (a1 < lowlim || a1 > highlim)
2881 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2883 if (CHARSET_DIMENSION (cs) == 1)
2887 ("Charset is of dimension one; second octet must be nil", arg2);
2888 return make_char (MAKE_CHAR (charset, a1, 0));
2897 a2 = XINT (arg2) & 0x7f;
2899 if (a2 < lowlim || a2 > highlim)
2900 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2902 return make_char (MAKE_CHAR (charset, a1, a2));
2905 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2906 Return the character set of CHARACTER.
2910 CHECK_CHAR_COERCE_INT (character);
2912 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2915 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2916 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2917 N defaults to 0 if omitted.
2921 Lisp_Object charset;
2924 CHECK_CHAR_COERCE_INT (character);
2926 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2928 if (NILP (n) || EQ (n, Qzero))
2929 return make_int (octet0);
2930 else if (EQ (n, make_int (1)))
2931 return make_int (octet1);
2933 signal_simple_error ("Octet number must be 0 or 1", n);
2937 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2938 Return code-point of CHARACTER in specified CHARSET.
2940 (character, charset, defined_only))
2944 CHECK_CHAR_COERCE_INT (character);
2945 charset = Fget_charset (charset);
2946 code_point = charset_code_point (charset, XCHAR (character),
2947 !NILP (defined_only));
2948 if (code_point >= 0)
2949 return make_int (code_point);
2955 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2956 Return list of charset and one or two position-codes of CHARACTER.
2960 /* This function can GC */
2961 struct gcpro gcpro1, gcpro2;
2962 Lisp_Object charset = Qnil;
2963 Lisp_Object rc = Qnil;
2971 GCPRO2 (charset, rc);
2972 CHECK_CHAR_COERCE_INT (character);
2975 code_point = ENCODE_CHAR (XCHAR (character), charset);
2976 dimension = XCHARSET_DIMENSION (charset);
2977 while (dimension > 0)
2979 rc = Fcons (make_int (code_point & 255), rc);
2983 rc = Fcons (XCHARSET_NAME (charset), rc);
2985 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2987 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2989 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2993 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3002 #ifdef ENABLE_COMPOSITE_CHARS
3003 /************************************************************************/
3004 /* composite character functions */
3005 /************************************************************************/
3008 lookup_composite_char (Bufbyte *str, int len)
3010 Lisp_Object lispstr = make_string (str, len);
3011 Lisp_Object ch = Fgethash (lispstr,
3012 Vcomposite_char_string2char_hash_table,
3018 if (composite_char_row_next >= 128)
3019 signal_simple_error ("No more composite chars available", lispstr);
3020 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3021 composite_char_col_next);
3022 Fputhash (make_char (emch), lispstr,
3023 Vcomposite_char_char2string_hash_table);
3024 Fputhash (lispstr, make_char (emch),
3025 Vcomposite_char_string2char_hash_table);
3026 composite_char_col_next++;
3027 if (composite_char_col_next >= 128)
3029 composite_char_col_next = 32;
3030 composite_char_row_next++;
3039 composite_char_string (Emchar ch)
3041 Lisp_Object str = Fgethash (make_char (ch),
3042 Vcomposite_char_char2string_hash_table,
3044 assert (!UNBOUNDP (str));
3048 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3049 Convert a string into a single composite character.
3050 The character is the result of overstriking all the characters in
3055 CHECK_STRING (string);
3056 return make_char (lookup_composite_char (XSTRING_DATA (string),
3057 XSTRING_LENGTH (string)));
3060 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3061 Return a string of the characters comprising a composite character.
3069 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3070 signal_simple_error ("Must be composite char", ch);
3071 return composite_char_string (emch);
3073 #endif /* ENABLE_COMPOSITE_CHARS */
3076 /************************************************************************/
3077 /* initialization */
3078 /************************************************************************/
3081 syms_of_mule_charset (void)
3083 INIT_LRECORD_IMPLEMENTATION (charset);
3085 DEFSUBR (Fcharsetp);
3086 DEFSUBR (Ffind_charset);
3087 DEFSUBR (Fget_charset);
3088 DEFSUBR (Fcharset_list);
3089 DEFSUBR (Fcharset_name);
3090 DEFSUBR (Fmake_charset);
3091 DEFSUBR (Fmake_reverse_direction_charset);
3092 /* DEFSUBR (Freverse_direction_charset); */
3093 DEFSUBR (Fdefine_charset_alias);
3094 DEFSUBR (Fcharset_from_attributes);
3095 DEFSUBR (Fcharset_short_name);
3096 DEFSUBR (Fcharset_long_name);
3097 DEFSUBR (Fcharset_description);
3098 DEFSUBR (Fcharset_dimension);
3099 DEFSUBR (Fcharset_property);
3100 DEFSUBR (Fcharset_id);
3101 DEFSUBR (Fset_charset_ccl_program);
3102 DEFSUBR (Fset_charset_registry);
3105 DEFSUBR (Fcharset_mapping_table);
3106 DEFSUBR (Fset_charset_mapping_table);
3108 DEFSUBR (Fsave_charset_mapping_table);
3109 DEFSUBR (Freset_charset_mapping_table);
3110 #ifdef HAVE_LIBCHISE
3111 DEFSUBR (Fsave_charset_properties);
3112 #endif /* HAVE_LIBCHISE */
3113 #endif /* HAVE_CHISE */
3114 DEFSUBR (Fdecode_char);
3115 DEFSUBR (Fdecode_builtin_char);
3116 DEFSUBR (Fencode_char);
3119 DEFSUBR (Fmake_char);
3120 DEFSUBR (Fchar_charset);
3121 DEFSUBR (Fchar_octet);
3122 DEFSUBR (Fsplit_char);
3124 #ifdef ENABLE_COMPOSITE_CHARS
3125 DEFSUBR (Fmake_composite_char);
3126 DEFSUBR (Fcomposite_char_string);
3129 defsymbol (&Qcharsetp, "charsetp");
3130 defsymbol (&Qregistry, "registry");
3131 defsymbol (&Qfinal, "final");
3132 defsymbol (&Qgraphic, "graphic");
3133 defsymbol (&Qdirection, "direction");
3134 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3135 defsymbol (&Qshort_name, "short-name");
3136 defsymbol (&Qlong_name, "long-name");
3137 defsymbol (&Qiso_ir, "iso-ir");
3139 defsymbol (&Qto_iso_ir, "=>iso-ir");
3140 defsymbol (&Qpartial, "partial");
3141 defsymbol (&Qmother, "mother");
3142 defsymbol (&Qmin_code, "min-code");
3143 defsymbol (&Qmax_code, "max-code");
3144 defsymbol (&Qcode_offset, "code-offset");
3145 defsymbol (&Qconversion, "conversion");
3146 defsymbol (&Q94x60, "94x60");
3147 defsymbol (&Q94x94x60, "94x94x60");
3148 defsymbol (&Qbig5_1, "big5-1");
3149 defsymbol (&Qbig5_2, "big5-2");
3152 defsymbol (&Ql2r, "l2r");
3153 defsymbol (&Qr2l, "r2l");
3155 /* Charsets, compatible with FSF 20.3
3156 Naming convention is Script-Charset[-Edition] */
3157 defsymbol (&Qascii, "ascii");
3158 defsymbol (&Qcontrol_1, "control-1");
3159 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3160 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3161 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3162 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3163 defsymbol (&Qthai_tis620, "thai-tis620");
3164 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3165 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3166 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3167 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3168 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3169 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3170 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3171 /* defsymbol (&Qrep_jis_x0208_1978, "=jis-x0208@1978"); */
3172 defsymbol (&Qrep_gb2312, "=gb2312");
3173 defsymbol (&Qrep_gb12345, "=gb12345");
3174 defsymbol (&Qrep_jis_x0208_1983, "=jis-x0208@1983");
3175 defsymbol (&Qrep_ks_x1001, "=ks-x1001");
3176 defsymbol (&Qrep_jis_x0212, "=jis-x0212");
3177 defsymbol (&Qrep_cns11643_1, "=cns11643-1");
3178 defsymbol (&Qrep_cns11643_2, "=cns11643-2");
3180 defsymbol (&Qsystem_char_id, "system-char-id");
3181 defsymbol (&Qrep_ucs, "=ucs");
3182 defsymbol (&Qucs, "ucs");
3183 defsymbol (&Qucs_bmp, "ucs-bmp");
3184 defsymbol (&Qucs_smp, "ucs-smp");
3185 defsymbol (&Qucs_sip, "ucs-sip");
3186 defsymbol (&Qlatin_viscii, "latin-viscii");
3187 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3188 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3189 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3190 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3191 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3192 defsymbol (&Qrep_jis_x0208, "=jis-x0208");
3193 defsymbol (&Qrep_jis_x0208_1990, "=jis-x0208@1990");
3194 defsymbol (&Qrep_big5, "=big5");
3195 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3197 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3198 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3200 defsymbol (&Qcomposite, "composite");
3204 vars_of_mule_charset (void)
3211 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3212 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3214 /* Table of charsets indexed by leading byte. */
3215 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3216 chlook->charset_by_leading_byte[i] = Qnil;
3219 /* Table of charsets indexed by type/final-byte. */
3220 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3221 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3222 chlook->charset_by_attributes[i][j] = Qnil;
3224 /* Table of charsets indexed by type/final-byte/direction. */
3225 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3226 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3227 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3228 chlook->charset_by_attributes[i][j][k] = Qnil;
3232 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3234 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3235 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3239 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3240 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3241 Leading-code of private TYPE9N charset of column-width 1.
3243 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3247 Vdefault_coded_charset_priority_list = Qnil;
3248 DEFVAR_LISP ("default-coded-charset-priority-list",
3249 &Vdefault_coded_charset_priority_list /*
3250 Default order of preferred coded-character-sets.
3252 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3253 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3254 &Vdisplay_coded_charset_priority_use_inheritance /*
3255 If non-nil, use character inheritance.
3257 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3258 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3259 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3260 If non-nil, prefer nearest character in hierarchy order.
3266 complex_vars_of_mule_charset (void)
3268 staticpro (&Vcharset_hash_table);
3269 Vcharset_hash_table =
3270 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3272 /* Predefined character sets. We store them into variables for
3276 staticpro (&Vcharset_system_char_id);
3277 Vcharset_system_char_id =
3278 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3279 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3280 build_string ("SCID"),
3281 build_string ("CHAR-ID"),
3282 build_string ("System char-id"),
3285 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3287 staticpro (&Vcharset_ucs);
3289 make_charset (LEADING_BYTE_UCS, Qrep_ucs, 256, 4,
3290 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3291 build_string ("UCS"),
3292 build_string ("UCS"),
3293 build_string ("ISO/IEC 10646"),
3296 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3298 staticpro (&Vcharset_ucs_bmp);
3300 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3301 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3302 build_string ("BMP"),
3303 build_string ("UCS-BMP"),
3304 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3306 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3307 - LEADING_BYTE_UCS_BMP,
3308 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3310 staticpro (&Vcharset_ucs_smp);
3312 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3313 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3314 build_string ("SMP"),
3315 build_string ("UCS-SMP"),
3316 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3317 build_string ("UCS00-1"),
3319 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3320 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL,
3322 staticpro (&Vcharset_ucs_sip);
3324 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3325 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3326 build_string ("SIP"),
3327 build_string ("UCS-SIP"),
3328 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3329 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3331 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3332 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL,
3335 # define MIN_CHAR_THAI 0
3336 # define MAX_CHAR_THAI 0
3337 /* # define MIN_CHAR_HEBREW 0 */
3338 /* # define MAX_CHAR_HEBREW 0 */
3339 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3340 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3342 staticpro (&Vcharset_ascii);
3344 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3345 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3346 build_string ("ASCII"),
3347 build_string ("ASCII)"),
3348 build_string ("ASCII (ISO646 IRV)"),
3349 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3350 - LEADING_BYTE_ASCII,
3351 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL,
3353 staticpro (&Vcharset_control_1);
3354 Vcharset_control_1 =
3355 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3356 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3357 build_string ("C1"),
3358 build_string ("Control characters"),
3359 build_string ("Control characters 128-191"),
3361 - LEADING_BYTE_CONTROL_1,
3362 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL,
3364 staticpro (&Vcharset_latin_iso8859_1);
3365 Vcharset_latin_iso8859_1 =
3366 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3367 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3368 build_string ("Latin-1"),
3369 build_string ("ISO8859-1 (Latin-1)"),
3370 build_string ("ISO8859-1 (Latin-1)"),
3371 build_string ("iso8859-1"),
3372 - LEADING_BYTE_LATIN_ISO8859_1,
3373 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3375 staticpro (&Vcharset_latin_iso8859_2);
3376 Vcharset_latin_iso8859_2 =
3377 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3378 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3379 build_string ("Latin-2"),
3380 build_string ("ISO8859-2 (Latin-2)"),
3381 build_string ("ISO8859-2 (Latin-2)"),
3382 build_string ("iso8859-2"),
3383 - LEADING_BYTE_LATIN_ISO8859_2,
3384 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3386 staticpro (&Vcharset_latin_iso8859_3);
3387 Vcharset_latin_iso8859_3 =
3388 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3389 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3390 build_string ("Latin-3"),
3391 build_string ("ISO8859-3 (Latin-3)"),
3392 build_string ("ISO8859-3 (Latin-3)"),
3393 build_string ("iso8859-3"),
3394 - LEADING_BYTE_LATIN_ISO8859_3,
3395 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3397 staticpro (&Vcharset_latin_iso8859_4);
3398 Vcharset_latin_iso8859_4 =
3399 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3400 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3401 build_string ("Latin-4"),
3402 build_string ("ISO8859-4 (Latin-4)"),
3403 build_string ("ISO8859-4 (Latin-4)"),
3404 build_string ("iso8859-4"),
3405 - LEADING_BYTE_LATIN_ISO8859_4,
3406 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3408 staticpro (&Vcharset_thai_tis620);
3409 Vcharset_thai_tis620 =
3410 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3411 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3412 build_string ("TIS620"),
3413 build_string ("TIS620 (Thai)"),
3414 build_string ("TIS620.2529 (Thai)"),
3415 build_string ("tis620"),
3416 - LEADING_BYTE_THAI_TIS620,
3417 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3419 staticpro (&Vcharset_greek_iso8859_7);
3420 Vcharset_greek_iso8859_7 =
3421 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3422 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3423 build_string ("ISO8859-7"),
3424 build_string ("ISO8859-7 (Greek)"),
3425 build_string ("ISO8859-7 (Greek)"),
3426 build_string ("iso8859-7"),
3427 - LEADING_BYTE_GREEK_ISO8859_7,
3428 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3430 staticpro (&Vcharset_arabic_iso8859_6);
3431 Vcharset_arabic_iso8859_6 =
3432 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3433 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3434 build_string ("ISO8859-6"),
3435 build_string ("ISO8859-6 (Arabic)"),
3436 build_string ("ISO8859-6 (Arabic)"),
3437 build_string ("iso8859-6"),
3438 - LEADING_BYTE_ARABIC_ISO8859_6,
3439 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3441 staticpro (&Vcharset_hebrew_iso8859_8);
3442 Vcharset_hebrew_iso8859_8 =
3443 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3444 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3445 build_string ("ISO8859-8"),
3446 build_string ("ISO8859-8 (Hebrew)"),
3447 build_string ("ISO8859-8 (Hebrew)"),
3448 build_string ("iso8859-8"),
3449 - LEADING_BYTE_HEBREW_ISO8859_8,
3451 0 /* MIN_CHAR_HEBREW */,
3452 0 /* MAX_CHAR_HEBREW */, 0, 32,
3453 Qnil, CONVERSION_IDENTICAL,
3455 staticpro (&Vcharset_katakana_jisx0201);
3456 Vcharset_katakana_jisx0201 =
3457 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3458 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3459 build_string ("JISX0201 Kana"),
3460 build_string ("JISX0201.1976 (Japanese Kana)"),
3461 build_string ("JISX0201.1976 Japanese Kana"),
3462 build_string ("jisx0201\\.1976"),
3463 - LEADING_BYTE_KATAKANA_JISX0201,
3464 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3466 staticpro (&Vcharset_latin_jisx0201);
3467 Vcharset_latin_jisx0201 =
3468 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3469 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3470 build_string ("JISX0201 Roman"),
3471 build_string ("JISX0201.1976 (Japanese Roman)"),
3472 build_string ("JISX0201.1976 Japanese Roman"),
3473 build_string ("jisx0201\\.1976"),
3474 - LEADING_BYTE_LATIN_JISX0201,
3475 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3477 staticpro (&Vcharset_cyrillic_iso8859_5);
3478 Vcharset_cyrillic_iso8859_5 =
3479 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3480 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3481 build_string ("ISO8859-5"),
3482 build_string ("ISO8859-5 (Cyrillic)"),
3483 build_string ("ISO8859-5 (Cyrillic)"),
3484 build_string ("iso8859-5"),
3485 - LEADING_BYTE_CYRILLIC_ISO8859_5,
3486 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3488 staticpro (&Vcharset_latin_iso8859_9);
3489 Vcharset_latin_iso8859_9 =
3490 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3491 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3492 build_string ("Latin-5"),
3493 build_string ("ISO8859-9 (Latin-5)"),
3494 build_string ("ISO8859-9 (Latin-5)"),
3495 build_string ("iso8859-9"),
3496 - LEADING_BYTE_LATIN_ISO8859_9,
3497 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3500 staticpro (&Vcharset_jis_x0208);
3501 Vcharset_jis_x0208 =
3502 make_charset (LEADING_BYTE_JIS_X0208,
3503 Qrep_jis_x0208, 94, 2,
3504 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3505 build_string ("JIS X0208"),
3506 build_string ("JIS X0208 Common"),
3507 build_string ("JIS X0208 Common part"),
3508 build_string ("jisx0208"),
3509 - LEADING_BYTE_JAPANESE_JISX0208_1978,
3511 MIN_CHAR_JIS_X0208_1990,
3512 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3513 Qnil, CONVERSION_94x94,
3517 staticpro (&Vcharset_japanese_jisx0208_1978);
3518 Vcharset_japanese_jisx0208_1978 =
3519 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3520 Qrep_jis_x0208_1978, 94, 2,
3521 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3522 build_string ("JIS X0208:1978"),
3523 build_string ("JIS X0208:1978 (Japanese)"),
3525 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3526 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3527 - LEADING_BYTE_JAPANESE_JISX0208_1978,
3534 CONVERSION_IDENTICAL,
3537 staticpro (&Vcharset_chinese_gb2312);
3538 Vcharset_chinese_gb2312 =
3539 make_charset (LEADING_BYTE_CHINESE_GB2312, Qrep_gb2312, 94, 2,
3540 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3541 build_string ("GB2312"),
3542 build_string ("GB2312)"),
3543 build_string ("GB2312 Chinese simplified"),
3544 build_string ("gb2312"),
3545 - LEADING_BYTE_CHINESE_GB2312,
3546 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3548 staticpro (&Vcharset_chinese_gb12345);
3549 Vcharset_chinese_gb12345 =
3550 make_charset (LEADING_BYTE_CHINESE_GB12345, Qrep_gb12345, 94, 2,
3551 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3552 build_string ("G1"),
3553 build_string ("GB 12345)"),
3554 build_string ("GB 12345-1990"),
3555 build_string ("GB12345\\(\\.1990\\)?-0"),
3557 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3559 staticpro (&Vcharset_japanese_jisx0208);
3560 Vcharset_japanese_jisx0208 =
3561 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qrep_jis_x0208_1983, 94, 2,
3562 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3563 build_string ("JISX0208"),
3564 build_string ("JIS X0208:1983 (Japanese)"),
3565 build_string ("JIS X0208:1983 Japanese Kanji"),
3566 build_string ("jisx0208\\.1983"),
3567 - LEADING_BYTE_JAPANESE_JISX0208,
3574 CONVERSION_IDENTICAL,
3577 staticpro (&Vcharset_japanese_jisx0208_1990);
3578 Vcharset_japanese_jisx0208_1990 =
3579 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3580 Qrep_jis_x0208_1990, 94, 2,
3581 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3582 build_string ("JISX0208-1990"),
3583 build_string ("JIS X0208:1990 (Japanese)"),
3584 build_string ("JIS X0208:1990 Japanese Kanji"),
3585 build_string ("jisx0208\\.1990"),
3586 - LEADING_BYTE_JAPANESE_JISX0208_1990,
3588 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3589 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3590 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3591 Vcharset_jis_x0208 /* Qnil */,
3592 CONVERSION_IDENTICAL /* CONVERSION_94x94 */,
3595 staticpro (&Vcharset_korean_ksc5601);
3596 Vcharset_korean_ksc5601 =
3597 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qrep_ks_x1001, 94, 2,
3598 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3599 build_string ("KSC5601"),
3600 build_string ("KSC5601 (Korean"),
3601 build_string ("KSC5601 Korean Hangul and Hanja"),
3602 build_string ("ksc5601"),
3603 - LEADING_BYTE_KOREAN_KSC5601,
3604 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3606 staticpro (&Vcharset_japanese_jisx0212);
3607 Vcharset_japanese_jisx0212 =
3608 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qrep_jis_x0212, 94, 2,
3609 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3610 build_string ("JISX0212"),
3611 build_string ("JISX0212 (Japanese)"),
3612 build_string ("JISX0212 Japanese Supplement"),
3613 build_string ("jisx0212"),
3614 - LEADING_BYTE_JAPANESE_JISX0212,
3615 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3618 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3619 staticpro (&Vcharset_chinese_cns11643_1);
3620 Vcharset_chinese_cns11643_1 =
3621 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qrep_cns11643_1, 94, 2,
3622 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3623 build_string ("CNS11643-1"),
3624 build_string ("CNS11643-1 (Chinese traditional)"),
3626 ("CNS 11643 Plane 1 Chinese traditional"),
3627 build_string (CHINESE_CNS_PLANE_RE("1")),
3628 - LEADING_BYTE_CHINESE_CNS11643_1,
3629 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3631 staticpro (&Vcharset_chinese_cns11643_2);
3632 Vcharset_chinese_cns11643_2 =
3633 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qrep_cns11643_2, 94, 2,
3634 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3635 build_string ("CNS11643-2"),
3636 build_string ("CNS11643-2 (Chinese traditional)"),
3638 ("CNS 11643 Plane 2 Chinese traditional"),
3639 build_string (CHINESE_CNS_PLANE_RE("2")),
3640 - LEADING_BYTE_CHINESE_CNS11643_2,
3641 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3644 staticpro (&Vcharset_latin_tcvn5712);
3645 Vcharset_latin_tcvn5712 =
3646 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3647 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3648 build_string ("TCVN 5712"),
3649 build_string ("TCVN 5712 (VSCII-2)"),
3650 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3651 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3652 - LEADING_BYTE_LATIN_TCVN5712,
3653 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3655 staticpro (&Vcharset_latin_viscii_lower);
3656 Vcharset_latin_viscii_lower =
3657 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3658 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3659 build_string ("VISCII lower"),
3660 build_string ("VISCII lower (Vietnamese)"),
3661 build_string ("VISCII lower (Vietnamese)"),
3662 build_string ("MULEVISCII-LOWER"),
3664 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3666 staticpro (&Vcharset_latin_viscii_upper);
3667 Vcharset_latin_viscii_upper =
3668 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3669 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3670 build_string ("VISCII upper"),
3671 build_string ("VISCII upper (Vietnamese)"),
3672 build_string ("VISCII upper (Vietnamese)"),
3673 build_string ("MULEVISCII-UPPER"),
3675 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3677 staticpro (&Vcharset_latin_viscii);
3678 Vcharset_latin_viscii =
3679 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3680 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3681 build_string ("VISCII"),
3682 build_string ("VISCII 1.1 (Vietnamese)"),
3683 build_string ("VISCII 1.1 (Vietnamese)"),
3684 build_string ("VISCII1\\.1"),
3686 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL,
3688 staticpro (&Vcharset_chinese_big5);
3689 Vcharset_chinese_big5 =
3690 make_charset (LEADING_BYTE_CHINESE_BIG5, Qrep_big5, 256, 2,
3691 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3692 build_string ("Big5"),
3693 build_string ("Big5"),
3694 build_string ("Big5 Chinese traditional"),
3695 build_string ("big5-0"),
3698 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3699 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL,
3702 staticpro (&Vcharset_ethiopic_ucs);
3703 Vcharset_ethiopic_ucs =
3704 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3705 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3706 build_string ("Ethiopic (UCS)"),
3707 build_string ("Ethiopic (UCS)"),
3708 build_string ("Ethiopic of UCS"),
3709 build_string ("Ethiopic-Unicode"),
3711 Qnil, 0x1200, 0x137F, 0, 0,
3712 Qnil, CONVERSION_IDENTICAL,
3715 staticpro (&Vcharset_chinese_big5_1);
3716 Vcharset_chinese_big5_1 =
3717 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3718 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3719 build_string ("Big5"),
3720 build_string ("Big5 (Level-1)"),
3722 ("Big5 Level-1 Chinese traditional"),
3723 build_string ("big5"),
3725 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3726 Vcharset_chinese_big5, CONVERSION_BIG5_1,
3728 staticpro (&Vcharset_chinese_big5_2);
3729 Vcharset_chinese_big5_2 =
3730 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3731 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3732 build_string ("Big5"),
3733 build_string ("Big5 (Level-2)"),
3735 ("Big5 Level-2 Chinese traditional"),
3736 build_string ("big5"),
3738 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3739 Vcharset_chinese_big5, CONVERSION_BIG5_2,
3742 #ifdef ENABLE_COMPOSITE_CHARS
3743 /* #### For simplicity, we put composite chars into a 96x96 charset.
3744 This is going to lead to problems because you can run out of
3745 room, esp. as we don't yet recycle numbers. */
3746 staticpro (&Vcharset_composite);
3747 Vcharset_composite =
3748 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3749 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3750 build_string ("Composite"),
3751 build_string ("Composite characters"),
3752 build_string ("Composite characters"),
3755 /* #### not dumped properly */
3756 composite_char_row_next = 32;
3757 composite_char_col_next = 32;
3759 Vcomposite_char_string2char_hash_table =
3760 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3761 Vcomposite_char_char2string_hash_table =
3762 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3763 staticpro (&Vcomposite_char_string2char_hash_table);
3764 staticpro (&Vcomposite_char_char2string_hash_table);
3765 #endif /* ENABLE_COMPOSITE_CHARS */