1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2011
7 This file is part of XEmacs.
9 XEmacs is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with XEmacs; see the file COPYING. If not, write to
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
26 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
45 /* The various pre-defined charsets. */
47 Lisp_Object Vcharset_ascii;
48 Lisp_Object Vcharset_control_1;
49 Lisp_Object Vcharset_latin_iso8859_1;
50 Lisp_Object Vcharset_latin_iso8859_2;
51 Lisp_Object Vcharset_latin_iso8859_3;
52 Lisp_Object Vcharset_latin_iso8859_4;
53 Lisp_Object Vcharset_thai_tis620;
54 Lisp_Object Vcharset_greek_iso8859_7;
55 Lisp_Object Vcharset_arabic_iso8859_6;
56 Lisp_Object Vcharset_hebrew_iso8859_8;
57 Lisp_Object Vcharset_katakana_jisx0201;
58 Lisp_Object Vcharset_latin_jisx0201;
59 Lisp_Object Vcharset_cyrillic_iso8859_5;
60 Lisp_Object Vcharset_latin_iso8859_9;
61 /* Lisp_Object Vcharset_japanese_jisx0208_1978; */
62 Lisp_Object Vcharset_chinese_gb2312;
63 Lisp_Object Vcharset_chinese_gb12345;
64 Lisp_Object Vcharset_japanese_jisx0208;
65 Lisp_Object Vcharset_japanese_jisx0208_1990;
66 Lisp_Object Vcharset_korean_ksc5601;
67 Lisp_Object Vcharset_japanese_jisx0212;
68 Lisp_Object Vcharset_chinese_cns11643_1;
69 Lisp_Object Vcharset_chinese_cns11643_2;
71 Lisp_Object Vcharset_system_char_id;
72 Lisp_Object Vcharset_ucs;
73 Lisp_Object Vcharset_ucs_bmp;
74 Lisp_Object Vcharset_ucs_smp;
75 Lisp_Object Vcharset_ucs_sip;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ethiopic_ucs;
84 Lisp_Object Vcharset_chinese_big5_1;
85 Lisp_Object Vcharset_chinese_big5_2;
87 #ifdef ENABLE_COMPOSITE_CHARS
88 Lisp_Object Vcharset_composite;
90 /* Hash tables for composite chars. One maps string representing
91 composed chars to their equivalent chars; one goes the
93 Lisp_Object Vcomposite_char_char2string_hash_table;
94 Lisp_Object Vcomposite_char_string2char_hash_table;
96 static int composite_char_row_next;
97 static int composite_char_col_next;
99 #endif /* ENABLE_COMPOSITE_CHARS */
101 struct charset_lookup *chlook;
103 static const struct lrecord_description charset_lookup_description_1[] = {
104 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
106 NUM_LEADING_BYTES+4*128
113 static const struct struct_description charset_lookup_description = {
114 sizeof (struct charset_lookup),
115 charset_lookup_description_1
119 /* Table of number of bytes in the string representation of a character
120 indexed by the first byte of that representation.
122 rep_bytes_by_first_byte(c) is more efficient than the equivalent
123 canonical computation:
125 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
127 const Bytecount rep_bytes_by_first_byte[0xA0] =
128 { /* 0x00 - 0x7f are for straight ASCII */
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 /* 0x80 - 0x8f are for Dimension-1 official charsets */
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
141 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 /* 0x90 - 0x9d are for Dimension-2 official charsets */
144 /* 0x9e is for Dimension-1 private charsets */
145 /* 0x9f is for Dimension-2 private charsets */
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
152 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
154 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
158 if (XVECTOR_LENGTH (v) > ccs_len)
161 for (i = 0; i < XVECTOR_LENGTH (v); i++)
163 Lisp_Object c = XVECTOR_DATA(v)[i];
165 if (!NILP (c) && !CHARP (c))
169 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
181 decoding_table_put_char (Lisp_Object ccs,
182 int code_point, Lisp_Object character)
185 Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
186 int dim = XCHARSET_DIMENSION (ccs);
189 XCHARSET_DECODING_TABLE (ccs)
190 = put_ccs_octet_table (table1, ccs, code_point, character);
194 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
196 table2 = put_ccs_octet_table (table2, ccs,
197 (unsigned char)code_point, character);
198 XCHARSET_DECODING_TABLE (ccs)
199 = put_ccs_octet_table (table1, ccs,
200 (unsigned char)(code_point >> 8), table2);
205 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
207 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
209 table3 = put_ccs_octet_table (table3, ccs,
210 (unsigned char)code_point, character);
211 table2 = put_ccs_octet_table (table2, ccs,
212 (unsigned char)(code_point >> 8), table3);
213 XCHARSET_DECODING_TABLE (ccs)
214 = put_ccs_octet_table (table1, ccs,
215 (unsigned char)(code_point >> 16), table2);
217 else /* if (dim == 4) */
220 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
222 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
224 = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
226 table4 = put_ccs_octet_table (table4, ccs,
227 (unsigned char)code_point, character);
228 table3 = put_ccs_octet_table (table3, ccs,
229 (unsigned char)(code_point >> 8), table4);
230 table2 = put_ccs_octet_table (table2, ccs,
231 (unsigned char)(code_point >> 16), table3);
232 XCHARSET_DECODING_TABLE (ccs)
233 = put_ccs_octet_table (table1, ccs,
234 (unsigned char)(code_point >> 24), table2);
237 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
238 int dim = XCHARSET_DIMENSION (ccs);
239 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
242 int ccs_len = XVECTOR_LENGTH (v);
247 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
248 nv = XVECTOR_DATA(v)[i];
253 if (EQ (nv, character))
256 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
263 XVECTOR_DATA(v)[i] = character;
268 put_char_ccs_code_point (Lisp_Object character,
269 Lisp_Object ccs, Lisp_Object value)
271 if ( !( EQ (XCHARSET_NAME (ccs), Qrep_ucs)
272 && INTP (value) && (XINT (value) < 0xF0000)
273 && XCHAR (character) == XINT (value) )
276 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
280 { /* obsolete representation: value must be a list of bytes */
281 Lisp_Object ret = Fcar (value);
285 signal_simple_error ("Invalid value for coded-charset", value);
286 code_point = XINT (ret);
287 if (XCHARSET_GRAPHIC (ccs) == 1)
295 signal_simple_error ("Invalid value for coded-charset",
299 signal_simple_error ("Invalid value for coded-charset",
302 if (XCHARSET_GRAPHIC (ccs) == 1)
304 code_point = (code_point << 8) | j;
307 value = make_int (code_point);
309 else if (INTP (value))
311 code_point = XINT (value);
312 if (XCHARSET_GRAPHIC (ccs) == 1)
314 code_point &= 0x7F7F7F7F;
315 value = make_int (code_point);
319 signal_simple_error ("Invalid value for coded-charset", value);
323 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
326 decoding_table_remove_char (ccs, XINT (cpos));
329 decoding_table_put_char (ccs, code_point, character);
335 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
337 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
338 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
340 if (VECTORP (decoding_table))
342 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
346 decoding_table_remove_char (ccs, XINT (cpos));
349 if (CHAR_TABLEP (encoding_table))
351 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
359 int leading_code_private_11;
362 Lisp_Object Qcharsetp;
364 /* Qdoc_string, Qdimension, Qchars defined in general.c */
365 Lisp_Object Qregistry, Qfinal, Qgraphic;
366 Lisp_Object Qdirection;
367 Lisp_Object Qreverse_direction_charset;
368 Lisp_Object Qleading_byte;
369 Lisp_Object Qshort_name, Qlong_name;
372 Lisp_Object Qto_iso_ir;
373 Lisp_Object Qpartial;
374 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
375 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
392 /* Qrep_jis_x0208_1978, */
410 Qvietnamese_viscii_lower,
411 Qvietnamese_viscii_upper,
421 Lisp_Object Ql2r, Qr2l;
423 Lisp_Object Vcharset_hash_table;
425 /* Composite characters are characters constructed by overstriking two
426 or more regular characters.
428 1) The old Mule implementation involves storing composite characters
429 in a buffer as a tag followed by all of the actual characters
430 used to make up the composite character. I think this is a bad
431 idea; it greatly complicates code that wants to handle strings
432 one character at a time because it has to deal with the possibility
433 of great big ungainly characters. It's much more reasonable to
434 simply store an index into a table of composite characters.
436 2) The current implementation only allows for 16,384 separate
437 composite characters over the lifetime of the XEmacs process.
438 This could become a potential problem if the user
439 edited lots of different files that use composite characters.
440 Due to FSF bogosity, increasing the number of allowable
441 composite characters under Mule would decrease the number
442 of possible faces that can exist. Mule already has shrunk
443 this to 2048, and further shrinkage would become uncomfortable.
444 No such problems exist in XEmacs.
446 Composite characters could be represented as 0x80 C1 C2 C3,
447 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
448 for slightly under 2^20 (one million) composite characters
449 over the XEmacs process lifetime, and you only need to
450 increase the size of a Mule character from 19 to 21 bits.
451 Or you could use 0x80 C1 C2 C3 C4, allowing for about
452 85 million (slightly over 2^26) composite characters. */
455 /************************************************************************/
456 /* Basic Emchar functions */
457 /************************************************************************/
459 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
460 string in STR. Returns the number of bytes stored.
461 Do not call this directly. Use the macro set_charptr_emchar() instead.
465 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
480 else if ( c <= 0x7ff )
482 *p++ = (c >> 6) | 0xc0;
483 *p++ = (c & 0x3f) | 0x80;
485 else if ( c <= 0xffff )
487 *p++ = (c >> 12) | 0xe0;
488 *p++ = ((c >> 6) & 0x3f) | 0x80;
489 *p++ = (c & 0x3f) | 0x80;
491 else if ( c <= 0x1fffff )
493 *p++ = (c >> 18) | 0xf0;
494 *p++ = ((c >> 12) & 0x3f) | 0x80;
495 *p++ = ((c >> 6) & 0x3f) | 0x80;
496 *p++ = (c & 0x3f) | 0x80;
498 else if ( c <= 0x3ffffff )
500 *p++ = (c >> 24) | 0xf8;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
508 *p++ = (c >> 30) | 0xfc;
509 *p++ = ((c >> 24) & 0x3f) | 0x80;
510 *p++ = ((c >> 18) & 0x3f) | 0x80;
511 *p++ = ((c >> 12) & 0x3f) | 0x80;
512 *p++ = ((c >> 6) & 0x3f) | 0x80;
513 *p++ = (c & 0x3f) | 0x80;
516 BREAKUP_CHAR (c, charset, c1, c2);
517 lb = CHAR_LEADING_BYTE (c);
518 if (LEADING_BYTE_PRIVATE_P (lb))
519 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
521 if (EQ (charset, Vcharset_control_1))
530 /* Return the first character from a Mule-encoded string in STR,
531 assuming it's non-ASCII. Do not call this directly.
532 Use the macro charptr_emchar() instead. */
535 non_ascii_charptr_emchar (const Bufbyte *str)
548 else if ( b >= 0xf8 )
553 else if ( b >= 0xf0 )
558 else if ( b >= 0xe0 )
563 else if ( b >= 0xc0 )
573 for( ; len > 0; len-- )
576 ch = ( ch << 6 ) | ( b & 0x3f );
580 Bufbyte i0 = *str, i1, i2 = 0;
583 if (i0 == LEADING_BYTE_CONTROL_1)
584 return (Emchar) (*++str - 0x20);
586 if (LEADING_BYTE_PREFIX_P (i0))
591 charset = CHARSET_BY_LEADING_BYTE (i0);
592 if (XCHARSET_DIMENSION (charset) == 2)
595 return MAKE_CHAR (charset, i1, i2);
599 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
600 Do not call this directly. Use the macro valid_char_p() instead. */
604 non_ascii_valid_char_p (Emchar ch)
608 /* Must have only lowest 19 bits set */
612 f1 = CHAR_FIELD1 (ch);
613 f2 = CHAR_FIELD2 (ch);
614 f3 = CHAR_FIELD3 (ch);
620 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
621 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
622 f2 > MAX_CHAR_FIELD2_PRIVATE)
627 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
628 f2 <= MAX_CHAR_FIELD2_PRIVATE))
632 NOTE: This takes advantage of the fact that
633 FIELD2_TO_OFFICIAL_LEADING_BYTE and
634 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
636 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
637 if (EQ (charset, Qnil))
639 return (XCHARSET_CHARS (charset) == 96);
645 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
646 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
647 f1 > MAX_CHAR_FIELD1_PRIVATE)
649 if (f2 < 0x20 || f3 < 0x20)
652 #ifdef ENABLE_COMPOSITE_CHARS
653 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
655 if (UNBOUNDP (Fgethash (make_int (ch),
656 Vcomposite_char_char2string_hash_table,
661 #endif /* ENABLE_COMPOSITE_CHARS */
663 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
664 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
667 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
669 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
672 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
674 if (EQ (charset, Qnil))
676 return (XCHARSET_CHARS (charset) == 96);
682 /************************************************************************/
683 /* Basic string functions */
684 /************************************************************************/
686 /* Copy the character pointed to by SRC into DST. Do not call this
687 directly. Use the macro charptr_copy_char() instead.
688 Return the number of bytes copied. */
691 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
693 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
695 for (i = bytes; i; i--, dst++, src++)
701 /************************************************************************/
702 /* streams of Emchars */
703 /************************************************************************/
705 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
706 The functions below are not meant to be called directly; use
707 the macros in insdel.h. */
710 Lstream_get_emchar_1 (Lstream *stream, int ch)
712 Bufbyte str[MAX_EMCHAR_LEN];
713 Bufbyte *strptr = str;
716 str[0] = (Bufbyte) ch;
718 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
720 int c = Lstream_getc (stream);
721 bufpos_checking_assert (c >= 0);
722 *++strptr = (Bufbyte) c;
724 return charptr_emchar (str);
728 Lstream_fput_emchar (Lstream *stream, Emchar ch)
730 Bufbyte str[MAX_EMCHAR_LEN];
731 Bytecount len = set_charptr_emchar (str, ch);
732 return Lstream_write (stream, str, len);
736 Lstream_funget_emchar (Lstream *stream, Emchar ch)
738 Bufbyte str[MAX_EMCHAR_LEN];
739 Bytecount len = set_charptr_emchar (str, ch);
740 Lstream_unread (stream, str, len);
744 /************************************************************************/
746 /************************************************************************/
749 mark_charset (Lisp_Object obj)
751 Lisp_Charset *cs = XCHARSET (obj);
753 mark_object (cs->short_name);
754 mark_object (cs->long_name);
755 mark_object (cs->doc_string);
756 mark_object (cs->registry);
757 mark_object (cs->ccl_program);
759 mark_object (cs->decoding_table);
760 mark_object (cs->mother);
766 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
768 Lisp_Charset *cs = XCHARSET (obj);
772 error ("printing unreadable object #<charset %s 0x%x>",
773 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
776 write_c_string ("#<charset ", printcharfun);
777 print_internal (CHARSET_NAME (cs), printcharfun, 0);
778 write_c_string (" ", printcharfun);
779 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
780 write_c_string (" ", printcharfun);
781 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
782 write_c_string (" ", printcharfun);
783 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
784 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
786 CHARSET_DIMENSION (cs),
787 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
788 CHARSET_COLUMNS (cs),
789 CHARSET_GRAPHIC (cs),
791 write_c_string (buf, printcharfun);
792 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
793 sprintf (buf, " 0x%x>", cs->header.uid);
794 write_c_string (buf, printcharfun);
797 static const struct lrecord_description charset_description[] = {
798 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
799 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
800 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
801 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
802 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
803 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
804 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
806 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
807 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
812 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
813 mark_charset, print_charset, 0, 0, 0,
817 /* Make a new charset. */
818 /* #### SJT Should generic properties be allowed? */
820 make_charset (Charset_ID id, Lisp_Object name,
821 unsigned short chars, unsigned char dimension,
822 unsigned char columns, unsigned char graphic,
823 Bufbyte final, unsigned char direction, Lisp_Object short_name,
824 Lisp_Object long_name, Lisp_Object doc,
827 Lisp_Object decoding_table,
828 Emchar min_code, Emchar max_code,
829 Emchar code_offset, unsigned char byte_offset,
830 Lisp_Object mother, unsigned char conversion,
834 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
838 XSETCHARSET (obj, cs);
840 CHARSET_ID (cs) = id;
841 CHARSET_NAME (cs) = name;
842 CHARSET_SHORT_NAME (cs) = short_name;
843 CHARSET_LONG_NAME (cs) = long_name;
844 CHARSET_CHARS (cs) = chars;
845 CHARSET_DIMENSION (cs) = dimension;
846 CHARSET_DIRECTION (cs) = direction;
847 CHARSET_COLUMNS (cs) = columns;
848 CHARSET_GRAPHIC (cs) = graphic;
849 CHARSET_FINAL (cs) = final;
850 CHARSET_DOC_STRING (cs) = doc;
851 CHARSET_REGISTRY (cs) = reg;
852 CHARSET_CCL_PROGRAM (cs) = Qnil;
853 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
855 CHARSET_ISO_IR (cs) = iso_ir;
856 CHARSET_DECODING_TABLE(cs) = Qunbound;
857 CHARSET_MIN_CODE (cs) = min_code;
858 CHARSET_MAX_CODE (cs) = max_code;
859 CHARSET_CODE_OFFSET (cs) = code_offset;
860 CHARSET_BYTE_OFFSET (cs) = byte_offset;
861 CHARSET_MOTHER (cs) = mother;
862 CHARSET_CONVERSION (cs) = conversion;
866 if (id == LEADING_BYTE_ASCII)
867 CHARSET_REP_BYTES (cs) = 1;
869 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
871 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
876 /* some charsets do not have final characters. This includes
877 ASCII, Control-1, Composite, and the two faux private
879 unsigned char iso2022_type
880 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
882 if ( ( !partial ) && ( code_offset == 0 ) )
884 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
885 chlook->charset_by_attributes[iso2022_type][final] = obj;
889 (chlook->charset_by_attributes[iso2022_type][final][direction]));
890 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
894 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
895 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
897 /* Some charsets are "faux" and don't have names or really exist at
898 all except in the leading-byte table. */
900 Fputhash (name, obj, Vcharset_hash_table);
905 get_unallocated_leading_byte (int dimension)
910 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
913 lb = chlook->next_allocated_leading_byte++;
917 if (chlook->next_allocated_1_byte_leading_byte >
918 MAX_LEADING_BYTE_PRIVATE_1)
921 lb = chlook->next_allocated_1_byte_leading_byte++;
925 /* awfully fragile, but correct */
926 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
927 if (chlook->next_allocated_2_byte_leading_byte == 0)
929 if (chlook->next_allocated_2_byte_leading_byte >
930 MAX_LEADING_BYTE_PRIVATE_2)
934 lb = chlook->next_allocated_2_byte_leading_byte++;
940 ("No more character sets free for this dimension",
941 make_int (dimension));
947 /* Number of Big5 characters which have the same code in 1st byte. */
949 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
952 decode_ccs_conversion (int conv_type, int code_point)
954 if ( conv_type == CONVERSION_IDENTICAL )
958 if ( conv_type == CONVERSION_94x60 )
960 int row = code_point >> 8;
961 int cell = code_point & 255;
965 else if (row < 16 + 32 + 30)
966 return (row - (16 + 32)) * 94 + cell - 33;
967 else if (row < 18 + 32 + 30)
969 else if (row < 18 + 32 + 60)
970 return (row - (18 + 32)) * 94 + cell - 33;
972 else if ( conv_type == CONVERSION_94x94x60 )
974 int plane = code_point >> 16;
975 int row = (code_point >> 8) & 255;
976 int cell = code_point & 255;
980 else if (row < 16 + 32 + 30)
982 (plane - 33) * 94 * 60
983 + (row - (16 + 32)) * 94
985 else if (row < 18 + 32 + 30)
987 else if (row < 18 + 32 + 60)
989 (plane - 33) * 94 * 60
990 + (row - (18 + 32)) * 94
993 else if ( conv_type == CONVERSION_BIG5_1 )
996 = (((code_point >> 8) & 0x7F) - 33) * 94
997 + (( code_point & 0x7F) - 33);
998 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
999 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1001 b2 += b2 < 0x3F ? 0x40 : 0x62;
1002 return (b1 << 8) | b2;
1004 else if ( conv_type == CONVERSION_BIG5_2 )
1007 = (((code_point >> 8) & 0x7F) - 33) * 94
1008 + (( code_point & 0x7F) - 33)
1009 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1010 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1011 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1013 b2 += b2 < 0x3F ? 0x40 : 0x62;
1014 return (b1 << 8) | b2;
1020 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
1022 int dim = XCHARSET_DIMENSION (ccs);
1023 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1024 Emchar char_id = -1;
1031 = get_ccs_octet_table (decoding_table, ccs,
1032 (code_point >> (dim * 8)) & 255);
1034 if (CHARP (decoding_table))
1035 return XCHAR (decoding_table);
1037 if (EQ (decoding_table, Qunloaded))
1039 char_id = load_char_decoding_entry_maybe (ccs, code_point);
1041 #endif /* HAVE_CHISE */
1044 else if ( !without_inheritance
1045 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
1048 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
1052 code += XCHARSET_CODE_OFFSET(ccs);
1053 if ( EQ (mother, Vcharset_ucs) )
1054 return DECODE_CHAR (mother, code, without_inheritance);
1056 return decode_defined_char (mother, code,
1057 without_inheritance);
1064 decode_builtin_char (Lisp_Object charset, int code_point)
1066 Lisp_Object mother = XCHARSET_MOTHER (charset);
1069 if ( XCHARSET_MAX_CODE (charset) > 0 )
1071 if ( CHARSETP (mother) )
1074 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
1079 decode_builtin_char (mother,
1080 code + XCHARSET_CODE_OFFSET(charset));
1087 = (XCHARSET_DIMENSION (charset) == 1
1089 code_point - XCHARSET_BYTE_OFFSET (charset)
1091 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1092 * XCHARSET_CHARS (charset)
1093 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1094 + XCHARSET_CODE_OFFSET (charset);
1095 if ((cid < XCHARSET_MIN_CODE (charset))
1096 || (XCHARSET_MAX_CODE (charset) < cid))
1101 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1103 if (XCHARSET_DIMENSION (charset) == 1)
1105 switch (XCHARSET_CHARS (charset))
1109 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1112 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1120 switch (XCHARSET_CHARS (charset))
1123 return MIN_CHAR_94x94
1124 + (final - '0') * 94 * 94
1125 + (((code_point >> 8) & 0x7F) - 33) * 94
1126 + ((code_point & 0x7F) - 33);
1128 return MIN_CHAR_96x96
1129 + (final - '0') * 96 * 96
1130 + (((code_point >> 8) & 0x7F) - 32) * 96
1131 + ((code_point & 0x7F) - 32);
1143 charset_code_point (Lisp_Object charset, Emchar ch, int accepted_mode)
1147 if ( accepted_mode >= 0 )
1149 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1151 if ( CHAR_TABLEP (encoding_table)
1152 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1157 Lisp_Object mother = XCHARSET_MOTHER (charset);
1158 int min = XCHARSET_MIN_CODE (charset);
1159 int max = XCHARSET_MAX_CODE (charset);
1162 if ( CHARSETP (mother) && ( accepted_mode >= 0)
1163 && ( XCHARSET_FINAL (charset) >= '0' )
1165 code = charset_code_point (mother, ch, CHAR_DEFINED_ONLY);
1166 else if ( CHARSETP (mother)
1167 && ( XCHARSET_FINAL (charset) < '0' )
1169 code = charset_code_point (mother, ch, accepted_mode);
1170 else if ( accepted_mode == CHAR_DEFINED_ONLY )
1172 else if ( ((max == 0) && CHARSETP (mother)
1173 && (XCHARSET_FINAL (charset) == 0))
1174 || ((min <= ch) && (ch <= max)) )
1177 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1178 || ((min <= code) && (code <= max)) )
1180 int d = code - XCHARSET_CODE_OFFSET (charset);
1182 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1184 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1186 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1188 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1191 int cell = d % 94 + 33;
1197 return (row << 8) | cell;
1199 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1201 int B1 = d >> 8, B2 = d & 0xFF;
1203 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1204 - (B2 < 0x7F ? 0x40 : 0x62);
1208 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1211 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1213 int B1 = d >> 8, B2 = d & 0xFF;
1215 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1216 - (B2 < 0x7F ? 0x40 : 0x62);
1220 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1221 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1224 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1225 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1226 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1227 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1228 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1230 int plane = d / (94 * 60) + 33;
1231 int row = (d % (94 * 60)) / 94;
1232 int cell = d % 94 + 33;
1238 return (plane << 16) | (row << 8) | cell;
1240 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1242 ( (d / (94 * 94) + 33) << 16)
1243 | ((d / 94 % 94 + 33) << 8)
1245 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1247 ( (d / (96 * 96) + 32) << 16)
1248 | ((d / 96 % 96 + 32) << 8)
1250 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1252 ( (d / (94 * 94 * 94) + 33) << 24)
1253 | ((d / (94 * 94) % 94 + 33) << 16)
1254 | ((d / 94 % 94 + 33) << 8)
1256 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1258 ( (d / (96 * 96 * 96) + 32) << 24)
1259 | ((d / (96 * 96) % 96 + 32) << 16)
1260 | ((d / 96 % 96 + 32) << 8)
1264 printf ("Unknown CCS-conversion %d is specified!",
1265 XCHARSET_CONVERSION (charset));
1269 else if ( accepted_mode == CHAR_DEFINED_ONLY )
1271 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1272 ( XCHARSET_MIN_CODE (charset) == 0 )
1274 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1275 (XCHARSET_CODE_OFFSET (charset)
1276 == XCHARSET_MIN_CODE (charset))
1281 if (XCHARSET_DIMENSION (charset) == 1)
1283 if (XCHARSET_CHARS (charset) == 94)
1285 if (((d = ch - (MIN_CHAR_94
1286 + (XCHARSET_FINAL (charset) - '0') * 94))
1291 else if (XCHARSET_CHARS (charset) == 96)
1293 if (((d = ch - (MIN_CHAR_96
1294 + (XCHARSET_FINAL (charset) - '0') * 96))
1302 else if (XCHARSET_DIMENSION (charset) == 2)
1304 if (XCHARSET_CHARS (charset) == 94)
1306 if (((d = ch - (MIN_CHAR_94x94
1308 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1311 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1313 else if (XCHARSET_CHARS (charset) == 96)
1315 if (((d = ch - (MIN_CHAR_96x96
1317 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1320 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1331 encode_char_2 (Emchar ch, Lisp_Object* charset)
1333 Lisp_Object charsets = Vdefault_coded_charset_priority_list;
1336 while (!NILP (charsets))
1338 *charset = Ffind_charset (Fcar (charsets));
1339 if ( !NILP (*charset)
1340 && (XCHARSET_DIMENSION (*charset) <= 2) )
1342 code_point = charset_code_point (*charset, ch, 0);
1343 if (code_point >= 0)
1346 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1347 NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1349 code_point = encode_char_2_search_children (ch, charset);
1350 if (code_point >= 0)
1354 charsets = Fcdr (charsets);
1357 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1358 !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1360 charsets = Vdefault_coded_charset_priority_list;
1361 while (!NILP (charsets))
1363 *charset = Ffind_charset (Fcar (charsets));
1364 if ( !NILP (*charset)
1365 && (XCHARSET_DIMENSION (*charset) <= 2) )
1367 code_point = encode_char_2_search_children (ch, charset);
1368 if (code_point >= 0)
1371 charsets = Fcdr (charsets);
1375 /* otherwise --- maybe for bootstrap */
1376 return encode_builtin_char_1 (ch, charset);
1380 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1382 if (c <= MAX_CHAR_BASIC_LATIN)
1384 *charset = Vcharset_ascii;
1389 *charset = Vcharset_control_1;
1394 *charset = Vcharset_latin_iso8859_1;
1398 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1400 *charset = Vcharset_hebrew_iso8859_8;
1401 return c - MIN_CHAR_HEBREW + 0x20;
1404 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1406 *charset = Vcharset_thai_tis620;
1407 return c - MIN_CHAR_THAI + 0x20;
1410 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1411 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1413 return list2 (Vcharset_katakana_jisx0201,
1414 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1417 else if (c <= MAX_CHAR_BMP)
1419 *charset = Vcharset_ucs_bmp;
1422 else if (c <= MAX_CHAR_SMP)
1424 *charset = Vcharset_ucs_smp;
1425 return c - MIN_CHAR_SMP;
1427 else if (c <= MAX_CHAR_SIP)
1429 *charset = Vcharset_ucs_sip;
1430 return c - MIN_CHAR_SIP;
1432 else if (c < MIN_CHAR_94)
1434 *charset = Vcharset_ucs;
1437 else if (c <= MAX_CHAR_94)
1439 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1440 ((c - MIN_CHAR_94) / 94) + '0',
1441 CHARSET_LEFT_TO_RIGHT);
1442 if (!NILP (*charset))
1443 return ((c - MIN_CHAR_94) % 94) + 33;
1446 *charset = Vcharset_ucs;
1450 else if (c <= MAX_CHAR_96)
1452 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1453 ((c - MIN_CHAR_96) / 96) + '0',
1454 CHARSET_LEFT_TO_RIGHT);
1455 if (!NILP (*charset))
1456 return ((c - MIN_CHAR_96) % 96) + 32;
1459 *charset = Vcharset_ucs;
1463 else if (c <= MAX_CHAR_94x94)
1466 = CHARSET_BY_ATTRIBUTES (94, 2,
1467 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1468 CHARSET_LEFT_TO_RIGHT);
1469 if (!NILP (*charset))
1470 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1471 | (((c - MIN_CHAR_94x94) % 94) + 33);
1474 *charset = Vcharset_ucs;
1478 else if (c <= MAX_CHAR_96x96)
1481 = CHARSET_BY_ATTRIBUTES (96, 2,
1482 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1483 CHARSET_LEFT_TO_RIGHT);
1484 if (!NILP (*charset))
1485 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1486 | (((c - MIN_CHAR_96x96) % 96) + 32);
1489 *charset = Vcharset_ucs;
1495 *charset = Vcharset_ucs;
1500 Lisp_Object Vdefault_coded_charset_priority_list;
1501 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1502 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1506 /************************************************************************/
1507 /* Basic charset Lisp functions */
1508 /************************************************************************/
1510 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1511 Return non-nil if OBJECT is a charset.
1515 return CHARSETP (object) ? Qt : Qnil;
1518 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1519 Retrieve the charset of the given name.
1520 If CHARSET-OR-NAME is a charset object, it is simply returned.
1521 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1522 nil is returned. Otherwise the associated charset object is returned.
1526 if (CHARSETP (charset_or_name))
1527 return charset_or_name;
1529 CHECK_SYMBOL (charset_or_name);
1530 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1533 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1534 Retrieve the charset of the given name.
1535 Same as `find-charset' except an error is signalled if there is no such
1536 charset instead of returning nil.
1540 Lisp_Object charset = Ffind_charset (name);
1543 signal_simple_error ("No such charset", name);
1547 /* We store the charsets in hash tables with the names as the key and the
1548 actual charset object as the value. Occasionally we need to use them
1549 in a list format. These routines provide us with that. */
1550 struct charset_list_closure
1552 Lisp_Object *charset_list;
1556 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1557 void *charset_list_closure)
1559 /* This function can GC */
1560 struct charset_list_closure *chcl =
1561 (struct charset_list_closure*) charset_list_closure;
1562 Lisp_Object *charset_list = chcl->charset_list;
1564 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1568 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1569 Return a list of the names of all defined charsets.
1573 Lisp_Object charset_list = Qnil;
1574 struct gcpro gcpro1;
1575 struct charset_list_closure charset_list_closure;
1577 GCPRO1 (charset_list);
1578 charset_list_closure.charset_list = &charset_list;
1579 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1580 &charset_list_closure);
1583 return charset_list;
1586 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1587 Return the name of charset CHARSET.
1591 return XCHARSET_NAME (Fget_charset (charset));
1594 /* #### SJT Should generic properties be allowed? */
1595 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1596 Define a new character set.
1597 This function is for use with Mule support.
1598 NAME is a symbol, the name by which the character set is normally referred.
1599 DOC-STRING is a string describing the character set.
1600 PROPS is a property list, describing the specific nature of the
1601 character set. Recognized properties are:
1603 'short-name Short version of the charset name (ex: Latin-1)
1604 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1605 'registry A regular expression matching the font registry field for
1607 'dimension Number of octets used to index a character in this charset.
1608 Either 1 or 2. Defaults to 1.
1609 If UTF-2000 feature is enabled, 3 or 4 are also available.
1610 'columns Number of columns used to display a character in this charset.
1611 Only used in TTY mode. (Under X, the actual width of a
1612 character can be derived from the font used to display the
1613 characters.) If unspecified, defaults to the dimension
1614 (this is almost always the correct value).
1615 'chars Number of characters in each dimension (94 or 96).
1616 Defaults to 94. Note that if the dimension is 2, the
1617 character set thus described is 94x94 or 96x96.
1618 If UTF-2000 feature is enabled, 128 or 256 are also available.
1619 'final Final byte of ISO 2022 escape sequence. Must be
1620 supplied. Each combination of (DIMENSION, CHARS) defines a
1621 separate namespace for final bytes. Note that ISO
1622 2022 restricts the final byte to the range
1623 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1624 dimension == 2. Note also that final bytes in the range
1625 0x30 - 0x3F are reserved for user-defined (not official)
1627 'graphic 0 (use left half of font on output) or 1 (use right half
1628 of font on output). Defaults to 0. For example, for
1629 a font whose registry is ISO8859-1, the left half
1630 (octets 0x20 - 0x7F) is the `ascii' character set, while
1631 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1632 character set. With 'graphic set to 0, the octets
1633 will have their high bit cleared; with it set to 1,
1634 the octets will have their high bit set.
1635 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1637 'ccl-program A compiled CCL program used to convert a character in
1638 this charset into an index into the font. This is in
1639 addition to the 'graphic property. The CCL program
1640 is passed the octets of the character, with the high
1641 bit cleared and set depending upon whether the value
1642 of the 'graphic property is 0 or 1.
1643 'iso-ir ISO-IR number (for representative coded-charset).
1644 '=>iso-ir [UTF-2000 only] Corresponding ISO-IR number.
1645 'mother [UTF-2000 only] Base coded-charset.
1646 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1647 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1648 'code-offset [UTF-2000 only] Offset for a code-point of a base
1650 'conversion [UTF-2000 only] Conversion for a code-point of a base
1651 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1652 'partial [UTF-2000 only] If t, specify as a partial coded-charset.
1654 (name, doc_string, props))
1656 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1658 int direction = CHARSET_LEFT_TO_RIGHT;
1659 Lisp_Object registry = Qnil;
1660 Lisp_Object charset;
1661 Lisp_Object ccl_program = Qnil;
1662 Lisp_Object short_name = Qnil, long_name = Qnil;
1663 Lisp_Object mother = Qnil;
1665 int min_code = 0, max_code = 0, code_offset = 0;
1666 int byte_offset = -1;
1669 CHECK_SYMBOL (name);
1670 if (!NILP (doc_string))
1671 CHECK_STRING (doc_string);
1673 charset = Ffind_charset (name);
1674 if (!NILP (charset))
1675 signal_simple_error ("Cannot redefine existing charset", name);
1678 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1680 if (EQ (keyword, Qshort_name))
1682 CHECK_STRING (value);
1686 else if (EQ (keyword, Qlong_name))
1688 CHECK_STRING (value);
1692 else if (EQ (keyword, Qiso_ir))
1696 iso_ir = XINT (value);
1702 else if (EQ (keyword, Qto_iso_ir))
1705 iso_ir = XINT (value);
1709 else if (EQ (keyword, Qdimension))
1712 dimension = XINT (value);
1713 if (dimension < 1 ||
1720 signal_simple_error ("Invalid value for 'dimension", value);
1723 else if (EQ (keyword, Qchars))
1726 chars = XINT (value);
1727 if (chars != 94 && chars != 96
1729 && chars != 128 && chars != 256
1732 signal_simple_error ("Invalid value for 'chars", value);
1735 else if (EQ (keyword, Qcolumns))
1738 columns = XINT (value);
1739 if (columns != 1 && columns != 2)
1740 signal_simple_error ("Invalid value for 'columns", value);
1743 else if (EQ (keyword, Qgraphic))
1746 graphic = XINT (value);
1754 signal_simple_error ("Invalid value for 'graphic", value);
1757 else if (EQ (keyword, Qregistry))
1759 CHECK_STRING (value);
1763 else if (EQ (keyword, Qdirection))
1765 if (EQ (value, Ql2r))
1766 direction = CHARSET_LEFT_TO_RIGHT;
1767 else if (EQ (value, Qr2l))
1768 direction = CHARSET_RIGHT_TO_LEFT;
1770 signal_simple_error ("Invalid value for 'direction", value);
1773 else if (EQ (keyword, Qfinal))
1775 CHECK_CHAR_COERCE_INT (value);
1776 final = XCHAR (value);
1777 if (final < '0' || final > '~')
1778 signal_simple_error ("Invalid value for 'final", value);
1782 else if (EQ (keyword, Qpartial))
1784 partial = !NILP (value);
1787 else if (EQ (keyword, Qmother))
1789 mother = Fget_charset (value);
1792 else if (EQ (keyword, Qmin_code))
1795 min_code = XUINT (value);
1798 else if (EQ (keyword, Qmax_code))
1801 max_code = XUINT (value);
1804 else if (EQ (keyword, Qcode_offset))
1807 code_offset = XUINT (value);
1810 else if (EQ (keyword, Qconversion))
1812 if (EQ (value, Q94x60))
1813 conversion = CONVERSION_94x60;
1814 else if (EQ (value, Q94x94x60))
1815 conversion = CONVERSION_94x94x60;
1816 else if (EQ (value, Qbig5_1))
1817 conversion = CONVERSION_BIG5_1;
1818 else if (EQ (value, Qbig5_2))
1819 conversion = CONVERSION_BIG5_2;
1821 signal_simple_error ("Unrecognized conversion", value);
1825 else if (EQ (keyword, Qccl_program))
1827 struct ccl_program test_ccl;
1829 if (setup_ccl_program (&test_ccl, value) < 0)
1830 signal_simple_error ("Invalid value for 'ccl-program", value);
1831 ccl_program = value;
1835 signal_simple_error ("Unrecognized property", keyword);
1841 error ("'final must be specified");
1843 if (dimension == 2 && final > 0x5F)
1845 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1848 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1849 CHARSET_LEFT_TO_RIGHT)) ||
1850 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1851 CHARSET_RIGHT_TO_LEFT)))
1853 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1856 id = get_unallocated_leading_byte (dimension);
1858 if (NILP (doc_string))
1859 doc_string = build_string ("");
1861 if (NILP (registry))
1862 registry = build_string ("");
1864 if (NILP (short_name))
1865 XSETSTRING (short_name, XSYMBOL (name)->name);
1867 if (NILP (long_name))
1868 long_name = doc_string;
1871 columns = dimension;
1873 if (byte_offset < 0)
1877 else if (chars == 96)
1883 if ( (conversion == 0) && NILP (mother) && (min_code > 0) )
1891 conversion = CONVERSION_94;
1894 conversion = CONVERSION_94x94;
1897 conversion = CONVERSION_94x94x94;
1900 conversion = CONVERSION_94x94x94x94;
1908 conversion = CONVERSION_96;
1911 conversion = CONVERSION_96x96;
1914 conversion = CONVERSION_96x96x96;
1917 conversion = CONVERSION_96x96x96x96;
1924 charset = make_charset (id, name, chars, dimension, columns, graphic,
1925 final, direction, short_name, long_name,
1926 doc_string, registry, iso_ir,
1927 Qnil, min_code, max_code, code_offset, byte_offset,
1928 mother, conversion, partial);
1929 if (!NILP (ccl_program))
1930 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1934 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1936 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1937 NEW-NAME is the name of the new charset. Return the new charset.
1939 (charset, new_name))
1941 Lisp_Object new_charset = Qnil;
1942 int id, chars, dimension, columns, graphic, final;
1944 Lisp_Object registry, doc_string, short_name, long_name;
1947 charset = Fget_charset (charset);
1948 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1949 signal_simple_error ("Charset already has reverse-direction charset",
1952 CHECK_SYMBOL (new_name);
1953 if (!NILP (Ffind_charset (new_name)))
1954 signal_simple_error ("Cannot redefine existing charset", new_name);
1956 cs = XCHARSET (charset);
1958 chars = CHARSET_CHARS (cs);
1959 dimension = CHARSET_DIMENSION (cs);
1960 columns = CHARSET_COLUMNS (cs);
1961 id = get_unallocated_leading_byte (dimension);
1963 graphic = CHARSET_GRAPHIC (cs);
1964 final = CHARSET_FINAL (cs);
1965 direction = CHARSET_RIGHT_TO_LEFT;
1966 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1967 direction = CHARSET_LEFT_TO_RIGHT;
1968 doc_string = CHARSET_DOC_STRING (cs);
1969 short_name = CHARSET_SHORT_NAME (cs);
1970 long_name = CHARSET_LONG_NAME (cs);
1971 registry = CHARSET_REGISTRY (cs);
1973 new_charset = make_charset (id, new_name, chars, dimension, columns,
1974 graphic, final, direction, short_name, long_name,
1975 doc_string, registry,
1978 CHARSET_DECODING_TABLE(cs),
1979 CHARSET_MIN_CODE(cs),
1980 CHARSET_MAX_CODE(cs),
1981 CHARSET_CODE_OFFSET(cs),
1982 CHARSET_BYTE_OFFSET(cs),
1984 CHARSET_CONVERSION (cs)
1986 Qnil, 0, 0, 0, 0, Qnil, 0
1990 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1991 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1996 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1997 Define symbol ALIAS as an alias for CHARSET.
2001 CHECK_SYMBOL (alias);
2002 charset = Fget_charset (charset);
2003 return Fputhash (alias, charset, Vcharset_hash_table);
2006 /* #### Reverse direction charsets not yet implemented. */
2008 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2010 Return the reverse-direction charset parallel to CHARSET, if any.
2011 This is the charset with the same properties (in particular, the same
2012 dimension, number of characters per dimension, and final byte) as
2013 CHARSET but whose characters are displayed in the opposite direction.
2017 charset = Fget_charset (charset);
2018 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2022 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2023 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2024 If DIRECTION is omitted, both directions will be checked (left-to-right
2025 will be returned if character sets exist for both directions).
2027 (dimension, chars, final, direction))
2029 int dm, ch, fi, di = -1;
2030 Lisp_Object obj = Qnil;
2032 CHECK_INT (dimension);
2033 dm = XINT (dimension);
2034 if (dm < 1 || dm > 2)
2035 signal_simple_error ("Invalid value for DIMENSION", dimension);
2039 if (ch != 94 && ch != 96)
2040 signal_simple_error ("Invalid value for CHARS", chars);
2042 CHECK_CHAR_COERCE_INT (final);
2044 if (fi < '0' || fi > '~')
2045 signal_simple_error ("Invalid value for FINAL", final);
2047 if (EQ (direction, Ql2r))
2048 di = CHARSET_LEFT_TO_RIGHT;
2049 else if (EQ (direction, Qr2l))
2050 di = CHARSET_RIGHT_TO_LEFT;
2051 else if (!NILP (direction))
2052 signal_simple_error ("Invalid value for DIRECTION", direction);
2054 if (dm == 2 && fi > 0x5F)
2056 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2060 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2062 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2065 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2068 return XCHARSET_NAME (obj);
2072 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2073 Return short name of CHARSET.
2077 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2080 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2081 Return long name of CHARSET.
2085 return XCHARSET_LONG_NAME (Fget_charset (charset));
2088 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2089 Return description of CHARSET.
2093 return XCHARSET_DOC_STRING (Fget_charset (charset));
2096 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2097 Return dimension of CHARSET.
2101 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2104 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2105 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2106 Recognized properties are those listed in `make-charset', as well as
2107 'name and 'doc-string.
2113 charset = Fget_charset (charset);
2114 cs = XCHARSET (charset);
2116 CHECK_SYMBOL (prop);
2117 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2118 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2119 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2120 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2121 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2122 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2123 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2124 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
2125 Qnil : make_char (CHARSET_FINAL (cs));
2126 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2127 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2128 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2129 if (EQ (prop, Qdirection))
2130 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2131 if (EQ (prop, Qreverse_direction_charset))
2133 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2134 /* #### Is this translation OK? If so, error checking sufficient? */
2135 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2138 if (EQ (prop, Qiso_ir)||
2139 EQ (prop, Qto_iso_ir))
2141 if ( CHARSET_ISO_IR (cs) > 0 )
2142 return make_int (CHARSET_ISO_IR (cs));
2146 if (EQ (prop, Qmother))
2147 return CHARSET_MOTHER (cs);
2148 if (EQ (prop, Qmin_code))
2149 return make_int (CHARSET_MIN_CODE (cs));
2150 if (EQ (prop, Qmax_code))
2151 return make_int (CHARSET_MAX_CODE (cs));
2153 signal_simple_error ("Unrecognized charset property name", prop);
2154 return Qnil; /* not reached */
2157 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2158 Return charset identification number of CHARSET.
2162 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2165 /* #### We need to figure out which properties we really want to
2168 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2169 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2171 (charset, ccl_program))
2173 struct ccl_program test_ccl;
2175 charset = Fget_charset (charset);
2176 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2177 signal_simple_error ("Invalid ccl-program", ccl_program);
2178 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2183 invalidate_charset_font_caches (Lisp_Object charset)
2185 /* Invalidate font cache entries for charset on all devices. */
2186 Lisp_Object devcons, concons, hash_table;
2187 DEVICE_LOOP_NO_BREAK (devcons, concons)
2189 struct device *d = XDEVICE (XCAR (devcons));
2190 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2191 if (!UNBOUNDP (hash_table))
2192 Fclrhash (hash_table);
2196 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2197 Set the 'registry property of CHARSET to REGISTRY.
2199 (charset, registry))
2201 charset = Fget_charset (charset);
2202 CHECK_STRING (registry);
2203 XCHARSET_REGISTRY (charset) = registry;
2204 invalidate_charset_font_caches (charset);
2205 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2210 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2211 Return mapping-table of CHARSET.
2215 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2218 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2219 Set mapping-table of CHARSET to TABLE.
2223 struct Lisp_Charset *cs;
2227 charset = Fget_charset (charset);
2228 cs = XCHARSET (charset);
2232 CHARSET_DECODING_TABLE(cs) = Qnil;
2235 else if (VECTORP (table))
2237 int ccs_len = CHARSET_BYTE_SIZE (cs);
2238 int ret = decoding_table_check_elements (table,
2239 CHARSET_DIMENSION (cs),
2244 signal_simple_error ("Too big table", table);
2246 signal_simple_error ("Invalid element is found", table);
2248 signal_simple_error ("Something wrong", table);
2250 CHARSET_DECODING_TABLE(cs) = Qnil;
2253 signal_error (Qwrong_type_argument,
2254 list2 (build_translated_string ("vector-or-nil-p"),
2257 byte_offset = CHARSET_BYTE_OFFSET (cs);
2258 switch (CHARSET_DIMENSION (cs))
2261 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2263 Lisp_Object c = XVECTOR_DATA(table)[i];
2266 Fput_char_attribute (c, XCHARSET_NAME (charset),
2267 make_int (i + byte_offset));
2271 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2273 Lisp_Object v = XVECTOR_DATA(table)[i];
2279 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2281 Lisp_Object c = XVECTOR_DATA(v)[j];
2285 (c, XCHARSET_NAME (charset),
2286 make_int ( ( (i + byte_offset) << 8 )
2292 Fput_char_attribute (v, XCHARSET_NAME (charset),
2293 make_int (i + byte_offset));
2301 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2302 Save mapping-table of CHARSET.
2306 struct Lisp_Charset *cs;
2307 int byte_min, byte_max;
2308 #ifdef HAVE_LIBCHISE
2310 #else /* HAVE_LIBCHISE */
2312 Lisp_Object db_file;
2313 #endif /* not HAVE_LIBCHISE */
2315 charset = Fget_charset (charset);
2316 cs = XCHARSET (charset);
2318 #ifdef HAVE_LIBCHISE
2319 if ( open_chise_data_source_maybe () )
2323 = chise_ds_get_ccs (default_chise_data_source,
2324 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2327 printf ("Can't open decoding-table %s\n",
2328 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2331 #else /* HAVE_LIBCHISE */
2332 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2333 Qsystem_char_id, 1);
2334 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2335 #endif /* not HAVE_LIBCHISE */
2337 byte_min = CHARSET_BYTE_OFFSET (cs);
2338 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2339 switch (CHARSET_DIMENSION (cs))
2343 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2346 for (cell = byte_min; cell < byte_max; cell++)
2348 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2352 #ifdef HAVE_LIBCHISE
2353 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2354 #else /* HAVE_LIBCHISE */
2355 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2356 Fprin1_to_string (c, Qnil),
2358 #endif /* not HAVE_LIBCHISE */
2365 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2368 for (row = byte_min; row < byte_max; row++)
2370 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2373 for (cell = byte_min; cell < byte_max; cell++)
2375 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2379 #ifdef HAVE_LIBCHISE
2380 chise_ccs_set_decoded_char
2382 (row << 8) | cell, XCHAR (c));
2383 #else /* HAVE_LIBCHISE */
2384 Fput_database (Fprin1_to_string (make_int ((row << 8)
2387 Fprin1_to_string (c, Qnil),
2389 #endif /* not HAVE_LIBCHISE */
2397 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2400 for (plane = byte_min; plane < byte_max; plane++)
2403 = get_ccs_octet_table (table_p, charset, plane);
2406 for (row = byte_min; row < byte_max; row++)
2409 = get_ccs_octet_table (table_r, charset, row);
2412 for (cell = byte_min; cell < byte_max; cell++)
2414 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2419 #ifdef HAVE_LIBCHISE
2420 chise_ccs_set_decoded_char
2425 #else /* HAVE_LIBCHISE */
2426 Fput_database (Fprin1_to_string
2427 (make_int ((plane << 16)
2431 Fprin1_to_string (c, Qnil),
2433 #endif /* not HAVE_LIBCHISE */
2442 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2445 for (group = byte_min; group < byte_max; group++)
2448 = get_ccs_octet_table (table_g, charset, group);
2451 for (plane = byte_min; plane < byte_max; plane++)
2454 = get_ccs_octet_table (table_p, charset, plane);
2457 for (row = byte_min; row < byte_max; row++)
2460 = get_ccs_octet_table (table_r, charset, row);
2463 for (cell = byte_min; cell < byte_max; cell++)
2466 = get_ccs_octet_table (table_c, charset, cell);
2470 #ifdef HAVE_LIBCHISE
2471 chise_ccs_set_decoded_char
2477 #else /* HAVE_LIBCHISE */
2478 Fput_database (Fprin1_to_string
2479 (make_int (( group << 24)
2484 Fprin1_to_string (c, Qnil),
2486 #endif /* not HAVE_LIBCHISE */
2494 #ifdef HAVE_LIBCHISE
2495 chise_ccs_sync (dt_ccs);
2497 #else /* HAVE_LIBCHISE */
2498 return Fclose_database (db);
2499 #endif /* not HAVE_LIBCHISE */
2502 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2503 Reset mapping-table of CCS with database file.
2507 #ifdef HAVE_LIBCHISE
2508 CHISE_CCS chise_ccs;
2510 Lisp_Object db_file;
2513 ccs = Fget_charset (ccs);
2515 #ifdef HAVE_LIBCHISE
2516 if ( open_chise_data_source_maybe () )
2519 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2520 XSTRING_DATA (Fsymbol_name
2521 (XCHARSET_NAME(ccs))));
2522 if (chise_ccs == NULL)
2525 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2526 Qsystem_char_id, 0);
2530 #ifdef HAVE_LIBCHISE
2531 chise_ccs_setup_db (chise_ccs, 0) == 0
2533 !NILP (Ffile_exists_p (db_file))
2537 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2544 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2546 #ifdef HAVE_LIBCHISE
2547 CHISE_Char_ID char_id;
2549 if ( open_chise_data_source_maybe () )
2553 = chise_ds_decode_char (default_chise_data_source,
2554 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2557 decoding_table_put_char (ccs, code_point, make_char (char_id));
2559 decoding_table_put_char (ccs, code_point, Qnil);
2561 /* chise_ccst_close (dt_ccs); */
2563 #else /* HAVE_LIBCHISE */
2566 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2569 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2573 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2580 decoding_table_put_char (ccs, code_point, ret);
2581 Fclose_database (db);
2585 decoding_table_put_char (ccs, code_point, Qnil);
2586 Fclose_database (db);
2589 #endif /* not HAVE_LIBCHISE */
2592 #ifdef HAVE_LIBCHISE
2593 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2594 Save properties of CHARSET.
2598 struct Lisp_Charset *cs;
2599 CHISE_Property property;
2601 unsigned char* feature_name;
2603 ccs = Fget_charset (charset);
2604 cs = XCHARSET (ccs);
2606 if ( open_chise_data_source_maybe () )
2609 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2611 property = chise_ds_get_property (default_chise_data_source,
2613 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2614 chise_feature_set_property_value
2615 (chise_ds_get_feature (default_chise_data_source, feature_name),
2616 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2618 chise_property_sync (property);
2620 charset = XCHARSET_NAME (ccs);
2621 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2623 property = chise_ds_get_property (default_chise_data_source,
2625 chise_feature_set_property_value
2626 (chise_ds_get_feature (default_chise_data_source, feature_name),
2627 property, XSTRING_DATA (Fprin1_to_string
2628 (CHARSET_DOC_STRING (cs), Qnil)));
2629 chise_property_sync (property);
2631 property = chise_ds_get_property (default_chise_data_source, "type");
2632 chise_feature_set_property_value
2633 (chise_ds_get_feature (default_chise_data_source, feature_name),
2635 chise_property_sync (property);
2637 property = chise_ds_get_property (default_chise_data_source, "chars");
2638 chise_feature_set_property_value
2639 (chise_ds_get_feature (default_chise_data_source, feature_name),
2640 property, XSTRING_DATA (Fprin1_to_string (make_int
2641 (CHARSET_CHARS (cs)),
2643 chise_property_sync (property);
2645 property = chise_ds_get_property (default_chise_data_source, "dimension");
2646 chise_feature_set_property_value
2647 (chise_ds_get_feature (default_chise_data_source, feature_name),
2648 property, XSTRING_DATA (Fprin1_to_string (make_int
2649 (CHARSET_DIMENSION (cs)),
2651 chise_property_sync (property);
2653 if ( CHARSET_FINAL (cs) != 0 )
2655 property = chise_ds_get_property (default_chise_data_source,
2657 chise_feature_set_property_value
2658 (chise_ds_get_feature (default_chise_data_source, feature_name),
2659 property, XSTRING_DATA (Fprin1_to_string (make_int
2660 (CHARSET_FINAL (cs)),
2662 chise_property_sync (property);
2665 if ( !NILP (CHARSET_MOTHER (cs)) )
2667 Lisp_Object mother = CHARSET_MOTHER (cs);
2669 if ( CHARSETP (mother) )
2670 mother = XCHARSET_NAME (mother);
2672 property = chise_ds_get_property (default_chise_data_source,
2674 chise_feature_set_property_value
2675 (chise_ds_get_feature (default_chise_data_source, feature_name),
2676 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2677 chise_property_sync (property);
2680 if ( CHARSET_MAX_CODE (cs) != 0 )
2684 property = chise_ds_get_property (default_chise_data_source,
2686 if ( CHARSET_MIN_CODE (cs) == 0 )
2687 chise_feature_set_property_value
2688 (chise_ds_get_feature (default_chise_data_source, feature_name),
2692 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2693 chise_feature_set_property_value
2694 (chise_ds_get_feature (default_chise_data_source, feature_name),
2697 chise_property_sync (property);
2699 property = chise_ds_get_property (default_chise_data_source,
2701 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2702 chise_feature_set_property_value
2703 (chise_ds_get_feature (default_chise_data_source, feature_name),
2705 chise_property_sync (property);
2707 property = chise_ds_get_property (default_chise_data_source,
2708 "mother-code-offset");
2709 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2710 chise_feature_set_property_value
2711 (chise_ds_get_feature (default_chise_data_source, feature_name),
2715 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2716 chise_feature_set_property_value
2717 (chise_ds_get_feature (default_chise_data_source, feature_name),
2720 chise_property_sync (property);
2722 property = chise_ds_get_property (default_chise_data_source,
2723 "mother-code-conversion");
2724 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2725 chise_feature_set_property_value
2726 (chise_ds_get_feature (default_chise_data_source, feature_name),
2727 property, "identical");
2730 Lisp_Object sym = Qnil;
2732 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2734 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2736 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2738 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2741 chise_feature_set_property_value
2742 (chise_ds_get_feature (default_chise_data_source, feature_name),
2743 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2745 chise_feature_set_property_value
2746 (chise_ds_get_feature (default_chise_data_source, feature_name),
2747 property, "unknown");
2749 chise_property_sync (property);
2753 #endif /* HAVE_LIBCHISE */
2755 #endif /* HAVE_CHISE */
2756 #endif /* UTF2000 */
2759 /************************************************************************/
2760 /* Lisp primitives for working with characters */
2761 /************************************************************************/
2764 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2765 Make a character from CHARSET and code-point CODE.
2766 If DEFINED_ONLY is non-nil, builtin character is not returned.
2767 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2768 If corresponding character is not found, nil is returned.
2770 (charset, code, defined_only, without_inheritance))
2774 charset = Fget_charset (charset);
2777 if ( (XCHARSET_GRAPHIC (charset) == 0) ||
2778 (XCHARSET_GRAPHIC (charset) == 1) )
2780 if (NILP (defined_only))
2781 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2783 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2784 return c >= 0 ? make_char (c) : Qnil;
2787 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2788 Make a builtin character from CHARSET and code-point CODE.
2795 charset = Fget_charset (charset);
2797 if (EQ (charset, Vcharset_latin_viscii))
2799 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2805 (ret = Fget_char_attribute (chr,
2806 Vcharset_latin_viscii_lower,
2809 charset = Vcharset_latin_viscii_lower;
2813 (ret = Fget_char_attribute (chr,
2814 Vcharset_latin_viscii_upper,
2817 charset = Vcharset_latin_viscii_upper;
2824 if (XCHARSET_GRAPHIC (charset) == 1)
2827 ch = decode_builtin_char (charset, c);
2829 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2833 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2834 Make a character from CHARSET and octets ARG1 and ARG2.
2835 ARG2 is required only for characters from two-dimensional charsets.
2836 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2837 character s with caron.
2839 (charset, arg1, arg2))
2843 int lowlim, highlim;
2845 charset = Fget_charset (charset);
2846 cs = XCHARSET (charset);
2848 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2849 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2851 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2853 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2854 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2857 /* It is useful (and safe, according to Olivier Galibert) to strip
2858 the 8th bit off ARG1 and ARG2 because it allows programmers to
2859 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2860 Latin 2 code of the character. */
2868 if (a1 < lowlim || a1 > highlim)
2869 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2871 if (CHARSET_DIMENSION (cs) == 1)
2875 ("Charset is of dimension one; second octet must be nil", arg2);
2876 return make_char (MAKE_CHAR (charset, a1, 0));
2885 a2 = XINT (arg2) & 0x7f;
2887 if (a2 < lowlim || a2 > highlim)
2888 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2890 return make_char (MAKE_CHAR (charset, a1, a2));
2893 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2894 Return the character set of CHARACTER.
2898 CHECK_CHAR_COERCE_INT (character);
2900 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2903 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2904 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2905 N defaults to 0 if omitted.
2909 Lisp_Object charset;
2912 CHECK_CHAR_COERCE_INT (character);
2914 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2916 if (NILP (n) || EQ (n, Qzero))
2917 return make_int (octet0);
2918 else if (EQ (n, make_int (1)))
2919 return make_int (octet1);
2921 signal_simple_error ("Octet number must be 0 or 1", n);
2925 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2926 Return code-point of CHARACTER in specified CHARSET.
2928 (character, charset, defined_only))
2932 CHECK_CHAR_COERCE_INT (character);
2933 charset = Fget_charset (charset);
2934 code_point = charset_code_point (charset, XCHAR (character),
2935 !NILP (defined_only));
2936 if (code_point >= 0)
2937 return make_int (code_point);
2943 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2944 Return list of charset and one or two position-codes of CHARACTER.
2948 /* This function can GC */
2949 struct gcpro gcpro1, gcpro2;
2950 Lisp_Object charset = Qnil;
2951 Lisp_Object rc = Qnil;
2959 GCPRO2 (charset, rc);
2960 CHECK_CHAR_COERCE_INT (character);
2963 code_point = ENCODE_CHAR (XCHAR (character), charset);
2964 dimension = XCHARSET_DIMENSION (charset);
2965 while (dimension > 0)
2967 rc = Fcons (make_int (code_point & 255), rc);
2971 rc = Fcons (XCHARSET_NAME (charset), rc);
2973 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2975 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2977 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2981 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2990 #ifdef ENABLE_COMPOSITE_CHARS
2991 /************************************************************************/
2992 /* composite character functions */
2993 /************************************************************************/
2996 lookup_composite_char (Bufbyte *str, int len)
2998 Lisp_Object lispstr = make_string (str, len);
2999 Lisp_Object ch = Fgethash (lispstr,
3000 Vcomposite_char_string2char_hash_table,
3006 if (composite_char_row_next >= 128)
3007 signal_simple_error ("No more composite chars available", lispstr);
3008 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3009 composite_char_col_next);
3010 Fputhash (make_char (emch), lispstr,
3011 Vcomposite_char_char2string_hash_table);
3012 Fputhash (lispstr, make_char (emch),
3013 Vcomposite_char_string2char_hash_table);
3014 composite_char_col_next++;
3015 if (composite_char_col_next >= 128)
3017 composite_char_col_next = 32;
3018 composite_char_row_next++;
3027 composite_char_string (Emchar ch)
3029 Lisp_Object str = Fgethash (make_char (ch),
3030 Vcomposite_char_char2string_hash_table,
3032 assert (!UNBOUNDP (str));
3036 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3037 Convert a string into a single composite character.
3038 The character is the result of overstriking all the characters in
3043 CHECK_STRING (string);
3044 return make_char (lookup_composite_char (XSTRING_DATA (string),
3045 XSTRING_LENGTH (string)));
3048 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3049 Return a string of the characters comprising a composite character.
3057 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3058 signal_simple_error ("Must be composite char", ch);
3059 return composite_char_string (emch);
3061 #endif /* ENABLE_COMPOSITE_CHARS */
3064 /************************************************************************/
3065 /* initialization */
3066 /************************************************************************/
3069 syms_of_mule_charset (void)
3071 INIT_LRECORD_IMPLEMENTATION (charset);
3073 DEFSUBR (Fcharsetp);
3074 DEFSUBR (Ffind_charset);
3075 DEFSUBR (Fget_charset);
3076 DEFSUBR (Fcharset_list);
3077 DEFSUBR (Fcharset_name);
3078 DEFSUBR (Fmake_charset);
3079 DEFSUBR (Fmake_reverse_direction_charset);
3080 /* DEFSUBR (Freverse_direction_charset); */
3081 DEFSUBR (Fdefine_charset_alias);
3082 DEFSUBR (Fcharset_from_attributes);
3083 DEFSUBR (Fcharset_short_name);
3084 DEFSUBR (Fcharset_long_name);
3085 DEFSUBR (Fcharset_description);
3086 DEFSUBR (Fcharset_dimension);
3087 DEFSUBR (Fcharset_property);
3088 DEFSUBR (Fcharset_id);
3089 DEFSUBR (Fset_charset_ccl_program);
3090 DEFSUBR (Fset_charset_registry);
3093 DEFSUBR (Fcharset_mapping_table);
3094 DEFSUBR (Fset_charset_mapping_table);
3096 DEFSUBR (Fsave_charset_mapping_table);
3097 DEFSUBR (Freset_charset_mapping_table);
3098 #ifdef HAVE_LIBCHISE
3099 DEFSUBR (Fsave_charset_properties);
3100 #endif /* HAVE_LIBCHISE */
3101 #endif /* HAVE_CHISE */
3102 DEFSUBR (Fdecode_char);
3103 DEFSUBR (Fdecode_builtin_char);
3104 DEFSUBR (Fencode_char);
3107 DEFSUBR (Fmake_char);
3108 DEFSUBR (Fchar_charset);
3109 DEFSUBR (Fchar_octet);
3110 DEFSUBR (Fsplit_char);
3112 #ifdef ENABLE_COMPOSITE_CHARS
3113 DEFSUBR (Fmake_composite_char);
3114 DEFSUBR (Fcomposite_char_string);
3117 defsymbol (&Qcharsetp, "charsetp");
3118 defsymbol (&Qregistry, "registry");
3119 defsymbol (&Qfinal, "final");
3120 defsymbol (&Qgraphic, "graphic");
3121 defsymbol (&Qdirection, "direction");
3122 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3123 defsymbol (&Qshort_name, "short-name");
3124 defsymbol (&Qlong_name, "long-name");
3125 defsymbol (&Qiso_ir, "iso-ir");
3127 defsymbol (&Qto_iso_ir, "=>iso-ir");
3128 defsymbol (&Qpartial, "partial");
3129 defsymbol (&Qmother, "mother");
3130 defsymbol (&Qmin_code, "min-code");
3131 defsymbol (&Qmax_code, "max-code");
3132 defsymbol (&Qcode_offset, "code-offset");
3133 defsymbol (&Qconversion, "conversion");
3134 defsymbol (&Q94x60, "94x60");
3135 defsymbol (&Q94x94x60, "94x94x60");
3136 defsymbol (&Qbig5_1, "big5-1");
3137 defsymbol (&Qbig5_2, "big5-2");
3140 defsymbol (&Ql2r, "l2r");
3141 defsymbol (&Qr2l, "r2l");
3143 /* Charsets, compatible with FSF 20.3
3144 Naming convention is Script-Charset[-Edition] */
3145 defsymbol (&Qascii, "ascii");
3146 defsymbol (&Qcontrol_1, "control-1");
3147 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3148 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3149 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3150 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3151 defsymbol (&Qthai_tis620, "thai-tis620");
3152 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3153 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3154 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3155 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3156 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3157 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3158 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3159 /* defsymbol (&Qrep_jis_x0208_1978, "=jis-x0208@1978"); */
3160 defsymbol (&Qrep_gb2312, "=gb2312");
3161 defsymbol (&Qrep_gb12345, "=gb12345");
3162 defsymbol (&Qrep_jis_x0208_1983, "=jis-x0208@1983");
3163 defsymbol (&Qrep_ks_x1001, "=ks-x1001");
3164 defsymbol (&Qrep_jis_x0212, "=jis-x0212");
3165 defsymbol (&Qrep_cns11643_1, "=cns11643-1");
3166 defsymbol (&Qrep_cns11643_2, "=cns11643-2");
3168 defsymbol (&Qsystem_char_id, "system-char-id");
3169 defsymbol (&Qrep_ucs, "=ucs");
3170 defsymbol (&Qucs, "ucs");
3171 defsymbol (&Qucs_bmp, "ucs-bmp");
3172 defsymbol (&Qucs_smp, "ucs-smp");
3173 defsymbol (&Qucs_sip, "ucs-sip");
3174 defsymbol (&Qlatin_viscii, "latin-viscii");
3175 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3176 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3177 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3178 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3179 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3180 defsymbol (&Qrep_jis_x0208, "=jis-x0208");
3181 defsymbol (&Qrep_jis_x0208_1990, "=jis-x0208@1990");
3182 defsymbol (&Qrep_big5, "=big5");
3183 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3185 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3186 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3188 defsymbol (&Qcomposite, "composite");
3192 vars_of_mule_charset (void)
3199 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3200 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3202 /* Table of charsets indexed by leading byte. */
3203 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3204 chlook->charset_by_leading_byte[i] = Qnil;
3207 /* Table of charsets indexed by type/final-byte. */
3208 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3209 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3210 chlook->charset_by_attributes[i][j] = Qnil;
3212 /* Table of charsets indexed by type/final-byte/direction. */
3213 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3214 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3215 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3216 chlook->charset_by_attributes[i][j][k] = Qnil;
3220 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3222 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3223 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3227 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3228 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3229 Leading-code of private TYPE9N charset of column-width 1.
3231 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3235 Vdefault_coded_charset_priority_list = Qnil;
3236 DEFVAR_LISP ("default-coded-charset-priority-list",
3237 &Vdefault_coded_charset_priority_list /*
3238 Default order of preferred coded-character-sets.
3240 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3241 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3242 &Vdisplay_coded_charset_priority_use_inheritance /*
3243 If non-nil, use character inheritance.
3245 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3246 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3247 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3248 If non-nil, prefer nearest character in hierarchy order.
3254 complex_vars_of_mule_charset (void)
3256 staticpro (&Vcharset_hash_table);
3257 Vcharset_hash_table =
3258 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3260 /* Predefined character sets. We store them into variables for
3264 staticpro (&Vcharset_system_char_id);
3265 Vcharset_system_char_id =
3266 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3267 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3268 build_string ("SCID"),
3269 build_string ("CHAR-ID"),
3270 build_string ("System char-id"),
3273 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3275 staticpro (&Vcharset_ucs);
3277 make_charset (LEADING_BYTE_UCS, Qrep_ucs, 256, 4,
3278 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3279 build_string ("UCS"),
3280 build_string ("UCS"),
3281 build_string ("ISO/IEC 10646"),
3284 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3286 staticpro (&Vcharset_ucs_bmp);
3288 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3289 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3290 build_string ("BMP"),
3291 build_string ("UCS-BMP"),
3292 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3294 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3295 - LEADING_BYTE_UCS_BMP,
3296 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3298 staticpro (&Vcharset_ucs_smp);
3300 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3301 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3302 build_string ("SMP"),
3303 build_string ("UCS-SMP"),
3304 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3305 build_string ("UCS00-1"),
3307 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3308 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL,
3310 staticpro (&Vcharset_ucs_sip);
3312 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3313 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3314 build_string ("SIP"),
3315 build_string ("UCS-SIP"),
3316 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3317 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3319 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3320 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL,
3323 # define MIN_CHAR_THAI 0
3324 # define MAX_CHAR_THAI 0
3325 /* # define MIN_CHAR_HEBREW 0 */
3326 /* # define MAX_CHAR_HEBREW 0 */
3327 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3328 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3330 staticpro (&Vcharset_ascii);
3332 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3333 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3334 build_string ("ASCII"),
3335 build_string ("ASCII)"),
3336 build_string ("ASCII (ISO646 IRV)"),
3337 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3338 - LEADING_BYTE_ASCII,
3339 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL,
3341 staticpro (&Vcharset_control_1);
3342 Vcharset_control_1 =
3343 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3344 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3345 build_string ("C1"),
3346 build_string ("Control characters"),
3347 build_string ("Control characters 128-191"),
3349 - LEADING_BYTE_CONTROL_1,
3350 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL,
3352 staticpro (&Vcharset_latin_iso8859_1);
3353 Vcharset_latin_iso8859_1 =
3354 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3355 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3356 build_string ("Latin-1"),
3357 build_string ("ISO8859-1 (Latin-1)"),
3358 build_string ("ISO8859-1 (Latin-1)"),
3359 build_string ("iso8859-1"),
3360 - LEADING_BYTE_LATIN_ISO8859_1,
3361 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3363 staticpro (&Vcharset_latin_iso8859_2);
3364 Vcharset_latin_iso8859_2 =
3365 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3366 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3367 build_string ("Latin-2"),
3368 build_string ("ISO8859-2 (Latin-2)"),
3369 build_string ("ISO8859-2 (Latin-2)"),
3370 build_string ("iso8859-2"),
3371 - LEADING_BYTE_LATIN_ISO8859_2,
3372 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3374 staticpro (&Vcharset_latin_iso8859_3);
3375 Vcharset_latin_iso8859_3 =
3376 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3377 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3378 build_string ("Latin-3"),
3379 build_string ("ISO8859-3 (Latin-3)"),
3380 build_string ("ISO8859-3 (Latin-3)"),
3381 build_string ("iso8859-3"),
3382 - LEADING_BYTE_LATIN_ISO8859_3,
3383 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3385 staticpro (&Vcharset_latin_iso8859_4);
3386 Vcharset_latin_iso8859_4 =
3387 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3388 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3389 build_string ("Latin-4"),
3390 build_string ("ISO8859-4 (Latin-4)"),
3391 build_string ("ISO8859-4 (Latin-4)"),
3392 build_string ("iso8859-4"),
3393 - LEADING_BYTE_LATIN_ISO8859_4,
3394 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3396 staticpro (&Vcharset_thai_tis620);
3397 Vcharset_thai_tis620 =
3398 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3399 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3400 build_string ("TIS620"),
3401 build_string ("TIS620 (Thai)"),
3402 build_string ("TIS620.2529 (Thai)"),
3403 build_string ("tis620"),
3404 - LEADING_BYTE_THAI_TIS620,
3405 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3407 staticpro (&Vcharset_greek_iso8859_7);
3408 Vcharset_greek_iso8859_7 =
3409 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3410 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3411 build_string ("ISO8859-7"),
3412 build_string ("ISO8859-7 (Greek)"),
3413 build_string ("ISO8859-7 (Greek)"),
3414 build_string ("iso8859-7"),
3415 - LEADING_BYTE_GREEK_ISO8859_7,
3416 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3418 staticpro (&Vcharset_arabic_iso8859_6);
3419 Vcharset_arabic_iso8859_6 =
3420 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3421 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3422 build_string ("ISO8859-6"),
3423 build_string ("ISO8859-6 (Arabic)"),
3424 build_string ("ISO8859-6 (Arabic)"),
3425 build_string ("iso8859-6"),
3426 - LEADING_BYTE_ARABIC_ISO8859_6,
3427 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3429 staticpro (&Vcharset_hebrew_iso8859_8);
3430 Vcharset_hebrew_iso8859_8 =
3431 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3432 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3433 build_string ("ISO8859-8"),
3434 build_string ("ISO8859-8 (Hebrew)"),
3435 build_string ("ISO8859-8 (Hebrew)"),
3436 build_string ("iso8859-8"),
3437 - LEADING_BYTE_HEBREW_ISO8859_8,
3439 0 /* MIN_CHAR_HEBREW */,
3440 0 /* MAX_CHAR_HEBREW */, 0, 32,
3441 Qnil, CONVERSION_IDENTICAL,
3443 staticpro (&Vcharset_katakana_jisx0201);
3444 Vcharset_katakana_jisx0201 =
3445 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3446 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3447 build_string ("JISX0201 Kana"),
3448 build_string ("JISX0201.1976 (Japanese Kana)"),
3449 build_string ("JISX0201.1976 Japanese Kana"),
3450 build_string ("jisx0201\\.1976"),
3451 - LEADING_BYTE_KATAKANA_JISX0201,
3452 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3454 staticpro (&Vcharset_latin_jisx0201);
3455 Vcharset_latin_jisx0201 =
3456 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3457 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3458 build_string ("JISX0201 Roman"),
3459 build_string ("JISX0201.1976 (Japanese Roman)"),
3460 build_string ("JISX0201.1976 Japanese Roman"),
3461 build_string ("jisx0201\\.1976"),
3462 - LEADING_BYTE_LATIN_JISX0201,
3463 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3465 staticpro (&Vcharset_cyrillic_iso8859_5);
3466 Vcharset_cyrillic_iso8859_5 =
3467 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3468 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3469 build_string ("ISO8859-5"),
3470 build_string ("ISO8859-5 (Cyrillic)"),
3471 build_string ("ISO8859-5 (Cyrillic)"),
3472 build_string ("iso8859-5"),
3473 - LEADING_BYTE_CYRILLIC_ISO8859_5,
3474 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3476 staticpro (&Vcharset_latin_iso8859_9);
3477 Vcharset_latin_iso8859_9 =
3478 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3479 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3480 build_string ("Latin-5"),
3481 build_string ("ISO8859-9 (Latin-5)"),
3482 build_string ("ISO8859-9 (Latin-5)"),
3483 build_string ("iso8859-9"),
3484 - LEADING_BYTE_LATIN_ISO8859_9,
3485 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3488 staticpro (&Vcharset_jis_x0208);
3489 Vcharset_jis_x0208 =
3490 make_charset (LEADING_BYTE_JIS_X0208,
3491 Qrep_jis_x0208, 94, 2,
3492 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3493 build_string ("JIS X0208"),
3494 build_string ("JIS X0208 Common"),
3495 build_string ("JIS X0208 Common part"),
3496 build_string ("jisx0208\\.1990"),
3497 - LEADING_BYTE_JAPANESE_JISX0208_1978,
3499 MIN_CHAR_JIS_X0208_1990,
3500 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3501 Qnil, CONVERSION_94x94,
3505 staticpro (&Vcharset_japanese_jisx0208_1978);
3506 Vcharset_japanese_jisx0208_1978 =
3507 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3508 Qrep_jis_x0208_1978, 94, 2,
3509 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3510 build_string ("JIS X0208:1978"),
3511 build_string ("JIS X0208:1978 (Japanese)"),
3513 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3514 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3515 - LEADING_BYTE_JAPANESE_JISX0208_1978,
3522 CONVERSION_IDENTICAL,
3525 staticpro (&Vcharset_chinese_gb2312);
3526 Vcharset_chinese_gb2312 =
3527 make_charset (LEADING_BYTE_CHINESE_GB2312, Qrep_gb2312, 94, 2,
3528 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3529 build_string ("GB2312"),
3530 build_string ("GB2312)"),
3531 build_string ("GB2312 Chinese simplified"),
3532 build_string ("gb2312"),
3533 - LEADING_BYTE_CHINESE_GB2312,
3534 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3536 staticpro (&Vcharset_chinese_gb12345);
3537 Vcharset_chinese_gb12345 =
3538 make_charset (LEADING_BYTE_CHINESE_GB12345, Qrep_gb12345, 94, 2,
3539 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3540 build_string ("G1"),
3541 build_string ("GB 12345)"),
3542 build_string ("GB 12345-1990"),
3543 build_string ("GB12345\\(\\.1990\\)?-0"),
3545 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3547 staticpro (&Vcharset_japanese_jisx0208);
3548 Vcharset_japanese_jisx0208 =
3549 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qrep_jis_x0208_1983, 94, 2,
3550 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3551 build_string ("JISX0208"),
3552 build_string ("JIS X0208:1983 (Japanese)"),
3553 build_string ("JIS X0208:1983 Japanese Kanji"),
3554 build_string ("jisx0208\\.1983"),
3555 - LEADING_BYTE_JAPANESE_JISX0208,
3562 CONVERSION_IDENTICAL,
3565 staticpro (&Vcharset_japanese_jisx0208_1990);
3566 Vcharset_japanese_jisx0208_1990 =
3567 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3568 Qrep_jis_x0208_1990, 94, 2,
3569 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3570 build_string ("JISX0208-1990"),
3571 build_string ("JIS X0208:1990 (Japanese)"),
3572 build_string ("JIS X0208:1990 Japanese Kanji"),
3573 build_string ("jisx0208\\.1990"),
3574 - LEADING_BYTE_JAPANESE_JISX0208_1990,
3576 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3577 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3578 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3579 Vcharset_jis_x0208 /* Qnil */,
3580 CONVERSION_IDENTICAL /* CONVERSION_94x94 */,
3583 staticpro (&Vcharset_korean_ksc5601);
3584 Vcharset_korean_ksc5601 =
3585 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qrep_ks_x1001, 94, 2,
3586 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3587 build_string ("KSC5601"),
3588 build_string ("KSC5601 (Korean"),
3589 build_string ("KSC5601 Korean Hangul and Hanja"),
3590 build_string ("ksc5601"),
3591 - LEADING_BYTE_KOREAN_KSC5601,
3592 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3594 staticpro (&Vcharset_japanese_jisx0212);
3595 Vcharset_japanese_jisx0212 =
3596 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qrep_jis_x0212, 94, 2,
3597 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3598 build_string ("JISX0212"),
3599 build_string ("JISX0212 (Japanese)"),
3600 build_string ("JISX0212 Japanese Supplement"),
3601 build_string ("jisx0212"),
3602 - LEADING_BYTE_JAPANESE_JISX0212,
3603 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3606 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3607 staticpro (&Vcharset_chinese_cns11643_1);
3608 Vcharset_chinese_cns11643_1 =
3609 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qrep_cns11643_1, 94, 2,
3610 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3611 build_string ("CNS11643-1"),
3612 build_string ("CNS11643-1 (Chinese traditional)"),
3614 ("CNS 11643 Plane 1 Chinese traditional"),
3615 build_string (CHINESE_CNS_PLANE_RE("1")),
3616 - LEADING_BYTE_CHINESE_CNS11643_1,
3617 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3619 staticpro (&Vcharset_chinese_cns11643_2);
3620 Vcharset_chinese_cns11643_2 =
3621 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qrep_cns11643_2, 94, 2,
3622 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3623 build_string ("CNS11643-2"),
3624 build_string ("CNS11643-2 (Chinese traditional)"),
3626 ("CNS 11643 Plane 2 Chinese traditional"),
3627 build_string (CHINESE_CNS_PLANE_RE("2")),
3628 - LEADING_BYTE_CHINESE_CNS11643_2,
3629 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3632 staticpro (&Vcharset_latin_tcvn5712);
3633 Vcharset_latin_tcvn5712 =
3634 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3635 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3636 build_string ("TCVN 5712"),
3637 build_string ("TCVN 5712 (VSCII-2)"),
3638 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3639 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3640 - LEADING_BYTE_LATIN_TCVN5712,
3641 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3643 staticpro (&Vcharset_latin_viscii_lower);
3644 Vcharset_latin_viscii_lower =
3645 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3646 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3647 build_string ("VISCII lower"),
3648 build_string ("VISCII lower (Vietnamese)"),
3649 build_string ("VISCII lower (Vietnamese)"),
3650 build_string ("MULEVISCII-LOWER"),
3652 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3654 staticpro (&Vcharset_latin_viscii_upper);
3655 Vcharset_latin_viscii_upper =
3656 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3657 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3658 build_string ("VISCII upper"),
3659 build_string ("VISCII upper (Vietnamese)"),
3660 build_string ("VISCII upper (Vietnamese)"),
3661 build_string ("MULEVISCII-UPPER"),
3663 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3665 staticpro (&Vcharset_latin_viscii);
3666 Vcharset_latin_viscii =
3667 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3668 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3669 build_string ("VISCII"),
3670 build_string ("VISCII 1.1 (Vietnamese)"),
3671 build_string ("VISCII 1.1 (Vietnamese)"),
3672 build_string ("VISCII1\\.1"),
3674 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL,
3676 staticpro (&Vcharset_chinese_big5);
3677 Vcharset_chinese_big5 =
3678 make_charset (LEADING_BYTE_CHINESE_BIG5, Qrep_big5, 256, 2,
3679 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3680 build_string ("Big5"),
3681 build_string ("Big5"),
3682 build_string ("Big5 Chinese traditional"),
3683 build_string ("big5-0"),
3686 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3687 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL,
3690 staticpro (&Vcharset_ethiopic_ucs);
3691 Vcharset_ethiopic_ucs =
3692 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3693 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3694 build_string ("Ethiopic (UCS)"),
3695 build_string ("Ethiopic (UCS)"),
3696 build_string ("Ethiopic of UCS"),
3697 build_string ("Ethiopic-Unicode"),
3699 Qnil, 0x1200, 0x137F, 0, 0,
3700 Qnil, CONVERSION_IDENTICAL,
3703 staticpro (&Vcharset_chinese_big5_1);
3704 Vcharset_chinese_big5_1 =
3705 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3706 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3707 build_string ("Big5"),
3708 build_string ("Big5 (Level-1)"),
3710 ("Big5 Level-1 Chinese traditional"),
3711 build_string ("big5"),
3713 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3714 Vcharset_chinese_big5, CONVERSION_BIG5_1,
3716 staticpro (&Vcharset_chinese_big5_2);
3717 Vcharset_chinese_big5_2 =
3718 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3719 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3720 build_string ("Big5"),
3721 build_string ("Big5 (Level-2)"),
3723 ("Big5 Level-2 Chinese traditional"),
3724 build_string ("big5"),
3726 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3727 Vcharset_chinese_big5, CONVERSION_BIG5_2,
3730 #ifdef ENABLE_COMPOSITE_CHARS
3731 /* #### For simplicity, we put composite chars into a 96x96 charset.
3732 This is going to lead to problems because you can run out of
3733 room, esp. as we don't yet recycle numbers. */
3734 staticpro (&Vcharset_composite);
3735 Vcharset_composite =
3736 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3737 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3738 build_string ("Composite"),
3739 build_string ("Composite characters"),
3740 build_string ("Composite characters"),
3743 /* #### not dumped properly */
3744 composite_char_row_next = 32;
3745 composite_char_col_next = 32;
3747 Vcomposite_char_string2char_hash_table =
3748 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3749 Vcomposite_char_char2string_hash_table =
3750 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3751 staticpro (&Vcomposite_char_string2char_hash_table);
3752 staticpro (&Vcomposite_char_char2string_hash_table);
3753 #endif /* ENABLE_COMPOSITE_CHARS */