1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2011
7 This file is part of XEmacs.
9 XEmacs is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with XEmacs; see the file COPYING. If not, write to
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
26 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
45 /* The various pre-defined charsets. */
47 Lisp_Object Vcharset_ascii;
48 Lisp_Object Vcharset_control_1;
49 Lisp_Object Vcharset_latin_iso8859_1;
50 Lisp_Object Vcharset_latin_iso8859_2;
51 Lisp_Object Vcharset_latin_iso8859_3;
52 Lisp_Object Vcharset_latin_iso8859_4;
53 Lisp_Object Vcharset_thai_tis620;
54 Lisp_Object Vcharset_greek_iso8859_7;
55 Lisp_Object Vcharset_arabic_iso8859_6;
56 Lisp_Object Vcharset_hebrew_iso8859_8;
57 Lisp_Object Vcharset_katakana_jisx0201;
58 Lisp_Object Vcharset_latin_jisx0201;
59 Lisp_Object Vcharset_cyrillic_iso8859_5;
60 Lisp_Object Vcharset_latin_iso8859_9;
61 /* Lisp_Object Vcharset_japanese_jisx0208_1978; */
62 Lisp_Object Vcharset_chinese_gb2312;
63 Lisp_Object Vcharset_chinese_gb12345;
64 Lisp_Object Vcharset_japanese_jisx0208;
65 Lisp_Object Vcharset_japanese_jisx0208_1990;
66 Lisp_Object Vcharset_korean_ksc5601;
67 Lisp_Object Vcharset_japanese_jisx0212;
68 Lisp_Object Vcharset_chinese_cns11643_1;
69 Lisp_Object Vcharset_chinese_cns11643_2;
71 Lisp_Object Vcharset_system_char_id;
72 Lisp_Object Vcharset_ucs;
73 Lisp_Object Vcharset_ucs_bmp;
74 Lisp_Object Vcharset_ucs_smp;
75 Lisp_Object Vcharset_ucs_sip;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ethiopic_ucs;
84 Lisp_Object Vcharset_chinese_big5_1;
85 Lisp_Object Vcharset_chinese_big5_2;
87 #ifdef ENABLE_COMPOSITE_CHARS
88 Lisp_Object Vcharset_composite;
90 /* Hash tables for composite chars. One maps string representing
91 composed chars to their equivalent chars; one goes the
93 Lisp_Object Vcomposite_char_char2string_hash_table;
94 Lisp_Object Vcomposite_char_string2char_hash_table;
96 static int composite_char_row_next;
97 static int composite_char_col_next;
99 #endif /* ENABLE_COMPOSITE_CHARS */
101 struct charset_lookup *chlook;
103 static const struct lrecord_description charset_lookup_description_1[] = {
104 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
106 NUM_LEADING_BYTES+4*128
113 static const struct struct_description charset_lookup_description = {
114 sizeof (struct charset_lookup),
115 charset_lookup_description_1
119 /* Table of number of bytes in the string representation of a character
120 indexed by the first byte of that representation.
122 rep_bytes_by_first_byte(c) is more efficient than the equivalent
123 canonical computation:
125 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
127 const Bytecount rep_bytes_by_first_byte[0xA0] =
128 { /* 0x00 - 0x7f are for straight ASCII */
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 /* 0x80 - 0x8f are for Dimension-1 official charsets */
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
141 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 /* 0x90 - 0x9d are for Dimension-2 official charsets */
144 /* 0x9e is for Dimension-1 private charsets */
145 /* 0x9f is for Dimension-2 private charsets */
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
152 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
154 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
158 if (XVECTOR_LENGTH (v) > ccs_len)
161 for (i = 0; i < XVECTOR_LENGTH (v); i++)
163 Lisp_Object c = XVECTOR_DATA(v)[i];
165 if (!NILP (c) && !CHARP (c))
169 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
181 decoding_table_put_char (Lisp_Object ccs,
182 int code_point, Lisp_Object character)
185 Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
186 int dim = XCHARSET_DIMENSION (ccs);
189 XCHARSET_DECODING_TABLE (ccs)
190 = put_ccs_octet_table (table1, ccs, code_point, character);
194 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
196 table2 = put_ccs_octet_table (table2, ccs,
197 (unsigned char)code_point, character);
198 XCHARSET_DECODING_TABLE (ccs)
199 = put_ccs_octet_table (table1, ccs,
200 (unsigned char)(code_point >> 8), table2);
205 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
207 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
209 table3 = put_ccs_octet_table (table3, ccs,
210 (unsigned char)code_point, character);
211 table2 = put_ccs_octet_table (table2, ccs,
212 (unsigned char)(code_point >> 8), table3);
213 XCHARSET_DECODING_TABLE (ccs)
214 = put_ccs_octet_table (table1, ccs,
215 (unsigned char)(code_point >> 16), table2);
217 else /* if (dim == 4) */
220 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
222 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
224 = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
226 table4 = put_ccs_octet_table (table4, ccs,
227 (unsigned char)code_point, character);
228 table3 = put_ccs_octet_table (table3, ccs,
229 (unsigned char)(code_point >> 8), table4);
230 table2 = put_ccs_octet_table (table2, ccs,
231 (unsigned char)(code_point >> 16), table3);
232 XCHARSET_DECODING_TABLE (ccs)
233 = put_ccs_octet_table (table1, ccs,
234 (unsigned char)(code_point >> 24), table2);
237 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
238 int dim = XCHARSET_DIMENSION (ccs);
239 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
242 int ccs_len = XVECTOR_LENGTH (v);
247 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
248 nv = XVECTOR_DATA(v)[i];
253 if (EQ (nv, character))
256 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
263 XVECTOR_DATA(v)[i] = character;
268 put_char_ccs_code_point (Lisp_Object character,
269 Lisp_Object ccs, Lisp_Object value)
271 if ( !( EQ (XCHARSET_NAME (ccs), Qrep_ucs)
272 && INTP (value) && (XINT (value) < 0xF0000)
273 && XCHAR (character) == XINT (value) )
276 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
280 { /* obsolete representation: value must be a list of bytes */
281 Lisp_Object ret = Fcar (value);
285 signal_simple_error ("Invalid value for coded-charset", value);
286 code_point = XINT (ret);
287 if (XCHARSET_GRAPHIC (ccs) == 1)
295 signal_simple_error ("Invalid value for coded-charset",
299 signal_simple_error ("Invalid value for coded-charset",
302 if (XCHARSET_GRAPHIC (ccs) == 1)
304 code_point = (code_point << 8) | j;
307 value = make_int (code_point);
309 else if (INTP (value))
311 code_point = XINT (value);
312 if (XCHARSET_GRAPHIC (ccs) == 1)
314 code_point &= 0x7F7F7F7F;
315 value = make_int (code_point);
319 signal_simple_error ("Invalid value for coded-charset", value);
323 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
326 decoding_table_remove_char (ccs, XINT (cpos));
329 decoding_table_put_char (ccs, code_point, character);
335 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
337 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
338 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
340 if (VECTORP (decoding_table))
342 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
346 decoding_table_remove_char (ccs, XINT (cpos));
349 if (CHAR_TABLEP (encoding_table))
351 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
359 int leading_code_private_11;
362 Lisp_Object Qcharsetp;
364 /* Qdoc_string, Qdimension, Qchars defined in general.c */
365 Lisp_Object Qregistry, Qfinal, Qgraphic;
366 Lisp_Object Qdirection;
367 Lisp_Object Qreverse_direction_charset;
368 Lisp_Object Qleading_byte;
369 Lisp_Object Qshort_name, Qlong_name;
372 Lisp_Object Qto_iso_ir;
373 Lisp_Object Qpartial;
374 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
375 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
392 /* Qrep_jis_x0208_1978, */
410 Qvietnamese_viscii_lower,
411 Qvietnamese_viscii_upper,
421 Lisp_Object Ql2r, Qr2l;
423 Lisp_Object Vcharset_hash_table;
425 /* Composite characters are characters constructed by overstriking two
426 or more regular characters.
428 1) The old Mule implementation involves storing composite characters
429 in a buffer as a tag followed by all of the actual characters
430 used to make up the composite character. I think this is a bad
431 idea; it greatly complicates code that wants to handle strings
432 one character at a time because it has to deal with the possibility
433 of great big ungainly characters. It's much more reasonable to
434 simply store an index into a table of composite characters.
436 2) The current implementation only allows for 16,384 separate
437 composite characters over the lifetime of the XEmacs process.
438 This could become a potential problem if the user
439 edited lots of different files that use composite characters.
440 Due to FSF bogosity, increasing the number of allowable
441 composite characters under Mule would decrease the number
442 of possible faces that can exist. Mule already has shrunk
443 this to 2048, and further shrinkage would become uncomfortable.
444 No such problems exist in XEmacs.
446 Composite characters could be represented as 0x80 C1 C2 C3,
447 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
448 for slightly under 2^20 (one million) composite characters
449 over the XEmacs process lifetime, and you only need to
450 increase the size of a Mule character from 19 to 21 bits.
451 Or you could use 0x80 C1 C2 C3 C4, allowing for about
452 85 million (slightly over 2^26) composite characters. */
455 /************************************************************************/
456 /* Basic Emchar functions */
457 /************************************************************************/
459 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
460 string in STR. Returns the number of bytes stored.
461 Do not call this directly. Use the macro set_charptr_emchar() instead.
465 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
480 else if ( c <= 0x7ff )
482 *p++ = (c >> 6) | 0xc0;
483 *p++ = (c & 0x3f) | 0x80;
485 else if ( c <= 0xffff )
487 *p++ = (c >> 12) | 0xe0;
488 *p++ = ((c >> 6) & 0x3f) | 0x80;
489 *p++ = (c & 0x3f) | 0x80;
491 else if ( c <= 0x1fffff )
493 *p++ = (c >> 18) | 0xf0;
494 *p++ = ((c >> 12) & 0x3f) | 0x80;
495 *p++ = ((c >> 6) & 0x3f) | 0x80;
496 *p++ = (c & 0x3f) | 0x80;
498 else if ( c <= 0x3ffffff )
500 *p++ = (c >> 24) | 0xf8;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
508 *p++ = (c >> 30) | 0xfc;
509 *p++ = ((c >> 24) & 0x3f) | 0x80;
510 *p++ = ((c >> 18) & 0x3f) | 0x80;
511 *p++ = ((c >> 12) & 0x3f) | 0x80;
512 *p++ = ((c >> 6) & 0x3f) | 0x80;
513 *p++ = (c & 0x3f) | 0x80;
516 BREAKUP_CHAR (c, charset, c1, c2);
517 lb = CHAR_LEADING_BYTE (c);
518 if (LEADING_BYTE_PRIVATE_P (lb))
519 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
521 if (EQ (charset, Vcharset_control_1))
530 /* Return the first character from a Mule-encoded string in STR,
531 assuming it's non-ASCII. Do not call this directly.
532 Use the macro charptr_emchar() instead. */
535 non_ascii_charptr_emchar (const Bufbyte *str)
548 else if ( b >= 0xf8 )
553 else if ( b >= 0xf0 )
558 else if ( b >= 0xe0 )
563 else if ( b >= 0xc0 )
573 for( ; len > 0; len-- )
576 ch = ( ch << 6 ) | ( b & 0x3f );
580 Bufbyte i0 = *str, i1, i2 = 0;
583 if (i0 == LEADING_BYTE_CONTROL_1)
584 return (Emchar) (*++str - 0x20);
586 if (LEADING_BYTE_PREFIX_P (i0))
591 charset = CHARSET_BY_LEADING_BYTE (i0);
592 if (XCHARSET_DIMENSION (charset) == 2)
595 return MAKE_CHAR (charset, i1, i2);
599 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
600 Do not call this directly. Use the macro valid_char_p() instead. */
604 non_ascii_valid_char_p (Emchar ch)
608 /* Must have only lowest 19 bits set */
612 f1 = CHAR_FIELD1 (ch);
613 f2 = CHAR_FIELD2 (ch);
614 f3 = CHAR_FIELD3 (ch);
620 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
621 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
622 f2 > MAX_CHAR_FIELD2_PRIVATE)
627 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
628 f2 <= MAX_CHAR_FIELD2_PRIVATE))
632 NOTE: This takes advantage of the fact that
633 FIELD2_TO_OFFICIAL_LEADING_BYTE and
634 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
636 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
637 if (EQ (charset, Qnil))
639 return (XCHARSET_CHARS (charset) == 96);
645 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
646 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
647 f1 > MAX_CHAR_FIELD1_PRIVATE)
649 if (f2 < 0x20 || f3 < 0x20)
652 #ifdef ENABLE_COMPOSITE_CHARS
653 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
655 if (UNBOUNDP (Fgethash (make_int (ch),
656 Vcomposite_char_char2string_hash_table,
661 #endif /* ENABLE_COMPOSITE_CHARS */
663 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
664 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
667 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
669 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
672 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
674 if (EQ (charset, Qnil))
676 return (XCHARSET_CHARS (charset) == 96);
682 /************************************************************************/
683 /* Basic string functions */
684 /************************************************************************/
686 /* Copy the character pointed to by SRC into DST. Do not call this
687 directly. Use the macro charptr_copy_char() instead.
688 Return the number of bytes copied. */
691 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
693 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
695 for (i = bytes; i; i--, dst++, src++)
701 /************************************************************************/
702 /* streams of Emchars */
703 /************************************************************************/
705 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
706 The functions below are not meant to be called directly; use
707 the macros in insdel.h. */
710 Lstream_get_emchar_1 (Lstream *stream, int ch)
712 Bufbyte str[MAX_EMCHAR_LEN];
713 Bufbyte *strptr = str;
716 str[0] = (Bufbyte) ch;
718 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
720 int c = Lstream_getc (stream);
721 bufpos_checking_assert (c >= 0);
722 *++strptr = (Bufbyte) c;
724 return charptr_emchar (str);
728 Lstream_fput_emchar (Lstream *stream, Emchar ch)
730 Bufbyte str[MAX_EMCHAR_LEN];
731 Bytecount len = set_charptr_emchar (str, ch);
732 return Lstream_write (stream, str, len);
736 Lstream_funget_emchar (Lstream *stream, Emchar ch)
738 Bufbyte str[MAX_EMCHAR_LEN];
739 Bytecount len = set_charptr_emchar (str, ch);
740 Lstream_unread (stream, str, len);
744 /************************************************************************/
746 /************************************************************************/
749 mark_charset (Lisp_Object obj)
751 Lisp_Charset *cs = XCHARSET (obj);
753 mark_object (cs->short_name);
754 mark_object (cs->long_name);
755 mark_object (cs->doc_string);
756 mark_object (cs->registry);
757 mark_object (cs->ccl_program);
759 mark_object (cs->decoding_table);
760 mark_object (cs->mother);
766 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
768 Lisp_Charset *cs = XCHARSET (obj);
772 error ("printing unreadable object #<charset %s 0x%x>",
773 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
776 write_c_string ("#<charset ", printcharfun);
777 print_internal (CHARSET_NAME (cs), printcharfun, 0);
778 write_c_string (" ", printcharfun);
779 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
780 write_c_string (" ", printcharfun);
781 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
782 write_c_string (" ", printcharfun);
783 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
784 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
786 CHARSET_DIMENSION (cs),
787 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
788 CHARSET_COLUMNS (cs),
789 CHARSET_GRAPHIC (cs),
791 write_c_string (buf, printcharfun);
792 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
793 sprintf (buf, " 0x%x>", cs->header.uid);
794 write_c_string (buf, printcharfun);
797 static const struct lrecord_description charset_description[] = {
798 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
799 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
800 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
801 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
802 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
803 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
804 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
806 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
807 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
812 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
813 mark_charset, print_charset, 0, 0, 0,
817 /* Make a new charset. */
818 /* #### SJT Should generic properties be allowed? */
820 make_charset (Charset_ID id, Lisp_Object name,
821 unsigned short chars, unsigned char dimension,
822 unsigned char columns, unsigned char graphic,
823 Bufbyte final, unsigned char direction, Lisp_Object short_name,
824 Lisp_Object long_name, Lisp_Object doc,
827 Lisp_Object decoding_table,
828 Emchar min_code, Emchar max_code,
829 Emchar code_offset, unsigned char byte_offset,
830 Lisp_Object mother, unsigned char conversion,
834 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
838 XSETCHARSET (obj, cs);
840 CHARSET_ID (cs) = id;
841 CHARSET_NAME (cs) = name;
842 CHARSET_SHORT_NAME (cs) = short_name;
843 CHARSET_LONG_NAME (cs) = long_name;
844 CHARSET_CHARS (cs) = chars;
845 CHARSET_DIMENSION (cs) = dimension;
846 CHARSET_DIRECTION (cs) = direction;
847 CHARSET_COLUMNS (cs) = columns;
848 CHARSET_GRAPHIC (cs) = graphic;
849 CHARSET_FINAL (cs) = final;
850 CHARSET_DOC_STRING (cs) = doc;
851 CHARSET_REGISTRY (cs) = reg;
852 CHARSET_CCL_PROGRAM (cs) = Qnil;
853 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
855 CHARSET_ISO_IR (cs) = iso_ir;
856 CHARSET_DECODING_TABLE(cs) = Qunbound;
857 CHARSET_MIN_CODE (cs) = min_code;
858 CHARSET_MAX_CODE (cs) = max_code;
859 CHARSET_CODE_OFFSET (cs) = code_offset;
860 CHARSET_BYTE_OFFSET (cs) = byte_offset;
861 CHARSET_MOTHER (cs) = mother;
862 CHARSET_CONVERSION (cs) = conversion;
866 if (id == LEADING_BYTE_ASCII)
867 CHARSET_REP_BYTES (cs) = 1;
869 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
871 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
876 /* some charsets do not have final characters. This includes
877 ASCII, Control-1, Composite, and the two faux private
879 unsigned char iso2022_type
880 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
882 if ( ( !partial ) && ( code_offset == 0 ) )
884 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
885 chlook->charset_by_attributes[iso2022_type][final] = obj;
889 (chlook->charset_by_attributes[iso2022_type][final][direction]));
890 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
894 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
895 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
897 /* Some charsets are "faux" and don't have names or really exist at
898 all except in the leading-byte table. */
900 Fputhash (name, obj, Vcharset_hash_table);
905 get_unallocated_leading_byte (int dimension)
910 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
913 lb = chlook->next_allocated_leading_byte++;
917 if (chlook->next_allocated_1_byte_leading_byte >
918 MAX_LEADING_BYTE_PRIVATE_1)
921 lb = chlook->next_allocated_1_byte_leading_byte++;
925 /* awfully fragile, but correct */
926 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
927 if (chlook->next_allocated_2_byte_leading_byte == 0)
929 if (chlook->next_allocated_2_byte_leading_byte >
930 MAX_LEADING_BYTE_PRIVATE_2)
934 lb = chlook->next_allocated_2_byte_leading_byte++;
940 ("No more character sets free for this dimension",
941 make_int (dimension));
947 /* Number of Big5 characters which have the same code in 1st byte. */
949 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
952 decode_ccs_conversion (int conv_type, int code_point)
954 if ( conv_type == CONVERSION_IDENTICAL )
958 if ( conv_type == CONVERSION_94x60 )
960 int row = code_point >> 8;
961 int cell = code_point & 255;
965 else if (row < 16 + 32 + 30)
966 return (row - (16 + 32)) * 94 + cell - 33;
967 else if (row < 18 + 32 + 30)
969 else if (row < 18 + 32 + 60)
970 return (row - (18 + 32)) * 94 + cell - 33;
972 else if ( conv_type == CONVERSION_94x94x60 )
974 int plane = code_point >> 16;
975 int row = (code_point >> 8) & 255;
976 int cell = code_point & 255;
980 else if (row < 16 + 32 + 30)
982 (plane - 33) * 94 * 60
983 + (row - (16 + 32)) * 94
985 else if (row < 18 + 32 + 30)
987 else if (row < 18 + 32 + 60)
989 (plane - 33) * 94 * 60
990 + (row - (18 + 32)) * 94
993 else if ( conv_type == CONVERSION_BIG5_1 )
996 = (((code_point >> 8) & 0x7F) - 33) * 94
997 + (( code_point & 0x7F) - 33);
998 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
999 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1001 b2 += b2 < 0x3F ? 0x40 : 0x62;
1002 return (b1 << 8) | b2;
1004 else if ( conv_type == CONVERSION_BIG5_2 )
1007 = (((code_point >> 8) & 0x7F) - 33) * 94
1008 + (( code_point & 0x7F) - 33)
1009 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1010 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1011 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1013 b2 += b2 < 0x3F ? 0x40 : 0x62;
1014 return (b1 << 8) | b2;
1020 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
1022 int dim = XCHARSET_DIMENSION (ccs);
1023 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1024 Emchar char_id = -1;
1031 = get_ccs_octet_table (decoding_table, ccs,
1032 (code_point >> (dim * 8)) & 255);
1034 if (CHARP (decoding_table))
1035 return XCHAR (decoding_table);
1037 if (EQ (decoding_table, Qunloaded))
1039 char_id = load_char_decoding_entry_maybe (ccs, code_point);
1041 #endif /* HAVE_CHISE */
1044 else if ( !without_inheritance
1045 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
1048 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
1052 code += XCHARSET_CODE_OFFSET(ccs);
1053 if ( EQ (mother, Vcharset_ucs) )
1054 return DECODE_CHAR (mother, code, without_inheritance);
1056 return decode_defined_char (mother, code,
1057 without_inheritance);
1064 decode_builtin_char (Lisp_Object charset, int code_point)
1066 Lisp_Object mother = XCHARSET_MOTHER (charset);
1069 if ( XCHARSET_MAX_CODE (charset) > 0 )
1071 if ( CHARSETP (mother) )
1074 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
1079 decode_builtin_char (mother,
1080 code + XCHARSET_CODE_OFFSET(charset));
1087 = (XCHARSET_DIMENSION (charset) == 1
1089 code_point - XCHARSET_BYTE_OFFSET (charset)
1091 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1092 * XCHARSET_CHARS (charset)
1093 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1094 + XCHARSET_CODE_OFFSET (charset);
1095 if ((cid < XCHARSET_MIN_CODE (charset))
1096 || (XCHARSET_MAX_CODE (charset) < cid))
1101 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1103 if (XCHARSET_DIMENSION (charset) == 1)
1105 switch (XCHARSET_CHARS (charset))
1109 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1112 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1120 switch (XCHARSET_CHARS (charset))
1123 return MIN_CHAR_94x94
1124 + (final - '0') * 94 * 94
1125 + (((code_point >> 8) & 0x7F) - 33) * 94
1126 + ((code_point & 0x7F) - 33);
1128 return MIN_CHAR_96x96
1129 + (final - '0') * 96 * 96
1130 + (((code_point >> 8) & 0x7F) - 32) * 96
1131 + ((code_point & 0x7F) - 32);
1143 charset_code_point (Lisp_Object charset, Emchar ch, int accepted_mode)
1147 if ( accepted_mode >= 0 )
1149 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1151 if ( CHAR_TABLEP (encoding_table)
1152 && !UNBOUNDP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1160 Lisp_Object mother = XCHARSET_MOTHER (charset);
1161 int min = XCHARSET_MIN_CODE (charset);
1162 int max = XCHARSET_MAX_CODE (charset);
1165 if ( CHARSETP (mother) && ( accepted_mode >= 0)
1166 && ( XCHARSET_FINAL (charset) >= '0' )
1168 code = charset_code_point (mother, ch, CHAR_DEFINED_ONLY);
1169 else if ( CHARSETP (mother)
1170 && ( XCHARSET_FINAL (charset) < '0' )
1172 code = charset_code_point (mother, ch, accepted_mode);
1173 else if ( accepted_mode == CHAR_DEFINED_ONLY )
1175 else if ( ((max == 0) && CHARSETP (mother)
1176 && (XCHARSET_FINAL (charset) == 0))
1177 || ((min <= ch) && (ch <= max)) )
1180 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1181 || ((min <= code) && (code <= max)) )
1183 int d = code - XCHARSET_CODE_OFFSET (charset);
1185 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1187 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1189 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1191 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1194 int cell = d % 94 + 33;
1200 return (row << 8) | cell;
1202 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1204 int B1 = d >> 8, B2 = d & 0xFF;
1206 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1207 - (B2 < 0x7F ? 0x40 : 0x62);
1211 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1214 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1216 int B1 = d >> 8, B2 = d & 0xFF;
1218 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1219 - (B2 < 0x7F ? 0x40 : 0x62);
1223 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1224 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1227 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1228 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1229 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1230 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1231 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1233 int plane = d / (94 * 60) + 33;
1234 int row = (d % (94 * 60)) / 94;
1235 int cell = d % 94 + 33;
1241 return (plane << 16) | (row << 8) | cell;
1243 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1245 ( (d / (94 * 94) + 33) << 16)
1246 | ((d / 94 % 94 + 33) << 8)
1248 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1250 ( (d / (96 * 96) + 32) << 16)
1251 | ((d / 96 % 96 + 32) << 8)
1253 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1255 ( (d / (94 * 94 * 94) + 33) << 24)
1256 | ((d / (94 * 94) % 94 + 33) << 16)
1257 | ((d / 94 % 94 + 33) << 8)
1259 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1261 ( (d / (96 * 96 * 96) + 32) << 24)
1262 | ((d / (96 * 96) % 96 + 32) << 16)
1263 | ((d / 96 % 96 + 32) << 8)
1267 printf ("Unknown CCS-conversion %d is specified!",
1268 XCHARSET_CONVERSION (charset));
1272 else if ( accepted_mode == CHAR_DEFINED_ONLY )
1274 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1275 ( XCHARSET_MIN_CODE (charset) == 0 )
1277 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1278 (XCHARSET_CODE_OFFSET (charset)
1279 == XCHARSET_MIN_CODE (charset))
1284 if (XCHARSET_DIMENSION (charset) == 1)
1286 if (XCHARSET_CHARS (charset) == 94)
1288 if (((d = ch - (MIN_CHAR_94
1289 + (XCHARSET_FINAL (charset) - '0') * 94))
1294 else if (XCHARSET_CHARS (charset) == 96)
1296 if (((d = ch - (MIN_CHAR_96
1297 + (XCHARSET_FINAL (charset) - '0') * 96))
1305 else if (XCHARSET_DIMENSION (charset) == 2)
1307 if (XCHARSET_CHARS (charset) == 94)
1309 if (((d = ch - (MIN_CHAR_94x94
1311 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1314 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1316 else if (XCHARSET_CHARS (charset) == 96)
1318 if (((d = ch - (MIN_CHAR_96x96
1320 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1323 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1334 encode_char_2 (Emchar ch, Lisp_Object* charset)
1336 Lisp_Object charsets = Vdefault_coded_charset_priority_list;
1339 while (!NILP (charsets))
1341 *charset = Ffind_charset (Fcar (charsets));
1342 if ( !NILP (*charset)
1343 && (XCHARSET_DIMENSION (*charset) <= 2) )
1345 code_point = charset_code_point (*charset, ch, 0);
1346 if (code_point >= 0)
1349 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1350 NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1352 code_point = encode_char_2_search_children (ch, charset);
1353 if (code_point >= 0)
1357 charsets = Fcdr (charsets);
1360 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1361 !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1363 charsets = Vdefault_coded_charset_priority_list;
1364 while (!NILP (charsets))
1366 *charset = Ffind_charset (Fcar (charsets));
1367 if ( !NILP (*charset)
1368 && (XCHARSET_DIMENSION (*charset) <= 2) )
1370 code_point = encode_char_2_search_children (ch, charset);
1371 if (code_point >= 0)
1374 charsets = Fcdr (charsets);
1378 /* otherwise --- maybe for bootstrap */
1379 return encode_builtin_char_1 (ch, charset);
1383 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1385 if (c <= MAX_CHAR_BASIC_LATIN)
1387 *charset = Vcharset_ascii;
1392 *charset = Vcharset_control_1;
1397 *charset = Vcharset_latin_iso8859_1;
1401 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1403 *charset = Vcharset_hebrew_iso8859_8;
1404 return c - MIN_CHAR_HEBREW + 0x20;
1407 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1409 *charset = Vcharset_thai_tis620;
1410 return c - MIN_CHAR_THAI + 0x20;
1413 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1414 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1416 return list2 (Vcharset_katakana_jisx0201,
1417 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1420 else if (c <= MAX_CHAR_BMP)
1422 *charset = Vcharset_ucs_bmp;
1425 else if (c <= MAX_CHAR_SMP)
1427 *charset = Vcharset_ucs_smp;
1428 return c - MIN_CHAR_SMP;
1430 else if (c <= MAX_CHAR_SIP)
1432 *charset = Vcharset_ucs_sip;
1433 return c - MIN_CHAR_SIP;
1435 else if (c < MIN_CHAR_94)
1437 *charset = Vcharset_ucs;
1440 else if (c <= MAX_CHAR_94)
1442 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1443 ((c - MIN_CHAR_94) / 94) + '0',
1444 CHARSET_LEFT_TO_RIGHT);
1445 if (!NILP (*charset))
1446 return ((c - MIN_CHAR_94) % 94) + 33;
1449 *charset = Vcharset_ucs;
1453 else if (c <= MAX_CHAR_96)
1455 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1456 ((c - MIN_CHAR_96) / 96) + '0',
1457 CHARSET_LEFT_TO_RIGHT);
1458 if (!NILP (*charset))
1459 return ((c - MIN_CHAR_96) % 96) + 32;
1462 *charset = Vcharset_ucs;
1466 else if (c <= MAX_CHAR_94x94)
1469 = CHARSET_BY_ATTRIBUTES (94, 2,
1470 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1471 CHARSET_LEFT_TO_RIGHT);
1472 if (!NILP (*charset))
1473 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1474 | (((c - MIN_CHAR_94x94) % 94) + 33);
1477 *charset = Vcharset_ucs;
1481 else if (c <= MAX_CHAR_96x96)
1484 = CHARSET_BY_ATTRIBUTES (96, 2,
1485 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1486 CHARSET_LEFT_TO_RIGHT);
1487 if (!NILP (*charset))
1488 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1489 | (((c - MIN_CHAR_96x96) % 96) + 32);
1492 *charset = Vcharset_ucs;
1498 *charset = Vcharset_ucs;
1503 Lisp_Object Vdefault_coded_charset_priority_list;
1504 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1505 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1509 /************************************************************************/
1510 /* Basic charset Lisp functions */
1511 /************************************************************************/
1513 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1514 Return non-nil if OBJECT is a charset.
1518 return CHARSETP (object) ? Qt : Qnil;
1521 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1522 Retrieve the charset of the given name.
1523 If CHARSET-OR-NAME is a charset object, it is simply returned.
1524 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1525 nil is returned. Otherwise the associated charset object is returned.
1529 if (CHARSETP (charset_or_name))
1530 return charset_or_name;
1532 CHECK_SYMBOL (charset_or_name);
1533 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1536 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1537 Retrieve the charset of the given name.
1538 Same as `find-charset' except an error is signalled if there is no such
1539 charset instead of returning nil.
1543 Lisp_Object charset = Ffind_charset (name);
1546 signal_simple_error ("No such charset", name);
1550 /* We store the charsets in hash tables with the names as the key and the
1551 actual charset object as the value. Occasionally we need to use them
1552 in a list format. These routines provide us with that. */
1553 struct charset_list_closure
1555 Lisp_Object *charset_list;
1559 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1560 void *charset_list_closure)
1562 /* This function can GC */
1563 struct charset_list_closure *chcl =
1564 (struct charset_list_closure*) charset_list_closure;
1565 Lisp_Object *charset_list = chcl->charset_list;
1567 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1571 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1572 Return a list of the names of all defined charsets.
1576 Lisp_Object charset_list = Qnil;
1577 struct gcpro gcpro1;
1578 struct charset_list_closure charset_list_closure;
1580 GCPRO1 (charset_list);
1581 charset_list_closure.charset_list = &charset_list;
1582 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1583 &charset_list_closure);
1586 return charset_list;
1589 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1590 Return the name of charset CHARSET.
1594 return XCHARSET_NAME (Fget_charset (charset));
1597 /* #### SJT Should generic properties be allowed? */
1598 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1599 Define a new character set.
1600 This function is for use with Mule support.
1601 NAME is a symbol, the name by which the character set is normally referred.
1602 DOC-STRING is a string describing the character set.
1603 PROPS is a property list, describing the specific nature of the
1604 character set. Recognized properties are:
1606 'short-name Short version of the charset name (ex: Latin-1)
1607 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1608 'registry A regular expression matching the font registry field for
1610 'dimension Number of octets used to index a character in this charset.
1611 Either 1 or 2. Defaults to 1.
1612 If UTF-2000 feature is enabled, 3 or 4 are also available.
1613 'columns Number of columns used to display a character in this charset.
1614 Only used in TTY mode. (Under X, the actual width of a
1615 character can be derived from the font used to display the
1616 characters.) If unspecified, defaults to the dimension
1617 (this is almost always the correct value).
1618 'chars Number of characters in each dimension (94 or 96).
1619 Defaults to 94. Note that if the dimension is 2, the
1620 character set thus described is 94x94 or 96x96.
1621 If UTF-2000 feature is enabled, 128 or 256 are also available.
1622 'final Final byte of ISO 2022 escape sequence. Must be
1623 supplied. Each combination of (DIMENSION, CHARS) defines a
1624 separate namespace for final bytes. Note that ISO
1625 2022 restricts the final byte to the range
1626 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1627 dimension == 2. Note also that final bytes in the range
1628 0x30 - 0x3F are reserved for user-defined (not official)
1630 'graphic 0 (use left half of font on output) or 1 (use right half
1631 of font on output). Defaults to 0. For example, for
1632 a font whose registry is ISO8859-1, the left half
1633 (octets 0x20 - 0x7F) is the `ascii' character set, while
1634 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1635 character set. With 'graphic set to 0, the octets
1636 will have their high bit cleared; with it set to 1,
1637 the octets will have their high bit set.
1638 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1640 'ccl-program A compiled CCL program used to convert a character in
1641 this charset into an index into the font. This is in
1642 addition to the 'graphic property. The CCL program
1643 is passed the octets of the character, with the high
1644 bit cleared and set depending upon whether the value
1645 of the 'graphic property is 0 or 1.
1646 'iso-ir ISO-IR number (for representative coded-charset).
1647 '=>iso-ir [UTF-2000 only] Corresponding ISO-IR number.
1648 'mother [UTF-2000 only] Base coded-charset.
1649 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1650 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1651 'code-offset [UTF-2000 only] Offset for a code-point of a base
1653 'conversion [UTF-2000 only] Conversion for a code-point of a base
1654 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1655 'partial [UTF-2000 only] If t, specify as a partial coded-charset.
1657 (name, doc_string, props))
1659 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1661 int direction = CHARSET_LEFT_TO_RIGHT;
1662 Lisp_Object registry = Qnil;
1663 Lisp_Object charset;
1664 Lisp_Object ccl_program = Qnil;
1665 Lisp_Object short_name = Qnil, long_name = Qnil;
1666 Lisp_Object mother = Qnil;
1668 int min_code = 0, max_code = 0, code_offset = 0;
1669 int byte_offset = -1;
1672 CHECK_SYMBOL (name);
1673 if (!NILP (doc_string))
1674 CHECK_STRING (doc_string);
1676 charset = Ffind_charset (name);
1677 if (!NILP (charset))
1678 signal_simple_error ("Cannot redefine existing charset", name);
1681 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1683 if (EQ (keyword, Qshort_name))
1685 CHECK_STRING (value);
1689 else if (EQ (keyword, Qlong_name))
1691 CHECK_STRING (value);
1695 else if (EQ (keyword, Qiso_ir))
1699 iso_ir = XINT (value);
1705 else if (EQ (keyword, Qto_iso_ir))
1708 iso_ir = XINT (value);
1712 else if (EQ (keyword, Qdimension))
1715 dimension = XINT (value);
1716 if (dimension < 1 ||
1723 signal_simple_error ("Invalid value for 'dimension", value);
1726 else if (EQ (keyword, Qchars))
1729 chars = XINT (value);
1730 if (chars != 94 && chars != 96
1732 && chars != 128 && chars != 256
1735 signal_simple_error ("Invalid value for 'chars", value);
1738 else if (EQ (keyword, Qcolumns))
1741 columns = XINT (value);
1742 if (columns != 1 && columns != 2)
1743 signal_simple_error ("Invalid value for 'columns", value);
1746 else if (EQ (keyword, Qgraphic))
1749 graphic = XINT (value);
1757 signal_simple_error ("Invalid value for 'graphic", value);
1760 else if (EQ (keyword, Qregistry))
1762 CHECK_STRING (value);
1766 else if (EQ (keyword, Qdirection))
1768 if (EQ (value, Ql2r))
1769 direction = CHARSET_LEFT_TO_RIGHT;
1770 else if (EQ (value, Qr2l))
1771 direction = CHARSET_RIGHT_TO_LEFT;
1773 signal_simple_error ("Invalid value for 'direction", value);
1776 else if (EQ (keyword, Qfinal))
1778 CHECK_CHAR_COERCE_INT (value);
1779 final = XCHAR (value);
1780 if (final < '0' || final > '~')
1781 signal_simple_error ("Invalid value for 'final", value);
1785 else if (EQ (keyword, Qpartial))
1787 partial = !NILP (value);
1790 else if (EQ (keyword, Qmother))
1792 mother = Fget_charset (value);
1795 else if (EQ (keyword, Qmin_code))
1798 min_code = XUINT (value);
1801 else if (EQ (keyword, Qmax_code))
1804 max_code = XUINT (value);
1807 else if (EQ (keyword, Qcode_offset))
1810 code_offset = XUINT (value);
1813 else if (EQ (keyword, Qconversion))
1815 if (EQ (value, Q94x60))
1816 conversion = CONVERSION_94x60;
1817 else if (EQ (value, Q94x94x60))
1818 conversion = CONVERSION_94x94x60;
1819 else if (EQ (value, Qbig5_1))
1820 conversion = CONVERSION_BIG5_1;
1821 else if (EQ (value, Qbig5_2))
1822 conversion = CONVERSION_BIG5_2;
1824 signal_simple_error ("Unrecognized conversion", value);
1828 else if (EQ (keyword, Qccl_program))
1830 struct ccl_program test_ccl;
1832 if (setup_ccl_program (&test_ccl, value) < 0)
1833 signal_simple_error ("Invalid value for 'ccl-program", value);
1834 ccl_program = value;
1838 signal_simple_error ("Unrecognized property", keyword);
1844 error ("'final must be specified");
1846 if (dimension == 2 && final > 0x5F)
1848 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1851 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1852 CHARSET_LEFT_TO_RIGHT)) ||
1853 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1854 CHARSET_RIGHT_TO_LEFT)))
1856 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1859 id = get_unallocated_leading_byte (dimension);
1861 if (NILP (doc_string))
1862 doc_string = build_string ("");
1864 if (NILP (registry))
1865 registry = build_string ("");
1867 if (NILP (short_name))
1868 XSETSTRING (short_name, XSYMBOL (name)->name);
1870 if (NILP (long_name))
1871 long_name = doc_string;
1874 columns = dimension;
1876 if (byte_offset < 0)
1880 else if (chars == 96)
1886 if ( (conversion == 0) && NILP (mother) && (min_code > 0) )
1894 conversion = CONVERSION_94;
1897 conversion = CONVERSION_94x94;
1900 conversion = CONVERSION_94x94x94;
1903 conversion = CONVERSION_94x94x94x94;
1911 conversion = CONVERSION_96;
1914 conversion = CONVERSION_96x96;
1917 conversion = CONVERSION_96x96x96;
1920 conversion = CONVERSION_96x96x96x96;
1927 charset = make_charset (id, name, chars, dimension, columns, graphic,
1928 final, direction, short_name, long_name,
1929 doc_string, registry, iso_ir,
1930 Qnil, min_code, max_code, code_offset, byte_offset,
1931 mother, conversion, partial);
1932 if (!NILP (ccl_program))
1933 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1937 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1939 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1940 NEW-NAME is the name of the new charset. Return the new charset.
1942 (charset, new_name))
1944 Lisp_Object new_charset = Qnil;
1945 int id, chars, dimension, columns, graphic, final;
1947 Lisp_Object registry, doc_string, short_name, long_name;
1950 charset = Fget_charset (charset);
1951 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1952 signal_simple_error ("Charset already has reverse-direction charset",
1955 CHECK_SYMBOL (new_name);
1956 if (!NILP (Ffind_charset (new_name)))
1957 signal_simple_error ("Cannot redefine existing charset", new_name);
1959 cs = XCHARSET (charset);
1961 chars = CHARSET_CHARS (cs);
1962 dimension = CHARSET_DIMENSION (cs);
1963 columns = CHARSET_COLUMNS (cs);
1964 id = get_unallocated_leading_byte (dimension);
1966 graphic = CHARSET_GRAPHIC (cs);
1967 final = CHARSET_FINAL (cs);
1968 direction = CHARSET_RIGHT_TO_LEFT;
1969 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1970 direction = CHARSET_LEFT_TO_RIGHT;
1971 doc_string = CHARSET_DOC_STRING (cs);
1972 short_name = CHARSET_SHORT_NAME (cs);
1973 long_name = CHARSET_LONG_NAME (cs);
1974 registry = CHARSET_REGISTRY (cs);
1976 new_charset = make_charset (id, new_name, chars, dimension, columns,
1977 graphic, final, direction, short_name, long_name,
1978 doc_string, registry,
1981 CHARSET_DECODING_TABLE(cs),
1982 CHARSET_MIN_CODE(cs),
1983 CHARSET_MAX_CODE(cs),
1984 CHARSET_CODE_OFFSET(cs),
1985 CHARSET_BYTE_OFFSET(cs),
1987 CHARSET_CONVERSION (cs)
1989 Qnil, 0, 0, 0, 0, Qnil, 0
1993 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1994 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1999 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2000 Define symbol ALIAS as an alias for CHARSET.
2004 CHECK_SYMBOL (alias);
2005 charset = Fget_charset (charset);
2006 return Fputhash (alias, charset, Vcharset_hash_table);
2009 /* #### Reverse direction charsets not yet implemented. */
2011 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2013 Return the reverse-direction charset parallel to CHARSET, if any.
2014 This is the charset with the same properties (in particular, the same
2015 dimension, number of characters per dimension, and final byte) as
2016 CHARSET but whose characters are displayed in the opposite direction.
2020 charset = Fget_charset (charset);
2021 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2025 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2026 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2027 If DIRECTION is omitted, both directions will be checked (left-to-right
2028 will be returned if character sets exist for both directions).
2030 (dimension, chars, final, direction))
2032 int dm, ch, fi, di = -1;
2033 Lisp_Object obj = Qnil;
2035 CHECK_INT (dimension);
2036 dm = XINT (dimension);
2037 if (dm < 1 || dm > 2)
2038 signal_simple_error ("Invalid value for DIMENSION", dimension);
2042 if (ch != 94 && ch != 96)
2043 signal_simple_error ("Invalid value for CHARS", chars);
2045 CHECK_CHAR_COERCE_INT (final);
2047 if (fi < '0' || fi > '~')
2048 signal_simple_error ("Invalid value for FINAL", final);
2050 if (EQ (direction, Ql2r))
2051 di = CHARSET_LEFT_TO_RIGHT;
2052 else if (EQ (direction, Qr2l))
2053 di = CHARSET_RIGHT_TO_LEFT;
2054 else if (!NILP (direction))
2055 signal_simple_error ("Invalid value for DIRECTION", direction);
2057 if (dm == 2 && fi > 0x5F)
2059 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2063 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2065 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2068 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2071 return XCHARSET_NAME (obj);
2075 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2076 Return short name of CHARSET.
2080 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2083 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2084 Return long name of CHARSET.
2088 return XCHARSET_LONG_NAME (Fget_charset (charset));
2091 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2092 Return description of CHARSET.
2096 return XCHARSET_DOC_STRING (Fget_charset (charset));
2099 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2100 Return dimension of CHARSET.
2104 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2107 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2108 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2109 Recognized properties are those listed in `make-charset', as well as
2110 'name and 'doc-string.
2116 charset = Fget_charset (charset);
2117 cs = XCHARSET (charset);
2119 CHECK_SYMBOL (prop);
2120 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2121 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2122 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2123 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2124 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2125 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2126 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2127 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
2128 Qnil : make_char (CHARSET_FINAL (cs));
2129 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2130 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2131 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2132 if (EQ (prop, Qdirection))
2133 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2134 if (EQ (prop, Qreverse_direction_charset))
2136 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2137 /* #### Is this translation OK? If so, error checking sufficient? */
2138 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2141 if (EQ (prop, Qiso_ir)||
2142 EQ (prop, Qto_iso_ir))
2144 if ( CHARSET_ISO_IR (cs) > 0 )
2145 return make_int (CHARSET_ISO_IR (cs));
2149 if (EQ (prop, Qmother))
2150 return CHARSET_MOTHER (cs);
2151 if (EQ (prop, Qmin_code))
2152 return make_int (CHARSET_MIN_CODE (cs));
2153 if (EQ (prop, Qmax_code))
2154 return make_int (CHARSET_MAX_CODE (cs));
2156 signal_simple_error ("Unrecognized charset property name", prop);
2157 return Qnil; /* not reached */
2160 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2161 Return charset identification number of CHARSET.
2165 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2168 /* #### We need to figure out which properties we really want to
2171 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2172 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2174 (charset, ccl_program))
2176 struct ccl_program test_ccl;
2178 charset = Fget_charset (charset);
2179 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2180 signal_simple_error ("Invalid ccl-program", ccl_program);
2181 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2186 invalidate_charset_font_caches (Lisp_Object charset)
2188 /* Invalidate font cache entries for charset on all devices. */
2189 Lisp_Object devcons, concons, hash_table;
2190 DEVICE_LOOP_NO_BREAK (devcons, concons)
2192 struct device *d = XDEVICE (XCAR (devcons));
2193 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2194 if (!UNBOUNDP (hash_table))
2195 Fclrhash (hash_table);
2199 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2200 Set the 'registry property of CHARSET to REGISTRY.
2202 (charset, registry))
2204 charset = Fget_charset (charset);
2205 CHECK_STRING (registry);
2206 XCHARSET_REGISTRY (charset) = registry;
2207 invalidate_charset_font_caches (charset);
2208 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2213 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2214 Return mapping-table of CHARSET.
2218 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2221 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2222 Set mapping-table of CHARSET to TABLE.
2226 struct Lisp_Charset *cs;
2230 charset = Fget_charset (charset);
2231 cs = XCHARSET (charset);
2235 CHARSET_DECODING_TABLE(cs) = Qnil;
2238 else if (VECTORP (table))
2240 int ccs_len = CHARSET_BYTE_SIZE (cs);
2241 int ret = decoding_table_check_elements (table,
2242 CHARSET_DIMENSION (cs),
2247 signal_simple_error ("Too big table", table);
2249 signal_simple_error ("Invalid element is found", table);
2251 signal_simple_error ("Something wrong", table);
2253 CHARSET_DECODING_TABLE(cs) = Qnil;
2256 signal_error (Qwrong_type_argument,
2257 list2 (build_translated_string ("vector-or-nil-p"),
2260 byte_offset = CHARSET_BYTE_OFFSET (cs);
2261 switch (CHARSET_DIMENSION (cs))
2264 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2266 Lisp_Object c = XVECTOR_DATA(table)[i];
2269 Fput_char_attribute (c, XCHARSET_NAME (charset),
2270 make_int (i + byte_offset));
2274 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2276 Lisp_Object v = XVECTOR_DATA(table)[i];
2282 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2284 Lisp_Object c = XVECTOR_DATA(v)[j];
2288 (c, XCHARSET_NAME (charset),
2289 make_int ( ( (i + byte_offset) << 8 )
2295 Fput_char_attribute (v, XCHARSET_NAME (charset),
2296 make_int (i + byte_offset));
2304 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2305 Save mapping-table of CHARSET.
2309 struct Lisp_Charset *cs;
2310 int byte_min, byte_max;
2311 #ifdef HAVE_LIBCHISE
2313 #else /* HAVE_LIBCHISE */
2315 Lisp_Object db_file;
2316 #endif /* not HAVE_LIBCHISE */
2318 charset = Fget_charset (charset);
2319 cs = XCHARSET (charset);
2321 #ifdef HAVE_LIBCHISE
2322 if ( open_chise_data_source_maybe () )
2326 = chise_ds_get_ccs (default_chise_data_source,
2327 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2330 printf ("Can't open decoding-table %s\n",
2331 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2334 #else /* HAVE_LIBCHISE */
2335 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2336 Qsystem_char_id, 1);
2337 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2338 #endif /* not HAVE_LIBCHISE */
2340 byte_min = CHARSET_BYTE_OFFSET (cs);
2341 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2342 switch (CHARSET_DIMENSION (cs))
2346 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2349 for (cell = byte_min; cell < byte_max; cell++)
2351 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2355 #ifdef HAVE_LIBCHISE
2356 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2357 #else /* HAVE_LIBCHISE */
2358 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2359 Fprin1_to_string (c, Qnil),
2361 #endif /* not HAVE_LIBCHISE */
2368 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2371 for (row = byte_min; row < byte_max; row++)
2373 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2376 for (cell = byte_min; cell < byte_max; cell++)
2378 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2382 #ifdef HAVE_LIBCHISE
2383 chise_ccs_set_decoded_char
2385 (row << 8) | cell, XCHAR (c));
2386 #else /* HAVE_LIBCHISE */
2387 Fput_database (Fprin1_to_string (make_int ((row << 8)
2390 Fprin1_to_string (c, Qnil),
2392 #endif /* not HAVE_LIBCHISE */
2400 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2403 for (plane = byte_min; plane < byte_max; plane++)
2406 = get_ccs_octet_table (table_p, charset, plane);
2409 for (row = byte_min; row < byte_max; row++)
2412 = get_ccs_octet_table (table_r, charset, row);
2415 for (cell = byte_min; cell < byte_max; cell++)
2417 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2422 #ifdef HAVE_LIBCHISE
2423 chise_ccs_set_decoded_char
2428 #else /* HAVE_LIBCHISE */
2429 Fput_database (Fprin1_to_string
2430 (make_int ((plane << 16)
2434 Fprin1_to_string (c, Qnil),
2436 #endif /* not HAVE_LIBCHISE */
2445 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2448 for (group = byte_min; group < byte_max; group++)
2451 = get_ccs_octet_table (table_g, charset, group);
2454 for (plane = byte_min; plane < byte_max; plane++)
2457 = get_ccs_octet_table (table_p, charset, plane);
2460 for (row = byte_min; row < byte_max; row++)
2463 = get_ccs_octet_table (table_r, charset, row);
2466 for (cell = byte_min; cell < byte_max; cell++)
2469 = get_ccs_octet_table (table_c, charset, cell);
2473 #ifdef HAVE_LIBCHISE
2474 chise_ccs_set_decoded_char
2480 #else /* HAVE_LIBCHISE */
2481 Fput_database (Fprin1_to_string
2482 (make_int (( group << 24)
2487 Fprin1_to_string (c, Qnil),
2489 #endif /* not HAVE_LIBCHISE */
2497 #ifdef HAVE_LIBCHISE
2498 chise_ccs_sync (dt_ccs);
2500 #else /* HAVE_LIBCHISE */
2501 return Fclose_database (db);
2502 #endif /* not HAVE_LIBCHISE */
2505 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2506 Reset mapping-table of CCS with database file.
2510 #ifdef HAVE_LIBCHISE
2511 CHISE_CCS chise_ccs;
2513 Lisp_Object db_file;
2516 ccs = Fget_charset (ccs);
2518 #ifdef HAVE_LIBCHISE
2519 if ( open_chise_data_source_maybe () )
2522 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2523 XSTRING_DATA (Fsymbol_name
2524 (XCHARSET_NAME(ccs))));
2525 if (chise_ccs == NULL)
2528 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2529 Qsystem_char_id, 0);
2533 #ifdef HAVE_LIBCHISE
2534 chise_ccs_setup_db (chise_ccs, 0) == 0
2536 !NILP (Ffile_exists_p (db_file))
2540 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2547 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2549 #ifdef HAVE_LIBCHISE
2550 CHISE_Char_ID char_id;
2552 if ( open_chise_data_source_maybe () )
2556 = chise_ds_decode_char (default_chise_data_source,
2557 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2560 decoding_table_put_char (ccs, code_point, make_char (char_id));
2562 decoding_table_put_char (ccs, code_point, Qnil);
2564 /* chise_ccst_close (dt_ccs); */
2566 #else /* HAVE_LIBCHISE */
2569 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2572 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2576 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2583 decoding_table_put_char (ccs, code_point, ret);
2584 Fclose_database (db);
2588 decoding_table_put_char (ccs, code_point, Qnil);
2589 Fclose_database (db);
2592 #endif /* not HAVE_LIBCHISE */
2595 #ifdef HAVE_LIBCHISE
2596 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2597 Save properties of CHARSET.
2601 struct Lisp_Charset *cs;
2602 CHISE_Property property;
2604 unsigned char* feature_name;
2606 ccs = Fget_charset (charset);
2607 cs = XCHARSET (ccs);
2609 if ( open_chise_data_source_maybe () )
2612 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2614 property = chise_ds_get_property (default_chise_data_source,
2616 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2617 chise_feature_set_property_value
2618 (chise_ds_get_feature (default_chise_data_source, feature_name),
2619 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2621 chise_property_sync (property);
2623 charset = XCHARSET_NAME (ccs);
2624 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2626 property = chise_ds_get_property (default_chise_data_source,
2628 chise_feature_set_property_value
2629 (chise_ds_get_feature (default_chise_data_source, feature_name),
2630 property, XSTRING_DATA (Fprin1_to_string
2631 (CHARSET_DOC_STRING (cs), Qnil)));
2632 chise_property_sync (property);
2634 property = chise_ds_get_property (default_chise_data_source, "type");
2635 chise_feature_set_property_value
2636 (chise_ds_get_feature (default_chise_data_source, feature_name),
2638 chise_property_sync (property);
2640 property = chise_ds_get_property (default_chise_data_source, "chars");
2641 chise_feature_set_property_value
2642 (chise_ds_get_feature (default_chise_data_source, feature_name),
2643 property, XSTRING_DATA (Fprin1_to_string (make_int
2644 (CHARSET_CHARS (cs)),
2646 chise_property_sync (property);
2648 property = chise_ds_get_property (default_chise_data_source, "dimension");
2649 chise_feature_set_property_value
2650 (chise_ds_get_feature (default_chise_data_source, feature_name),
2651 property, XSTRING_DATA (Fprin1_to_string (make_int
2652 (CHARSET_DIMENSION (cs)),
2654 chise_property_sync (property);
2656 if ( CHARSET_FINAL (cs) != 0 )
2658 property = chise_ds_get_property (default_chise_data_source,
2660 chise_feature_set_property_value
2661 (chise_ds_get_feature (default_chise_data_source, feature_name),
2662 property, XSTRING_DATA (Fprin1_to_string (make_int
2663 (CHARSET_FINAL (cs)),
2665 chise_property_sync (property);
2668 if ( !NILP (CHARSET_MOTHER (cs)) )
2670 Lisp_Object mother = CHARSET_MOTHER (cs);
2672 if ( CHARSETP (mother) )
2673 mother = XCHARSET_NAME (mother);
2675 property = chise_ds_get_property (default_chise_data_source,
2677 chise_feature_set_property_value
2678 (chise_ds_get_feature (default_chise_data_source, feature_name),
2679 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2680 chise_property_sync (property);
2683 if ( CHARSET_MAX_CODE (cs) != 0 )
2687 property = chise_ds_get_property (default_chise_data_source,
2689 if ( CHARSET_MIN_CODE (cs) == 0 )
2690 chise_feature_set_property_value
2691 (chise_ds_get_feature (default_chise_data_source, feature_name),
2695 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2696 chise_feature_set_property_value
2697 (chise_ds_get_feature (default_chise_data_source, feature_name),
2700 chise_property_sync (property);
2702 property = chise_ds_get_property (default_chise_data_source,
2704 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2705 chise_feature_set_property_value
2706 (chise_ds_get_feature (default_chise_data_source, feature_name),
2708 chise_property_sync (property);
2710 property = chise_ds_get_property (default_chise_data_source,
2711 "mother-code-offset");
2712 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2713 chise_feature_set_property_value
2714 (chise_ds_get_feature (default_chise_data_source, feature_name),
2718 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2719 chise_feature_set_property_value
2720 (chise_ds_get_feature (default_chise_data_source, feature_name),
2723 chise_property_sync (property);
2725 property = chise_ds_get_property (default_chise_data_source,
2726 "mother-code-conversion");
2727 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2728 chise_feature_set_property_value
2729 (chise_ds_get_feature (default_chise_data_source, feature_name),
2730 property, "identical");
2733 Lisp_Object sym = Qnil;
2735 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2737 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2739 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2741 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2744 chise_feature_set_property_value
2745 (chise_ds_get_feature (default_chise_data_source, feature_name),
2746 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2748 chise_feature_set_property_value
2749 (chise_ds_get_feature (default_chise_data_source, feature_name),
2750 property, "unknown");
2752 chise_property_sync (property);
2756 #endif /* HAVE_LIBCHISE */
2758 #endif /* HAVE_CHISE */
2759 #endif /* UTF2000 */
2762 /************************************************************************/
2763 /* Lisp primitives for working with characters */
2764 /************************************************************************/
2767 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2768 Make a character from CHARSET and code-point CODE.
2769 If DEFINED_ONLY is non-nil, builtin character is not returned.
2770 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2771 If corresponding character is not found, nil is returned.
2773 (charset, code, defined_only, without_inheritance))
2777 charset = Fget_charset (charset);
2780 if ( (XCHARSET_GRAPHIC (charset) == 0) ||
2781 (XCHARSET_GRAPHIC (charset) == 1) )
2783 if (NILP (defined_only))
2784 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2786 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2787 return c >= 0 ? make_char (c) : Qnil;
2790 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2791 Make a builtin character from CHARSET and code-point CODE.
2798 charset = Fget_charset (charset);
2800 if (EQ (charset, Vcharset_latin_viscii))
2802 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2808 (ret = Fget_char_attribute (chr,
2809 Vcharset_latin_viscii_lower,
2812 charset = Vcharset_latin_viscii_lower;
2816 (ret = Fget_char_attribute (chr,
2817 Vcharset_latin_viscii_upper,
2820 charset = Vcharset_latin_viscii_upper;
2827 if (XCHARSET_GRAPHIC (charset) == 1)
2830 ch = decode_builtin_char (charset, c);
2832 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2836 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2837 Make a character from CHARSET and octets ARG1 and ARG2.
2838 ARG2 is required only for characters from two-dimensional charsets.
2839 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2840 character s with caron.
2842 (charset, arg1, arg2))
2846 int lowlim, highlim;
2848 charset = Fget_charset (charset);
2849 cs = XCHARSET (charset);
2851 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2852 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2854 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2856 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2857 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2860 /* It is useful (and safe, according to Olivier Galibert) to strip
2861 the 8th bit off ARG1 and ARG2 because it allows programmers to
2862 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2863 Latin 2 code of the character. */
2871 if (a1 < lowlim || a1 > highlim)
2872 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2874 if (CHARSET_DIMENSION (cs) == 1)
2878 ("Charset is of dimension one; second octet must be nil", arg2);
2879 return make_char (MAKE_CHAR (charset, a1, 0));
2888 a2 = XINT (arg2) & 0x7f;
2890 if (a2 < lowlim || a2 > highlim)
2891 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2893 return make_char (MAKE_CHAR (charset, a1, a2));
2896 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2897 Return the character set of CHARACTER.
2901 CHECK_CHAR_COERCE_INT (character);
2903 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2906 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2907 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2908 N defaults to 0 if omitted.
2912 Lisp_Object charset;
2915 CHECK_CHAR_COERCE_INT (character);
2917 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2919 if (NILP (n) || EQ (n, Qzero))
2920 return make_int (octet0);
2921 else if (EQ (n, make_int (1)))
2922 return make_int (octet1);
2924 signal_simple_error ("Octet number must be 0 or 1", n);
2928 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2929 Return code-point of CHARACTER in specified CHARSET.
2931 (character, charset, defined_only))
2935 CHECK_CHAR_COERCE_INT (character);
2936 charset = Fget_charset (charset);
2937 code_point = charset_code_point (charset, XCHAR (character),
2938 !NILP (defined_only));
2939 if (code_point >= 0)
2940 return make_int (code_point);
2946 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2947 Return list of charset and one or two position-codes of CHARACTER.
2951 /* This function can GC */
2952 struct gcpro gcpro1, gcpro2;
2953 Lisp_Object charset = Qnil;
2954 Lisp_Object rc = Qnil;
2962 GCPRO2 (charset, rc);
2963 CHECK_CHAR_COERCE_INT (character);
2966 code_point = ENCODE_CHAR (XCHAR (character), charset);
2967 dimension = XCHARSET_DIMENSION (charset);
2968 while (dimension > 0)
2970 rc = Fcons (make_int (code_point & 255), rc);
2974 rc = Fcons (XCHARSET_NAME (charset), rc);
2976 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2978 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2980 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2984 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2993 #ifdef ENABLE_COMPOSITE_CHARS
2994 /************************************************************************/
2995 /* composite character functions */
2996 /************************************************************************/
2999 lookup_composite_char (Bufbyte *str, int len)
3001 Lisp_Object lispstr = make_string (str, len);
3002 Lisp_Object ch = Fgethash (lispstr,
3003 Vcomposite_char_string2char_hash_table,
3009 if (composite_char_row_next >= 128)
3010 signal_simple_error ("No more composite chars available", lispstr);
3011 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3012 composite_char_col_next);
3013 Fputhash (make_char (emch), lispstr,
3014 Vcomposite_char_char2string_hash_table);
3015 Fputhash (lispstr, make_char (emch),
3016 Vcomposite_char_string2char_hash_table);
3017 composite_char_col_next++;
3018 if (composite_char_col_next >= 128)
3020 composite_char_col_next = 32;
3021 composite_char_row_next++;
3030 composite_char_string (Emchar ch)
3032 Lisp_Object str = Fgethash (make_char (ch),
3033 Vcomposite_char_char2string_hash_table,
3035 assert (!UNBOUNDP (str));
3039 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3040 Convert a string into a single composite character.
3041 The character is the result of overstriking all the characters in
3046 CHECK_STRING (string);
3047 return make_char (lookup_composite_char (XSTRING_DATA (string),
3048 XSTRING_LENGTH (string)));
3051 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3052 Return a string of the characters comprising a composite character.
3060 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3061 signal_simple_error ("Must be composite char", ch);
3062 return composite_char_string (emch);
3064 #endif /* ENABLE_COMPOSITE_CHARS */
3067 /************************************************************************/
3068 /* initialization */
3069 /************************************************************************/
3072 syms_of_mule_charset (void)
3074 INIT_LRECORD_IMPLEMENTATION (charset);
3076 DEFSUBR (Fcharsetp);
3077 DEFSUBR (Ffind_charset);
3078 DEFSUBR (Fget_charset);
3079 DEFSUBR (Fcharset_list);
3080 DEFSUBR (Fcharset_name);
3081 DEFSUBR (Fmake_charset);
3082 DEFSUBR (Fmake_reverse_direction_charset);
3083 /* DEFSUBR (Freverse_direction_charset); */
3084 DEFSUBR (Fdefine_charset_alias);
3085 DEFSUBR (Fcharset_from_attributes);
3086 DEFSUBR (Fcharset_short_name);
3087 DEFSUBR (Fcharset_long_name);
3088 DEFSUBR (Fcharset_description);
3089 DEFSUBR (Fcharset_dimension);
3090 DEFSUBR (Fcharset_property);
3091 DEFSUBR (Fcharset_id);
3092 DEFSUBR (Fset_charset_ccl_program);
3093 DEFSUBR (Fset_charset_registry);
3096 DEFSUBR (Fcharset_mapping_table);
3097 DEFSUBR (Fset_charset_mapping_table);
3099 DEFSUBR (Fsave_charset_mapping_table);
3100 DEFSUBR (Freset_charset_mapping_table);
3101 #ifdef HAVE_LIBCHISE
3102 DEFSUBR (Fsave_charset_properties);
3103 #endif /* HAVE_LIBCHISE */
3104 #endif /* HAVE_CHISE */
3105 DEFSUBR (Fdecode_char);
3106 DEFSUBR (Fdecode_builtin_char);
3107 DEFSUBR (Fencode_char);
3110 DEFSUBR (Fmake_char);
3111 DEFSUBR (Fchar_charset);
3112 DEFSUBR (Fchar_octet);
3113 DEFSUBR (Fsplit_char);
3115 #ifdef ENABLE_COMPOSITE_CHARS
3116 DEFSUBR (Fmake_composite_char);
3117 DEFSUBR (Fcomposite_char_string);
3120 defsymbol (&Qcharsetp, "charsetp");
3121 defsymbol (&Qregistry, "registry");
3122 defsymbol (&Qfinal, "final");
3123 defsymbol (&Qgraphic, "graphic");
3124 defsymbol (&Qdirection, "direction");
3125 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3126 defsymbol (&Qshort_name, "short-name");
3127 defsymbol (&Qlong_name, "long-name");
3128 defsymbol (&Qiso_ir, "iso-ir");
3130 defsymbol (&Qto_iso_ir, "=>iso-ir");
3131 defsymbol (&Qpartial, "partial");
3132 defsymbol (&Qmother, "mother");
3133 defsymbol (&Qmin_code, "min-code");
3134 defsymbol (&Qmax_code, "max-code");
3135 defsymbol (&Qcode_offset, "code-offset");
3136 defsymbol (&Qconversion, "conversion");
3137 defsymbol (&Q94x60, "94x60");
3138 defsymbol (&Q94x94x60, "94x94x60");
3139 defsymbol (&Qbig5_1, "big5-1");
3140 defsymbol (&Qbig5_2, "big5-2");
3143 defsymbol (&Ql2r, "l2r");
3144 defsymbol (&Qr2l, "r2l");
3146 /* Charsets, compatible with FSF 20.3
3147 Naming convention is Script-Charset[-Edition] */
3148 defsymbol (&Qascii, "ascii");
3149 defsymbol (&Qcontrol_1, "control-1");
3150 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3151 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3152 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3153 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3154 defsymbol (&Qthai_tis620, "thai-tis620");
3155 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3156 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3157 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3158 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3159 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3160 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3161 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3162 /* defsymbol (&Qrep_jis_x0208_1978, "=jis-x0208@1978"); */
3163 defsymbol (&Qrep_gb2312, "=gb2312");
3164 defsymbol (&Qrep_gb12345, "=gb12345");
3165 defsymbol (&Qrep_jis_x0208_1983, "=jis-x0208@1983");
3166 defsymbol (&Qrep_ks_x1001, "=ks-x1001");
3167 defsymbol (&Qrep_jis_x0212, "=jis-x0212");
3168 defsymbol (&Qrep_cns11643_1, "=cns11643-1");
3169 defsymbol (&Qrep_cns11643_2, "=cns11643-2");
3171 defsymbol (&Qsystem_char_id, "system-char-id");
3172 defsymbol (&Qrep_ucs, "=ucs");
3173 defsymbol (&Qucs, "ucs");
3174 defsymbol (&Qucs_bmp, "ucs-bmp");
3175 defsymbol (&Qucs_smp, "ucs-smp");
3176 defsymbol (&Qucs_sip, "ucs-sip");
3177 defsymbol (&Qlatin_viscii, "latin-viscii");
3178 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3179 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3180 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3181 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3182 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3183 defsymbol (&Qrep_jis_x0208, "=jis-x0208");
3184 defsymbol (&Qrep_jis_x0208_1990, "=jis-x0208@1990");
3185 defsymbol (&Qrep_big5, "=big5");
3186 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3188 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3189 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3191 defsymbol (&Qcomposite, "composite");
3195 vars_of_mule_charset (void)
3202 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3203 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3205 /* Table of charsets indexed by leading byte. */
3206 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3207 chlook->charset_by_leading_byte[i] = Qnil;
3210 /* Table of charsets indexed by type/final-byte. */
3211 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3212 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3213 chlook->charset_by_attributes[i][j] = Qnil;
3215 /* Table of charsets indexed by type/final-byte/direction. */
3216 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3217 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3218 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3219 chlook->charset_by_attributes[i][j][k] = Qnil;
3223 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3225 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3226 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3230 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3231 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3232 Leading-code of private TYPE9N charset of column-width 1.
3234 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3238 Vdefault_coded_charset_priority_list = Qnil;
3239 DEFVAR_LISP ("default-coded-charset-priority-list",
3240 &Vdefault_coded_charset_priority_list /*
3241 Default order of preferred coded-character-sets.
3243 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3244 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3245 &Vdisplay_coded_charset_priority_use_inheritance /*
3246 If non-nil, use character inheritance.
3248 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3249 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3250 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3251 If non-nil, prefer nearest character in hierarchy order.
3257 complex_vars_of_mule_charset (void)
3259 staticpro (&Vcharset_hash_table);
3260 Vcharset_hash_table =
3261 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3263 /* Predefined character sets. We store them into variables for
3267 staticpro (&Vcharset_system_char_id);
3268 Vcharset_system_char_id =
3269 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3270 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3271 build_string ("SCID"),
3272 build_string ("CHAR-ID"),
3273 build_string ("System char-id"),
3276 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3278 staticpro (&Vcharset_ucs);
3280 make_charset (LEADING_BYTE_UCS, Qrep_ucs, 256, 4,
3281 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3282 build_string ("UCS"),
3283 build_string ("UCS"),
3284 build_string ("ISO/IEC 10646"),
3287 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3289 staticpro (&Vcharset_ucs_bmp);
3291 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3292 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3293 build_string ("BMP"),
3294 build_string ("UCS-BMP"),
3295 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3297 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3298 - LEADING_BYTE_UCS_BMP,
3299 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3301 staticpro (&Vcharset_ucs_smp);
3303 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3304 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3305 build_string ("SMP"),
3306 build_string ("UCS-SMP"),
3307 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3308 build_string ("UCS00-1"),
3310 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3311 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL,
3313 staticpro (&Vcharset_ucs_sip);
3315 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3316 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3317 build_string ("SIP"),
3318 build_string ("UCS-SIP"),
3319 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3320 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3322 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3323 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL,
3326 # define MIN_CHAR_THAI 0
3327 # define MAX_CHAR_THAI 0
3328 /* # define MIN_CHAR_HEBREW 0 */
3329 /* # define MAX_CHAR_HEBREW 0 */
3330 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3331 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3333 staticpro (&Vcharset_ascii);
3335 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3336 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3337 build_string ("ASCII"),
3338 build_string ("ASCII)"),
3339 build_string ("ASCII (ISO646 IRV)"),
3340 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3341 - LEADING_BYTE_ASCII,
3342 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL,
3344 staticpro (&Vcharset_control_1);
3345 Vcharset_control_1 =
3346 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3347 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3348 build_string ("C1"),
3349 build_string ("Control characters"),
3350 build_string ("Control characters 128-191"),
3352 - LEADING_BYTE_CONTROL_1,
3353 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL,
3355 staticpro (&Vcharset_latin_iso8859_1);
3356 Vcharset_latin_iso8859_1 =
3357 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3358 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3359 build_string ("Latin-1"),
3360 build_string ("ISO8859-1 (Latin-1)"),
3361 build_string ("ISO8859-1 (Latin-1)"),
3362 build_string ("iso8859-1"),
3363 - LEADING_BYTE_LATIN_ISO8859_1,
3364 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3366 staticpro (&Vcharset_latin_iso8859_2);
3367 Vcharset_latin_iso8859_2 =
3368 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3369 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3370 build_string ("Latin-2"),
3371 build_string ("ISO8859-2 (Latin-2)"),
3372 build_string ("ISO8859-2 (Latin-2)"),
3373 build_string ("iso8859-2"),
3374 - LEADING_BYTE_LATIN_ISO8859_2,
3375 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3377 staticpro (&Vcharset_latin_iso8859_3);
3378 Vcharset_latin_iso8859_3 =
3379 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3380 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3381 build_string ("Latin-3"),
3382 build_string ("ISO8859-3 (Latin-3)"),
3383 build_string ("ISO8859-3 (Latin-3)"),
3384 build_string ("iso8859-3"),
3385 - LEADING_BYTE_LATIN_ISO8859_3,
3386 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3388 staticpro (&Vcharset_latin_iso8859_4);
3389 Vcharset_latin_iso8859_4 =
3390 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3391 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3392 build_string ("Latin-4"),
3393 build_string ("ISO8859-4 (Latin-4)"),
3394 build_string ("ISO8859-4 (Latin-4)"),
3395 build_string ("iso8859-4"),
3396 - LEADING_BYTE_LATIN_ISO8859_4,
3397 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3399 staticpro (&Vcharset_thai_tis620);
3400 Vcharset_thai_tis620 =
3401 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3402 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3403 build_string ("TIS620"),
3404 build_string ("TIS620 (Thai)"),
3405 build_string ("TIS620.2529 (Thai)"),
3406 build_string ("tis620"),
3407 - LEADING_BYTE_THAI_TIS620,
3408 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3410 staticpro (&Vcharset_greek_iso8859_7);
3411 Vcharset_greek_iso8859_7 =
3412 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3413 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3414 build_string ("ISO8859-7"),
3415 build_string ("ISO8859-7 (Greek)"),
3416 build_string ("ISO8859-7 (Greek)"),
3417 build_string ("iso8859-7"),
3418 - LEADING_BYTE_GREEK_ISO8859_7,
3419 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3421 staticpro (&Vcharset_arabic_iso8859_6);
3422 Vcharset_arabic_iso8859_6 =
3423 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3424 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3425 build_string ("ISO8859-6"),
3426 build_string ("ISO8859-6 (Arabic)"),
3427 build_string ("ISO8859-6 (Arabic)"),
3428 build_string ("iso8859-6"),
3429 - LEADING_BYTE_ARABIC_ISO8859_6,
3430 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3432 staticpro (&Vcharset_hebrew_iso8859_8);
3433 Vcharset_hebrew_iso8859_8 =
3434 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3435 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3436 build_string ("ISO8859-8"),
3437 build_string ("ISO8859-8 (Hebrew)"),
3438 build_string ("ISO8859-8 (Hebrew)"),
3439 build_string ("iso8859-8"),
3440 - LEADING_BYTE_HEBREW_ISO8859_8,
3442 0 /* MIN_CHAR_HEBREW */,
3443 0 /* MAX_CHAR_HEBREW */, 0, 32,
3444 Qnil, CONVERSION_IDENTICAL,
3446 staticpro (&Vcharset_katakana_jisx0201);
3447 Vcharset_katakana_jisx0201 =
3448 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3449 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3450 build_string ("JISX0201 Kana"),
3451 build_string ("JISX0201.1976 (Japanese Kana)"),
3452 build_string ("JISX0201.1976 Japanese Kana"),
3453 build_string ("jisx0201\\.1976"),
3454 - LEADING_BYTE_KATAKANA_JISX0201,
3455 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3457 staticpro (&Vcharset_latin_jisx0201);
3458 Vcharset_latin_jisx0201 =
3459 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3460 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3461 build_string ("JISX0201 Roman"),
3462 build_string ("JISX0201.1976 (Japanese Roman)"),
3463 build_string ("JISX0201.1976 Japanese Roman"),
3464 build_string ("jisx0201\\.1976"),
3465 - LEADING_BYTE_LATIN_JISX0201,
3466 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3468 staticpro (&Vcharset_cyrillic_iso8859_5);
3469 Vcharset_cyrillic_iso8859_5 =
3470 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3471 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3472 build_string ("ISO8859-5"),
3473 build_string ("ISO8859-5 (Cyrillic)"),
3474 build_string ("ISO8859-5 (Cyrillic)"),
3475 build_string ("iso8859-5"),
3476 - LEADING_BYTE_CYRILLIC_ISO8859_5,
3477 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3479 staticpro (&Vcharset_latin_iso8859_9);
3480 Vcharset_latin_iso8859_9 =
3481 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3482 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3483 build_string ("Latin-5"),
3484 build_string ("ISO8859-9 (Latin-5)"),
3485 build_string ("ISO8859-9 (Latin-5)"),
3486 build_string ("iso8859-9"),
3487 - LEADING_BYTE_LATIN_ISO8859_9,
3488 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3491 staticpro (&Vcharset_jis_x0208);
3492 Vcharset_jis_x0208 =
3493 make_charset (LEADING_BYTE_JIS_X0208,
3494 Qrep_jis_x0208, 94, 2,
3495 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3496 build_string ("JIS X0208"),
3497 build_string ("JIS X0208 Common"),
3498 build_string ("JIS X0208 Common part"),
3499 build_string ("jisx0208"),
3500 - LEADING_BYTE_JAPANESE_JISX0208_1978,
3502 MIN_CHAR_JIS_X0208_1990,
3503 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3504 Qnil, CONVERSION_94x94,
3508 staticpro (&Vcharset_japanese_jisx0208_1978);
3509 Vcharset_japanese_jisx0208_1978 =
3510 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3511 Qrep_jis_x0208_1978, 94, 2,
3512 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3513 build_string ("JIS X0208:1978"),
3514 build_string ("JIS X0208:1978 (Japanese)"),
3516 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3517 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3518 - LEADING_BYTE_JAPANESE_JISX0208_1978,
3525 CONVERSION_IDENTICAL,
3528 staticpro (&Vcharset_chinese_gb2312);
3529 Vcharset_chinese_gb2312 =
3530 make_charset (LEADING_BYTE_CHINESE_GB2312, Qrep_gb2312, 94, 2,
3531 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3532 build_string ("GB2312"),
3533 build_string ("GB2312)"),
3534 build_string ("GB2312 Chinese simplified"),
3535 build_string ("gb2312"),
3536 - LEADING_BYTE_CHINESE_GB2312,
3537 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3539 staticpro (&Vcharset_chinese_gb12345);
3540 Vcharset_chinese_gb12345 =
3541 make_charset (LEADING_BYTE_CHINESE_GB12345, Qrep_gb12345, 94, 2,
3542 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3543 build_string ("G1"),
3544 build_string ("GB 12345)"),
3545 build_string ("GB 12345-1990"),
3546 build_string ("GB12345\\(\\.1990\\)?-0"),
3548 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3550 staticpro (&Vcharset_japanese_jisx0208);
3551 Vcharset_japanese_jisx0208 =
3552 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qrep_jis_x0208_1983, 94, 2,
3553 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3554 build_string ("JISX0208"),
3555 build_string ("JIS X0208:1983 (Japanese)"),
3556 build_string ("JIS X0208:1983 Japanese Kanji"),
3557 build_string ("jisx0208\\.1983"),
3558 - LEADING_BYTE_JAPANESE_JISX0208,
3565 CONVERSION_IDENTICAL,
3568 staticpro (&Vcharset_japanese_jisx0208_1990);
3569 Vcharset_japanese_jisx0208_1990 =
3570 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3571 Qrep_jis_x0208_1990, 94, 2,
3572 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3573 build_string ("JISX0208-1990"),
3574 build_string ("JIS X0208:1990 (Japanese)"),
3575 build_string ("JIS X0208:1990 Japanese Kanji"),
3576 build_string ("jisx0208\\.1990"),
3577 - LEADING_BYTE_JAPANESE_JISX0208_1990,
3579 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3580 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3581 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3582 Vcharset_jis_x0208 /* Qnil */,
3583 CONVERSION_IDENTICAL /* CONVERSION_94x94 */,
3586 staticpro (&Vcharset_korean_ksc5601);
3587 Vcharset_korean_ksc5601 =
3588 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qrep_ks_x1001, 94, 2,
3589 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3590 build_string ("KSC5601"),
3591 build_string ("KSC5601 (Korean"),
3592 build_string ("KSC5601 Korean Hangul and Hanja"),
3593 build_string ("ksc5601"),
3594 - LEADING_BYTE_KOREAN_KSC5601,
3595 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3597 staticpro (&Vcharset_japanese_jisx0212);
3598 Vcharset_japanese_jisx0212 =
3599 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qrep_jis_x0212, 94, 2,
3600 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3601 build_string ("JISX0212"),
3602 build_string ("JISX0212 (Japanese)"),
3603 build_string ("JISX0212 Japanese Supplement"),
3604 build_string ("jisx0212"),
3605 - LEADING_BYTE_JAPANESE_JISX0212,
3606 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3609 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3610 staticpro (&Vcharset_chinese_cns11643_1);
3611 Vcharset_chinese_cns11643_1 =
3612 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qrep_cns11643_1, 94, 2,
3613 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3614 build_string ("CNS11643-1"),
3615 build_string ("CNS11643-1 (Chinese traditional)"),
3617 ("CNS 11643 Plane 1 Chinese traditional"),
3618 build_string (CHINESE_CNS_PLANE_RE("1")),
3619 - LEADING_BYTE_CHINESE_CNS11643_1,
3620 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3622 staticpro (&Vcharset_chinese_cns11643_2);
3623 Vcharset_chinese_cns11643_2 =
3624 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qrep_cns11643_2, 94, 2,
3625 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3626 build_string ("CNS11643-2"),
3627 build_string ("CNS11643-2 (Chinese traditional)"),
3629 ("CNS 11643 Plane 2 Chinese traditional"),
3630 build_string (CHINESE_CNS_PLANE_RE("2")),
3631 - LEADING_BYTE_CHINESE_CNS11643_2,
3632 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3635 staticpro (&Vcharset_latin_tcvn5712);
3636 Vcharset_latin_tcvn5712 =
3637 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3638 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3639 build_string ("TCVN 5712"),
3640 build_string ("TCVN 5712 (VSCII-2)"),
3641 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3642 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3643 - LEADING_BYTE_LATIN_TCVN5712,
3644 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3646 staticpro (&Vcharset_latin_viscii_lower);
3647 Vcharset_latin_viscii_lower =
3648 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3649 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3650 build_string ("VISCII lower"),
3651 build_string ("VISCII lower (Vietnamese)"),
3652 build_string ("VISCII lower (Vietnamese)"),
3653 build_string ("MULEVISCII-LOWER"),
3655 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3657 staticpro (&Vcharset_latin_viscii_upper);
3658 Vcharset_latin_viscii_upper =
3659 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3660 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3661 build_string ("VISCII upper"),
3662 build_string ("VISCII upper (Vietnamese)"),
3663 build_string ("VISCII upper (Vietnamese)"),
3664 build_string ("MULEVISCII-UPPER"),
3666 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3668 staticpro (&Vcharset_latin_viscii);
3669 Vcharset_latin_viscii =
3670 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3671 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3672 build_string ("VISCII"),
3673 build_string ("VISCII 1.1 (Vietnamese)"),
3674 build_string ("VISCII 1.1 (Vietnamese)"),
3675 build_string ("VISCII1\\.1"),
3677 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL,
3679 staticpro (&Vcharset_chinese_big5);
3680 Vcharset_chinese_big5 =
3681 make_charset (LEADING_BYTE_CHINESE_BIG5, Qrep_big5, 256, 2,
3682 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3683 build_string ("Big5"),
3684 build_string ("Big5"),
3685 build_string ("Big5 Chinese traditional"),
3686 build_string ("big5-0"),
3689 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3690 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL,
3693 staticpro (&Vcharset_ethiopic_ucs);
3694 Vcharset_ethiopic_ucs =
3695 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3696 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3697 build_string ("Ethiopic (UCS)"),
3698 build_string ("Ethiopic (UCS)"),
3699 build_string ("Ethiopic of UCS"),
3700 build_string ("Ethiopic-Unicode"),
3702 Qnil, 0x1200, 0x137F, 0, 0,
3703 Qnil, CONVERSION_IDENTICAL,
3706 staticpro (&Vcharset_chinese_big5_1);
3707 Vcharset_chinese_big5_1 =
3708 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3709 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3710 build_string ("Big5"),
3711 build_string ("Big5 (Level-1)"),
3713 ("Big5 Level-1 Chinese traditional"),
3714 build_string ("big5"),
3716 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3717 Vcharset_chinese_big5, CONVERSION_BIG5_1,
3719 staticpro (&Vcharset_chinese_big5_2);
3720 Vcharset_chinese_big5_2 =
3721 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3722 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3723 build_string ("Big5"),
3724 build_string ("Big5 (Level-2)"),
3726 ("Big5 Level-2 Chinese traditional"),
3727 build_string ("big5"),
3729 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3730 Vcharset_chinese_big5, CONVERSION_BIG5_2,
3733 #ifdef ENABLE_COMPOSITE_CHARS
3734 /* #### For simplicity, we put composite chars into a 96x96 charset.
3735 This is going to lead to problems because you can run out of
3736 room, esp. as we don't yet recycle numbers. */
3737 staticpro (&Vcharset_composite);
3738 Vcharset_composite =
3739 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3740 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3741 build_string ("Composite"),
3742 build_string ("Composite characters"),
3743 build_string ("Composite characters"),
3746 /* #### not dumped properly */
3747 composite_char_row_next = 32;
3748 composite_char_col_next = 32;
3750 Vcomposite_char_string2char_hash_table =
3751 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3752 Vcomposite_char_char2string_hash_table =
3753 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3754 staticpro (&Vcomposite_char_string2char_hash_table);
3755 staticpro (&Vcomposite_char_char2string_hash_table);
3756 #endif /* ENABLE_COMPOSITE_CHARS */