1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ideograph_daikanwa_2;
82 Lisp_Object Vcharset_ethiopic_ucs;
84 Lisp_Object Vcharset_chinese_big5_1;
85 Lisp_Object Vcharset_chinese_big5_2;
87 #ifdef ENABLE_COMPOSITE_CHARS
88 Lisp_Object Vcharset_composite;
90 /* Hash tables for composite chars. One maps string representing
91 composed chars to their equivalent chars; one goes the
93 Lisp_Object Vcomposite_char_char2string_hash_table;
94 Lisp_Object Vcomposite_char_string2char_hash_table;
96 static int composite_char_row_next;
97 static int composite_char_col_next;
99 #endif /* ENABLE_COMPOSITE_CHARS */
101 struct charset_lookup *chlook;
103 static const struct lrecord_description charset_lookup_description_1[] = {
104 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
106 NUM_LEADING_BYTES+4*128
113 static const struct struct_description charset_lookup_description = {
114 sizeof (struct charset_lookup),
115 charset_lookup_description_1
119 /* Table of number of bytes in the string representation of a character
120 indexed by the first byte of that representation.
122 rep_bytes_by_first_byte(c) is more efficient than the equivalent
123 canonical computation:
125 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
127 const Bytecount rep_bytes_by_first_byte[0xA0] =
128 { /* 0x00 - 0x7f are for straight ASCII */
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 /* 0x80 - 0x8f are for Dimension-1 official charsets */
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
141 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 /* 0x90 - 0x9d are for Dimension-2 official charsets */
144 /* 0x9e is for Dimension-1 private charsets */
145 /* 0x9f is for Dimension-2 private charsets */
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
152 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
154 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
158 if (XVECTOR_LENGTH (v) > ccs_len)
161 for (i = 0; i < XVECTOR_LENGTH (v); i++)
163 Lisp_Object c = XVECTOR_DATA(v)[i];
165 if (!NILP (c) && !CHARP (c))
169 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
181 put_char_ccs_code_point (Lisp_Object character,
182 Lisp_Object ccs, Lisp_Object value)
184 if (!EQ (XCHARSET_NAME (ccs), Qucs)
186 || (XCHAR (character) != XINT (value)))
188 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
192 { /* obsolete representation: value must be a list of bytes */
193 Lisp_Object ret = Fcar (value);
197 signal_simple_error ("Invalid value for coded-charset", value);
198 code_point = XINT (ret);
199 if (XCHARSET_GRAPHIC (ccs) == 1)
207 signal_simple_error ("Invalid value for coded-charset",
211 signal_simple_error ("Invalid value for coded-charset",
214 if (XCHARSET_GRAPHIC (ccs) == 1)
216 code_point = (code_point << 8) | j;
219 value = make_int (code_point);
221 else if (INTP (value))
223 code_point = XINT (value);
224 if (XCHARSET_GRAPHIC (ccs) == 1)
226 code_point &= 0x7F7F7F7F;
227 value = make_int (code_point);
231 signal_simple_error ("Invalid value for coded-charset", value);
235 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
238 decoding_table_remove_char (ccs, XINT (cpos));
241 decoding_table_put_char (ccs, code_point, character);
247 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
249 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
250 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
252 if (VECTORP (decoding_table))
254 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
258 decoding_table_remove_char (ccs, XINT (cpos));
261 if (CHAR_TABLEP (encoding_table))
263 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
271 int leading_code_private_11;
274 Lisp_Object Qcharsetp;
276 /* Qdoc_string, Qdimension, Qchars defined in general.c */
277 Lisp_Object Qregistry, Qfinal, Qgraphic;
278 Lisp_Object Qdirection;
279 Lisp_Object Qreverse_direction_charset;
280 Lisp_Object Qleading_byte;
281 Lisp_Object Qshort_name, Qlong_name;
283 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
284 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
301 Qjapanese_jisx0208_1978,
305 Qjapanese_jisx0208_1990,
323 Qvietnamese_viscii_lower,
324 Qvietnamese_viscii_upper,
327 Qideograph_daikanwa_2,
334 Lisp_Object Ql2r, Qr2l;
336 Lisp_Object Vcharset_hash_table;
338 /* Composite characters are characters constructed by overstriking two
339 or more regular characters.
341 1) The old Mule implementation involves storing composite characters
342 in a buffer as a tag followed by all of the actual characters
343 used to make up the composite character. I think this is a bad
344 idea; it greatly complicates code that wants to handle strings
345 one character at a time because it has to deal with the possibility
346 of great big ungainly characters. It's much more reasonable to
347 simply store an index into a table of composite characters.
349 2) The current implementation only allows for 16,384 separate
350 composite characters over the lifetime of the XEmacs process.
351 This could become a potential problem if the user
352 edited lots of different files that use composite characters.
353 Due to FSF bogosity, increasing the number of allowable
354 composite characters under Mule would decrease the number
355 of possible faces that can exist. Mule already has shrunk
356 this to 2048, and further shrinkage would become uncomfortable.
357 No such problems exist in XEmacs.
359 Composite characters could be represented as 0x80 C1 C2 C3,
360 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
361 for slightly under 2^20 (one million) composite characters
362 over the XEmacs process lifetime, and you only need to
363 increase the size of a Mule character from 19 to 21 bits.
364 Or you could use 0x80 C1 C2 C3 C4, allowing for about
365 85 million (slightly over 2^26) composite characters. */
368 /************************************************************************/
369 /* Basic Emchar functions */
370 /************************************************************************/
372 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
373 string in STR. Returns the number of bytes stored.
374 Do not call this directly. Use the macro set_charptr_emchar() instead.
378 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
393 else if ( c <= 0x7ff )
395 *p++ = (c >> 6) | 0xc0;
396 *p++ = (c & 0x3f) | 0x80;
398 else if ( c <= 0xffff )
400 *p++ = (c >> 12) | 0xe0;
401 *p++ = ((c >> 6) & 0x3f) | 0x80;
402 *p++ = (c & 0x3f) | 0x80;
404 else if ( c <= 0x1fffff )
406 *p++ = (c >> 18) | 0xf0;
407 *p++ = ((c >> 12) & 0x3f) | 0x80;
408 *p++ = ((c >> 6) & 0x3f) | 0x80;
409 *p++ = (c & 0x3f) | 0x80;
411 else if ( c <= 0x3ffffff )
413 *p++ = (c >> 24) | 0xf8;
414 *p++ = ((c >> 18) & 0x3f) | 0x80;
415 *p++ = ((c >> 12) & 0x3f) | 0x80;
416 *p++ = ((c >> 6) & 0x3f) | 0x80;
417 *p++ = (c & 0x3f) | 0x80;
421 *p++ = (c >> 30) | 0xfc;
422 *p++ = ((c >> 24) & 0x3f) | 0x80;
423 *p++ = ((c >> 18) & 0x3f) | 0x80;
424 *p++ = ((c >> 12) & 0x3f) | 0x80;
425 *p++ = ((c >> 6) & 0x3f) | 0x80;
426 *p++ = (c & 0x3f) | 0x80;
429 BREAKUP_CHAR (c, charset, c1, c2);
430 lb = CHAR_LEADING_BYTE (c);
431 if (LEADING_BYTE_PRIVATE_P (lb))
432 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
434 if (EQ (charset, Vcharset_control_1))
443 /* Return the first character from a Mule-encoded string in STR,
444 assuming it's non-ASCII. Do not call this directly.
445 Use the macro charptr_emchar() instead. */
448 non_ascii_charptr_emchar (const Bufbyte *str)
461 else if ( b >= 0xf8 )
466 else if ( b >= 0xf0 )
471 else if ( b >= 0xe0 )
476 else if ( b >= 0xc0 )
486 for( ; len > 0; len-- )
489 ch = ( ch << 6 ) | ( b & 0x3f );
493 Bufbyte i0 = *str, i1, i2 = 0;
496 if (i0 == LEADING_BYTE_CONTROL_1)
497 return (Emchar) (*++str - 0x20);
499 if (LEADING_BYTE_PREFIX_P (i0))
504 charset = CHARSET_BY_LEADING_BYTE (i0);
505 if (XCHARSET_DIMENSION (charset) == 2)
508 return MAKE_CHAR (charset, i1, i2);
512 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
513 Do not call this directly. Use the macro valid_char_p() instead. */
517 non_ascii_valid_char_p (Emchar ch)
521 /* Must have only lowest 19 bits set */
525 f1 = CHAR_FIELD1 (ch);
526 f2 = CHAR_FIELD2 (ch);
527 f3 = CHAR_FIELD3 (ch);
533 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
534 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
535 f2 > MAX_CHAR_FIELD2_PRIVATE)
540 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
541 f2 <= MAX_CHAR_FIELD2_PRIVATE))
545 NOTE: This takes advantage of the fact that
546 FIELD2_TO_OFFICIAL_LEADING_BYTE and
547 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
549 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
550 if (EQ (charset, Qnil))
552 return (XCHARSET_CHARS (charset) == 96);
558 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
559 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
560 f1 > MAX_CHAR_FIELD1_PRIVATE)
562 if (f2 < 0x20 || f3 < 0x20)
565 #ifdef ENABLE_COMPOSITE_CHARS
566 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
568 if (UNBOUNDP (Fgethash (make_int (ch),
569 Vcomposite_char_char2string_hash_table,
574 #endif /* ENABLE_COMPOSITE_CHARS */
576 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
577 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
580 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
582 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
585 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
587 if (EQ (charset, Qnil))
589 return (XCHARSET_CHARS (charset) == 96);
595 /************************************************************************/
596 /* Basic string functions */
597 /************************************************************************/
599 /* Copy the character pointed to by SRC into DST. Do not call this
600 directly. Use the macro charptr_copy_char() instead.
601 Return the number of bytes copied. */
604 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
606 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
608 for (i = bytes; i; i--, dst++, src++)
614 /************************************************************************/
615 /* streams of Emchars */
616 /************************************************************************/
618 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
619 The functions below are not meant to be called directly; use
620 the macros in insdel.h. */
623 Lstream_get_emchar_1 (Lstream *stream, int ch)
625 Bufbyte str[MAX_EMCHAR_LEN];
626 Bufbyte *strptr = str;
629 str[0] = (Bufbyte) ch;
631 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
633 int c = Lstream_getc (stream);
634 bufpos_checking_assert (c >= 0);
635 *++strptr = (Bufbyte) c;
637 return charptr_emchar (str);
641 Lstream_fput_emchar (Lstream *stream, Emchar ch)
643 Bufbyte str[MAX_EMCHAR_LEN];
644 Bytecount len = set_charptr_emchar (str, ch);
645 return Lstream_write (stream, str, len);
649 Lstream_funget_emchar (Lstream *stream, Emchar ch)
651 Bufbyte str[MAX_EMCHAR_LEN];
652 Bytecount len = set_charptr_emchar (str, ch);
653 Lstream_unread (stream, str, len);
657 /************************************************************************/
659 /************************************************************************/
662 mark_charset (Lisp_Object obj)
664 Lisp_Charset *cs = XCHARSET (obj);
666 mark_object (cs->short_name);
667 mark_object (cs->long_name);
668 mark_object (cs->doc_string);
669 mark_object (cs->registry);
670 mark_object (cs->ccl_program);
672 mark_object (cs->decoding_table);
673 mark_object (cs->mother);
679 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
681 Lisp_Charset *cs = XCHARSET (obj);
685 error ("printing unreadable object #<charset %s 0x%x>",
686 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
689 write_c_string ("#<charset ", printcharfun);
690 print_internal (CHARSET_NAME (cs), printcharfun, 0);
691 write_c_string (" ", printcharfun);
692 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
693 write_c_string (" ", printcharfun);
694 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
695 write_c_string (" ", printcharfun);
696 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
697 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
699 CHARSET_DIMENSION (cs),
700 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
701 CHARSET_COLUMNS (cs),
702 CHARSET_GRAPHIC (cs),
704 write_c_string (buf, printcharfun);
705 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
706 sprintf (buf, " 0x%x>", cs->header.uid);
707 write_c_string (buf, printcharfun);
710 static const struct lrecord_description charset_description[] = {
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
715 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
716 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
717 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
719 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
720 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
725 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
726 mark_charset, print_charset, 0, 0, 0,
730 /* Make a new charset. */
731 /* #### SJT Should generic properties be allowed? */
733 make_charset (Charset_ID id, Lisp_Object name,
734 unsigned short chars, unsigned char dimension,
735 unsigned char columns, unsigned char graphic,
736 Bufbyte final, unsigned char direction, Lisp_Object short_name,
737 Lisp_Object long_name, Lisp_Object doc,
739 Lisp_Object decoding_table,
740 Emchar min_code, Emchar max_code,
741 Emchar code_offset, unsigned char byte_offset,
742 Lisp_Object mother, unsigned char conversion)
745 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
749 XSETCHARSET (obj, cs);
751 CHARSET_ID (cs) = id;
752 CHARSET_NAME (cs) = name;
753 CHARSET_SHORT_NAME (cs) = short_name;
754 CHARSET_LONG_NAME (cs) = long_name;
755 CHARSET_CHARS (cs) = chars;
756 CHARSET_DIMENSION (cs) = dimension;
757 CHARSET_DIRECTION (cs) = direction;
758 CHARSET_COLUMNS (cs) = columns;
759 CHARSET_GRAPHIC (cs) = graphic;
760 CHARSET_FINAL (cs) = final;
761 CHARSET_DOC_STRING (cs) = doc;
762 CHARSET_REGISTRY (cs) = reg;
763 CHARSET_CCL_PROGRAM (cs) = Qnil;
764 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
766 CHARSET_DECODING_TABLE(cs) = Qunbound;
767 CHARSET_MIN_CODE (cs) = min_code;
768 CHARSET_MAX_CODE (cs) = max_code;
769 CHARSET_CODE_OFFSET (cs) = code_offset;
770 CHARSET_BYTE_OFFSET (cs) = byte_offset;
771 CHARSET_MOTHER (cs) = mother;
772 CHARSET_CONVERSION (cs) = conversion;
776 if (id == LEADING_BYTE_ASCII)
777 CHARSET_REP_BYTES (cs) = 1;
779 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
781 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
786 /* some charsets do not have final characters. This includes
787 ASCII, Control-1, Composite, and the two faux private
789 unsigned char iso2022_type
790 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
792 if (code_offset == 0)
794 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
795 chlook->charset_by_attributes[iso2022_type][final] = obj;
799 (chlook->charset_by_attributes[iso2022_type][final][direction]));
800 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
804 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
805 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
807 /* Some charsets are "faux" and don't have names or really exist at
808 all except in the leading-byte table. */
810 Fputhash (name, obj, Vcharset_hash_table);
815 get_unallocated_leading_byte (int dimension)
820 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
823 lb = chlook->next_allocated_leading_byte++;
827 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
830 lb = chlook->next_allocated_1_byte_leading_byte++;
834 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
837 lb = chlook->next_allocated_2_byte_leading_byte++;
843 ("No more character sets free for this dimension",
844 make_int (dimension));
850 /* Number of Big5 characters which have the same code in 1st byte. */
852 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
855 decode_defined_char (Lisp_Object ccs, int code_point)
857 int dim = XCHARSET_DIMENSION (ccs);
858 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
866 = get_ccs_octet_table (decoding_table, ccs,
867 (code_point >> (dim * 8)) & 255);
869 if (CHARP (decoding_table))
870 return XCHAR (decoding_table);
873 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
875 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
877 if ( EQ (mother, Vcharset_ucs) )
878 return DECODE_CHAR (mother, code_point);
880 return decode_defined_char (mother, code_point);
887 decode_builtin_char (Lisp_Object charset, int code_point)
889 Lisp_Object mother = XCHARSET_MOTHER (charset);
892 if ( XCHARSET_MAX_CODE (charset) > 0 )
894 if ( CHARSETP (mother) )
896 int code = code_point;
898 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
900 int row = code_point >> 8;
901 int cell = code_point & 255;
905 else if (row < 16 + 32 + 30)
906 code = (row - (16 + 32)) * 94 + cell - 33;
907 else if (row < 18 + 32 + 30)
909 else if (row < 18 + 32 + 60)
910 code = (row - (18 + 32)) * 94 + cell - 33;
912 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
914 int plane = code_point >> 16;
915 int row = (code_point >> 8) & 255;
916 int cell = code_point & 255;
920 else if (row < 16 + 32 + 30)
922 = (plane - 33) * 94 * 60
923 + (row - (16 + 32)) * 94
925 else if (row < 18 + 32 + 30)
927 else if (row < 18 + 32 + 60)
929 = (plane - 33) * 94 * 60
930 + (row - (18 + 32)) * 94
934 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
939 = (XCHARSET_DIMENSION (charset) == 1
941 code_point - XCHARSET_BYTE_OFFSET (charset)
943 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
944 * XCHARSET_CHARS (charset)
945 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
946 + XCHARSET_CODE_OFFSET (charset);
947 if ((cid < XCHARSET_MIN_CODE (charset))
948 || (XCHARSET_MAX_CODE (charset) < cid))
953 else if ((final = XCHARSET_FINAL (charset)) >= '0')
955 if (XCHARSET_DIMENSION (charset) == 1)
957 switch (XCHARSET_CHARS (charset))
961 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
964 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
972 switch (XCHARSET_CHARS (charset))
975 return MIN_CHAR_94x94
976 + (final - '0') * 94 * 94
977 + (((code_point >> 8) & 0x7F) - 33) * 94
978 + ((code_point & 0x7F) - 33);
980 return MIN_CHAR_96x96
981 + (final - '0') * 96 * 96
982 + (((code_point >> 8) & 0x7F) - 32) * 96
983 + ((code_point & 0x7F) - 32);
995 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
997 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1000 if ( CHAR_TABLEP (encoding_table)
1001 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1006 Lisp_Object mother = XCHARSET_MOTHER (charset);
1007 int min = XCHARSET_MIN_CODE (charset);
1008 int max = XCHARSET_MAX_CODE (charset);
1011 if ( CHARSETP (mother) )
1013 if (XCHARSET_FINAL (charset) >= '0')
1014 code = charset_code_point (mother, ch, 1);
1016 code = charset_code_point (mother, ch, defined_only);
1018 else if (defined_only)
1020 else if ( ((max == 0) && CHARSETP (mother)
1021 && (XCHARSET_FINAL (charset) == 0))
1022 || ((min <= ch) && (ch <= max)) )
1024 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1025 || ((min <= code) && (code <= max)) )
1027 int d = code - XCHARSET_CODE_OFFSET (charset);
1029 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1031 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1033 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1035 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1038 int cell = d % 94 + 33;
1044 return (row << 8) | cell;
1046 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1047 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1048 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1049 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1050 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1052 int plane = d / (94 * 60) + 33;
1053 int row = (d % (94 * 60)) / 94;
1054 int cell = d % 94 + 33;
1060 return (plane << 16) | (row << 8) | cell;
1062 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1064 ( (d / (94 * 94) + 33) << 16)
1065 | ((d / 94 % 94 + 33) << 8)
1067 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1069 ( (d / (96 * 96) + 32) << 16)
1070 | ((d / 96 % 96 + 32) << 8)
1072 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1074 ( (d / (94 * 94 * 94) + 33) << 24)
1075 | ((d / (94 * 94) % 94 + 33) << 16)
1076 | ((d / 94 % 94 + 33) << 8)
1078 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1080 ( (d / (96 * 96 * 96) + 32) << 24)
1081 | ((d / (96 * 96) % 96 + 32) << 16)
1082 | ((d / 96 % 96 + 32) << 8)
1086 printf ("Unknown CCS-conversion %d is specified!",
1087 XCHARSET_CONVERSION (charset));
1091 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1092 ( XCHARSET_MIN_CODE (charset) == 0 )
1094 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1095 (XCHARSET_CODE_OFFSET (charset)
1096 == XCHARSET_MIN_CODE (charset))
1101 if (XCHARSET_DIMENSION (charset) == 1)
1103 if (XCHARSET_CHARS (charset) == 94)
1105 if (((d = ch - (MIN_CHAR_94
1106 + (XCHARSET_FINAL (charset) - '0') * 94))
1111 else if (XCHARSET_CHARS (charset) == 96)
1113 if (((d = ch - (MIN_CHAR_96
1114 + (XCHARSET_FINAL (charset) - '0') * 96))
1122 else if (XCHARSET_DIMENSION (charset) == 2)
1124 if (XCHARSET_CHARS (charset) == 94)
1126 if (((d = ch - (MIN_CHAR_94x94
1128 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1131 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1133 else if (XCHARSET_CHARS (charset) == 96)
1135 if (((d = ch - (MIN_CHAR_96x96
1137 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1140 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1151 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1153 if (c <= MAX_CHAR_BASIC_LATIN)
1155 *charset = Vcharset_ascii;
1160 *charset = Vcharset_control_1;
1165 *charset = Vcharset_latin_iso8859_1;
1169 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1171 *charset = Vcharset_hebrew_iso8859_8;
1172 return c - MIN_CHAR_HEBREW + 0x20;
1175 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1177 *charset = Vcharset_thai_tis620;
1178 return c - MIN_CHAR_THAI + 0x20;
1181 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1182 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1184 return list2 (Vcharset_katakana_jisx0201,
1185 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1188 else if (c <= MAX_CHAR_BMP)
1190 *charset = Vcharset_ucs_bmp;
1193 else if (c <= MAX_CHAR_SMP)
1195 *charset = Vcharset_ucs_smp;
1196 return c - MIN_CHAR_SMP;
1198 else if (c <= MAX_CHAR_SIP)
1200 *charset = Vcharset_ucs_sip;
1201 return c - MIN_CHAR_SIP;
1203 else if (c < MIN_CHAR_94)
1205 *charset = Vcharset_ucs;
1208 else if (c <= MAX_CHAR_94)
1210 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1211 ((c - MIN_CHAR_94) / 94) + '0',
1212 CHARSET_LEFT_TO_RIGHT);
1213 if (!NILP (*charset))
1214 return ((c - MIN_CHAR_94) % 94) + 33;
1217 *charset = Vcharset_ucs;
1221 else if (c <= MAX_CHAR_96)
1223 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1224 ((c - MIN_CHAR_96) / 96) + '0',
1225 CHARSET_LEFT_TO_RIGHT);
1226 if (!NILP (*charset))
1227 return ((c - MIN_CHAR_96) % 96) + 32;
1230 *charset = Vcharset_ucs;
1234 else if (c <= MAX_CHAR_94x94)
1237 = CHARSET_BY_ATTRIBUTES (94, 2,
1238 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1239 CHARSET_LEFT_TO_RIGHT);
1240 if (!NILP (*charset))
1241 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1242 | (((c - MIN_CHAR_94x94) % 94) + 33);
1245 *charset = Vcharset_ucs;
1249 else if (c <= MAX_CHAR_96x96)
1252 = CHARSET_BY_ATTRIBUTES (96, 2,
1253 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1254 CHARSET_LEFT_TO_RIGHT);
1255 if (!NILP (*charset))
1256 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1257 | (((c - MIN_CHAR_96x96) % 96) + 32);
1260 *charset = Vcharset_ucs;
1266 *charset = Vcharset_ucs;
1271 Lisp_Object Vdefault_coded_charset_priority_list;
1275 /************************************************************************/
1276 /* Basic charset Lisp functions */
1277 /************************************************************************/
1279 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1280 Return non-nil if OBJECT is a charset.
1284 return CHARSETP (object) ? Qt : Qnil;
1287 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1288 Retrieve the charset of the given name.
1289 If CHARSET-OR-NAME is a charset object, it is simply returned.
1290 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1291 nil is returned. Otherwise the associated charset object is returned.
1295 if (CHARSETP (charset_or_name))
1296 return charset_or_name;
1298 CHECK_SYMBOL (charset_or_name);
1299 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1302 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1303 Retrieve the charset of the given name.
1304 Same as `find-charset' except an error is signalled if there is no such
1305 charset instead of returning nil.
1309 Lisp_Object charset = Ffind_charset (name);
1312 signal_simple_error ("No such charset", name);
1316 /* We store the charsets in hash tables with the names as the key and the
1317 actual charset object as the value. Occasionally we need to use them
1318 in a list format. These routines provide us with that. */
1319 struct charset_list_closure
1321 Lisp_Object *charset_list;
1325 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1326 void *charset_list_closure)
1328 /* This function can GC */
1329 struct charset_list_closure *chcl =
1330 (struct charset_list_closure*) charset_list_closure;
1331 Lisp_Object *charset_list = chcl->charset_list;
1333 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1337 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1338 Return a list of the names of all defined charsets.
1342 Lisp_Object charset_list = Qnil;
1343 struct gcpro gcpro1;
1344 struct charset_list_closure charset_list_closure;
1346 GCPRO1 (charset_list);
1347 charset_list_closure.charset_list = &charset_list;
1348 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1349 &charset_list_closure);
1352 return charset_list;
1355 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1356 Return the name of charset CHARSET.
1360 return XCHARSET_NAME (Fget_charset (charset));
1363 /* #### SJT Should generic properties be allowed? */
1364 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1365 Define a new character set.
1366 This function is for use with Mule support.
1367 NAME is a symbol, the name by which the character set is normally referred.
1368 DOC-STRING is a string describing the character set.
1369 PROPS is a property list, describing the specific nature of the
1370 character set. Recognized properties are:
1372 'short-name Short version of the charset name (ex: Latin-1)
1373 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1374 'registry A regular expression matching the font registry field for
1376 'dimension Number of octets used to index a character in this charset.
1377 Either 1 or 2. Defaults to 1.
1378 If UTF-2000 feature is enabled, 3 or 4 are also available.
1379 'columns Number of columns used to display a character in this charset.
1380 Only used in TTY mode. (Under X, the actual width of a
1381 character can be derived from the font used to display the
1382 characters.) If unspecified, defaults to the dimension
1383 (this is almost always the correct value).
1384 'chars Number of characters in each dimension (94 or 96).
1385 Defaults to 94. Note that if the dimension is 2, the
1386 character set thus described is 94x94 or 96x96.
1387 If UTF-2000 feature is enabled, 128 or 256 are also available.
1388 'final Final byte of ISO 2022 escape sequence. Must be
1389 supplied. Each combination of (DIMENSION, CHARS) defines a
1390 separate namespace for final bytes. Note that ISO
1391 2022 restricts the final byte to the range
1392 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1393 dimension == 2. Note also that final bytes in the range
1394 0x30 - 0x3F are reserved for user-defined (not official)
1396 'graphic 0 (use left half of font on output) or 1 (use right half
1397 of font on output). Defaults to 0. For example, for
1398 a font whose registry is ISO8859-1, the left half
1399 (octets 0x20 - 0x7F) is the `ascii' character set, while
1400 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1401 character set. With 'graphic set to 0, the octets
1402 will have their high bit cleared; with it set to 1,
1403 the octets will have their high bit set.
1404 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1406 'ccl-program A compiled CCL program used to convert a character in
1407 this charset into an index into the font. This is in
1408 addition to the 'graphic property. The CCL program
1409 is passed the octets of the character, with the high
1410 bit cleared and set depending upon whether the value
1411 of the 'graphic property is 0 or 1.
1412 'mother [UTF-2000 only] Base coded-charset.
1413 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1414 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1415 'code-offset [UTF-2000 only] Offset for a code-point of a base
1417 'conversion [UTF-2000 only] Conversion for a code-point of a base
1418 coded-charset (94x60 or 94x94x60).
1420 (name, doc_string, props))
1422 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1423 int direction = CHARSET_LEFT_TO_RIGHT;
1424 Lisp_Object registry = Qnil;
1425 Lisp_Object charset;
1426 Lisp_Object ccl_program = Qnil;
1427 Lisp_Object short_name = Qnil, long_name = Qnil;
1428 Lisp_Object mother = Qnil;
1429 int min_code = 0, max_code = 0, code_offset = 0;
1430 int byte_offset = -1;
1433 CHECK_SYMBOL (name);
1434 if (!NILP (doc_string))
1435 CHECK_STRING (doc_string);
1437 charset = Ffind_charset (name);
1438 if (!NILP (charset))
1439 signal_simple_error ("Cannot redefine existing charset", name);
1442 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1444 if (EQ (keyword, Qshort_name))
1446 CHECK_STRING (value);
1450 if (EQ (keyword, Qlong_name))
1452 CHECK_STRING (value);
1456 else if (EQ (keyword, Qdimension))
1459 dimension = XINT (value);
1460 if (dimension < 1 ||
1467 signal_simple_error ("Invalid value for 'dimension", value);
1470 else if (EQ (keyword, Qchars))
1473 chars = XINT (value);
1474 if (chars != 94 && chars != 96
1476 && chars != 128 && chars != 256
1479 signal_simple_error ("Invalid value for 'chars", value);
1482 else if (EQ (keyword, Qcolumns))
1485 columns = XINT (value);
1486 if (columns != 1 && columns != 2)
1487 signal_simple_error ("Invalid value for 'columns", value);
1490 else if (EQ (keyword, Qgraphic))
1493 graphic = XINT (value);
1501 signal_simple_error ("Invalid value for 'graphic", value);
1504 else if (EQ (keyword, Qregistry))
1506 CHECK_STRING (value);
1510 else if (EQ (keyword, Qdirection))
1512 if (EQ (value, Ql2r))
1513 direction = CHARSET_LEFT_TO_RIGHT;
1514 else if (EQ (value, Qr2l))
1515 direction = CHARSET_RIGHT_TO_LEFT;
1517 signal_simple_error ("Invalid value for 'direction", value);
1520 else if (EQ (keyword, Qfinal))
1522 CHECK_CHAR_COERCE_INT (value);
1523 final = XCHAR (value);
1524 if (final < '0' || final > '~')
1525 signal_simple_error ("Invalid value for 'final", value);
1529 else if (EQ (keyword, Qmother))
1531 mother = Fget_charset (value);
1534 else if (EQ (keyword, Qmin_code))
1537 min_code = XUINT (value);
1540 else if (EQ (keyword, Qmax_code))
1543 max_code = XUINT (value);
1546 else if (EQ (keyword, Qcode_offset))
1549 code_offset = XUINT (value);
1552 else if (EQ (keyword, Qconversion))
1554 if (EQ (value, Q94x60))
1555 conversion = CONVERSION_94x60;
1556 else if (EQ (value, Q94x94x60))
1557 conversion = CONVERSION_94x94x60;
1559 signal_simple_error ("Unrecognized conversion", value);
1563 else if (EQ (keyword, Qccl_program))
1565 struct ccl_program test_ccl;
1567 if (setup_ccl_program (&test_ccl, value) < 0)
1568 signal_simple_error ("Invalid value for 'ccl-program", value);
1569 ccl_program = value;
1573 signal_simple_error ("Unrecognized property", keyword);
1579 error ("'final must be specified");
1581 if (dimension == 2 && final > 0x5F)
1583 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1586 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1587 CHARSET_LEFT_TO_RIGHT)) ||
1588 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1589 CHARSET_RIGHT_TO_LEFT)))
1591 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1593 id = get_unallocated_leading_byte (dimension);
1595 if (NILP (doc_string))
1596 doc_string = build_string ("");
1598 if (NILP (registry))
1599 registry = build_string ("");
1601 if (NILP (short_name))
1602 XSETSTRING (short_name, XSYMBOL (name)->name);
1604 if (NILP (long_name))
1605 long_name = doc_string;
1608 columns = dimension;
1610 if (byte_offset < 0)
1614 else if (chars == 96)
1620 charset = make_charset (id, name, chars, dimension, columns, graphic,
1621 final, direction, short_name, long_name,
1622 doc_string, registry,
1623 Qnil, min_code, max_code, code_offset, byte_offset,
1624 mother, conversion);
1625 if (!NILP (ccl_program))
1626 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1630 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1632 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1633 NEW-NAME is the name of the new charset. Return the new charset.
1635 (charset, new_name))
1637 Lisp_Object new_charset = Qnil;
1638 int id, chars, dimension, columns, graphic, final;
1640 Lisp_Object registry, doc_string, short_name, long_name;
1643 charset = Fget_charset (charset);
1644 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1645 signal_simple_error ("Charset already has reverse-direction charset",
1648 CHECK_SYMBOL (new_name);
1649 if (!NILP (Ffind_charset (new_name)))
1650 signal_simple_error ("Cannot redefine existing charset", new_name);
1652 cs = XCHARSET (charset);
1654 chars = CHARSET_CHARS (cs);
1655 dimension = CHARSET_DIMENSION (cs);
1656 columns = CHARSET_COLUMNS (cs);
1657 id = get_unallocated_leading_byte (dimension);
1659 graphic = CHARSET_GRAPHIC (cs);
1660 final = CHARSET_FINAL (cs);
1661 direction = CHARSET_RIGHT_TO_LEFT;
1662 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1663 direction = CHARSET_LEFT_TO_RIGHT;
1664 doc_string = CHARSET_DOC_STRING (cs);
1665 short_name = CHARSET_SHORT_NAME (cs);
1666 long_name = CHARSET_LONG_NAME (cs);
1667 registry = CHARSET_REGISTRY (cs);
1669 new_charset = make_charset (id, new_name, chars, dimension, columns,
1670 graphic, final, direction, short_name, long_name,
1671 doc_string, registry,
1673 CHARSET_DECODING_TABLE(cs),
1674 CHARSET_MIN_CODE(cs),
1675 CHARSET_MAX_CODE(cs),
1676 CHARSET_CODE_OFFSET(cs),
1677 CHARSET_BYTE_OFFSET(cs),
1679 CHARSET_CONVERSION (cs)
1681 Qnil, 0, 0, 0, 0, Qnil, 0
1685 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1686 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1691 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1692 Define symbol ALIAS as an alias for CHARSET.
1696 CHECK_SYMBOL (alias);
1697 charset = Fget_charset (charset);
1698 return Fputhash (alias, charset, Vcharset_hash_table);
1701 /* #### Reverse direction charsets not yet implemented. */
1703 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1705 Return the reverse-direction charset parallel to CHARSET, if any.
1706 This is the charset with the same properties (in particular, the same
1707 dimension, number of characters per dimension, and final byte) as
1708 CHARSET but whose characters are displayed in the opposite direction.
1712 charset = Fget_charset (charset);
1713 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1717 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1718 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1719 If DIRECTION is omitted, both directions will be checked (left-to-right
1720 will be returned if character sets exist for both directions).
1722 (dimension, chars, final, direction))
1724 int dm, ch, fi, di = -1;
1725 Lisp_Object obj = Qnil;
1727 CHECK_INT (dimension);
1728 dm = XINT (dimension);
1729 if (dm < 1 || dm > 2)
1730 signal_simple_error ("Invalid value for DIMENSION", dimension);
1734 if (ch != 94 && ch != 96)
1735 signal_simple_error ("Invalid value for CHARS", chars);
1737 CHECK_CHAR_COERCE_INT (final);
1739 if (fi < '0' || fi > '~')
1740 signal_simple_error ("Invalid value for FINAL", final);
1742 if (EQ (direction, Ql2r))
1743 di = CHARSET_LEFT_TO_RIGHT;
1744 else if (EQ (direction, Qr2l))
1745 di = CHARSET_RIGHT_TO_LEFT;
1746 else if (!NILP (direction))
1747 signal_simple_error ("Invalid value for DIRECTION", direction);
1749 if (dm == 2 && fi > 0x5F)
1751 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1755 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1757 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1760 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1763 return XCHARSET_NAME (obj);
1767 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1768 Return short name of CHARSET.
1772 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1775 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1776 Return long name of CHARSET.
1780 return XCHARSET_LONG_NAME (Fget_charset (charset));
1783 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1784 Return description of CHARSET.
1788 return XCHARSET_DOC_STRING (Fget_charset (charset));
1791 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1792 Return dimension of CHARSET.
1796 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1799 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1800 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1801 Recognized properties are those listed in `make-charset', as well as
1802 'name and 'doc-string.
1808 charset = Fget_charset (charset);
1809 cs = XCHARSET (charset);
1811 CHECK_SYMBOL (prop);
1812 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1813 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1814 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1815 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1816 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1817 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1818 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1819 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1820 Qnil : make_char (CHARSET_FINAL (cs));
1821 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1822 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1823 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1824 if (EQ (prop, Qdirection))
1825 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1826 if (EQ (prop, Qreverse_direction_charset))
1828 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1829 /* #### Is this translation OK? If so, error checking sufficient? */
1830 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1833 if (EQ (prop, Qmother))
1834 return CHARSET_MOTHER (cs);
1835 if (EQ (prop, Qmin_code))
1836 return make_int (CHARSET_MIN_CODE (cs));
1837 if (EQ (prop, Qmax_code))
1838 return make_int (CHARSET_MAX_CODE (cs));
1840 signal_simple_error ("Unrecognized charset property name", prop);
1841 return Qnil; /* not reached */
1844 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1845 Return charset identification number of CHARSET.
1849 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1852 /* #### We need to figure out which properties we really want to
1855 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1856 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1858 (charset, ccl_program))
1860 struct ccl_program test_ccl;
1862 charset = Fget_charset (charset);
1863 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1864 signal_simple_error ("Invalid ccl-program", ccl_program);
1865 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1870 invalidate_charset_font_caches (Lisp_Object charset)
1872 /* Invalidate font cache entries for charset on all devices. */
1873 Lisp_Object devcons, concons, hash_table;
1874 DEVICE_LOOP_NO_BREAK (devcons, concons)
1876 struct device *d = XDEVICE (XCAR (devcons));
1877 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1878 if (!UNBOUNDP (hash_table))
1879 Fclrhash (hash_table);
1883 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1884 Set the 'registry property of CHARSET to REGISTRY.
1886 (charset, registry))
1888 charset = Fget_charset (charset);
1889 CHECK_STRING (registry);
1890 XCHARSET_REGISTRY (charset) = registry;
1891 invalidate_charset_font_caches (charset);
1892 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1897 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1898 Return mapping-table of CHARSET.
1902 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1905 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1906 Set mapping-table of CHARSET to TABLE.
1910 struct Lisp_Charset *cs;
1914 charset = Fget_charset (charset);
1915 cs = XCHARSET (charset);
1919 CHARSET_DECODING_TABLE(cs) = Qnil;
1922 else if (VECTORP (table))
1924 int ccs_len = CHARSET_BYTE_SIZE (cs);
1925 int ret = decoding_table_check_elements (table,
1926 CHARSET_DIMENSION (cs),
1931 signal_simple_error ("Too big table", table);
1933 signal_simple_error ("Invalid element is found", table);
1935 signal_simple_error ("Something wrong", table);
1937 CHARSET_DECODING_TABLE(cs) = Qnil;
1940 signal_error (Qwrong_type_argument,
1941 list2 (build_translated_string ("vector-or-nil-p"),
1944 byte_offset = CHARSET_BYTE_OFFSET (cs);
1945 switch (CHARSET_DIMENSION (cs))
1948 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1950 Lisp_Object c = XVECTOR_DATA(table)[i];
1953 Fput_char_attribute (c, XCHARSET_NAME (charset),
1954 make_int (i + byte_offset));
1958 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1960 Lisp_Object v = XVECTOR_DATA(table)[i];
1966 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1968 Lisp_Object c = XVECTOR_DATA(v)[j];
1972 (c, XCHARSET_NAME (charset),
1973 make_int ( ( (i + byte_offset) << 8 )
1979 Fput_char_attribute (v, XCHARSET_NAME (charset),
1980 make_int (i + byte_offset));
1989 /************************************************************************/
1990 /* Lisp primitives for working with characters */
1991 /************************************************************************/
1994 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1995 Make a character from CHARSET and code-point CODE.
1996 If DEFINED_ONLY is non-nil, builtin character is not returned.
1997 If corresponding character is not found, nil is returned.
1999 (charset, code, defined_only))
2003 charset = Fget_charset (charset);
2006 if (XCHARSET_GRAPHIC (charset) == 1)
2008 if (NILP (defined_only))
2009 c = DECODE_CHAR (charset, c);
2011 c = decode_defined_char (charset, c);
2012 return c >= 0 ? make_char (c) : Qnil;
2015 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2016 Make a builtin character from CHARSET and code-point CODE.
2022 charset = Fget_charset (charset);
2024 if (EQ (charset, Vcharset_latin_viscii))
2026 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2032 (ret = Fget_char_attribute (chr,
2033 Vcharset_latin_viscii_lower,
2036 charset = Vcharset_latin_viscii_lower;
2040 (ret = Fget_char_attribute (chr,
2041 Vcharset_latin_viscii_upper,
2044 charset = Vcharset_latin_viscii_upper;
2051 if (XCHARSET_GRAPHIC (charset) == 1)
2054 c = decode_builtin_char (charset, c);
2055 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2059 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2060 Make a character from CHARSET and octets ARG1 and ARG2.
2061 ARG2 is required only for characters from two-dimensional charsets.
2062 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2063 character s with caron.
2065 (charset, arg1, arg2))
2069 int lowlim, highlim;
2071 charset = Fget_charset (charset);
2072 cs = XCHARSET (charset);
2074 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2075 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2077 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2079 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2080 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2083 /* It is useful (and safe, according to Olivier Galibert) to strip
2084 the 8th bit off ARG1 and ARG2 because it allows programmers to
2085 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2086 Latin 2 code of the character. */
2094 if (a1 < lowlim || a1 > highlim)
2095 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2097 if (CHARSET_DIMENSION (cs) == 1)
2101 ("Charset is of dimension one; second octet must be nil", arg2);
2102 return make_char (MAKE_CHAR (charset, a1, 0));
2111 a2 = XINT (arg2) & 0x7f;
2113 if (a2 < lowlim || a2 > highlim)
2114 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2116 return make_char (MAKE_CHAR (charset, a1, a2));
2119 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2120 Return the character set of CHARACTER.
2124 CHECK_CHAR_COERCE_INT (character);
2126 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2129 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2130 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2131 N defaults to 0 if omitted.
2135 Lisp_Object charset;
2138 CHECK_CHAR_COERCE_INT (character);
2140 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2142 if (NILP (n) || EQ (n, Qzero))
2143 return make_int (octet0);
2144 else if (EQ (n, make_int (1)))
2145 return make_int (octet1);
2147 signal_simple_error ("Octet number must be 0 or 1", n);
2151 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2152 Return code-point of CHARACTER in specified CHARSET.
2154 (character, charset, defined_only))
2158 CHECK_CHAR_COERCE_INT (character);
2159 charset = Fget_charset (charset);
2160 code_point = charset_code_point (charset, XCHAR (character),
2161 !NILP (defined_only));
2162 if (code_point >= 0)
2163 return make_int (code_point);
2169 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2170 Return list of charset and one or two position-codes of CHARACTER.
2174 /* This function can GC */
2175 struct gcpro gcpro1, gcpro2;
2176 Lisp_Object charset = Qnil;
2177 Lisp_Object rc = Qnil;
2185 GCPRO2 (charset, rc);
2186 CHECK_CHAR_COERCE_INT (character);
2189 code_point = ENCODE_CHAR (XCHAR (character), charset);
2190 dimension = XCHARSET_DIMENSION (charset);
2191 while (dimension > 0)
2193 rc = Fcons (make_int (code_point & 255), rc);
2197 rc = Fcons (XCHARSET_NAME (charset), rc);
2199 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2201 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2203 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2207 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2216 #ifdef ENABLE_COMPOSITE_CHARS
2217 /************************************************************************/
2218 /* composite character functions */
2219 /************************************************************************/
2222 lookup_composite_char (Bufbyte *str, int len)
2224 Lisp_Object lispstr = make_string (str, len);
2225 Lisp_Object ch = Fgethash (lispstr,
2226 Vcomposite_char_string2char_hash_table,
2232 if (composite_char_row_next >= 128)
2233 signal_simple_error ("No more composite chars available", lispstr);
2234 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2235 composite_char_col_next);
2236 Fputhash (make_char (emch), lispstr,
2237 Vcomposite_char_char2string_hash_table);
2238 Fputhash (lispstr, make_char (emch),
2239 Vcomposite_char_string2char_hash_table);
2240 composite_char_col_next++;
2241 if (composite_char_col_next >= 128)
2243 composite_char_col_next = 32;
2244 composite_char_row_next++;
2253 composite_char_string (Emchar ch)
2255 Lisp_Object str = Fgethash (make_char (ch),
2256 Vcomposite_char_char2string_hash_table,
2258 assert (!UNBOUNDP (str));
2262 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2263 Convert a string into a single composite character.
2264 The character is the result of overstriking all the characters in
2269 CHECK_STRING (string);
2270 return make_char (lookup_composite_char (XSTRING_DATA (string),
2271 XSTRING_LENGTH (string)));
2274 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2275 Return a string of the characters comprising a composite character.
2283 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2284 signal_simple_error ("Must be composite char", ch);
2285 return composite_char_string (emch);
2287 #endif /* ENABLE_COMPOSITE_CHARS */
2290 /************************************************************************/
2291 /* initialization */
2292 /************************************************************************/
2295 syms_of_mule_charset (void)
2297 INIT_LRECORD_IMPLEMENTATION (charset);
2299 DEFSUBR (Fcharsetp);
2300 DEFSUBR (Ffind_charset);
2301 DEFSUBR (Fget_charset);
2302 DEFSUBR (Fcharset_list);
2303 DEFSUBR (Fcharset_name);
2304 DEFSUBR (Fmake_charset);
2305 DEFSUBR (Fmake_reverse_direction_charset);
2306 /* DEFSUBR (Freverse_direction_charset); */
2307 DEFSUBR (Fdefine_charset_alias);
2308 DEFSUBR (Fcharset_from_attributes);
2309 DEFSUBR (Fcharset_short_name);
2310 DEFSUBR (Fcharset_long_name);
2311 DEFSUBR (Fcharset_description);
2312 DEFSUBR (Fcharset_dimension);
2313 DEFSUBR (Fcharset_property);
2314 DEFSUBR (Fcharset_id);
2315 DEFSUBR (Fset_charset_ccl_program);
2316 DEFSUBR (Fset_charset_registry);
2318 DEFSUBR (Fcharset_mapping_table);
2319 DEFSUBR (Fset_charset_mapping_table);
2323 DEFSUBR (Fdecode_char);
2324 DEFSUBR (Fdecode_builtin_char);
2325 DEFSUBR (Fencode_char);
2327 DEFSUBR (Fmake_char);
2328 DEFSUBR (Fchar_charset);
2329 DEFSUBR (Fchar_octet);
2330 DEFSUBR (Fsplit_char);
2332 #ifdef ENABLE_COMPOSITE_CHARS
2333 DEFSUBR (Fmake_composite_char);
2334 DEFSUBR (Fcomposite_char_string);
2337 defsymbol (&Qcharsetp, "charsetp");
2338 defsymbol (&Qregistry, "registry");
2339 defsymbol (&Qfinal, "final");
2340 defsymbol (&Qgraphic, "graphic");
2341 defsymbol (&Qdirection, "direction");
2342 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2343 defsymbol (&Qshort_name, "short-name");
2344 defsymbol (&Qlong_name, "long-name");
2346 defsymbol (&Qmother, "mother");
2347 defsymbol (&Qmin_code, "min-code");
2348 defsymbol (&Qmax_code, "max-code");
2349 defsymbol (&Qcode_offset, "code-offset");
2350 defsymbol (&Qconversion, "conversion");
2351 defsymbol (&Q94x60, "94x60");
2352 defsymbol (&Q94x94x60, "94x94x60");
2355 defsymbol (&Ql2r, "l2r");
2356 defsymbol (&Qr2l, "r2l");
2358 /* Charsets, compatible with FSF 20.3
2359 Naming convention is Script-Charset[-Edition] */
2360 defsymbol (&Qascii, "ascii");
2361 defsymbol (&Qcontrol_1, "control-1");
2362 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2363 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2364 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2365 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2366 defsymbol (&Qthai_tis620, "thai-tis620");
2367 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2368 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2369 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2370 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2371 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2372 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2373 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2374 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2375 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2376 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2377 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2378 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2379 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2380 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2381 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2382 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2384 defsymbol (&Qucs, "ucs");
2385 defsymbol (&Qucs_bmp, "ucs-bmp");
2386 defsymbol (&Qucs_smp, "ucs-smp");
2387 defsymbol (&Qucs_sip, "ucs-sip");
2388 defsymbol (&Qucs_gb, "ucs-gb");
2389 defsymbol (&Qucs_cns, "ucs-cns");
2390 defsymbol (&Qucs_jis, "ucs-jis");
2391 defsymbol (&Qucs_ks, "ucs-ks");
2392 defsymbol (&Qlatin_viscii, "latin-viscii");
2393 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2394 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2395 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2396 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2397 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2398 defsymbol (&Qjis_x0208, "=jis-x0208");
2399 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2400 defsymbol (&Qchinese_big5, "chinese-big5");
2401 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2403 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2404 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2406 defsymbol (&Qcomposite, "composite");
2410 vars_of_mule_charset (void)
2417 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2418 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2420 /* Table of charsets indexed by leading byte. */
2421 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2422 chlook->charset_by_leading_byte[i] = Qnil;
2425 /* Table of charsets indexed by type/final-byte. */
2426 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2427 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2428 chlook->charset_by_attributes[i][j] = Qnil;
2430 /* Table of charsets indexed by type/final-byte/direction. */
2431 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2432 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2433 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2434 chlook->charset_by_attributes[i][j][k] = Qnil;
2438 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2440 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2441 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2445 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2446 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2447 Leading-code of private TYPE9N charset of column-width 1.
2449 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2453 Vdefault_coded_charset_priority_list = Qnil;
2454 DEFVAR_LISP ("default-coded-charset-priority-list",
2455 &Vdefault_coded_charset_priority_list /*
2456 Default order of preferred coded-character-sets.
2462 complex_vars_of_mule_charset (void)
2464 staticpro (&Vcharset_hash_table);
2465 Vcharset_hash_table =
2466 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2468 /* Predefined character sets. We store them into variables for
2472 staticpro (&Vcharset_ucs);
2474 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2475 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2476 build_string ("UCS"),
2477 build_string ("UCS"),
2478 build_string ("ISO/IEC 10646"),
2480 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2481 staticpro (&Vcharset_ucs_bmp);
2483 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2484 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2485 build_string ("BMP"),
2486 build_string ("UCS-BMP"),
2487 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2489 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2490 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2491 staticpro (&Vcharset_ucs_smp);
2493 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2494 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2495 build_string ("SMP"),
2496 build_string ("UCS-SMP"),
2497 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2498 build_string ("UCS00-1"),
2499 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2500 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2501 staticpro (&Vcharset_ucs_sip);
2503 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2504 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2505 build_string ("SIP"),
2506 build_string ("UCS-SIP"),
2507 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2508 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2509 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2510 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2511 staticpro (&Vcharset_ucs_gb);
2513 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2514 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2515 build_string ("UCS for GB"),
2516 build_string ("UCS for GB"),
2517 build_string ("ISO/IEC 10646 for GB"),
2519 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2520 staticpro (&Vcharset_ucs_cns);
2522 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2523 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2524 build_string ("UCS for CNS"),
2525 build_string ("UCS for CNS 11643"),
2526 build_string ("ISO/IEC 10646 for CNS 11643"),
2528 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2529 staticpro (&Vcharset_ucs_jis);
2531 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2532 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2533 build_string ("UCS for JIS"),
2534 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2536 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2538 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2539 staticpro (&Vcharset_ucs_ks);
2541 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2542 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2543 build_string ("UCS for KS"),
2544 build_string ("UCS for CCS defined by KS"),
2545 build_string ("ISO/IEC 10646 for Korean Standards"),
2547 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2549 # define MIN_CHAR_THAI 0
2550 # define MAX_CHAR_THAI 0
2551 /* # define MIN_CHAR_HEBREW 0 */
2552 /* # define MAX_CHAR_HEBREW 0 */
2553 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2554 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2556 staticpro (&Vcharset_ascii);
2558 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2559 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2560 build_string ("ASCII"),
2561 build_string ("ASCII)"),
2562 build_string ("ASCII (ISO646 IRV)"),
2563 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2564 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2565 staticpro (&Vcharset_control_1);
2566 Vcharset_control_1 =
2567 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2568 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2569 build_string ("C1"),
2570 build_string ("Control characters"),
2571 build_string ("Control characters 128-191"),
2573 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2574 staticpro (&Vcharset_latin_iso8859_1);
2575 Vcharset_latin_iso8859_1 =
2576 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2577 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2578 build_string ("Latin-1"),
2579 build_string ("ISO8859-1 (Latin-1)"),
2580 build_string ("ISO8859-1 (Latin-1)"),
2581 build_string ("iso8859-1"),
2582 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2583 staticpro (&Vcharset_latin_iso8859_2);
2584 Vcharset_latin_iso8859_2 =
2585 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2586 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2587 build_string ("Latin-2"),
2588 build_string ("ISO8859-2 (Latin-2)"),
2589 build_string ("ISO8859-2 (Latin-2)"),
2590 build_string ("iso8859-2"),
2591 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2592 staticpro (&Vcharset_latin_iso8859_3);
2593 Vcharset_latin_iso8859_3 =
2594 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2595 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2596 build_string ("Latin-3"),
2597 build_string ("ISO8859-3 (Latin-3)"),
2598 build_string ("ISO8859-3 (Latin-3)"),
2599 build_string ("iso8859-3"),
2600 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2601 staticpro (&Vcharset_latin_iso8859_4);
2602 Vcharset_latin_iso8859_4 =
2603 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2604 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2605 build_string ("Latin-4"),
2606 build_string ("ISO8859-4 (Latin-4)"),
2607 build_string ("ISO8859-4 (Latin-4)"),
2608 build_string ("iso8859-4"),
2609 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2610 staticpro (&Vcharset_thai_tis620);
2611 Vcharset_thai_tis620 =
2612 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2613 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2614 build_string ("TIS620"),
2615 build_string ("TIS620 (Thai)"),
2616 build_string ("TIS620.2529 (Thai)"),
2617 build_string ("tis620"),
2618 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2619 staticpro (&Vcharset_greek_iso8859_7);
2620 Vcharset_greek_iso8859_7 =
2621 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2622 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2623 build_string ("ISO8859-7"),
2624 build_string ("ISO8859-7 (Greek)"),
2625 build_string ("ISO8859-7 (Greek)"),
2626 build_string ("iso8859-7"),
2627 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2628 staticpro (&Vcharset_arabic_iso8859_6);
2629 Vcharset_arabic_iso8859_6 =
2630 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2631 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2632 build_string ("ISO8859-6"),
2633 build_string ("ISO8859-6 (Arabic)"),
2634 build_string ("ISO8859-6 (Arabic)"),
2635 build_string ("iso8859-6"),
2636 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2637 staticpro (&Vcharset_hebrew_iso8859_8);
2638 Vcharset_hebrew_iso8859_8 =
2639 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2640 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2641 build_string ("ISO8859-8"),
2642 build_string ("ISO8859-8 (Hebrew)"),
2643 build_string ("ISO8859-8 (Hebrew)"),
2644 build_string ("iso8859-8"),
2646 0 /* MIN_CHAR_HEBREW */,
2647 0 /* MAX_CHAR_HEBREW */, 0, 32,
2648 Qnil, CONVERSION_IDENTICAL);
2649 staticpro (&Vcharset_katakana_jisx0201);
2650 Vcharset_katakana_jisx0201 =
2651 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2652 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2653 build_string ("JISX0201 Kana"),
2654 build_string ("JISX0201.1976 (Japanese Kana)"),
2655 build_string ("JISX0201.1976 Japanese Kana"),
2656 build_string ("jisx0201\\.1976"),
2657 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2658 staticpro (&Vcharset_latin_jisx0201);
2659 Vcharset_latin_jisx0201 =
2660 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2661 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2662 build_string ("JISX0201 Roman"),
2663 build_string ("JISX0201.1976 (Japanese Roman)"),
2664 build_string ("JISX0201.1976 Japanese Roman"),
2665 build_string ("jisx0201\\.1976"),
2666 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2667 staticpro (&Vcharset_cyrillic_iso8859_5);
2668 Vcharset_cyrillic_iso8859_5 =
2669 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2670 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2671 build_string ("ISO8859-5"),
2672 build_string ("ISO8859-5 (Cyrillic)"),
2673 build_string ("ISO8859-5 (Cyrillic)"),
2674 build_string ("iso8859-5"),
2675 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2676 staticpro (&Vcharset_latin_iso8859_9);
2677 Vcharset_latin_iso8859_9 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2679 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2680 build_string ("Latin-5"),
2681 build_string ("ISO8859-9 (Latin-5)"),
2682 build_string ("ISO8859-9 (Latin-5)"),
2683 build_string ("iso8859-9"),
2684 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2686 staticpro (&Vcharset_jis_x0208);
2687 Vcharset_jis_x0208 =
2688 make_charset (LEADING_BYTE_JIS_X0208,
2690 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2691 build_string ("JIS X0208"),
2692 build_string ("JIS X0208 Common"),
2693 build_string ("JIS X0208 Common part"),
2694 build_string ("jisx0208\\.1990"),
2696 MIN_CHAR_JIS_X0208_1990,
2697 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2698 Qnil, CONVERSION_94x94);
2700 staticpro (&Vcharset_japanese_jisx0208_1978);
2701 Vcharset_japanese_jisx0208_1978 =
2702 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2703 Qjapanese_jisx0208_1978, 94, 2,
2704 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2705 build_string ("JIS X0208:1978"),
2706 build_string ("JIS X0208:1978 (Japanese)"),
2708 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2709 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2716 CONVERSION_IDENTICAL);
2717 staticpro (&Vcharset_chinese_gb2312);
2718 Vcharset_chinese_gb2312 =
2719 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2720 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2721 build_string ("GB2312"),
2722 build_string ("GB2312)"),
2723 build_string ("GB2312 Chinese simplified"),
2724 build_string ("gb2312"),
2725 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2726 staticpro (&Vcharset_chinese_gb12345);
2727 Vcharset_chinese_gb12345 =
2728 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2729 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2730 build_string ("G1"),
2731 build_string ("GB 12345)"),
2732 build_string ("GB 12345-1990"),
2733 build_string ("GB12345\\(\\.1990\\)?-0"),
2734 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2735 staticpro (&Vcharset_japanese_jisx0208);
2736 Vcharset_japanese_jisx0208 =
2737 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2738 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2739 build_string ("JISX0208"),
2740 build_string ("JIS X0208:1983 (Japanese)"),
2741 build_string ("JIS X0208:1983 Japanese Kanji"),
2742 build_string ("jisx0208\\.1983"),
2749 CONVERSION_IDENTICAL);
2751 staticpro (&Vcharset_japanese_jisx0208_1990);
2752 Vcharset_japanese_jisx0208_1990 =
2753 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2754 Qjapanese_jisx0208_1990, 94, 2,
2755 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2756 build_string ("JISX0208-1990"),
2757 build_string ("JIS X0208:1990 (Japanese)"),
2758 build_string ("JIS X0208:1990 Japanese Kanji"),
2759 build_string ("jisx0208\\.1990"),
2761 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2762 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2763 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2764 Vcharset_jis_x0208 /* Qnil */,
2765 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2767 staticpro (&Vcharset_korean_ksc5601);
2768 Vcharset_korean_ksc5601 =
2769 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2770 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2771 build_string ("KSC5601"),
2772 build_string ("KSC5601 (Korean"),
2773 build_string ("KSC5601 Korean Hangul and Hanja"),
2774 build_string ("ksc5601"),
2775 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2776 staticpro (&Vcharset_japanese_jisx0212);
2777 Vcharset_japanese_jisx0212 =
2778 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2779 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2780 build_string ("JISX0212"),
2781 build_string ("JISX0212 (Japanese)"),
2782 build_string ("JISX0212 Japanese Supplement"),
2783 build_string ("jisx0212"),
2784 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2786 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2787 staticpro (&Vcharset_chinese_cns11643_1);
2788 Vcharset_chinese_cns11643_1 =
2789 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2790 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2791 build_string ("CNS11643-1"),
2792 build_string ("CNS11643-1 (Chinese traditional)"),
2794 ("CNS 11643 Plane 1 Chinese traditional"),
2795 build_string (CHINESE_CNS_PLANE_RE("1")),
2796 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2797 staticpro (&Vcharset_chinese_cns11643_2);
2798 Vcharset_chinese_cns11643_2 =
2799 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2800 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2801 build_string ("CNS11643-2"),
2802 build_string ("CNS11643-2 (Chinese traditional)"),
2804 ("CNS 11643 Plane 2 Chinese traditional"),
2805 build_string (CHINESE_CNS_PLANE_RE("2")),
2806 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2808 staticpro (&Vcharset_latin_tcvn5712);
2809 Vcharset_latin_tcvn5712 =
2810 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2811 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2812 build_string ("TCVN 5712"),
2813 build_string ("TCVN 5712 (VSCII-2)"),
2814 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2815 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2816 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_latin_viscii_lower);
2818 Vcharset_latin_viscii_lower =
2819 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2820 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2821 build_string ("VISCII lower"),
2822 build_string ("VISCII lower (Vietnamese)"),
2823 build_string ("VISCII lower (Vietnamese)"),
2824 build_string ("MULEVISCII-LOWER"),
2825 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2826 staticpro (&Vcharset_latin_viscii_upper);
2827 Vcharset_latin_viscii_upper =
2828 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2829 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2830 build_string ("VISCII upper"),
2831 build_string ("VISCII upper (Vietnamese)"),
2832 build_string ("VISCII upper (Vietnamese)"),
2833 build_string ("MULEVISCII-UPPER"),
2834 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2835 staticpro (&Vcharset_latin_viscii);
2836 Vcharset_latin_viscii =
2837 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2838 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2839 build_string ("VISCII"),
2840 build_string ("VISCII 1.1 (Vietnamese)"),
2841 build_string ("VISCII 1.1 (Vietnamese)"),
2842 build_string ("VISCII1\\.1"),
2843 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2844 staticpro (&Vcharset_chinese_big5);
2845 Vcharset_chinese_big5 =
2846 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2847 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2848 build_string ("Big5"),
2849 build_string ("Big5"),
2850 build_string ("Big5 Chinese traditional"),
2851 build_string ("big5-0"),
2853 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2854 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2856 staticpro (&Vcharset_ideograph_daikanwa_2);
2857 Vcharset_ideograph_daikanwa_2 =
2858 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
2859 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2860 build_string ("Daikanwa Rev."),
2861 build_string ("Morohashi's Daikanwa Rev."),
2863 ("Daikanwa dictionary (revised version)"),
2864 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
2865 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2867 staticpro (&Vcharset_ethiopic_ucs);
2868 Vcharset_ethiopic_ucs =
2869 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2870 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2871 build_string ("Ethiopic (UCS)"),
2872 build_string ("Ethiopic (UCS)"),
2873 build_string ("Ethiopic of UCS"),
2874 build_string ("Ethiopic-Unicode"),
2875 Qnil, 0x1200, 0x137F, 0, 0,
2876 Qnil, CONVERSION_IDENTICAL);
2878 staticpro (&Vcharset_chinese_big5_1);
2879 Vcharset_chinese_big5_1 =
2880 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2881 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2882 build_string ("Big5"),
2883 build_string ("Big5 (Level-1)"),
2885 ("Big5 Level-1 Chinese traditional"),
2886 build_string ("big5"),
2887 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2888 staticpro (&Vcharset_chinese_big5_2);
2889 Vcharset_chinese_big5_2 =
2890 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2891 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2892 build_string ("Big5"),
2893 build_string ("Big5 (Level-2)"),
2895 ("Big5 Level-2 Chinese traditional"),
2896 build_string ("big5"),
2897 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2899 #ifdef ENABLE_COMPOSITE_CHARS
2900 /* #### For simplicity, we put composite chars into a 96x96 charset.
2901 This is going to lead to problems because you can run out of
2902 room, esp. as we don't yet recycle numbers. */
2903 staticpro (&Vcharset_composite);
2904 Vcharset_composite =
2905 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2906 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2907 build_string ("Composite"),
2908 build_string ("Composite characters"),
2909 build_string ("Composite characters"),
2912 /* #### not dumped properly */
2913 composite_char_row_next = 32;
2914 composite_char_col_next = 32;
2916 Vcomposite_char_string2char_hash_table =
2917 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2918 Vcomposite_char_char2string_hash_table =
2919 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2920 staticpro (&Vcomposite_char_string2char_hash_table);
2921 staticpro (&Vcomposite_char_char2string_hash_table);
2922 #endif /* ENABLE_COMPOSITE_CHARS */