1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_jis_x0208;
77 Lisp_Object Vcharset_chinese_big5;
78 Lisp_Object Vcharset_ethiopic_ucs;
80 Lisp_Object Vcharset_chinese_big5_1;
81 Lisp_Object Vcharset_chinese_big5_2;
83 #ifdef ENABLE_COMPOSITE_CHARS
84 Lisp_Object Vcharset_composite;
86 /* Hash tables for composite chars. One maps string representing
87 composed chars to their equivalent chars; one goes the
89 Lisp_Object Vcomposite_char_char2string_hash_table;
90 Lisp_Object Vcomposite_char_string2char_hash_table;
92 static int composite_char_row_next;
93 static int composite_char_col_next;
95 #endif /* ENABLE_COMPOSITE_CHARS */
97 struct charset_lookup *chlook;
99 static const struct lrecord_description charset_lookup_description_1[] = {
100 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
102 NUM_LEADING_BYTES+4*128
109 static const struct struct_description charset_lookup_description = {
110 sizeof (struct charset_lookup),
111 charset_lookup_description_1
115 /* Table of number of bytes in the string representation of a character
116 indexed by the first byte of that representation.
118 rep_bytes_by_first_byte(c) is more efficient than the equivalent
119 canonical computation:
121 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
123 const Bytecount rep_bytes_by_first_byte[0xA0] =
124 { /* 0x00 - 0x7f are for straight ASCII */
125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 /* 0x80 - 0x8f are for Dimension-1 official charsets */
135 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 /* 0x90 - 0x9d are for Dimension-2 official charsets */
140 /* 0x9e is for Dimension-1 private charsets */
141 /* 0x9f is for Dimension-2 private charsets */
142 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
148 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
150 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
154 if (XVECTOR_LENGTH (v) > ccs_len)
157 for (i = 0; i < XVECTOR_LENGTH (v); i++)
159 Lisp_Object c = XVECTOR_DATA(v)[i];
161 if (!NILP (c) && !CHARP (c))
165 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
177 put_char_ccs_code_point (Lisp_Object character,
178 Lisp_Object ccs, Lisp_Object value)
180 if (!EQ (XCHARSET_NAME (ccs), Qucs)
182 || (XCHAR (character) != XINT (value)))
184 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
188 { /* obsolete representation: value must be a list of bytes */
189 Lisp_Object ret = Fcar (value);
193 signal_simple_error ("Invalid value for coded-charset", value);
194 code_point = XINT (ret);
195 if (XCHARSET_GRAPHIC (ccs) == 1)
203 signal_simple_error ("Invalid value for coded-charset",
207 signal_simple_error ("Invalid value for coded-charset",
210 if (XCHARSET_GRAPHIC (ccs) == 1)
212 code_point = (code_point << 8) | j;
215 value = make_int (code_point);
217 else if (INTP (value))
219 code_point = XINT (value);
220 if (XCHARSET_GRAPHIC (ccs) == 1)
222 code_point &= 0x7F7F7F7F;
223 value = make_int (code_point);
227 signal_simple_error ("Invalid value for coded-charset", value);
231 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
234 decoding_table_remove_char (ccs, XINT (cpos));
237 decoding_table_put_char (ccs, code_point, character);
243 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
245 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
246 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
248 if (VECTORP (decoding_table))
250 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
254 decoding_table_remove_char (ccs, XINT (cpos));
257 if (CHAR_TABLEP (encoding_table))
259 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
267 int leading_code_private_11;
270 Lisp_Object Qcharsetp;
272 /* Qdoc_string, Qdimension, Qchars defined in general.c */
273 Lisp_Object Qregistry, Qfinal, Qgraphic;
274 Lisp_Object Qdirection;
275 Lisp_Object Qreverse_direction_charset;
276 Lisp_Object Qleading_byte;
277 Lisp_Object Qshort_name, Qlong_name;
279 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
280 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
297 Qjapanese_jisx0208_1978,
301 Qjapanese_jisx0208_1990,
316 Qvietnamese_viscii_lower,
317 Qvietnamese_viscii_upper,
326 Lisp_Object Ql2r, Qr2l;
328 Lisp_Object Vcharset_hash_table;
330 /* Composite characters are characters constructed by overstriking two
331 or more regular characters.
333 1) The old Mule implementation involves storing composite characters
334 in a buffer as a tag followed by all of the actual characters
335 used to make up the composite character. I think this is a bad
336 idea; it greatly complicates code that wants to handle strings
337 one character at a time because it has to deal with the possibility
338 of great big ungainly characters. It's much more reasonable to
339 simply store an index into a table of composite characters.
341 2) The current implementation only allows for 16,384 separate
342 composite characters over the lifetime of the XEmacs process.
343 This could become a potential problem if the user
344 edited lots of different files that use composite characters.
345 Due to FSF bogosity, increasing the number of allowable
346 composite characters under Mule would decrease the number
347 of possible faces that can exist. Mule already has shrunk
348 this to 2048, and further shrinkage would become uncomfortable.
349 No such problems exist in XEmacs.
351 Composite characters could be represented as 0x80 C1 C2 C3,
352 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
353 for slightly under 2^20 (one million) composite characters
354 over the XEmacs process lifetime, and you only need to
355 increase the size of a Mule character from 19 to 21 bits.
356 Or you could use 0x80 C1 C2 C3 C4, allowing for about
357 85 million (slightly over 2^26) composite characters. */
360 /************************************************************************/
361 /* Basic Emchar functions */
362 /************************************************************************/
364 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
365 string in STR. Returns the number of bytes stored.
366 Do not call this directly. Use the macro set_charptr_emchar() instead.
370 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
385 else if ( c <= 0x7ff )
387 *p++ = (c >> 6) | 0xc0;
388 *p++ = (c & 0x3f) | 0x80;
390 else if ( c <= 0xffff )
392 *p++ = (c >> 12) | 0xe0;
393 *p++ = ((c >> 6) & 0x3f) | 0x80;
394 *p++ = (c & 0x3f) | 0x80;
396 else if ( c <= 0x1fffff )
398 *p++ = (c >> 18) | 0xf0;
399 *p++ = ((c >> 12) & 0x3f) | 0x80;
400 *p++ = ((c >> 6) & 0x3f) | 0x80;
401 *p++ = (c & 0x3f) | 0x80;
403 else if ( c <= 0x3ffffff )
405 *p++ = (c >> 24) | 0xf8;
406 *p++ = ((c >> 18) & 0x3f) | 0x80;
407 *p++ = ((c >> 12) & 0x3f) | 0x80;
408 *p++ = ((c >> 6) & 0x3f) | 0x80;
409 *p++ = (c & 0x3f) | 0x80;
413 *p++ = (c >> 30) | 0xfc;
414 *p++ = ((c >> 24) & 0x3f) | 0x80;
415 *p++ = ((c >> 18) & 0x3f) | 0x80;
416 *p++ = ((c >> 12) & 0x3f) | 0x80;
417 *p++ = ((c >> 6) & 0x3f) | 0x80;
418 *p++ = (c & 0x3f) | 0x80;
421 BREAKUP_CHAR (c, charset, c1, c2);
422 lb = CHAR_LEADING_BYTE (c);
423 if (LEADING_BYTE_PRIVATE_P (lb))
424 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
426 if (EQ (charset, Vcharset_control_1))
435 /* Return the first character from a Mule-encoded string in STR,
436 assuming it's non-ASCII. Do not call this directly.
437 Use the macro charptr_emchar() instead. */
440 non_ascii_charptr_emchar (const Bufbyte *str)
453 else if ( b >= 0xf8 )
458 else if ( b >= 0xf0 )
463 else if ( b >= 0xe0 )
468 else if ( b >= 0xc0 )
478 for( ; len > 0; len-- )
481 ch = ( ch << 6 ) | ( b & 0x3f );
485 Bufbyte i0 = *str, i1, i2 = 0;
488 if (i0 == LEADING_BYTE_CONTROL_1)
489 return (Emchar) (*++str - 0x20);
491 if (LEADING_BYTE_PREFIX_P (i0))
496 charset = CHARSET_BY_LEADING_BYTE (i0);
497 if (XCHARSET_DIMENSION (charset) == 2)
500 return MAKE_CHAR (charset, i1, i2);
504 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
505 Do not call this directly. Use the macro valid_char_p() instead. */
509 non_ascii_valid_char_p (Emchar ch)
513 /* Must have only lowest 19 bits set */
517 f1 = CHAR_FIELD1 (ch);
518 f2 = CHAR_FIELD2 (ch);
519 f3 = CHAR_FIELD3 (ch);
525 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
526 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
527 f2 > MAX_CHAR_FIELD2_PRIVATE)
532 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
533 f2 <= MAX_CHAR_FIELD2_PRIVATE))
537 NOTE: This takes advantage of the fact that
538 FIELD2_TO_OFFICIAL_LEADING_BYTE and
539 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
541 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
542 if (EQ (charset, Qnil))
544 return (XCHARSET_CHARS (charset) == 96);
550 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
551 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
552 f1 > MAX_CHAR_FIELD1_PRIVATE)
554 if (f2 < 0x20 || f3 < 0x20)
557 #ifdef ENABLE_COMPOSITE_CHARS
558 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
560 if (UNBOUNDP (Fgethash (make_int (ch),
561 Vcomposite_char_char2string_hash_table,
566 #endif /* ENABLE_COMPOSITE_CHARS */
568 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
569 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
572 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
574 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
577 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
579 if (EQ (charset, Qnil))
581 return (XCHARSET_CHARS (charset) == 96);
587 /************************************************************************/
588 /* Basic string functions */
589 /************************************************************************/
591 /* Copy the character pointed to by SRC into DST. Do not call this
592 directly. Use the macro charptr_copy_char() instead.
593 Return the number of bytes copied. */
596 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
598 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
600 for (i = bytes; i; i--, dst++, src++)
606 /************************************************************************/
607 /* streams of Emchars */
608 /************************************************************************/
610 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
611 The functions below are not meant to be called directly; use
612 the macros in insdel.h. */
615 Lstream_get_emchar_1 (Lstream *stream, int ch)
617 Bufbyte str[MAX_EMCHAR_LEN];
618 Bufbyte *strptr = str;
621 str[0] = (Bufbyte) ch;
623 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
625 int c = Lstream_getc (stream);
626 bufpos_checking_assert (c >= 0);
627 *++strptr = (Bufbyte) c;
629 return charptr_emchar (str);
633 Lstream_fput_emchar (Lstream *stream, Emchar ch)
635 Bufbyte str[MAX_EMCHAR_LEN];
636 Bytecount len = set_charptr_emchar (str, ch);
637 return Lstream_write (stream, str, len);
641 Lstream_funget_emchar (Lstream *stream, Emchar ch)
643 Bufbyte str[MAX_EMCHAR_LEN];
644 Bytecount len = set_charptr_emchar (str, ch);
645 Lstream_unread (stream, str, len);
649 /************************************************************************/
651 /************************************************************************/
654 mark_charset (Lisp_Object obj)
656 Lisp_Charset *cs = XCHARSET (obj);
658 mark_object (cs->short_name);
659 mark_object (cs->long_name);
660 mark_object (cs->doc_string);
661 mark_object (cs->registry);
662 mark_object (cs->ccl_program);
664 mark_object (cs->decoding_table);
665 mark_object (cs->mother);
671 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
673 Lisp_Charset *cs = XCHARSET (obj);
677 error ("printing unreadable object #<charset %s 0x%x>",
678 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
681 write_c_string ("#<charset ", printcharfun);
682 print_internal (CHARSET_NAME (cs), printcharfun, 0);
683 write_c_string (" ", printcharfun);
684 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
689 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
691 CHARSET_DIMENSION (cs),
692 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
693 CHARSET_COLUMNS (cs),
694 CHARSET_GRAPHIC (cs),
696 write_c_string (buf, printcharfun);
697 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
698 sprintf (buf, " 0x%x>", cs->header.uid);
699 write_c_string (buf, printcharfun);
702 static const struct lrecord_description charset_description[] = {
703 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
717 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
718 mark_charset, print_charset, 0, 0, 0,
722 /* Make a new charset. */
723 /* #### SJT Should generic properties be allowed? */
725 make_charset (Charset_ID id, Lisp_Object name,
726 unsigned short chars, unsigned char dimension,
727 unsigned char columns, unsigned char graphic,
728 Bufbyte final, unsigned char direction, Lisp_Object short_name,
729 Lisp_Object long_name, Lisp_Object doc,
731 Lisp_Object decoding_table,
732 Emchar min_code, Emchar max_code,
733 Emchar code_offset, unsigned char byte_offset,
734 Lisp_Object mother, unsigned char conversion)
737 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
741 XSETCHARSET (obj, cs);
743 CHARSET_ID (cs) = id;
744 CHARSET_NAME (cs) = name;
745 CHARSET_SHORT_NAME (cs) = short_name;
746 CHARSET_LONG_NAME (cs) = long_name;
747 CHARSET_CHARS (cs) = chars;
748 CHARSET_DIMENSION (cs) = dimension;
749 CHARSET_DIRECTION (cs) = direction;
750 CHARSET_COLUMNS (cs) = columns;
751 CHARSET_GRAPHIC (cs) = graphic;
752 CHARSET_FINAL (cs) = final;
753 CHARSET_DOC_STRING (cs) = doc;
754 CHARSET_REGISTRY (cs) = reg;
755 CHARSET_CCL_PROGRAM (cs) = Qnil;
756 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
758 CHARSET_DECODING_TABLE(cs) = Qunbound;
759 CHARSET_MIN_CODE (cs) = min_code;
760 CHARSET_MAX_CODE (cs) = max_code;
761 CHARSET_CODE_OFFSET (cs) = code_offset;
762 CHARSET_BYTE_OFFSET (cs) = byte_offset;
763 CHARSET_MOTHER (cs) = mother;
764 CHARSET_CONVERSION (cs) = conversion;
768 if (id == LEADING_BYTE_ASCII)
769 CHARSET_REP_BYTES (cs) = 1;
771 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
778 /* some charsets do not have final characters. This includes
779 ASCII, Control-1, Composite, and the two faux private
781 unsigned char iso2022_type
782 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
784 if (code_offset == 0)
786 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
787 chlook->charset_by_attributes[iso2022_type][final] = obj;
791 (chlook->charset_by_attributes[iso2022_type][final][direction]));
792 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
796 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
797 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
799 /* Some charsets are "faux" and don't have names or really exist at
800 all except in the leading-byte table. */
802 Fputhash (name, obj, Vcharset_hash_table);
807 get_unallocated_leading_byte (int dimension)
812 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
815 lb = chlook->next_allocated_leading_byte++;
819 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
822 lb = chlook->next_allocated_1_byte_leading_byte++;
826 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
829 lb = chlook->next_allocated_2_byte_leading_byte++;
835 ("No more character sets free for this dimension",
836 make_int (dimension));
842 /* Number of Big5 characters which have the same code in 1st byte. */
844 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
847 decode_defined_char (Lisp_Object ccs, int code_point)
849 int dim = XCHARSET_DIMENSION (ccs);
850 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
858 = get_ccs_octet_table (decoding_table, ccs,
859 (code_point >> (dim * 8)) & 255);
861 if (CHARP (decoding_table))
862 return XCHAR (decoding_table);
863 #ifdef HAVE_CHISE_CLIENT
864 if (EQ (decoding_table, Qunloaded))
866 char_id = load_char_decoding_entry_maybe (ccs, code_point);
871 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
873 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
875 if ( EQ (mother, Vcharset_ucs) )
876 return DECODE_CHAR (mother, code_point);
878 return decode_defined_char (mother, code_point);
880 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
883 = (((code_point >> 8) & 0x7F) - 33) * 94
884 + (( code_point & 0x7F) - 33);
885 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
886 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
888 b2 += b2 < 0x3F ? 0x40 : 0x62;
889 return decode_defined_char (mother, (b1 << 8) | b2);
891 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
894 = (((code_point >> 8) & 0x7F) - 33) * 94
895 + (( code_point & 0x7F) - 33)
896 + BIG5_SAME_ROW * (0xC9 - 0xA1);
897 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
898 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
900 b2 += b2 < 0x3F ? 0x40 : 0x62;
901 return decode_defined_char (mother, (b1 << 8) | b2);
908 decode_builtin_char (Lisp_Object charset, int code_point)
910 Lisp_Object mother = XCHARSET_MOTHER (charset);
913 if ( XCHARSET_MAX_CODE (charset) > 0 )
915 if ( CHARSETP (mother) )
917 int code = code_point;
919 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
921 int row = code_point >> 8;
922 int cell = code_point & 255;
926 else if (row < 16 + 32 + 30)
927 code = (row - (16 + 32)) * 94 + cell - 33;
928 else if (row < 18 + 32 + 30)
930 else if (row < 18 + 32 + 60)
931 code = (row - (18 + 32)) * 94 + cell - 33;
933 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
935 int plane = code_point >> 16;
936 int row = (code_point >> 8) & 255;
937 int cell = code_point & 255;
941 else if (row < 16 + 32 + 30)
943 = (plane - 33) * 94 * 60
944 + (row - (16 + 32)) * 94
946 else if (row < 18 + 32 + 30)
948 else if (row < 18 + 32 + 60)
950 = (plane - 33) * 94 * 60
951 + (row - (18 + 32)) * 94
954 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
957 = (((code_point >> 8) & 0x7F) - 33) * 94
958 + (( code_point & 0x7F) - 33);
959 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
960 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
962 b2 += b2 < 0x3F ? 0x40 : 0x62;
963 code = (b1 << 8) | b2;
965 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
968 = (((code_point >> 8) & 0x7F) - 33) * 94
969 + (( code_point & 0x7F) - 33)
970 + BIG5_SAME_ROW * (0xC9 - 0xA1);
971 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
972 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
974 b2 += b2 < 0x3F ? 0x40 : 0x62;
975 code = (b1 << 8) | b2;
978 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
983 = (XCHARSET_DIMENSION (charset) == 1
985 code_point - XCHARSET_BYTE_OFFSET (charset)
987 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
988 * XCHARSET_CHARS (charset)
989 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
990 + XCHARSET_CODE_OFFSET (charset);
991 if ((cid < XCHARSET_MIN_CODE (charset))
992 || (XCHARSET_MAX_CODE (charset) < cid))
997 else if ((final = XCHARSET_FINAL (charset)) >= '0')
999 if (XCHARSET_DIMENSION (charset) == 1)
1001 switch (XCHARSET_CHARS (charset))
1005 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1008 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1016 switch (XCHARSET_CHARS (charset))
1019 return MIN_CHAR_94x94
1020 + (final - '0') * 94 * 94
1021 + (((code_point >> 8) & 0x7F) - 33) * 94
1022 + ((code_point & 0x7F) - 33);
1024 return MIN_CHAR_96x96
1025 + (final - '0') * 96 * 96
1026 + (((code_point >> 8) & 0x7F) - 32) * 96
1027 + ((code_point & 0x7F) - 32);
1039 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1041 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1044 if ( CHAR_TABLEP (encoding_table)
1045 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1050 Lisp_Object mother = XCHARSET_MOTHER (charset);
1051 int min = XCHARSET_MIN_CODE (charset);
1052 int max = XCHARSET_MAX_CODE (charset);
1055 if ( CHARSETP (mother) )
1057 if (XCHARSET_FINAL (charset) >= '0')
1058 code = charset_code_point (mother, ch, 1);
1060 code = charset_code_point (mother, ch, defined_only);
1062 else if (defined_only)
1064 else if ( ((max == 0) && CHARSETP (mother)
1065 && (XCHARSET_FINAL (charset) == 0))
1066 || ((min <= ch) && (ch <= max)) )
1068 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1069 || ((min <= code) && (code <= max)) )
1071 int d = code - XCHARSET_CODE_OFFSET (charset);
1073 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1075 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1082 int cell = d % 94 + 33;
1088 return (row << 8) | cell;
1090 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1092 int B1 = d >> 8, B2 = d & 0xFF;
1094 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1095 - (B2 < 0x7F ? 0x40 : 0x62);
1099 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1102 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1104 int B1 = d >> 8, B2 = d & 0xFF;
1106 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1107 - (B2 < 0x7F ? 0x40 : 0x62);
1111 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1112 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1115 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1116 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1117 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1118 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1119 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1121 int plane = d / (94 * 60) + 33;
1122 int row = (d % (94 * 60)) / 94;
1123 int cell = d % 94 + 33;
1129 return (plane << 16) | (row << 8) | cell;
1131 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1133 ( (d / (94 * 94) + 33) << 16)
1134 | ((d / 94 % 94 + 33) << 8)
1136 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1138 ( (d / (96 * 96) + 32) << 16)
1139 | ((d / 96 % 96 + 32) << 8)
1141 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1143 ( (d / (94 * 94 * 94) + 33) << 24)
1144 | ((d / (94 * 94) % 94 + 33) << 16)
1145 | ((d / 94 % 94 + 33) << 8)
1147 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1149 ( (d / (96 * 96 * 96) + 32) << 24)
1150 | ((d / (96 * 96) % 96 + 32) << 16)
1151 | ((d / 96 % 96 + 32) << 8)
1155 printf ("Unknown CCS-conversion %d is specified!",
1156 XCHARSET_CONVERSION (charset));
1160 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1161 ( XCHARSET_MIN_CODE (charset) == 0 )
1163 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1164 (XCHARSET_CODE_OFFSET (charset)
1165 == XCHARSET_MIN_CODE (charset))
1170 if (XCHARSET_DIMENSION (charset) == 1)
1172 if (XCHARSET_CHARS (charset) == 94)
1174 if (((d = ch - (MIN_CHAR_94
1175 + (XCHARSET_FINAL (charset) - '0') * 94))
1180 else if (XCHARSET_CHARS (charset) == 96)
1182 if (((d = ch - (MIN_CHAR_96
1183 + (XCHARSET_FINAL (charset) - '0') * 96))
1191 else if (XCHARSET_DIMENSION (charset) == 2)
1193 if (XCHARSET_CHARS (charset) == 94)
1195 if (((d = ch - (MIN_CHAR_94x94
1197 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1200 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1202 else if (XCHARSET_CHARS (charset) == 96)
1204 if (((d = ch - (MIN_CHAR_96x96
1206 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1209 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1220 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1222 if (c <= MAX_CHAR_BASIC_LATIN)
1224 *charset = Vcharset_ascii;
1229 *charset = Vcharset_control_1;
1234 *charset = Vcharset_latin_iso8859_1;
1238 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1240 *charset = Vcharset_hebrew_iso8859_8;
1241 return c - MIN_CHAR_HEBREW + 0x20;
1244 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1246 *charset = Vcharset_thai_tis620;
1247 return c - MIN_CHAR_THAI + 0x20;
1250 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1251 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1253 return list2 (Vcharset_katakana_jisx0201,
1254 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1257 else if (c <= MAX_CHAR_BMP)
1259 *charset = Vcharset_ucs_bmp;
1262 else if (c <= MAX_CHAR_SMP)
1264 *charset = Vcharset_ucs_smp;
1265 return c - MIN_CHAR_SMP;
1267 else if (c <= MAX_CHAR_SIP)
1269 *charset = Vcharset_ucs_sip;
1270 return c - MIN_CHAR_SIP;
1272 else if (c < MIN_CHAR_94)
1274 *charset = Vcharset_ucs;
1277 else if (c <= MAX_CHAR_94)
1279 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1280 ((c - MIN_CHAR_94) / 94) + '0',
1281 CHARSET_LEFT_TO_RIGHT);
1282 if (!NILP (*charset))
1283 return ((c - MIN_CHAR_94) % 94) + 33;
1286 *charset = Vcharset_ucs;
1290 else if (c <= MAX_CHAR_96)
1292 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1293 ((c - MIN_CHAR_96) / 96) + '0',
1294 CHARSET_LEFT_TO_RIGHT);
1295 if (!NILP (*charset))
1296 return ((c - MIN_CHAR_96) % 96) + 32;
1299 *charset = Vcharset_ucs;
1303 else if (c <= MAX_CHAR_94x94)
1306 = CHARSET_BY_ATTRIBUTES (94, 2,
1307 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1308 CHARSET_LEFT_TO_RIGHT);
1309 if (!NILP (*charset))
1310 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1311 | (((c - MIN_CHAR_94x94) % 94) + 33);
1314 *charset = Vcharset_ucs;
1318 else if (c <= MAX_CHAR_96x96)
1321 = CHARSET_BY_ATTRIBUTES (96, 2,
1322 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1323 CHARSET_LEFT_TO_RIGHT);
1324 if (!NILP (*charset))
1325 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1326 | (((c - MIN_CHAR_96x96) % 96) + 32);
1329 *charset = Vcharset_ucs;
1335 *charset = Vcharset_ucs;
1340 Lisp_Object Vdefault_coded_charset_priority_list;
1344 /************************************************************************/
1345 /* Basic charset Lisp functions */
1346 /************************************************************************/
1348 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1349 Return non-nil if OBJECT is a charset.
1353 return CHARSETP (object) ? Qt : Qnil;
1356 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1357 Retrieve the charset of the given name.
1358 If CHARSET-OR-NAME is a charset object, it is simply returned.
1359 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1360 nil is returned. Otherwise the associated charset object is returned.
1364 if (CHARSETP (charset_or_name))
1365 return charset_or_name;
1367 CHECK_SYMBOL (charset_or_name);
1368 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1371 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1372 Retrieve the charset of the given name.
1373 Same as `find-charset' except an error is signalled if there is no such
1374 charset instead of returning nil.
1378 Lisp_Object charset = Ffind_charset (name);
1381 signal_simple_error ("No such charset", name);
1385 /* We store the charsets in hash tables with the names as the key and the
1386 actual charset object as the value. Occasionally we need to use them
1387 in a list format. These routines provide us with that. */
1388 struct charset_list_closure
1390 Lisp_Object *charset_list;
1394 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1395 void *charset_list_closure)
1397 /* This function can GC */
1398 struct charset_list_closure *chcl =
1399 (struct charset_list_closure*) charset_list_closure;
1400 Lisp_Object *charset_list = chcl->charset_list;
1402 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1406 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1407 Return a list of the names of all defined charsets.
1411 Lisp_Object charset_list = Qnil;
1412 struct gcpro gcpro1;
1413 struct charset_list_closure charset_list_closure;
1415 GCPRO1 (charset_list);
1416 charset_list_closure.charset_list = &charset_list;
1417 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1418 &charset_list_closure);
1421 return charset_list;
1424 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1425 Return the name of charset CHARSET.
1429 return XCHARSET_NAME (Fget_charset (charset));
1432 /* #### SJT Should generic properties be allowed? */
1433 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1434 Define a new character set.
1435 This function is for use with Mule support.
1436 NAME is a symbol, the name by which the character set is normally referred.
1437 DOC-STRING is a string describing the character set.
1438 PROPS is a property list, describing the specific nature of the
1439 character set. Recognized properties are:
1441 'short-name Short version of the charset name (ex: Latin-1)
1442 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1443 'registry A regular expression matching the font registry field for
1445 'dimension Number of octets used to index a character in this charset.
1446 Either 1 or 2. Defaults to 1.
1447 If UTF-2000 feature is enabled, 3 or 4 are also available.
1448 'columns Number of columns used to display a character in this charset.
1449 Only used in TTY mode. (Under X, the actual width of a
1450 character can be derived from the font used to display the
1451 characters.) If unspecified, defaults to the dimension
1452 (this is almost always the correct value).
1453 'chars Number of characters in each dimension (94 or 96).
1454 Defaults to 94. Note that if the dimension is 2, the
1455 character set thus described is 94x94 or 96x96.
1456 If UTF-2000 feature is enabled, 128 or 256 are also available.
1457 'final Final byte of ISO 2022 escape sequence. Must be
1458 supplied. Each combination of (DIMENSION, CHARS) defines a
1459 separate namespace for final bytes. Note that ISO
1460 2022 restricts the final byte to the range
1461 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1462 dimension == 2. Note also that final bytes in the range
1463 0x30 - 0x3F are reserved for user-defined (not official)
1465 'graphic 0 (use left half of font on output) or 1 (use right half
1466 of font on output). Defaults to 0. For example, for
1467 a font whose registry is ISO8859-1, the left half
1468 (octets 0x20 - 0x7F) is the `ascii' character set, while
1469 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1470 character set. With 'graphic set to 0, the octets
1471 will have their high bit cleared; with it set to 1,
1472 the octets will have their high bit set.
1473 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1475 'ccl-program A compiled CCL program used to convert a character in
1476 this charset into an index into the font. This is in
1477 addition to the 'graphic property. The CCL program
1478 is passed the octets of the character, with the high
1479 bit cleared and set depending upon whether the value
1480 of the 'graphic property is 0 or 1.
1481 'mother [UTF-2000 only] Base coded-charset.
1482 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1483 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1484 'code-offset [UTF-2000 only] Offset for a code-point of a base
1486 'conversion [UTF-2000 only] Conversion for a code-point of a base
1487 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1489 (name, doc_string, props))
1491 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1492 int direction = CHARSET_LEFT_TO_RIGHT;
1493 Lisp_Object registry = Qnil;
1494 Lisp_Object charset;
1495 Lisp_Object ccl_program = Qnil;
1496 Lisp_Object short_name = Qnil, long_name = Qnil;
1497 Lisp_Object mother = Qnil;
1498 int min_code = 0, max_code = 0, code_offset = 0;
1499 int byte_offset = -1;
1502 CHECK_SYMBOL (name);
1503 if (!NILP (doc_string))
1504 CHECK_STRING (doc_string);
1506 charset = Ffind_charset (name);
1507 if (!NILP (charset))
1508 signal_simple_error ("Cannot redefine existing charset", name);
1511 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1513 if (EQ (keyword, Qshort_name))
1515 CHECK_STRING (value);
1519 else if (EQ (keyword, Qlong_name))
1521 CHECK_STRING (value);
1525 else if (EQ (keyword, Qdimension))
1528 dimension = XINT (value);
1529 if (dimension < 1 ||
1536 signal_simple_error ("Invalid value for 'dimension", value);
1539 else if (EQ (keyword, Qchars))
1542 chars = XINT (value);
1543 if (chars != 94 && chars != 96
1545 && chars != 128 && chars != 256
1548 signal_simple_error ("Invalid value for 'chars", value);
1551 else if (EQ (keyword, Qcolumns))
1554 columns = XINT (value);
1555 if (columns != 1 && columns != 2)
1556 signal_simple_error ("Invalid value for 'columns", value);
1559 else if (EQ (keyword, Qgraphic))
1562 graphic = XINT (value);
1570 signal_simple_error ("Invalid value for 'graphic", value);
1573 else if (EQ (keyword, Qregistry))
1575 CHECK_STRING (value);
1579 else if (EQ (keyword, Qdirection))
1581 if (EQ (value, Ql2r))
1582 direction = CHARSET_LEFT_TO_RIGHT;
1583 else if (EQ (value, Qr2l))
1584 direction = CHARSET_RIGHT_TO_LEFT;
1586 signal_simple_error ("Invalid value for 'direction", value);
1589 else if (EQ (keyword, Qfinal))
1591 CHECK_CHAR_COERCE_INT (value);
1592 final = XCHAR (value);
1593 if (final < '0' || final > '~')
1594 signal_simple_error ("Invalid value for 'final", value);
1598 else if (EQ (keyword, Qmother))
1600 mother = Fget_charset (value);
1603 else if (EQ (keyword, Qmin_code))
1606 min_code = XUINT (value);
1609 else if (EQ (keyword, Qmax_code))
1612 max_code = XUINT (value);
1615 else if (EQ (keyword, Qcode_offset))
1618 code_offset = XUINT (value);
1621 else if (EQ (keyword, Qconversion))
1623 if (EQ (value, Q94x60))
1624 conversion = CONVERSION_94x60;
1625 else if (EQ (value, Q94x94x60))
1626 conversion = CONVERSION_94x94x60;
1627 else if (EQ (value, Qbig5_1))
1628 conversion = CONVERSION_BIG5_1;
1629 else if (EQ (value, Qbig5_2))
1630 conversion = CONVERSION_BIG5_2;
1632 signal_simple_error ("Unrecognized conversion", value);
1636 else if (EQ (keyword, Qccl_program))
1638 struct ccl_program test_ccl;
1640 if (setup_ccl_program (&test_ccl, value) < 0)
1641 signal_simple_error ("Invalid value for 'ccl-program", value);
1642 ccl_program = value;
1646 signal_simple_error ("Unrecognized property", keyword);
1652 error ("'final must be specified");
1654 if (dimension == 2 && final > 0x5F)
1656 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1659 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1660 CHARSET_LEFT_TO_RIGHT)) ||
1661 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1662 CHARSET_RIGHT_TO_LEFT)))
1664 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1666 id = get_unallocated_leading_byte (dimension);
1668 if (NILP (doc_string))
1669 doc_string = build_string ("");
1671 if (NILP (registry))
1672 registry = build_string ("");
1674 if (NILP (short_name))
1675 XSETSTRING (short_name, XSYMBOL (name)->name);
1677 if (NILP (long_name))
1678 long_name = doc_string;
1681 columns = dimension;
1683 if (byte_offset < 0)
1687 else if (chars == 96)
1693 charset = make_charset (id, name, chars, dimension, columns, graphic,
1694 final, direction, short_name, long_name,
1695 doc_string, registry,
1696 Qnil, min_code, max_code, code_offset, byte_offset,
1697 mother, conversion);
1698 if (!NILP (ccl_program))
1699 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1703 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1705 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1706 NEW-NAME is the name of the new charset. Return the new charset.
1708 (charset, new_name))
1710 Lisp_Object new_charset = Qnil;
1711 int id, chars, dimension, columns, graphic, final;
1713 Lisp_Object registry, doc_string, short_name, long_name;
1716 charset = Fget_charset (charset);
1717 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1718 signal_simple_error ("Charset already has reverse-direction charset",
1721 CHECK_SYMBOL (new_name);
1722 if (!NILP (Ffind_charset (new_name)))
1723 signal_simple_error ("Cannot redefine existing charset", new_name);
1725 cs = XCHARSET (charset);
1727 chars = CHARSET_CHARS (cs);
1728 dimension = CHARSET_DIMENSION (cs);
1729 columns = CHARSET_COLUMNS (cs);
1730 id = get_unallocated_leading_byte (dimension);
1732 graphic = CHARSET_GRAPHIC (cs);
1733 final = CHARSET_FINAL (cs);
1734 direction = CHARSET_RIGHT_TO_LEFT;
1735 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1736 direction = CHARSET_LEFT_TO_RIGHT;
1737 doc_string = CHARSET_DOC_STRING (cs);
1738 short_name = CHARSET_SHORT_NAME (cs);
1739 long_name = CHARSET_LONG_NAME (cs);
1740 registry = CHARSET_REGISTRY (cs);
1742 new_charset = make_charset (id, new_name, chars, dimension, columns,
1743 graphic, final, direction, short_name, long_name,
1744 doc_string, registry,
1746 CHARSET_DECODING_TABLE(cs),
1747 CHARSET_MIN_CODE(cs),
1748 CHARSET_MAX_CODE(cs),
1749 CHARSET_CODE_OFFSET(cs),
1750 CHARSET_BYTE_OFFSET(cs),
1752 CHARSET_CONVERSION (cs)
1754 Qnil, 0, 0, 0, 0, Qnil, 0
1758 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1759 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1764 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1765 Define symbol ALIAS as an alias for CHARSET.
1769 CHECK_SYMBOL (alias);
1770 charset = Fget_charset (charset);
1771 return Fputhash (alias, charset, Vcharset_hash_table);
1774 /* #### Reverse direction charsets not yet implemented. */
1776 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1778 Return the reverse-direction charset parallel to CHARSET, if any.
1779 This is the charset with the same properties (in particular, the same
1780 dimension, number of characters per dimension, and final byte) as
1781 CHARSET but whose characters are displayed in the opposite direction.
1785 charset = Fget_charset (charset);
1786 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1790 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1791 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1792 If DIRECTION is omitted, both directions will be checked (left-to-right
1793 will be returned if character sets exist for both directions).
1795 (dimension, chars, final, direction))
1797 int dm, ch, fi, di = -1;
1798 Lisp_Object obj = Qnil;
1800 CHECK_INT (dimension);
1801 dm = XINT (dimension);
1802 if (dm < 1 || dm > 2)
1803 signal_simple_error ("Invalid value for DIMENSION", dimension);
1807 if (ch != 94 && ch != 96)
1808 signal_simple_error ("Invalid value for CHARS", chars);
1810 CHECK_CHAR_COERCE_INT (final);
1812 if (fi < '0' || fi > '~')
1813 signal_simple_error ("Invalid value for FINAL", final);
1815 if (EQ (direction, Ql2r))
1816 di = CHARSET_LEFT_TO_RIGHT;
1817 else if (EQ (direction, Qr2l))
1818 di = CHARSET_RIGHT_TO_LEFT;
1819 else if (!NILP (direction))
1820 signal_simple_error ("Invalid value for DIRECTION", direction);
1822 if (dm == 2 && fi > 0x5F)
1824 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1828 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1830 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1833 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1836 return XCHARSET_NAME (obj);
1840 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1841 Return short name of CHARSET.
1845 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1848 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1849 Return long name of CHARSET.
1853 return XCHARSET_LONG_NAME (Fget_charset (charset));
1856 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1857 Return description of CHARSET.
1861 return XCHARSET_DOC_STRING (Fget_charset (charset));
1864 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1865 Return dimension of CHARSET.
1869 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1872 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1873 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1874 Recognized properties are those listed in `make-charset', as well as
1875 'name and 'doc-string.
1881 charset = Fget_charset (charset);
1882 cs = XCHARSET (charset);
1884 CHECK_SYMBOL (prop);
1885 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1886 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1887 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1888 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1889 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1890 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1891 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1892 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1893 Qnil : make_char (CHARSET_FINAL (cs));
1894 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1895 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1896 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1897 if (EQ (prop, Qdirection))
1898 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1899 if (EQ (prop, Qreverse_direction_charset))
1901 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1902 /* #### Is this translation OK? If so, error checking sufficient? */
1903 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1906 if (EQ (prop, Qmother))
1907 return CHARSET_MOTHER (cs);
1908 if (EQ (prop, Qmin_code))
1909 return make_int (CHARSET_MIN_CODE (cs));
1910 if (EQ (prop, Qmax_code))
1911 return make_int (CHARSET_MAX_CODE (cs));
1913 signal_simple_error ("Unrecognized charset property name", prop);
1914 return Qnil; /* not reached */
1917 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1918 Return charset identification number of CHARSET.
1922 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1925 /* #### We need to figure out which properties we really want to
1928 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1929 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1931 (charset, ccl_program))
1933 struct ccl_program test_ccl;
1935 charset = Fget_charset (charset);
1936 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1937 signal_simple_error ("Invalid ccl-program", ccl_program);
1938 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1943 invalidate_charset_font_caches (Lisp_Object charset)
1945 /* Invalidate font cache entries for charset on all devices. */
1946 Lisp_Object devcons, concons, hash_table;
1947 DEVICE_LOOP_NO_BREAK (devcons, concons)
1949 struct device *d = XDEVICE (XCAR (devcons));
1950 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1951 if (!UNBOUNDP (hash_table))
1952 Fclrhash (hash_table);
1956 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1957 Set the 'registry property of CHARSET to REGISTRY.
1959 (charset, registry))
1961 charset = Fget_charset (charset);
1962 CHECK_STRING (registry);
1963 XCHARSET_REGISTRY (charset) = registry;
1964 invalidate_charset_font_caches (charset);
1965 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1970 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1971 Return mapping-table of CHARSET.
1975 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1978 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1979 Set mapping-table of CHARSET to TABLE.
1983 struct Lisp_Charset *cs;
1987 charset = Fget_charset (charset);
1988 cs = XCHARSET (charset);
1992 CHARSET_DECODING_TABLE(cs) = Qnil;
1995 else if (VECTORP (table))
1997 int ccs_len = CHARSET_BYTE_SIZE (cs);
1998 int ret = decoding_table_check_elements (table,
1999 CHARSET_DIMENSION (cs),
2004 signal_simple_error ("Too big table", table);
2006 signal_simple_error ("Invalid element is found", table);
2008 signal_simple_error ("Something wrong", table);
2010 CHARSET_DECODING_TABLE(cs) = Qnil;
2013 signal_error (Qwrong_type_argument,
2014 list2 (build_translated_string ("vector-or-nil-p"),
2017 byte_offset = CHARSET_BYTE_OFFSET (cs);
2018 switch (CHARSET_DIMENSION (cs))
2021 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2023 Lisp_Object c = XVECTOR_DATA(table)[i];
2026 Fput_char_attribute (c, XCHARSET_NAME (charset),
2027 make_int (i + byte_offset));
2031 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2033 Lisp_Object v = XVECTOR_DATA(table)[i];
2039 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2041 Lisp_Object c = XVECTOR_DATA(v)[j];
2045 (c, XCHARSET_NAME (charset),
2046 make_int ( ( (i + byte_offset) << 8 )
2052 Fput_char_attribute (v, XCHARSET_NAME (charset),
2053 make_int (i + byte_offset));
2060 #ifdef HAVE_CHISE_CLIENT
2061 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2062 Save mapping-table of CHARSET.
2066 struct Lisp_Charset *cs;
2067 int byte_min, byte_max;
2069 Lisp_Object db_file;
2071 charset = Fget_charset (charset);
2072 cs = XCHARSET (charset);
2074 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2075 Qsystem_char_id, 1);
2076 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2078 byte_min = CHARSET_BYTE_OFFSET (cs);
2079 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2080 switch (CHARSET_DIMENSION (cs))
2084 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2087 for (cell = byte_min; cell < byte_max; cell++)
2089 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2092 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2093 Fprin1_to_string (c, Qnil),
2100 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2103 for (row = byte_min; row < byte_max; row++)
2105 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2108 for (cell = byte_min; cell < byte_max; cell++)
2110 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2113 Fput_database (Fprin1_to_string (make_int ((row << 8)
2116 Fprin1_to_string (c, Qnil),
2124 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2127 for (plane = byte_min; plane < byte_max; plane++)
2130 = get_ccs_octet_table (table_p, charset, plane);
2133 for (row = byte_min; row < byte_max; row++)
2136 = get_ccs_octet_table (table_r, charset, row);
2139 for (cell = byte_min; cell < byte_max; cell++)
2141 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2145 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2149 Fprin1_to_string (c, Qnil),
2158 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2161 for (group = byte_min; group < byte_max; group++)
2164 = get_ccs_octet_table (table_g, charset, group);
2167 for (plane = byte_min; plane < byte_max; plane++)
2170 = get_ccs_octet_table (table_p, charset, plane);
2173 for (row = byte_min; row < byte_max; row++)
2176 = get_ccs_octet_table (table_r, charset, row);
2179 for (cell = byte_min; cell < byte_max; cell++)
2182 = get_ccs_octet_table (table_c, charset, cell);
2185 Fput_database (Fprin1_to_string
2186 (make_int (( group << 24)
2191 Fprin1_to_string (c, Qnil),
2199 return Fclose_database (db);
2202 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2203 Reset mapping-table of CCS with database file.
2207 Lisp_Object db_file;
2209 ccs = Fget_charset (ccs);
2210 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2211 Qsystem_char_id, 0);
2213 if (!NILP (Ffile_exists_p (db_file)))
2215 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2222 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2226 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2229 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2233 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2240 decoding_table_put_char (ccs, code_point, ret);
2241 Fclose_database (db);
2245 decoding_table_put_char (ccs, code_point, Qnil);
2246 Fclose_database (db);
2250 #endif /* HAVE_CHISE_CLIENT */
2251 #endif /* UTF2000 */
2254 /************************************************************************/
2255 /* Lisp primitives for working with characters */
2256 /************************************************************************/
2259 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2260 Make a character from CHARSET and code-point CODE.
2261 If DEFINED_ONLY is non-nil, builtin character is not returned.
2262 If corresponding character is not found, nil is returned.
2264 (charset, code, defined_only))
2268 charset = Fget_charset (charset);
2271 if (XCHARSET_GRAPHIC (charset) == 1)
2273 if (NILP (defined_only))
2274 c = DECODE_CHAR (charset, c);
2276 c = decode_defined_char (charset, c);
2277 return c >= 0 ? make_char (c) : Qnil;
2280 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2281 Make a builtin character from CHARSET and code-point CODE.
2287 charset = Fget_charset (charset);
2289 if (EQ (charset, Vcharset_latin_viscii))
2291 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2297 (ret = Fget_char_attribute (chr,
2298 Vcharset_latin_viscii_lower,
2301 charset = Vcharset_latin_viscii_lower;
2305 (ret = Fget_char_attribute (chr,
2306 Vcharset_latin_viscii_upper,
2309 charset = Vcharset_latin_viscii_upper;
2316 if (XCHARSET_GRAPHIC (charset) == 1)
2319 c = decode_builtin_char (charset, c);
2320 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2324 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2325 Make a character from CHARSET and octets ARG1 and ARG2.
2326 ARG2 is required only for characters from two-dimensional charsets.
2327 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2328 character s with caron.
2330 (charset, arg1, arg2))
2334 int lowlim, highlim;
2336 charset = Fget_charset (charset);
2337 cs = XCHARSET (charset);
2339 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2340 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2342 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2344 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2345 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2348 /* It is useful (and safe, according to Olivier Galibert) to strip
2349 the 8th bit off ARG1 and ARG2 because it allows programmers to
2350 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2351 Latin 2 code of the character. */
2359 if (a1 < lowlim || a1 > highlim)
2360 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2362 if (CHARSET_DIMENSION (cs) == 1)
2366 ("Charset is of dimension one; second octet must be nil", arg2);
2367 return make_char (MAKE_CHAR (charset, a1, 0));
2376 a2 = XINT (arg2) & 0x7f;
2378 if (a2 < lowlim || a2 > highlim)
2379 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2381 return make_char (MAKE_CHAR (charset, a1, a2));
2384 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2385 Return the character set of CHARACTER.
2389 CHECK_CHAR_COERCE_INT (character);
2391 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2394 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2395 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2396 N defaults to 0 if omitted.
2400 Lisp_Object charset;
2403 CHECK_CHAR_COERCE_INT (character);
2405 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2407 if (NILP (n) || EQ (n, Qzero))
2408 return make_int (octet0);
2409 else if (EQ (n, make_int (1)))
2410 return make_int (octet1);
2412 signal_simple_error ("Octet number must be 0 or 1", n);
2416 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2417 Return code-point of CHARACTER in specified CHARSET.
2419 (character, charset, defined_only))
2423 CHECK_CHAR_COERCE_INT (character);
2424 charset = Fget_charset (charset);
2425 code_point = charset_code_point (charset, XCHAR (character),
2426 !NILP (defined_only));
2427 if (code_point >= 0)
2428 return make_int (code_point);
2434 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2435 Return list of charset and one or two position-codes of CHARACTER.
2439 /* This function can GC */
2440 struct gcpro gcpro1, gcpro2;
2441 Lisp_Object charset = Qnil;
2442 Lisp_Object rc = Qnil;
2450 GCPRO2 (charset, rc);
2451 CHECK_CHAR_COERCE_INT (character);
2454 code_point = ENCODE_CHAR (XCHAR (character), charset);
2455 dimension = XCHARSET_DIMENSION (charset);
2456 while (dimension > 0)
2458 rc = Fcons (make_int (code_point & 255), rc);
2462 rc = Fcons (XCHARSET_NAME (charset), rc);
2464 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2466 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2468 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2472 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2481 #ifdef ENABLE_COMPOSITE_CHARS
2482 /************************************************************************/
2483 /* composite character functions */
2484 /************************************************************************/
2487 lookup_composite_char (Bufbyte *str, int len)
2489 Lisp_Object lispstr = make_string (str, len);
2490 Lisp_Object ch = Fgethash (lispstr,
2491 Vcomposite_char_string2char_hash_table,
2497 if (composite_char_row_next >= 128)
2498 signal_simple_error ("No more composite chars available", lispstr);
2499 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2500 composite_char_col_next);
2501 Fputhash (make_char (emch), lispstr,
2502 Vcomposite_char_char2string_hash_table);
2503 Fputhash (lispstr, make_char (emch),
2504 Vcomposite_char_string2char_hash_table);
2505 composite_char_col_next++;
2506 if (composite_char_col_next >= 128)
2508 composite_char_col_next = 32;
2509 composite_char_row_next++;
2518 composite_char_string (Emchar ch)
2520 Lisp_Object str = Fgethash (make_char (ch),
2521 Vcomposite_char_char2string_hash_table,
2523 assert (!UNBOUNDP (str));
2527 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2528 Convert a string into a single composite character.
2529 The character is the result of overstriking all the characters in
2534 CHECK_STRING (string);
2535 return make_char (lookup_composite_char (XSTRING_DATA (string),
2536 XSTRING_LENGTH (string)));
2539 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2540 Return a string of the characters comprising a composite character.
2548 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2549 signal_simple_error ("Must be composite char", ch);
2550 return composite_char_string (emch);
2552 #endif /* ENABLE_COMPOSITE_CHARS */
2555 /************************************************************************/
2556 /* initialization */
2557 /************************************************************************/
2560 syms_of_mule_charset (void)
2562 INIT_LRECORD_IMPLEMENTATION (charset);
2564 DEFSUBR (Fcharsetp);
2565 DEFSUBR (Ffind_charset);
2566 DEFSUBR (Fget_charset);
2567 DEFSUBR (Fcharset_list);
2568 DEFSUBR (Fcharset_name);
2569 DEFSUBR (Fmake_charset);
2570 DEFSUBR (Fmake_reverse_direction_charset);
2571 /* DEFSUBR (Freverse_direction_charset); */
2572 DEFSUBR (Fdefine_charset_alias);
2573 DEFSUBR (Fcharset_from_attributes);
2574 DEFSUBR (Fcharset_short_name);
2575 DEFSUBR (Fcharset_long_name);
2576 DEFSUBR (Fcharset_description);
2577 DEFSUBR (Fcharset_dimension);
2578 DEFSUBR (Fcharset_property);
2579 DEFSUBR (Fcharset_id);
2580 DEFSUBR (Fset_charset_ccl_program);
2581 DEFSUBR (Fset_charset_registry);
2583 DEFSUBR (Fcharset_mapping_table);
2584 DEFSUBR (Fset_charset_mapping_table);
2585 #ifdef HAVE_CHISE_CLIENT
2586 DEFSUBR (Fsave_charset_mapping_table);
2587 DEFSUBR (Freset_charset_mapping_table);
2590 DEFSUBR (Fdecode_char);
2591 DEFSUBR (Fdecode_builtin_char);
2592 DEFSUBR (Fencode_char);
2594 DEFSUBR (Fmake_char);
2595 DEFSUBR (Fchar_charset);
2596 DEFSUBR (Fchar_octet);
2597 DEFSUBR (Fsplit_char);
2599 #ifdef ENABLE_COMPOSITE_CHARS
2600 DEFSUBR (Fmake_composite_char);
2601 DEFSUBR (Fcomposite_char_string);
2604 defsymbol (&Qcharsetp, "charsetp");
2605 defsymbol (&Qregistry, "registry");
2606 defsymbol (&Qfinal, "final");
2607 defsymbol (&Qgraphic, "graphic");
2608 defsymbol (&Qdirection, "direction");
2609 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2610 defsymbol (&Qshort_name, "short-name");
2611 defsymbol (&Qlong_name, "long-name");
2613 defsymbol (&Qmother, "mother");
2614 defsymbol (&Qmin_code, "min-code");
2615 defsymbol (&Qmax_code, "max-code");
2616 defsymbol (&Qcode_offset, "code-offset");
2617 defsymbol (&Qconversion, "conversion");
2618 defsymbol (&Q94x60, "94x60");
2619 defsymbol (&Q94x94x60, "94x94x60");
2620 defsymbol (&Qbig5_1, "big5-1");
2621 defsymbol (&Qbig5_2, "big5-2");
2624 defsymbol (&Ql2r, "l2r");
2625 defsymbol (&Qr2l, "r2l");
2627 /* Charsets, compatible with FSF 20.3
2628 Naming convention is Script-Charset[-Edition] */
2629 defsymbol (&Qascii, "ascii");
2630 defsymbol (&Qcontrol_1, "control-1");
2631 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2632 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2633 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2634 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2635 defsymbol (&Qthai_tis620, "thai-tis620");
2636 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2637 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2638 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2639 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2640 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2641 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2642 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2643 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2644 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2645 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2646 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2647 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2648 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2649 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2650 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2651 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2653 defsymbol (&Qucs, "ucs");
2654 defsymbol (&Qucs_bmp, "ucs-bmp");
2655 defsymbol (&Qucs_smp, "ucs-smp");
2656 defsymbol (&Qucs_sip, "ucs-sip");
2657 defsymbol (&Qucs_gb, "ucs-gb");
2658 defsymbol (&Qlatin_viscii, "latin-viscii");
2659 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2660 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2661 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2662 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2663 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2664 defsymbol (&Qjis_x0208, "=jis-x0208");
2665 defsymbol (&Qchinese_big5, "chinese-big5");
2666 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2668 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2669 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2671 defsymbol (&Qcomposite, "composite");
2675 vars_of_mule_charset (void)
2682 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2683 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2685 /* Table of charsets indexed by leading byte. */
2686 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2687 chlook->charset_by_leading_byte[i] = Qnil;
2690 /* Table of charsets indexed by type/final-byte. */
2691 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2692 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2693 chlook->charset_by_attributes[i][j] = Qnil;
2695 /* Table of charsets indexed by type/final-byte/direction. */
2696 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2697 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2698 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2699 chlook->charset_by_attributes[i][j][k] = Qnil;
2703 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2705 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2706 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2710 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2711 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2712 Leading-code of private TYPE9N charset of column-width 1.
2714 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2718 Vdefault_coded_charset_priority_list = Qnil;
2719 DEFVAR_LISP ("default-coded-charset-priority-list",
2720 &Vdefault_coded_charset_priority_list /*
2721 Default order of preferred coded-character-sets.
2727 complex_vars_of_mule_charset (void)
2729 staticpro (&Vcharset_hash_table);
2730 Vcharset_hash_table =
2731 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2733 /* Predefined character sets. We store them into variables for
2737 staticpro (&Vcharset_ucs);
2739 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2740 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2741 build_string ("UCS"),
2742 build_string ("UCS"),
2743 build_string ("ISO/IEC 10646"),
2745 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2746 staticpro (&Vcharset_ucs_bmp);
2748 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2749 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2750 build_string ("BMP"),
2751 build_string ("UCS-BMP"),
2752 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2754 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2755 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2756 staticpro (&Vcharset_ucs_smp);
2758 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2759 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2760 build_string ("SMP"),
2761 build_string ("UCS-SMP"),
2762 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2763 build_string ("UCS00-1"),
2764 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2765 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2766 staticpro (&Vcharset_ucs_sip);
2768 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2769 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2770 build_string ("SIP"),
2771 build_string ("UCS-SIP"),
2772 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2773 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2774 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2775 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2776 staticpro (&Vcharset_ucs_gb);
2778 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2779 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2780 build_string ("UCS for GB"),
2781 build_string ("UCS for GB"),
2782 build_string ("ISO/IEC 10646 for GB"),
2784 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2786 # define MIN_CHAR_THAI 0
2787 # define MAX_CHAR_THAI 0
2788 /* # define MIN_CHAR_HEBREW 0 */
2789 /* # define MAX_CHAR_HEBREW 0 */
2790 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2791 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2793 staticpro (&Vcharset_ascii);
2795 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2796 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2797 build_string ("ASCII"),
2798 build_string ("ASCII)"),
2799 build_string ("ASCII (ISO646 IRV)"),
2800 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2801 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2802 staticpro (&Vcharset_control_1);
2803 Vcharset_control_1 =
2804 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2805 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2806 build_string ("C1"),
2807 build_string ("Control characters"),
2808 build_string ("Control characters 128-191"),
2810 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2811 staticpro (&Vcharset_latin_iso8859_1);
2812 Vcharset_latin_iso8859_1 =
2813 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2814 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2815 build_string ("Latin-1"),
2816 build_string ("ISO8859-1 (Latin-1)"),
2817 build_string ("ISO8859-1 (Latin-1)"),
2818 build_string ("iso8859-1"),
2819 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2820 staticpro (&Vcharset_latin_iso8859_2);
2821 Vcharset_latin_iso8859_2 =
2822 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2823 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2824 build_string ("Latin-2"),
2825 build_string ("ISO8859-2 (Latin-2)"),
2826 build_string ("ISO8859-2 (Latin-2)"),
2827 build_string ("iso8859-2"),
2828 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2829 staticpro (&Vcharset_latin_iso8859_3);
2830 Vcharset_latin_iso8859_3 =
2831 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2832 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2833 build_string ("Latin-3"),
2834 build_string ("ISO8859-3 (Latin-3)"),
2835 build_string ("ISO8859-3 (Latin-3)"),
2836 build_string ("iso8859-3"),
2837 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2838 staticpro (&Vcharset_latin_iso8859_4);
2839 Vcharset_latin_iso8859_4 =
2840 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2841 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2842 build_string ("Latin-4"),
2843 build_string ("ISO8859-4 (Latin-4)"),
2844 build_string ("ISO8859-4 (Latin-4)"),
2845 build_string ("iso8859-4"),
2846 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2847 staticpro (&Vcharset_thai_tis620);
2848 Vcharset_thai_tis620 =
2849 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2850 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2851 build_string ("TIS620"),
2852 build_string ("TIS620 (Thai)"),
2853 build_string ("TIS620.2529 (Thai)"),
2854 build_string ("tis620"),
2855 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2856 staticpro (&Vcharset_greek_iso8859_7);
2857 Vcharset_greek_iso8859_7 =
2858 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2859 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2860 build_string ("ISO8859-7"),
2861 build_string ("ISO8859-7 (Greek)"),
2862 build_string ("ISO8859-7 (Greek)"),
2863 build_string ("iso8859-7"),
2864 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2865 staticpro (&Vcharset_arabic_iso8859_6);
2866 Vcharset_arabic_iso8859_6 =
2867 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2868 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2869 build_string ("ISO8859-6"),
2870 build_string ("ISO8859-6 (Arabic)"),
2871 build_string ("ISO8859-6 (Arabic)"),
2872 build_string ("iso8859-6"),
2873 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2874 staticpro (&Vcharset_hebrew_iso8859_8);
2875 Vcharset_hebrew_iso8859_8 =
2876 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2877 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2878 build_string ("ISO8859-8"),
2879 build_string ("ISO8859-8 (Hebrew)"),
2880 build_string ("ISO8859-8 (Hebrew)"),
2881 build_string ("iso8859-8"),
2883 0 /* MIN_CHAR_HEBREW */,
2884 0 /* MAX_CHAR_HEBREW */, 0, 32,
2885 Qnil, CONVERSION_IDENTICAL);
2886 staticpro (&Vcharset_katakana_jisx0201);
2887 Vcharset_katakana_jisx0201 =
2888 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2889 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2890 build_string ("JISX0201 Kana"),
2891 build_string ("JISX0201.1976 (Japanese Kana)"),
2892 build_string ("JISX0201.1976 Japanese Kana"),
2893 build_string ("jisx0201\\.1976"),
2894 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2895 staticpro (&Vcharset_latin_jisx0201);
2896 Vcharset_latin_jisx0201 =
2897 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2898 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2899 build_string ("JISX0201 Roman"),
2900 build_string ("JISX0201.1976 (Japanese Roman)"),
2901 build_string ("JISX0201.1976 Japanese Roman"),
2902 build_string ("jisx0201\\.1976"),
2903 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2904 staticpro (&Vcharset_cyrillic_iso8859_5);
2905 Vcharset_cyrillic_iso8859_5 =
2906 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2907 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2908 build_string ("ISO8859-5"),
2909 build_string ("ISO8859-5 (Cyrillic)"),
2910 build_string ("ISO8859-5 (Cyrillic)"),
2911 build_string ("iso8859-5"),
2912 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2913 staticpro (&Vcharset_latin_iso8859_9);
2914 Vcharset_latin_iso8859_9 =
2915 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2916 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2917 build_string ("Latin-5"),
2918 build_string ("ISO8859-9 (Latin-5)"),
2919 build_string ("ISO8859-9 (Latin-5)"),
2920 build_string ("iso8859-9"),
2921 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2923 staticpro (&Vcharset_jis_x0208);
2924 Vcharset_jis_x0208 =
2925 make_charset (LEADING_BYTE_JIS_X0208,
2927 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2928 build_string ("JIS X0208"),
2929 build_string ("JIS X0208 Common"),
2930 build_string ("JIS X0208 Common part"),
2931 build_string ("jisx0208\\.1990"),
2933 MIN_CHAR_JIS_X0208_1990,
2934 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2935 Qnil, CONVERSION_94x94);
2937 staticpro (&Vcharset_japanese_jisx0208_1978);
2938 Vcharset_japanese_jisx0208_1978 =
2939 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2940 Qjapanese_jisx0208_1978, 94, 2,
2941 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2942 build_string ("JIS X0208:1978"),
2943 build_string ("JIS X0208:1978 (Japanese)"),
2945 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2946 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2953 CONVERSION_IDENTICAL);
2954 staticpro (&Vcharset_chinese_gb2312);
2955 Vcharset_chinese_gb2312 =
2956 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2957 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2958 build_string ("GB2312"),
2959 build_string ("GB2312)"),
2960 build_string ("GB2312 Chinese simplified"),
2961 build_string ("gb2312"),
2962 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2963 staticpro (&Vcharset_chinese_gb12345);
2964 Vcharset_chinese_gb12345 =
2965 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2966 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2967 build_string ("G1"),
2968 build_string ("GB 12345)"),
2969 build_string ("GB 12345-1990"),
2970 build_string ("GB12345\\(\\.1990\\)?-0"),
2971 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2972 staticpro (&Vcharset_japanese_jisx0208);
2973 Vcharset_japanese_jisx0208 =
2974 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2975 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2976 build_string ("JISX0208"),
2977 build_string ("JIS X0208:1983 (Japanese)"),
2978 build_string ("JIS X0208:1983 Japanese Kanji"),
2979 build_string ("jisx0208\\.1983"),
2986 CONVERSION_IDENTICAL);
2988 staticpro (&Vcharset_japanese_jisx0208_1990);
2989 Vcharset_japanese_jisx0208_1990 =
2990 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2991 Qjapanese_jisx0208_1990, 94, 2,
2992 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2993 build_string ("JISX0208-1990"),
2994 build_string ("JIS X0208:1990 (Japanese)"),
2995 build_string ("JIS X0208:1990 Japanese Kanji"),
2996 build_string ("jisx0208\\.1990"),
2998 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2999 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3000 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3001 Vcharset_jis_x0208 /* Qnil */,
3002 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3004 staticpro (&Vcharset_korean_ksc5601);
3005 Vcharset_korean_ksc5601 =
3006 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3007 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3008 build_string ("KSC5601"),
3009 build_string ("KSC5601 (Korean"),
3010 build_string ("KSC5601 Korean Hangul and Hanja"),
3011 build_string ("ksc5601"),
3012 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3013 staticpro (&Vcharset_japanese_jisx0212);
3014 Vcharset_japanese_jisx0212 =
3015 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3016 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3017 build_string ("JISX0212"),
3018 build_string ("JISX0212 (Japanese)"),
3019 build_string ("JISX0212 Japanese Supplement"),
3020 build_string ("jisx0212"),
3021 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3023 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3024 staticpro (&Vcharset_chinese_cns11643_1);
3025 Vcharset_chinese_cns11643_1 =
3026 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3027 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3028 build_string ("CNS11643-1"),
3029 build_string ("CNS11643-1 (Chinese traditional)"),
3031 ("CNS 11643 Plane 1 Chinese traditional"),
3032 build_string (CHINESE_CNS_PLANE_RE("1")),
3033 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3034 staticpro (&Vcharset_chinese_cns11643_2);
3035 Vcharset_chinese_cns11643_2 =
3036 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3037 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3038 build_string ("CNS11643-2"),
3039 build_string ("CNS11643-2 (Chinese traditional)"),
3041 ("CNS 11643 Plane 2 Chinese traditional"),
3042 build_string (CHINESE_CNS_PLANE_RE("2")),
3043 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3045 staticpro (&Vcharset_latin_tcvn5712);
3046 Vcharset_latin_tcvn5712 =
3047 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3048 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3049 build_string ("TCVN 5712"),
3050 build_string ("TCVN 5712 (VSCII-2)"),
3051 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3052 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3053 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3054 staticpro (&Vcharset_latin_viscii_lower);
3055 Vcharset_latin_viscii_lower =
3056 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3057 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3058 build_string ("VISCII lower"),
3059 build_string ("VISCII lower (Vietnamese)"),
3060 build_string ("VISCII lower (Vietnamese)"),
3061 build_string ("MULEVISCII-LOWER"),
3062 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3063 staticpro (&Vcharset_latin_viscii_upper);
3064 Vcharset_latin_viscii_upper =
3065 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3066 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3067 build_string ("VISCII upper"),
3068 build_string ("VISCII upper (Vietnamese)"),
3069 build_string ("VISCII upper (Vietnamese)"),
3070 build_string ("MULEVISCII-UPPER"),
3071 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3072 staticpro (&Vcharset_latin_viscii);
3073 Vcharset_latin_viscii =
3074 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3075 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3076 build_string ("VISCII"),
3077 build_string ("VISCII 1.1 (Vietnamese)"),
3078 build_string ("VISCII 1.1 (Vietnamese)"),
3079 build_string ("VISCII1\\.1"),
3080 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3081 staticpro (&Vcharset_chinese_big5);
3082 Vcharset_chinese_big5 =
3083 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3084 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3085 build_string ("Big5"),
3086 build_string ("Big5"),
3087 build_string ("Big5 Chinese traditional"),
3088 build_string ("big5-0"),
3090 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3091 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3093 staticpro (&Vcharset_ethiopic_ucs);
3094 Vcharset_ethiopic_ucs =
3095 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3096 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3097 build_string ("Ethiopic (UCS)"),
3098 build_string ("Ethiopic (UCS)"),
3099 build_string ("Ethiopic of UCS"),
3100 build_string ("Ethiopic-Unicode"),
3101 Qnil, 0x1200, 0x137F, 0, 0,
3102 Qnil, CONVERSION_IDENTICAL);
3104 staticpro (&Vcharset_chinese_big5_1);
3105 Vcharset_chinese_big5_1 =
3106 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3107 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3108 build_string ("Big5"),
3109 build_string ("Big5 (Level-1)"),
3111 ("Big5 Level-1 Chinese traditional"),
3112 build_string ("big5"),
3113 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3114 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3115 staticpro (&Vcharset_chinese_big5_2);
3116 Vcharset_chinese_big5_2 =
3117 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3118 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Big5"),
3120 build_string ("Big5 (Level-2)"),
3122 ("Big5 Level-2 Chinese traditional"),
3123 build_string ("big5"),
3124 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3125 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3127 #ifdef ENABLE_COMPOSITE_CHARS
3128 /* #### For simplicity, we put composite chars into a 96x96 charset.
3129 This is going to lead to problems because you can run out of
3130 room, esp. as we don't yet recycle numbers. */
3131 staticpro (&Vcharset_composite);
3132 Vcharset_composite =
3133 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3134 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3135 build_string ("Composite"),
3136 build_string ("Composite characters"),
3137 build_string ("Composite characters"),
3140 /* #### not dumped properly */
3141 composite_char_row_next = 32;
3142 composite_char_col_next = 32;
3144 Vcomposite_char_string2char_hash_table =
3145 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3146 Vcomposite_char_char2string_hash_table =
3147 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3148 staticpro (&Vcomposite_char_string2char_hash_table);
3149 staticpro (&Vcomposite_char_char2string_hash_table);
3150 #endif /* ENABLE_COMPOSITE_CHARS */