1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
281 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
282 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
299 Qjapanese_jisx0208_1978,
303 Qjapanese_jisx0208_1990,
320 Qvietnamese_viscii_lower,
321 Qvietnamese_viscii_upper,
330 Lisp_Object Ql2r, Qr2l;
332 Lisp_Object Vcharset_hash_table;
334 /* Composite characters are characters constructed by overstriking two
335 or more regular characters.
337 1) The old Mule implementation involves storing composite characters
338 in a buffer as a tag followed by all of the actual characters
339 used to make up the composite character. I think this is a bad
340 idea; it greatly complicates code that wants to handle strings
341 one character at a time because it has to deal with the possibility
342 of great big ungainly characters. It's much more reasonable to
343 simply store an index into a table of composite characters.
345 2) The current implementation only allows for 16,384 separate
346 composite characters over the lifetime of the XEmacs process.
347 This could become a potential problem if the user
348 edited lots of different files that use composite characters.
349 Due to FSF bogosity, increasing the number of allowable
350 composite characters under Mule would decrease the number
351 of possible faces that can exist. Mule already has shrunk
352 this to 2048, and further shrinkage would become uncomfortable.
353 No such problems exist in XEmacs.
355 Composite characters could be represented as 0x80 C1 C2 C3,
356 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
357 for slightly under 2^20 (one million) composite characters
358 over the XEmacs process lifetime, and you only need to
359 increase the size of a Mule character from 19 to 21 bits.
360 Or you could use 0x80 C1 C2 C3 C4, allowing for about
361 85 million (slightly over 2^26) composite characters. */
364 /************************************************************************/
365 /* Basic Emchar functions */
366 /************************************************************************/
368 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
369 string in STR. Returns the number of bytes stored.
370 Do not call this directly. Use the macro set_charptr_emchar() instead.
374 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
389 else if ( c <= 0x7ff )
391 *p++ = (c >> 6) | 0xc0;
392 *p++ = (c & 0x3f) | 0x80;
394 else if ( c <= 0xffff )
396 *p++ = (c >> 12) | 0xe0;
397 *p++ = ((c >> 6) & 0x3f) | 0x80;
398 *p++ = (c & 0x3f) | 0x80;
400 else if ( c <= 0x1fffff )
402 *p++ = (c >> 18) | 0xf0;
403 *p++ = ((c >> 12) & 0x3f) | 0x80;
404 *p++ = ((c >> 6) & 0x3f) | 0x80;
405 *p++ = (c & 0x3f) | 0x80;
407 else if ( c <= 0x3ffffff )
409 *p++ = (c >> 24) | 0xf8;
410 *p++ = ((c >> 18) & 0x3f) | 0x80;
411 *p++ = ((c >> 12) & 0x3f) | 0x80;
412 *p++ = ((c >> 6) & 0x3f) | 0x80;
413 *p++ = (c & 0x3f) | 0x80;
417 *p++ = (c >> 30) | 0xfc;
418 *p++ = ((c >> 24) & 0x3f) | 0x80;
419 *p++ = ((c >> 18) & 0x3f) | 0x80;
420 *p++ = ((c >> 12) & 0x3f) | 0x80;
421 *p++ = ((c >> 6) & 0x3f) | 0x80;
422 *p++ = (c & 0x3f) | 0x80;
425 BREAKUP_CHAR (c, charset, c1, c2);
426 lb = CHAR_LEADING_BYTE (c);
427 if (LEADING_BYTE_PRIVATE_P (lb))
428 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
430 if (EQ (charset, Vcharset_control_1))
439 /* Return the first character from a Mule-encoded string in STR,
440 assuming it's non-ASCII. Do not call this directly.
441 Use the macro charptr_emchar() instead. */
444 non_ascii_charptr_emchar (const Bufbyte *str)
457 else if ( b >= 0xf8 )
462 else if ( b >= 0xf0 )
467 else if ( b >= 0xe0 )
472 else if ( b >= 0xc0 )
482 for( ; len > 0; len-- )
485 ch = ( ch << 6 ) | ( b & 0x3f );
489 Bufbyte i0 = *str, i1, i2 = 0;
492 if (i0 == LEADING_BYTE_CONTROL_1)
493 return (Emchar) (*++str - 0x20);
495 if (LEADING_BYTE_PREFIX_P (i0))
500 charset = CHARSET_BY_LEADING_BYTE (i0);
501 if (XCHARSET_DIMENSION (charset) == 2)
504 return MAKE_CHAR (charset, i1, i2);
508 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
509 Do not call this directly. Use the macro valid_char_p() instead. */
513 non_ascii_valid_char_p (Emchar ch)
517 /* Must have only lowest 19 bits set */
521 f1 = CHAR_FIELD1 (ch);
522 f2 = CHAR_FIELD2 (ch);
523 f3 = CHAR_FIELD3 (ch);
529 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
530 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
531 f2 > MAX_CHAR_FIELD2_PRIVATE)
536 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
537 f2 <= MAX_CHAR_FIELD2_PRIVATE))
541 NOTE: This takes advantage of the fact that
542 FIELD2_TO_OFFICIAL_LEADING_BYTE and
543 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
545 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
546 if (EQ (charset, Qnil))
548 return (XCHARSET_CHARS (charset) == 96);
554 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
555 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
556 f1 > MAX_CHAR_FIELD1_PRIVATE)
558 if (f2 < 0x20 || f3 < 0x20)
561 #ifdef ENABLE_COMPOSITE_CHARS
562 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
564 if (UNBOUNDP (Fgethash (make_int (ch),
565 Vcomposite_char_char2string_hash_table,
570 #endif /* ENABLE_COMPOSITE_CHARS */
572 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
573 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
576 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
578 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
581 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
583 if (EQ (charset, Qnil))
585 return (XCHARSET_CHARS (charset) == 96);
591 /************************************************************************/
592 /* Basic string functions */
593 /************************************************************************/
595 /* Copy the character pointed to by SRC into DST. Do not call this
596 directly. Use the macro charptr_copy_char() instead.
597 Return the number of bytes copied. */
600 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
602 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
604 for (i = bytes; i; i--, dst++, src++)
610 /************************************************************************/
611 /* streams of Emchars */
612 /************************************************************************/
614 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
615 The functions below are not meant to be called directly; use
616 the macros in insdel.h. */
619 Lstream_get_emchar_1 (Lstream *stream, int ch)
621 Bufbyte str[MAX_EMCHAR_LEN];
622 Bufbyte *strptr = str;
625 str[0] = (Bufbyte) ch;
627 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
629 int c = Lstream_getc (stream);
630 bufpos_checking_assert (c >= 0);
631 *++strptr = (Bufbyte) c;
633 return charptr_emchar (str);
637 Lstream_fput_emchar (Lstream *stream, Emchar ch)
639 Bufbyte str[MAX_EMCHAR_LEN];
640 Bytecount len = set_charptr_emchar (str, ch);
641 return Lstream_write (stream, str, len);
645 Lstream_funget_emchar (Lstream *stream, Emchar ch)
647 Bufbyte str[MAX_EMCHAR_LEN];
648 Bytecount len = set_charptr_emchar (str, ch);
649 Lstream_unread (stream, str, len);
653 /************************************************************************/
655 /************************************************************************/
658 mark_charset (Lisp_Object obj)
660 Lisp_Charset *cs = XCHARSET (obj);
662 mark_object (cs->short_name);
663 mark_object (cs->long_name);
664 mark_object (cs->doc_string);
665 mark_object (cs->registry);
666 mark_object (cs->ccl_program);
668 mark_object (cs->decoding_table);
669 mark_object (cs->mother);
675 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
677 Lisp_Charset *cs = XCHARSET (obj);
681 error ("printing unreadable object #<charset %s 0x%x>",
682 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
685 write_c_string ("#<charset ", printcharfun);
686 print_internal (CHARSET_NAME (cs), printcharfun, 0);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
691 write_c_string (" ", printcharfun);
692 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
693 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
695 CHARSET_DIMENSION (cs),
696 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
697 CHARSET_COLUMNS (cs),
698 CHARSET_GRAPHIC (cs),
700 write_c_string (buf, printcharfun);
701 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
702 sprintf (buf, " 0x%x>", cs->header.uid);
703 write_c_string (buf, printcharfun);
706 static const struct lrecord_description charset_description[] = {
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
715 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
716 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
721 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
722 mark_charset, print_charset, 0, 0, 0,
726 /* Make a new charset. */
727 /* #### SJT Should generic properties be allowed? */
729 make_charset (Charset_ID id, Lisp_Object name,
730 unsigned short chars, unsigned char dimension,
731 unsigned char columns, unsigned char graphic,
732 Bufbyte final, unsigned char direction, Lisp_Object short_name,
733 Lisp_Object long_name, Lisp_Object doc,
735 Lisp_Object decoding_table,
736 Emchar min_code, Emchar max_code,
737 Emchar code_offset, unsigned char byte_offset,
738 Lisp_Object mother, unsigned char conversion)
741 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
745 XSETCHARSET (obj, cs);
747 CHARSET_ID (cs) = id;
748 CHARSET_NAME (cs) = name;
749 CHARSET_SHORT_NAME (cs) = short_name;
750 CHARSET_LONG_NAME (cs) = long_name;
751 CHARSET_CHARS (cs) = chars;
752 CHARSET_DIMENSION (cs) = dimension;
753 CHARSET_DIRECTION (cs) = direction;
754 CHARSET_COLUMNS (cs) = columns;
755 CHARSET_GRAPHIC (cs) = graphic;
756 CHARSET_FINAL (cs) = final;
757 CHARSET_DOC_STRING (cs) = doc;
758 CHARSET_REGISTRY (cs) = reg;
759 CHARSET_CCL_PROGRAM (cs) = Qnil;
760 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
762 CHARSET_DECODING_TABLE(cs) = Qunbound;
763 CHARSET_MIN_CODE (cs) = min_code;
764 CHARSET_MAX_CODE (cs) = max_code;
765 CHARSET_CODE_OFFSET (cs) = code_offset;
766 CHARSET_BYTE_OFFSET (cs) = byte_offset;
767 CHARSET_MOTHER (cs) = mother;
768 CHARSET_CONVERSION (cs) = conversion;
772 if (id == LEADING_BYTE_ASCII)
773 CHARSET_REP_BYTES (cs) = 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
777 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
782 /* some charsets do not have final characters. This includes
783 ASCII, Control-1, Composite, and the two faux private
785 unsigned char iso2022_type
786 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
788 if (code_offset == 0)
790 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
791 chlook->charset_by_attributes[iso2022_type][final] = obj;
795 (chlook->charset_by_attributes[iso2022_type][final][direction]));
796 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
800 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
801 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
803 /* Some charsets are "faux" and don't have names or really exist at
804 all except in the leading-byte table. */
806 Fputhash (name, obj, Vcharset_hash_table);
811 get_unallocated_leading_byte (int dimension)
816 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
819 lb = chlook->next_allocated_leading_byte++;
823 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
826 lb = chlook->next_allocated_1_byte_leading_byte++;
830 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
833 lb = chlook->next_allocated_2_byte_leading_byte++;
839 ("No more character sets free for this dimension",
840 make_int (dimension));
846 /* Number of Big5 characters which have the same code in 1st byte. */
848 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
851 decode_defined_char (Lisp_Object ccs, int code_point)
853 int dim = XCHARSET_DIMENSION (ccs);
854 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
862 = get_ccs_octet_table (decoding_table, ccs,
863 (code_point >> (dim * 8)) & 255);
865 if (CHARP (decoding_table))
866 return XCHAR (decoding_table);
867 #ifdef HAVE_CHISE_CLIENT
868 if (EQ (decoding_table, Qunloaded))
870 char_id = load_char_decoding_entry_maybe (ccs, code_point);
875 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
877 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
879 if ( EQ (mother, Vcharset_ucs) )
880 return DECODE_CHAR (mother, code_point);
882 return decode_defined_char (mother, code_point);
884 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
887 = (((code_point >> 8) & 0x7F) - 33) * 94
888 + (( code_point & 0x7F) - 33);
889 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
890 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
892 b2 += b2 < 0x3F ? 0x40 : 0x62;
893 return decode_defined_char (mother, (b1 << 8) | b2);
895 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
898 = (((code_point >> 8) & 0x7F) - 33) * 94
899 + (( code_point & 0x7F) - 33)
900 + BIG5_SAME_ROW * (0xC9 - 0xA1);
901 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
902 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
904 b2 += b2 < 0x3F ? 0x40 : 0x62;
905 return decode_defined_char (mother, (b1 << 8) | b2);
912 decode_builtin_char (Lisp_Object charset, int code_point)
914 Lisp_Object mother = XCHARSET_MOTHER (charset);
917 if ( XCHARSET_MAX_CODE (charset) > 0 )
919 if ( CHARSETP (mother) )
921 int code = code_point;
923 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
925 int row = code_point >> 8;
926 int cell = code_point & 255;
930 else if (row < 16 + 32 + 30)
931 code = (row - (16 + 32)) * 94 + cell - 33;
932 else if (row < 18 + 32 + 30)
934 else if (row < 18 + 32 + 60)
935 code = (row - (18 + 32)) * 94 + cell - 33;
937 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
939 int plane = code_point >> 16;
940 int row = (code_point >> 8) & 255;
941 int cell = code_point & 255;
945 else if (row < 16 + 32 + 30)
947 = (plane - 33) * 94 * 60
948 + (row - (16 + 32)) * 94
950 else if (row < 18 + 32 + 30)
952 else if (row < 18 + 32 + 60)
954 = (plane - 33) * 94 * 60
955 + (row - (18 + 32)) * 94
958 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
961 = (((code_point >> 8) & 0x7F) - 33) * 94
962 + (( code_point & 0x7F) - 33);
963 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
964 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
966 b2 += b2 < 0x3F ? 0x40 : 0x62;
967 code = (b1 << 8) | b2;
969 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
972 = (((code_point >> 8) & 0x7F) - 33) * 94
973 + (( code_point & 0x7F) - 33)
974 + BIG5_SAME_ROW * (0xC9 - 0xA1);
975 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
976 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
978 b2 += b2 < 0x3F ? 0x40 : 0x62;
979 code = (b1 << 8) | b2;
982 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
987 = (XCHARSET_DIMENSION (charset) == 1
989 code_point - XCHARSET_BYTE_OFFSET (charset)
991 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
992 * XCHARSET_CHARS (charset)
993 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
994 + XCHARSET_CODE_OFFSET (charset);
995 if ((cid < XCHARSET_MIN_CODE (charset))
996 || (XCHARSET_MAX_CODE (charset) < cid))
1001 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1003 if (XCHARSET_DIMENSION (charset) == 1)
1005 switch (XCHARSET_CHARS (charset))
1009 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1012 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1020 switch (XCHARSET_CHARS (charset))
1023 return MIN_CHAR_94x94
1024 + (final - '0') * 94 * 94
1025 + (((code_point >> 8) & 0x7F) - 33) * 94
1026 + ((code_point & 0x7F) - 33);
1028 return MIN_CHAR_96x96
1029 + (final - '0') * 96 * 96
1030 + (((code_point >> 8) & 0x7F) - 32) * 96
1031 + ((code_point & 0x7F) - 32);
1043 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1045 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1048 if ( CHAR_TABLEP (encoding_table)
1049 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1054 Lisp_Object mother = XCHARSET_MOTHER (charset);
1055 int min = XCHARSET_MIN_CODE (charset);
1056 int max = XCHARSET_MAX_CODE (charset);
1059 if ( CHARSETP (mother) )
1061 if (XCHARSET_FINAL (charset) >= '0')
1062 code = charset_code_point (mother, ch, 1);
1064 code = charset_code_point (mother, ch, defined_only);
1066 else if (defined_only)
1068 else if ( ((max == 0) && CHARSETP (mother)
1069 && (XCHARSET_FINAL (charset) == 0))
1070 || ((min <= ch) && (ch <= max)) )
1072 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1073 || ((min <= code) && (code <= max)) )
1075 int d = code - XCHARSET_CODE_OFFSET (charset);
1077 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1083 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1086 int cell = d % 94 + 33;
1092 return (row << 8) | cell;
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1096 int B1 = d >> 8, B2 = d & 0xFF;
1098 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1099 - (B2 < 0x7F ? 0x40 : 0x62);
1103 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1106 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1108 int B1 = d >> 8, B2 = d & 0xFF;
1110 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1111 - (B2 < 0x7F ? 0x40 : 0x62);
1115 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1116 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1119 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1120 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1122 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1123 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1125 int plane = d / (94 * 60) + 33;
1126 int row = (d % (94 * 60)) / 94;
1127 int cell = d % 94 + 33;
1133 return (plane << 16) | (row << 8) | cell;
1135 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1137 ( (d / (94 * 94) + 33) << 16)
1138 | ((d / 94 % 94 + 33) << 8)
1140 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1142 ( (d / (96 * 96) + 32) << 16)
1143 | ((d / 96 % 96 + 32) << 8)
1145 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1147 ( (d / (94 * 94 * 94) + 33) << 24)
1148 | ((d / (94 * 94) % 94 + 33) << 16)
1149 | ((d / 94 % 94 + 33) << 8)
1151 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1153 ( (d / (96 * 96 * 96) + 32) << 24)
1154 | ((d / (96 * 96) % 96 + 32) << 16)
1155 | ((d / 96 % 96 + 32) << 8)
1159 printf ("Unknown CCS-conversion %d is specified!",
1160 XCHARSET_CONVERSION (charset));
1164 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1165 ( XCHARSET_MIN_CODE (charset) == 0 )
1167 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1168 (XCHARSET_CODE_OFFSET (charset)
1169 == XCHARSET_MIN_CODE (charset))
1174 if (XCHARSET_DIMENSION (charset) == 1)
1176 if (XCHARSET_CHARS (charset) == 94)
1178 if (((d = ch - (MIN_CHAR_94
1179 + (XCHARSET_FINAL (charset) - '0') * 94))
1184 else if (XCHARSET_CHARS (charset) == 96)
1186 if (((d = ch - (MIN_CHAR_96
1187 + (XCHARSET_FINAL (charset) - '0') * 96))
1195 else if (XCHARSET_DIMENSION (charset) == 2)
1197 if (XCHARSET_CHARS (charset) == 94)
1199 if (((d = ch - (MIN_CHAR_94x94
1201 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1204 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1206 else if (XCHARSET_CHARS (charset) == 96)
1208 if (((d = ch - (MIN_CHAR_96x96
1210 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1213 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1224 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1226 if (c <= MAX_CHAR_BASIC_LATIN)
1228 *charset = Vcharset_ascii;
1233 *charset = Vcharset_control_1;
1238 *charset = Vcharset_latin_iso8859_1;
1242 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1244 *charset = Vcharset_hebrew_iso8859_8;
1245 return c - MIN_CHAR_HEBREW + 0x20;
1248 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1250 *charset = Vcharset_thai_tis620;
1251 return c - MIN_CHAR_THAI + 0x20;
1254 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1255 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1257 return list2 (Vcharset_katakana_jisx0201,
1258 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1261 else if (c <= MAX_CHAR_BMP)
1263 *charset = Vcharset_ucs_bmp;
1266 else if (c <= MAX_CHAR_SMP)
1268 *charset = Vcharset_ucs_smp;
1269 return c - MIN_CHAR_SMP;
1271 else if (c <= MAX_CHAR_SIP)
1273 *charset = Vcharset_ucs_sip;
1274 return c - MIN_CHAR_SIP;
1276 else if (c < MIN_CHAR_94)
1278 *charset = Vcharset_ucs;
1281 else if (c <= MAX_CHAR_94)
1283 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1284 ((c - MIN_CHAR_94) / 94) + '0',
1285 CHARSET_LEFT_TO_RIGHT);
1286 if (!NILP (*charset))
1287 return ((c - MIN_CHAR_94) % 94) + 33;
1290 *charset = Vcharset_ucs;
1294 else if (c <= MAX_CHAR_96)
1296 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1297 ((c - MIN_CHAR_96) / 96) + '0',
1298 CHARSET_LEFT_TO_RIGHT);
1299 if (!NILP (*charset))
1300 return ((c - MIN_CHAR_96) % 96) + 32;
1303 *charset = Vcharset_ucs;
1307 else if (c <= MAX_CHAR_94x94)
1310 = CHARSET_BY_ATTRIBUTES (94, 2,
1311 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1312 CHARSET_LEFT_TO_RIGHT);
1313 if (!NILP (*charset))
1314 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1315 | (((c - MIN_CHAR_94x94) % 94) + 33);
1318 *charset = Vcharset_ucs;
1322 else if (c <= MAX_CHAR_96x96)
1325 = CHARSET_BY_ATTRIBUTES (96, 2,
1326 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1327 CHARSET_LEFT_TO_RIGHT);
1328 if (!NILP (*charset))
1329 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1330 | (((c - MIN_CHAR_96x96) % 96) + 32);
1333 *charset = Vcharset_ucs;
1339 *charset = Vcharset_ucs;
1344 Lisp_Object Vdefault_coded_charset_priority_list;
1348 /************************************************************************/
1349 /* Basic charset Lisp functions */
1350 /************************************************************************/
1352 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1353 Return non-nil if OBJECT is a charset.
1357 return CHARSETP (object) ? Qt : Qnil;
1360 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1361 Retrieve the charset of the given name.
1362 If CHARSET-OR-NAME is a charset object, it is simply returned.
1363 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1364 nil is returned. Otherwise the associated charset object is returned.
1368 if (CHARSETP (charset_or_name))
1369 return charset_or_name;
1371 CHECK_SYMBOL (charset_or_name);
1372 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1375 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1376 Retrieve the charset of the given name.
1377 Same as `find-charset' except an error is signalled if there is no such
1378 charset instead of returning nil.
1382 Lisp_Object charset = Ffind_charset (name);
1385 signal_simple_error ("No such charset", name);
1389 /* We store the charsets in hash tables with the names as the key and the
1390 actual charset object as the value. Occasionally we need to use them
1391 in a list format. These routines provide us with that. */
1392 struct charset_list_closure
1394 Lisp_Object *charset_list;
1398 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1399 void *charset_list_closure)
1401 /* This function can GC */
1402 struct charset_list_closure *chcl =
1403 (struct charset_list_closure*) charset_list_closure;
1404 Lisp_Object *charset_list = chcl->charset_list;
1406 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1410 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1411 Return a list of the names of all defined charsets.
1415 Lisp_Object charset_list = Qnil;
1416 struct gcpro gcpro1;
1417 struct charset_list_closure charset_list_closure;
1419 GCPRO1 (charset_list);
1420 charset_list_closure.charset_list = &charset_list;
1421 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1422 &charset_list_closure);
1425 return charset_list;
1428 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1429 Return the name of charset CHARSET.
1433 return XCHARSET_NAME (Fget_charset (charset));
1436 /* #### SJT Should generic properties be allowed? */
1437 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1438 Define a new character set.
1439 This function is for use with Mule support.
1440 NAME is a symbol, the name by which the character set is normally referred.
1441 DOC-STRING is a string describing the character set.
1442 PROPS is a property list, describing the specific nature of the
1443 character set. Recognized properties are:
1445 'short-name Short version of the charset name (ex: Latin-1)
1446 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1447 'registry A regular expression matching the font registry field for
1449 'dimension Number of octets used to index a character in this charset.
1450 Either 1 or 2. Defaults to 1.
1451 If UTF-2000 feature is enabled, 3 or 4 are also available.
1452 'columns Number of columns used to display a character in this charset.
1453 Only used in TTY mode. (Under X, the actual width of a
1454 character can be derived from the font used to display the
1455 characters.) If unspecified, defaults to the dimension
1456 (this is almost always the correct value).
1457 'chars Number of characters in each dimension (94 or 96).
1458 Defaults to 94. Note that if the dimension is 2, the
1459 character set thus described is 94x94 or 96x96.
1460 If UTF-2000 feature is enabled, 128 or 256 are also available.
1461 'final Final byte of ISO 2022 escape sequence. Must be
1462 supplied. Each combination of (DIMENSION, CHARS) defines a
1463 separate namespace for final bytes. Note that ISO
1464 2022 restricts the final byte to the range
1465 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1466 dimension == 2. Note also that final bytes in the range
1467 0x30 - 0x3F are reserved for user-defined (not official)
1469 'graphic 0 (use left half of font on output) or 1 (use right half
1470 of font on output). Defaults to 0. For example, for
1471 a font whose registry is ISO8859-1, the left half
1472 (octets 0x20 - 0x7F) is the `ascii' character set, while
1473 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1474 character set. With 'graphic set to 0, the octets
1475 will have their high bit cleared; with it set to 1,
1476 the octets will have their high bit set.
1477 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1479 'ccl-program A compiled CCL program used to convert a character in
1480 this charset into an index into the font. This is in
1481 addition to the 'graphic property. The CCL program
1482 is passed the octets of the character, with the high
1483 bit cleared and set depending upon whether the value
1484 of the 'graphic property is 0 or 1.
1485 'mother [UTF-2000 only] Base coded-charset.
1486 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1487 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1488 'code-offset [UTF-2000 only] Offset for a code-point of a base
1490 'conversion [UTF-2000 only] Conversion for a code-point of a base
1491 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1493 (name, doc_string, props))
1495 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1496 int direction = CHARSET_LEFT_TO_RIGHT;
1497 Lisp_Object registry = Qnil;
1498 Lisp_Object charset;
1499 Lisp_Object ccl_program = Qnil;
1500 Lisp_Object short_name = Qnil, long_name = Qnil;
1501 Lisp_Object mother = Qnil;
1502 int min_code = 0, max_code = 0, code_offset = 0;
1503 int byte_offset = -1;
1506 CHECK_SYMBOL (name);
1507 if (!NILP (doc_string))
1508 CHECK_STRING (doc_string);
1510 charset = Ffind_charset (name);
1511 if (!NILP (charset))
1512 signal_simple_error ("Cannot redefine existing charset", name);
1515 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1517 if (EQ (keyword, Qshort_name))
1519 CHECK_STRING (value);
1523 else if (EQ (keyword, Qlong_name))
1525 CHECK_STRING (value);
1529 else if (EQ (keyword, Qdimension))
1532 dimension = XINT (value);
1533 if (dimension < 1 ||
1540 signal_simple_error ("Invalid value for 'dimension", value);
1543 else if (EQ (keyword, Qchars))
1546 chars = XINT (value);
1547 if (chars != 94 && chars != 96
1549 && chars != 128 && chars != 256
1552 signal_simple_error ("Invalid value for 'chars", value);
1555 else if (EQ (keyword, Qcolumns))
1558 columns = XINT (value);
1559 if (columns != 1 && columns != 2)
1560 signal_simple_error ("Invalid value for 'columns", value);
1563 else if (EQ (keyword, Qgraphic))
1566 graphic = XINT (value);
1574 signal_simple_error ("Invalid value for 'graphic", value);
1577 else if (EQ (keyword, Qregistry))
1579 CHECK_STRING (value);
1583 else if (EQ (keyword, Qdirection))
1585 if (EQ (value, Ql2r))
1586 direction = CHARSET_LEFT_TO_RIGHT;
1587 else if (EQ (value, Qr2l))
1588 direction = CHARSET_RIGHT_TO_LEFT;
1590 signal_simple_error ("Invalid value for 'direction", value);
1593 else if (EQ (keyword, Qfinal))
1595 CHECK_CHAR_COERCE_INT (value);
1596 final = XCHAR (value);
1597 if (final < '0' || final > '~')
1598 signal_simple_error ("Invalid value for 'final", value);
1602 else if (EQ (keyword, Qmother))
1604 mother = Fget_charset (value);
1607 else if (EQ (keyword, Qmin_code))
1610 min_code = XUINT (value);
1613 else if (EQ (keyword, Qmax_code))
1616 max_code = XUINT (value);
1619 else if (EQ (keyword, Qcode_offset))
1622 code_offset = XUINT (value);
1625 else if (EQ (keyword, Qconversion))
1627 if (EQ (value, Q94x60))
1628 conversion = CONVERSION_94x60;
1629 else if (EQ (value, Q94x94x60))
1630 conversion = CONVERSION_94x94x60;
1631 else if (EQ (value, Qbig5_1))
1632 conversion = CONVERSION_BIG5_1;
1633 else if (EQ (value, Qbig5_2))
1634 conversion = CONVERSION_BIG5_2;
1636 signal_simple_error ("Unrecognized conversion", value);
1640 else if (EQ (keyword, Qccl_program))
1642 struct ccl_program test_ccl;
1644 if (setup_ccl_program (&test_ccl, value) < 0)
1645 signal_simple_error ("Invalid value for 'ccl-program", value);
1646 ccl_program = value;
1650 signal_simple_error ("Unrecognized property", keyword);
1656 error ("'final must be specified");
1658 if (dimension == 2 && final > 0x5F)
1660 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1663 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1664 CHARSET_LEFT_TO_RIGHT)) ||
1665 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1666 CHARSET_RIGHT_TO_LEFT)))
1668 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1670 id = get_unallocated_leading_byte (dimension);
1672 if (NILP (doc_string))
1673 doc_string = build_string ("");
1675 if (NILP (registry))
1676 registry = build_string ("");
1678 if (NILP (short_name))
1679 XSETSTRING (short_name, XSYMBOL (name)->name);
1681 if (NILP (long_name))
1682 long_name = doc_string;
1685 columns = dimension;
1687 if (byte_offset < 0)
1691 else if (chars == 96)
1697 charset = make_charset (id, name, chars, dimension, columns, graphic,
1698 final, direction, short_name, long_name,
1699 doc_string, registry,
1700 Qnil, min_code, max_code, code_offset, byte_offset,
1701 mother, conversion);
1702 if (!NILP (ccl_program))
1703 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1707 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1709 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1710 NEW-NAME is the name of the new charset. Return the new charset.
1712 (charset, new_name))
1714 Lisp_Object new_charset = Qnil;
1715 int id, chars, dimension, columns, graphic, final;
1717 Lisp_Object registry, doc_string, short_name, long_name;
1720 charset = Fget_charset (charset);
1721 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1722 signal_simple_error ("Charset already has reverse-direction charset",
1725 CHECK_SYMBOL (new_name);
1726 if (!NILP (Ffind_charset (new_name)))
1727 signal_simple_error ("Cannot redefine existing charset", new_name);
1729 cs = XCHARSET (charset);
1731 chars = CHARSET_CHARS (cs);
1732 dimension = CHARSET_DIMENSION (cs);
1733 columns = CHARSET_COLUMNS (cs);
1734 id = get_unallocated_leading_byte (dimension);
1736 graphic = CHARSET_GRAPHIC (cs);
1737 final = CHARSET_FINAL (cs);
1738 direction = CHARSET_RIGHT_TO_LEFT;
1739 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1740 direction = CHARSET_LEFT_TO_RIGHT;
1741 doc_string = CHARSET_DOC_STRING (cs);
1742 short_name = CHARSET_SHORT_NAME (cs);
1743 long_name = CHARSET_LONG_NAME (cs);
1744 registry = CHARSET_REGISTRY (cs);
1746 new_charset = make_charset (id, new_name, chars, dimension, columns,
1747 graphic, final, direction, short_name, long_name,
1748 doc_string, registry,
1750 CHARSET_DECODING_TABLE(cs),
1751 CHARSET_MIN_CODE(cs),
1752 CHARSET_MAX_CODE(cs),
1753 CHARSET_CODE_OFFSET(cs),
1754 CHARSET_BYTE_OFFSET(cs),
1756 CHARSET_CONVERSION (cs)
1758 Qnil, 0, 0, 0, 0, Qnil, 0
1762 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1763 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1768 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1769 Define symbol ALIAS as an alias for CHARSET.
1773 CHECK_SYMBOL (alias);
1774 charset = Fget_charset (charset);
1775 return Fputhash (alias, charset, Vcharset_hash_table);
1778 /* #### Reverse direction charsets not yet implemented. */
1780 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1782 Return the reverse-direction charset parallel to CHARSET, if any.
1783 This is the charset with the same properties (in particular, the same
1784 dimension, number of characters per dimension, and final byte) as
1785 CHARSET but whose characters are displayed in the opposite direction.
1789 charset = Fget_charset (charset);
1790 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1794 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1795 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1796 If DIRECTION is omitted, both directions will be checked (left-to-right
1797 will be returned if character sets exist for both directions).
1799 (dimension, chars, final, direction))
1801 int dm, ch, fi, di = -1;
1802 Lisp_Object obj = Qnil;
1804 CHECK_INT (dimension);
1805 dm = XINT (dimension);
1806 if (dm < 1 || dm > 2)
1807 signal_simple_error ("Invalid value for DIMENSION", dimension);
1811 if (ch != 94 && ch != 96)
1812 signal_simple_error ("Invalid value for CHARS", chars);
1814 CHECK_CHAR_COERCE_INT (final);
1816 if (fi < '0' || fi > '~')
1817 signal_simple_error ("Invalid value for FINAL", final);
1819 if (EQ (direction, Ql2r))
1820 di = CHARSET_LEFT_TO_RIGHT;
1821 else if (EQ (direction, Qr2l))
1822 di = CHARSET_RIGHT_TO_LEFT;
1823 else if (!NILP (direction))
1824 signal_simple_error ("Invalid value for DIRECTION", direction);
1826 if (dm == 2 && fi > 0x5F)
1828 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1832 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1834 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1837 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1840 return XCHARSET_NAME (obj);
1844 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1845 Return short name of CHARSET.
1849 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1852 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1853 Return long name of CHARSET.
1857 return XCHARSET_LONG_NAME (Fget_charset (charset));
1860 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1861 Return description of CHARSET.
1865 return XCHARSET_DOC_STRING (Fget_charset (charset));
1868 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1869 Return dimension of CHARSET.
1873 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1876 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1877 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1878 Recognized properties are those listed in `make-charset', as well as
1879 'name and 'doc-string.
1885 charset = Fget_charset (charset);
1886 cs = XCHARSET (charset);
1888 CHECK_SYMBOL (prop);
1889 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1890 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1891 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1892 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1893 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1894 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1895 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1896 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1897 Qnil : make_char (CHARSET_FINAL (cs));
1898 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1899 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1900 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1901 if (EQ (prop, Qdirection))
1902 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1903 if (EQ (prop, Qreverse_direction_charset))
1905 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1906 /* #### Is this translation OK? If so, error checking sufficient? */
1907 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1910 if (EQ (prop, Qmother))
1911 return CHARSET_MOTHER (cs);
1912 if (EQ (prop, Qmin_code))
1913 return make_int (CHARSET_MIN_CODE (cs));
1914 if (EQ (prop, Qmax_code))
1915 return make_int (CHARSET_MAX_CODE (cs));
1917 signal_simple_error ("Unrecognized charset property name", prop);
1918 return Qnil; /* not reached */
1921 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1922 Return charset identification number of CHARSET.
1926 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1929 /* #### We need to figure out which properties we really want to
1932 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1933 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1935 (charset, ccl_program))
1937 struct ccl_program test_ccl;
1939 charset = Fget_charset (charset);
1940 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1941 signal_simple_error ("Invalid ccl-program", ccl_program);
1942 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1947 invalidate_charset_font_caches (Lisp_Object charset)
1949 /* Invalidate font cache entries for charset on all devices. */
1950 Lisp_Object devcons, concons, hash_table;
1951 DEVICE_LOOP_NO_BREAK (devcons, concons)
1953 struct device *d = XDEVICE (XCAR (devcons));
1954 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1955 if (!UNBOUNDP (hash_table))
1956 Fclrhash (hash_table);
1960 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1961 Set the 'registry property of CHARSET to REGISTRY.
1963 (charset, registry))
1965 charset = Fget_charset (charset);
1966 CHECK_STRING (registry);
1967 XCHARSET_REGISTRY (charset) = registry;
1968 invalidate_charset_font_caches (charset);
1969 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1974 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1975 Return mapping-table of CHARSET.
1979 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1982 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1983 Set mapping-table of CHARSET to TABLE.
1987 struct Lisp_Charset *cs;
1991 charset = Fget_charset (charset);
1992 cs = XCHARSET (charset);
1996 CHARSET_DECODING_TABLE(cs) = Qnil;
1999 else if (VECTORP (table))
2001 int ccs_len = CHARSET_BYTE_SIZE (cs);
2002 int ret = decoding_table_check_elements (table,
2003 CHARSET_DIMENSION (cs),
2008 signal_simple_error ("Too big table", table);
2010 signal_simple_error ("Invalid element is found", table);
2012 signal_simple_error ("Something wrong", table);
2014 CHARSET_DECODING_TABLE(cs) = Qnil;
2017 signal_error (Qwrong_type_argument,
2018 list2 (build_translated_string ("vector-or-nil-p"),
2021 byte_offset = CHARSET_BYTE_OFFSET (cs);
2022 switch (CHARSET_DIMENSION (cs))
2025 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2027 Lisp_Object c = XVECTOR_DATA(table)[i];
2030 Fput_char_attribute (c, XCHARSET_NAME (charset),
2031 make_int (i + byte_offset));
2035 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2037 Lisp_Object v = XVECTOR_DATA(table)[i];
2043 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2045 Lisp_Object c = XVECTOR_DATA(v)[j];
2049 (c, XCHARSET_NAME (charset),
2050 make_int ( ( (i + byte_offset) << 8 )
2056 Fput_char_attribute (v, XCHARSET_NAME (charset),
2057 make_int (i + byte_offset));
2064 #ifdef HAVE_CHISE_CLIENT
2065 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2066 Save mapping-table of CHARSET.
2070 struct Lisp_Charset *cs;
2071 int byte_min, byte_max;
2073 Lisp_Object db_file;
2075 charset = Fget_charset (charset);
2076 cs = XCHARSET (charset);
2078 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2079 Qsystem_char_id, 1);
2080 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2082 byte_min = CHARSET_BYTE_OFFSET (cs);
2083 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2084 switch (CHARSET_DIMENSION (cs))
2088 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2091 for (cell = byte_min; cell < byte_max; cell++)
2093 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2096 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2097 Fprin1_to_string (c, Qnil),
2104 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2107 for (row = byte_min; row < byte_max; row++)
2109 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2112 for (cell = byte_min; cell < byte_max; cell++)
2114 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2117 Fput_database (Fprin1_to_string (make_int ((row << 8)
2120 Fprin1_to_string (c, Qnil),
2128 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2131 for (plane = byte_min; plane < byte_max; plane++)
2134 = get_ccs_octet_table (table_p, charset, plane);
2137 for (row = byte_min; row < byte_max; row++)
2140 = get_ccs_octet_table (table_r, charset, row);
2143 for (cell = byte_min; cell < byte_max; cell++)
2145 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2149 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2153 Fprin1_to_string (c, Qnil),
2162 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2165 for (group = byte_min; group < byte_max; group++)
2168 = get_ccs_octet_table (table_g, charset, group);
2171 for (plane = byte_min; plane < byte_max; plane++)
2174 = get_ccs_octet_table (table_p, charset, plane);
2177 for (row = byte_min; row < byte_max; row++)
2180 = get_ccs_octet_table (table_r, charset, row);
2183 for (cell = byte_min; cell < byte_max; cell++)
2186 = get_ccs_octet_table (table_c, charset, cell);
2189 Fput_database (Fprin1_to_string
2190 (make_int (( group << 24)
2195 Fprin1_to_string (c, Qnil),
2203 return Fclose_database (db);
2206 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2207 Reset mapping-table of CCS with database file.
2211 Lisp_Object db_file;
2213 ccs = Fget_charset (ccs);
2214 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2215 Qsystem_char_id, 0);
2217 if (!NILP (Ffile_exists_p (db_file)))
2219 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2226 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2230 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2233 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2237 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2244 decoding_table_put_char (ccs, code_point, ret);
2245 Fclose_database (db);
2249 decoding_table_put_char (ccs, code_point, Qnil);
2250 Fclose_database (db);
2254 #endif /* HAVE_CHISE_CLIENT */
2255 #endif /* UTF2000 */
2258 /************************************************************************/
2259 /* Lisp primitives for working with characters */
2260 /************************************************************************/
2263 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2264 Make a character from CHARSET and code-point CODE.
2265 If DEFINED_ONLY is non-nil, builtin character is not returned.
2266 If corresponding character is not found, nil is returned.
2268 (charset, code, defined_only))
2272 charset = Fget_charset (charset);
2275 if (XCHARSET_GRAPHIC (charset) == 1)
2277 if (NILP (defined_only))
2278 c = DECODE_CHAR (charset, c);
2280 c = decode_defined_char (charset, c);
2281 return c >= 0 ? make_char (c) : Qnil;
2284 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2285 Make a builtin character from CHARSET and code-point CODE.
2291 charset = Fget_charset (charset);
2293 if (EQ (charset, Vcharset_latin_viscii))
2295 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2301 (ret = Fget_char_attribute (chr,
2302 Vcharset_latin_viscii_lower,
2305 charset = Vcharset_latin_viscii_lower;
2309 (ret = Fget_char_attribute (chr,
2310 Vcharset_latin_viscii_upper,
2313 charset = Vcharset_latin_viscii_upper;
2320 if (XCHARSET_GRAPHIC (charset) == 1)
2323 c = decode_builtin_char (charset, c);
2324 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2328 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2329 Make a character from CHARSET and octets ARG1 and ARG2.
2330 ARG2 is required only for characters from two-dimensional charsets.
2331 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2332 character s with caron.
2334 (charset, arg1, arg2))
2338 int lowlim, highlim;
2340 charset = Fget_charset (charset);
2341 cs = XCHARSET (charset);
2343 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2344 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2346 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2348 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2349 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2352 /* It is useful (and safe, according to Olivier Galibert) to strip
2353 the 8th bit off ARG1 and ARG2 because it allows programmers to
2354 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2355 Latin 2 code of the character. */
2363 if (a1 < lowlim || a1 > highlim)
2364 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2366 if (CHARSET_DIMENSION (cs) == 1)
2370 ("Charset is of dimension one; second octet must be nil", arg2);
2371 return make_char (MAKE_CHAR (charset, a1, 0));
2380 a2 = XINT (arg2) & 0x7f;
2382 if (a2 < lowlim || a2 > highlim)
2383 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2385 return make_char (MAKE_CHAR (charset, a1, a2));
2388 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2389 Return the character set of CHARACTER.
2393 CHECK_CHAR_COERCE_INT (character);
2395 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2398 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2399 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2400 N defaults to 0 if omitted.
2404 Lisp_Object charset;
2407 CHECK_CHAR_COERCE_INT (character);
2409 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2411 if (NILP (n) || EQ (n, Qzero))
2412 return make_int (octet0);
2413 else if (EQ (n, make_int (1)))
2414 return make_int (octet1);
2416 signal_simple_error ("Octet number must be 0 or 1", n);
2420 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2421 Return code-point of CHARACTER in specified CHARSET.
2423 (character, charset, defined_only))
2427 CHECK_CHAR_COERCE_INT (character);
2428 charset = Fget_charset (charset);
2429 code_point = charset_code_point (charset, XCHAR (character),
2430 !NILP (defined_only));
2431 if (code_point >= 0)
2432 return make_int (code_point);
2438 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2439 Return list of charset and one or two position-codes of CHARACTER.
2443 /* This function can GC */
2444 struct gcpro gcpro1, gcpro2;
2445 Lisp_Object charset = Qnil;
2446 Lisp_Object rc = Qnil;
2454 GCPRO2 (charset, rc);
2455 CHECK_CHAR_COERCE_INT (character);
2458 code_point = ENCODE_CHAR (XCHAR (character), charset);
2459 dimension = XCHARSET_DIMENSION (charset);
2460 while (dimension > 0)
2462 rc = Fcons (make_int (code_point & 255), rc);
2466 rc = Fcons (XCHARSET_NAME (charset), rc);
2468 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2470 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2472 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2476 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2485 #ifdef ENABLE_COMPOSITE_CHARS
2486 /************************************************************************/
2487 /* composite character functions */
2488 /************************************************************************/
2491 lookup_composite_char (Bufbyte *str, int len)
2493 Lisp_Object lispstr = make_string (str, len);
2494 Lisp_Object ch = Fgethash (lispstr,
2495 Vcomposite_char_string2char_hash_table,
2501 if (composite_char_row_next >= 128)
2502 signal_simple_error ("No more composite chars available", lispstr);
2503 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2504 composite_char_col_next);
2505 Fputhash (make_char (emch), lispstr,
2506 Vcomposite_char_char2string_hash_table);
2507 Fputhash (lispstr, make_char (emch),
2508 Vcomposite_char_string2char_hash_table);
2509 composite_char_col_next++;
2510 if (composite_char_col_next >= 128)
2512 composite_char_col_next = 32;
2513 composite_char_row_next++;
2522 composite_char_string (Emchar ch)
2524 Lisp_Object str = Fgethash (make_char (ch),
2525 Vcomposite_char_char2string_hash_table,
2527 assert (!UNBOUNDP (str));
2531 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2532 Convert a string into a single composite character.
2533 The character is the result of overstriking all the characters in
2538 CHECK_STRING (string);
2539 return make_char (lookup_composite_char (XSTRING_DATA (string),
2540 XSTRING_LENGTH (string)));
2543 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2544 Return a string of the characters comprising a composite character.
2552 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2553 signal_simple_error ("Must be composite char", ch);
2554 return composite_char_string (emch);
2556 #endif /* ENABLE_COMPOSITE_CHARS */
2559 /************************************************************************/
2560 /* initialization */
2561 /************************************************************************/
2564 syms_of_mule_charset (void)
2566 INIT_LRECORD_IMPLEMENTATION (charset);
2568 DEFSUBR (Fcharsetp);
2569 DEFSUBR (Ffind_charset);
2570 DEFSUBR (Fget_charset);
2571 DEFSUBR (Fcharset_list);
2572 DEFSUBR (Fcharset_name);
2573 DEFSUBR (Fmake_charset);
2574 DEFSUBR (Fmake_reverse_direction_charset);
2575 /* DEFSUBR (Freverse_direction_charset); */
2576 DEFSUBR (Fdefine_charset_alias);
2577 DEFSUBR (Fcharset_from_attributes);
2578 DEFSUBR (Fcharset_short_name);
2579 DEFSUBR (Fcharset_long_name);
2580 DEFSUBR (Fcharset_description);
2581 DEFSUBR (Fcharset_dimension);
2582 DEFSUBR (Fcharset_property);
2583 DEFSUBR (Fcharset_id);
2584 DEFSUBR (Fset_charset_ccl_program);
2585 DEFSUBR (Fset_charset_registry);
2587 DEFSUBR (Fcharset_mapping_table);
2588 DEFSUBR (Fset_charset_mapping_table);
2589 #ifdef HAVE_CHISE_CLIENT
2590 DEFSUBR (Fsave_charset_mapping_table);
2591 DEFSUBR (Freset_charset_mapping_table);
2594 DEFSUBR (Fdecode_char);
2595 DEFSUBR (Fdecode_builtin_char);
2596 DEFSUBR (Fencode_char);
2598 DEFSUBR (Fmake_char);
2599 DEFSUBR (Fchar_charset);
2600 DEFSUBR (Fchar_octet);
2601 DEFSUBR (Fsplit_char);
2603 #ifdef ENABLE_COMPOSITE_CHARS
2604 DEFSUBR (Fmake_composite_char);
2605 DEFSUBR (Fcomposite_char_string);
2608 defsymbol (&Qcharsetp, "charsetp");
2609 defsymbol (&Qregistry, "registry");
2610 defsymbol (&Qfinal, "final");
2611 defsymbol (&Qgraphic, "graphic");
2612 defsymbol (&Qdirection, "direction");
2613 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2614 defsymbol (&Qshort_name, "short-name");
2615 defsymbol (&Qlong_name, "long-name");
2617 defsymbol (&Qmother, "mother");
2618 defsymbol (&Qmin_code, "min-code");
2619 defsymbol (&Qmax_code, "max-code");
2620 defsymbol (&Qcode_offset, "code-offset");
2621 defsymbol (&Qconversion, "conversion");
2622 defsymbol (&Q94x60, "94x60");
2623 defsymbol (&Q94x94x60, "94x94x60");
2624 defsymbol (&Qbig5_1, "big5-1");
2625 defsymbol (&Qbig5_2, "big5-2");
2628 defsymbol (&Ql2r, "l2r");
2629 defsymbol (&Qr2l, "r2l");
2631 /* Charsets, compatible with FSF 20.3
2632 Naming convention is Script-Charset[-Edition] */
2633 defsymbol (&Qascii, "ascii");
2634 defsymbol (&Qcontrol_1, "control-1");
2635 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2636 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2637 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2638 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2639 defsymbol (&Qthai_tis620, "thai-tis620");
2640 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2641 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2642 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2643 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2644 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2645 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2646 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2647 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2648 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2649 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2650 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2651 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2652 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2653 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2654 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2655 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2657 defsymbol (&Qucs, "ucs");
2658 defsymbol (&Qucs_bmp, "ucs-bmp");
2659 defsymbol (&Qucs_smp, "ucs-smp");
2660 defsymbol (&Qucs_sip, "ucs-sip");
2661 defsymbol (&Qucs_gb, "ucs-gb");
2662 defsymbol (&Qucs_cns, "ucs-cns");
2663 defsymbol (&Qucs_jis, "ucs-jis");
2664 defsymbol (&Qlatin_viscii, "latin-viscii");
2665 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2666 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2667 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2668 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2669 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2670 defsymbol (&Qjis_x0208, "=jis-x0208");
2671 defsymbol (&Qchinese_big5, "chinese-big5");
2672 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2674 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2675 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2677 defsymbol (&Qcomposite, "composite");
2681 vars_of_mule_charset (void)
2688 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2689 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2691 /* Table of charsets indexed by leading byte. */
2692 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2693 chlook->charset_by_leading_byte[i] = Qnil;
2696 /* Table of charsets indexed by type/final-byte. */
2697 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2698 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2699 chlook->charset_by_attributes[i][j] = Qnil;
2701 /* Table of charsets indexed by type/final-byte/direction. */
2702 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2703 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2704 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2705 chlook->charset_by_attributes[i][j][k] = Qnil;
2709 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2711 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2712 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2716 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2717 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2718 Leading-code of private TYPE9N charset of column-width 1.
2720 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2724 Vdefault_coded_charset_priority_list = Qnil;
2725 DEFVAR_LISP ("default-coded-charset-priority-list",
2726 &Vdefault_coded_charset_priority_list /*
2727 Default order of preferred coded-character-sets.
2733 complex_vars_of_mule_charset (void)
2735 staticpro (&Vcharset_hash_table);
2736 Vcharset_hash_table =
2737 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2739 /* Predefined character sets. We store them into variables for
2743 staticpro (&Vcharset_ucs);
2745 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2746 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2747 build_string ("UCS"),
2748 build_string ("UCS"),
2749 build_string ("ISO/IEC 10646"),
2751 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2752 staticpro (&Vcharset_ucs_bmp);
2754 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2755 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2756 build_string ("BMP"),
2757 build_string ("UCS-BMP"),
2758 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2760 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2761 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2762 staticpro (&Vcharset_ucs_smp);
2764 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2765 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2766 build_string ("SMP"),
2767 build_string ("UCS-SMP"),
2768 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2769 build_string ("UCS00-1"),
2770 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2771 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2772 staticpro (&Vcharset_ucs_sip);
2774 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2775 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2776 build_string ("SIP"),
2777 build_string ("UCS-SIP"),
2778 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2779 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2780 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2781 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2782 staticpro (&Vcharset_ucs_gb);
2784 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2785 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2786 build_string ("UCS for GB"),
2787 build_string ("UCS for GB"),
2788 build_string ("ISO/IEC 10646 for GB"),
2790 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2791 staticpro (&Vcharset_ucs_cns);
2793 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2794 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2795 build_string ("UCS for CNS"),
2796 build_string ("UCS for CNS 11643"),
2797 build_string ("ISO/IEC 10646 for CNS 11643"),
2799 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2800 staticpro (&Vcharset_ucs_jis);
2802 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2803 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2804 build_string ("UCS for JIS"),
2805 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2807 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2809 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2811 # define MIN_CHAR_THAI 0
2812 # define MAX_CHAR_THAI 0
2813 /* # define MIN_CHAR_HEBREW 0 */
2814 /* # define MAX_CHAR_HEBREW 0 */
2815 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2816 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2818 staticpro (&Vcharset_ascii);
2820 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2821 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2822 build_string ("ASCII"),
2823 build_string ("ASCII)"),
2824 build_string ("ASCII (ISO646 IRV)"),
2825 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2826 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2827 staticpro (&Vcharset_control_1);
2828 Vcharset_control_1 =
2829 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2830 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2831 build_string ("C1"),
2832 build_string ("Control characters"),
2833 build_string ("Control characters 128-191"),
2835 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2836 staticpro (&Vcharset_latin_iso8859_1);
2837 Vcharset_latin_iso8859_1 =
2838 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2839 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2840 build_string ("Latin-1"),
2841 build_string ("ISO8859-1 (Latin-1)"),
2842 build_string ("ISO8859-1 (Latin-1)"),
2843 build_string ("iso8859-1"),
2844 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2845 staticpro (&Vcharset_latin_iso8859_2);
2846 Vcharset_latin_iso8859_2 =
2847 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2848 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2849 build_string ("Latin-2"),
2850 build_string ("ISO8859-2 (Latin-2)"),
2851 build_string ("ISO8859-2 (Latin-2)"),
2852 build_string ("iso8859-2"),
2853 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2854 staticpro (&Vcharset_latin_iso8859_3);
2855 Vcharset_latin_iso8859_3 =
2856 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2857 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2858 build_string ("Latin-3"),
2859 build_string ("ISO8859-3 (Latin-3)"),
2860 build_string ("ISO8859-3 (Latin-3)"),
2861 build_string ("iso8859-3"),
2862 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2863 staticpro (&Vcharset_latin_iso8859_4);
2864 Vcharset_latin_iso8859_4 =
2865 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2866 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2867 build_string ("Latin-4"),
2868 build_string ("ISO8859-4 (Latin-4)"),
2869 build_string ("ISO8859-4 (Latin-4)"),
2870 build_string ("iso8859-4"),
2871 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2872 staticpro (&Vcharset_thai_tis620);
2873 Vcharset_thai_tis620 =
2874 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2875 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2876 build_string ("TIS620"),
2877 build_string ("TIS620 (Thai)"),
2878 build_string ("TIS620.2529 (Thai)"),
2879 build_string ("tis620"),
2880 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_greek_iso8859_7);
2882 Vcharset_greek_iso8859_7 =
2883 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2884 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2885 build_string ("ISO8859-7"),
2886 build_string ("ISO8859-7 (Greek)"),
2887 build_string ("ISO8859-7 (Greek)"),
2888 build_string ("iso8859-7"),
2889 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2890 staticpro (&Vcharset_arabic_iso8859_6);
2891 Vcharset_arabic_iso8859_6 =
2892 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2893 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2894 build_string ("ISO8859-6"),
2895 build_string ("ISO8859-6 (Arabic)"),
2896 build_string ("ISO8859-6 (Arabic)"),
2897 build_string ("iso8859-6"),
2898 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2899 staticpro (&Vcharset_hebrew_iso8859_8);
2900 Vcharset_hebrew_iso8859_8 =
2901 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2902 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2903 build_string ("ISO8859-8"),
2904 build_string ("ISO8859-8 (Hebrew)"),
2905 build_string ("ISO8859-8 (Hebrew)"),
2906 build_string ("iso8859-8"),
2908 0 /* MIN_CHAR_HEBREW */,
2909 0 /* MAX_CHAR_HEBREW */, 0, 32,
2910 Qnil, CONVERSION_IDENTICAL);
2911 staticpro (&Vcharset_katakana_jisx0201);
2912 Vcharset_katakana_jisx0201 =
2913 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2914 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2915 build_string ("JISX0201 Kana"),
2916 build_string ("JISX0201.1976 (Japanese Kana)"),
2917 build_string ("JISX0201.1976 Japanese Kana"),
2918 build_string ("jisx0201\\.1976"),
2919 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2920 staticpro (&Vcharset_latin_jisx0201);
2921 Vcharset_latin_jisx0201 =
2922 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2923 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2924 build_string ("JISX0201 Roman"),
2925 build_string ("JISX0201.1976 (Japanese Roman)"),
2926 build_string ("JISX0201.1976 Japanese Roman"),
2927 build_string ("jisx0201\\.1976"),
2928 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2929 staticpro (&Vcharset_cyrillic_iso8859_5);
2930 Vcharset_cyrillic_iso8859_5 =
2931 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2932 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2933 build_string ("ISO8859-5"),
2934 build_string ("ISO8859-5 (Cyrillic)"),
2935 build_string ("ISO8859-5 (Cyrillic)"),
2936 build_string ("iso8859-5"),
2937 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2938 staticpro (&Vcharset_latin_iso8859_9);
2939 Vcharset_latin_iso8859_9 =
2940 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2941 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Latin-5"),
2943 build_string ("ISO8859-9 (Latin-5)"),
2944 build_string ("ISO8859-9 (Latin-5)"),
2945 build_string ("iso8859-9"),
2946 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2948 staticpro (&Vcharset_jis_x0208);
2949 Vcharset_jis_x0208 =
2950 make_charset (LEADING_BYTE_JIS_X0208,
2952 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2953 build_string ("JIS X0208"),
2954 build_string ("JIS X0208 Common"),
2955 build_string ("JIS X0208 Common part"),
2956 build_string ("jisx0208\\.1990"),
2958 MIN_CHAR_JIS_X0208_1990,
2959 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2960 Qnil, CONVERSION_94x94);
2962 staticpro (&Vcharset_japanese_jisx0208_1978);
2963 Vcharset_japanese_jisx0208_1978 =
2964 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2965 Qjapanese_jisx0208_1978, 94, 2,
2966 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2967 build_string ("JIS X0208:1978"),
2968 build_string ("JIS X0208:1978 (Japanese)"),
2970 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2971 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2978 CONVERSION_IDENTICAL);
2979 staticpro (&Vcharset_chinese_gb2312);
2980 Vcharset_chinese_gb2312 =
2981 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2982 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2983 build_string ("GB2312"),
2984 build_string ("GB2312)"),
2985 build_string ("GB2312 Chinese simplified"),
2986 build_string ("gb2312"),
2987 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2988 staticpro (&Vcharset_chinese_gb12345);
2989 Vcharset_chinese_gb12345 =
2990 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2991 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2992 build_string ("G1"),
2993 build_string ("GB 12345)"),
2994 build_string ("GB 12345-1990"),
2995 build_string ("GB12345\\(\\.1990\\)?-0"),
2996 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2997 staticpro (&Vcharset_japanese_jisx0208);
2998 Vcharset_japanese_jisx0208 =
2999 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3000 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3001 build_string ("JISX0208"),
3002 build_string ("JIS X0208:1983 (Japanese)"),
3003 build_string ("JIS X0208:1983 Japanese Kanji"),
3004 build_string ("jisx0208\\.1983"),
3011 CONVERSION_IDENTICAL);
3013 staticpro (&Vcharset_japanese_jisx0208_1990);
3014 Vcharset_japanese_jisx0208_1990 =
3015 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3016 Qjapanese_jisx0208_1990, 94, 2,
3017 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3018 build_string ("JISX0208-1990"),
3019 build_string ("JIS X0208:1990 (Japanese)"),
3020 build_string ("JIS X0208:1990 Japanese Kanji"),
3021 build_string ("jisx0208\\.1990"),
3023 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3024 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3025 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3026 Vcharset_jis_x0208 /* Qnil */,
3027 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3029 staticpro (&Vcharset_korean_ksc5601);
3030 Vcharset_korean_ksc5601 =
3031 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3032 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3033 build_string ("KSC5601"),
3034 build_string ("KSC5601 (Korean"),
3035 build_string ("KSC5601 Korean Hangul and Hanja"),
3036 build_string ("ksc5601"),
3037 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3038 staticpro (&Vcharset_japanese_jisx0212);
3039 Vcharset_japanese_jisx0212 =
3040 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3041 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3042 build_string ("JISX0212"),
3043 build_string ("JISX0212 (Japanese)"),
3044 build_string ("JISX0212 Japanese Supplement"),
3045 build_string ("jisx0212"),
3046 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3048 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3049 staticpro (&Vcharset_chinese_cns11643_1);
3050 Vcharset_chinese_cns11643_1 =
3051 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3052 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3053 build_string ("CNS11643-1"),
3054 build_string ("CNS11643-1 (Chinese traditional)"),
3056 ("CNS 11643 Plane 1 Chinese traditional"),
3057 build_string (CHINESE_CNS_PLANE_RE("1")),
3058 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3059 staticpro (&Vcharset_chinese_cns11643_2);
3060 Vcharset_chinese_cns11643_2 =
3061 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3062 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3063 build_string ("CNS11643-2"),
3064 build_string ("CNS11643-2 (Chinese traditional)"),
3066 ("CNS 11643 Plane 2 Chinese traditional"),
3067 build_string (CHINESE_CNS_PLANE_RE("2")),
3068 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3070 staticpro (&Vcharset_latin_tcvn5712);
3071 Vcharset_latin_tcvn5712 =
3072 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3073 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3074 build_string ("TCVN 5712"),
3075 build_string ("TCVN 5712 (VSCII-2)"),
3076 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3077 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3078 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3079 staticpro (&Vcharset_latin_viscii_lower);
3080 Vcharset_latin_viscii_lower =
3081 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3082 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3083 build_string ("VISCII lower"),
3084 build_string ("VISCII lower (Vietnamese)"),
3085 build_string ("VISCII lower (Vietnamese)"),
3086 build_string ("MULEVISCII-LOWER"),
3087 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3088 staticpro (&Vcharset_latin_viscii_upper);
3089 Vcharset_latin_viscii_upper =
3090 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3091 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3092 build_string ("VISCII upper"),
3093 build_string ("VISCII upper (Vietnamese)"),
3094 build_string ("VISCII upper (Vietnamese)"),
3095 build_string ("MULEVISCII-UPPER"),
3096 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3097 staticpro (&Vcharset_latin_viscii);
3098 Vcharset_latin_viscii =
3099 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3100 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3101 build_string ("VISCII"),
3102 build_string ("VISCII 1.1 (Vietnamese)"),
3103 build_string ("VISCII 1.1 (Vietnamese)"),
3104 build_string ("VISCII1\\.1"),
3105 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3106 staticpro (&Vcharset_chinese_big5);
3107 Vcharset_chinese_big5 =
3108 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3109 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3110 build_string ("Big5"),
3111 build_string ("Big5"),
3112 build_string ("Big5 Chinese traditional"),
3113 build_string ("big5-0"),
3115 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3116 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3118 staticpro (&Vcharset_ethiopic_ucs);
3119 Vcharset_ethiopic_ucs =
3120 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3121 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3122 build_string ("Ethiopic (UCS)"),
3123 build_string ("Ethiopic (UCS)"),
3124 build_string ("Ethiopic of UCS"),
3125 build_string ("Ethiopic-Unicode"),
3126 Qnil, 0x1200, 0x137F, 0, 0,
3127 Qnil, CONVERSION_IDENTICAL);
3129 staticpro (&Vcharset_chinese_big5_1);
3130 Vcharset_chinese_big5_1 =
3131 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3132 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3133 build_string ("Big5"),
3134 build_string ("Big5 (Level-1)"),
3136 ("Big5 Level-1 Chinese traditional"),
3137 build_string ("big5"),
3138 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3139 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3140 staticpro (&Vcharset_chinese_big5_2);
3141 Vcharset_chinese_big5_2 =
3142 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3143 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3144 build_string ("Big5"),
3145 build_string ("Big5 (Level-2)"),
3147 ("Big5 Level-2 Chinese traditional"),
3148 build_string ("big5"),
3149 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3150 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3152 #ifdef ENABLE_COMPOSITE_CHARS
3153 /* #### For simplicity, we put composite chars into a 96x96 charset.
3154 This is going to lead to problems because you can run out of
3155 room, esp. as we don't yet recycle numbers. */
3156 staticpro (&Vcharset_composite);
3157 Vcharset_composite =
3158 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3159 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3160 build_string ("Composite"),
3161 build_string ("Composite characters"),
3162 build_string ("Composite characters"),
3165 /* #### not dumped properly */
3166 composite_char_row_next = 32;
3167 composite_char_col_next = 32;
3169 Vcomposite_char_string2char_hash_table =
3170 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3171 Vcomposite_char_char2string_hash_table =
3172 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3173 staticpro (&Vcomposite_char_string2char_hash_table);
3174 staticpro (&Vcomposite_char_char2string_hash_table);
3175 #endif /* ENABLE_COMPOSITE_CHARS */