1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_ucs;
71 Lisp_Object Vcharset_ucs_bmp;
72 Lisp_Object Vcharset_ucs_smp;
73 Lisp_Object Vcharset_ucs_sip;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
282 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
283 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
317 Qvietnamese_viscii_lower,
318 Qvietnamese_viscii_upper,
328 Lisp_Object Ql2r, Qr2l;
330 Lisp_Object Vcharset_hash_table;
332 /* Composite characters are characters constructed by overstriking two
333 or more regular characters.
335 1) The old Mule implementation involves storing composite characters
336 in a buffer as a tag followed by all of the actual characters
337 used to make up the composite character. I think this is a bad
338 idea; it greatly complicates code that wants to handle strings
339 one character at a time because it has to deal with the possibility
340 of great big ungainly characters. It's much more reasonable to
341 simply store an index into a table of composite characters.
343 2) The current implementation only allows for 16,384 separate
344 composite characters over the lifetime of the XEmacs process.
345 This could become a potential problem if the user
346 edited lots of different files that use composite characters.
347 Due to FSF bogosity, increasing the number of allowable
348 composite characters under Mule would decrease the number
349 of possible faces that can exist. Mule already has shrunk
350 this to 2048, and further shrinkage would become uncomfortable.
351 No such problems exist in XEmacs.
353 Composite characters could be represented as 0x80 C1 C2 C3,
354 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
355 for slightly under 2^20 (one million) composite characters
356 over the XEmacs process lifetime, and you only need to
357 increase the size of a Mule character from 19 to 21 bits.
358 Or you could use 0x80 C1 C2 C3 C4, allowing for about
359 85 million (slightly over 2^26) composite characters. */
362 /************************************************************************/
363 /* Basic Emchar functions */
364 /************************************************************************/
366 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
367 string in STR. Returns the number of bytes stored.
368 Do not call this directly. Use the macro set_charptr_emchar() instead.
372 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
387 else if ( c <= 0x7ff )
389 *p++ = (c >> 6) | 0xc0;
390 *p++ = (c & 0x3f) | 0x80;
392 else if ( c <= 0xffff )
394 *p++ = (c >> 12) | 0xe0;
395 *p++ = ((c >> 6) & 0x3f) | 0x80;
396 *p++ = (c & 0x3f) | 0x80;
398 else if ( c <= 0x1fffff )
400 *p++ = (c >> 18) | 0xf0;
401 *p++ = ((c >> 12) & 0x3f) | 0x80;
402 *p++ = ((c >> 6) & 0x3f) | 0x80;
403 *p++ = (c & 0x3f) | 0x80;
405 else if ( c <= 0x3ffffff )
407 *p++ = (c >> 24) | 0xf8;
408 *p++ = ((c >> 18) & 0x3f) | 0x80;
409 *p++ = ((c >> 12) & 0x3f) | 0x80;
410 *p++ = ((c >> 6) & 0x3f) | 0x80;
411 *p++ = (c & 0x3f) | 0x80;
415 *p++ = (c >> 30) | 0xfc;
416 *p++ = ((c >> 24) & 0x3f) | 0x80;
417 *p++ = ((c >> 18) & 0x3f) | 0x80;
418 *p++ = ((c >> 12) & 0x3f) | 0x80;
419 *p++ = ((c >> 6) & 0x3f) | 0x80;
420 *p++ = (c & 0x3f) | 0x80;
423 BREAKUP_CHAR (c, charset, c1, c2);
424 lb = CHAR_LEADING_BYTE (c);
425 if (LEADING_BYTE_PRIVATE_P (lb))
426 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
428 if (EQ (charset, Vcharset_control_1))
437 /* Return the first character from a Mule-encoded string in STR,
438 assuming it's non-ASCII. Do not call this directly.
439 Use the macro charptr_emchar() instead. */
442 non_ascii_charptr_emchar (const Bufbyte *str)
455 else if ( b >= 0xf8 )
460 else if ( b >= 0xf0 )
465 else if ( b >= 0xe0 )
470 else if ( b >= 0xc0 )
480 for( ; len > 0; len-- )
483 ch = ( ch << 6 ) | ( b & 0x3f );
487 Bufbyte i0 = *str, i1, i2 = 0;
490 if (i0 == LEADING_BYTE_CONTROL_1)
491 return (Emchar) (*++str - 0x20);
493 if (LEADING_BYTE_PREFIX_P (i0))
498 charset = CHARSET_BY_LEADING_BYTE (i0);
499 if (XCHARSET_DIMENSION (charset) == 2)
502 return MAKE_CHAR (charset, i1, i2);
506 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
507 Do not call this directly. Use the macro valid_char_p() instead. */
511 non_ascii_valid_char_p (Emchar ch)
515 /* Must have only lowest 19 bits set */
519 f1 = CHAR_FIELD1 (ch);
520 f2 = CHAR_FIELD2 (ch);
521 f3 = CHAR_FIELD3 (ch);
527 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
528 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
529 f2 > MAX_CHAR_FIELD2_PRIVATE)
534 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
535 f2 <= MAX_CHAR_FIELD2_PRIVATE))
539 NOTE: This takes advantage of the fact that
540 FIELD2_TO_OFFICIAL_LEADING_BYTE and
541 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
543 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
544 if (EQ (charset, Qnil))
546 return (XCHARSET_CHARS (charset) == 96);
552 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
553 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
554 f1 > MAX_CHAR_FIELD1_PRIVATE)
556 if (f2 < 0x20 || f3 < 0x20)
559 #ifdef ENABLE_COMPOSITE_CHARS
560 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
562 if (UNBOUNDP (Fgethash (make_int (ch),
563 Vcomposite_char_char2string_hash_table,
568 #endif /* ENABLE_COMPOSITE_CHARS */
570 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
571 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
574 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
581 if (EQ (charset, Qnil))
583 return (XCHARSET_CHARS (charset) == 96);
589 /************************************************************************/
590 /* Basic string functions */
591 /************************************************************************/
593 /* Copy the character pointed to by SRC into DST. Do not call this
594 directly. Use the macro charptr_copy_char() instead.
595 Return the number of bytes copied. */
598 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
600 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
602 for (i = bytes; i; i--, dst++, src++)
608 /************************************************************************/
609 /* streams of Emchars */
610 /************************************************************************/
612 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
613 The functions below are not meant to be called directly; use
614 the macros in insdel.h. */
617 Lstream_get_emchar_1 (Lstream *stream, int ch)
619 Bufbyte str[MAX_EMCHAR_LEN];
620 Bufbyte *strptr = str;
623 str[0] = (Bufbyte) ch;
625 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
627 int c = Lstream_getc (stream);
628 bufpos_checking_assert (c >= 0);
629 *++strptr = (Bufbyte) c;
631 return charptr_emchar (str);
635 Lstream_fput_emchar (Lstream *stream, Emchar ch)
637 Bufbyte str[MAX_EMCHAR_LEN];
638 Bytecount len = set_charptr_emchar (str, ch);
639 return Lstream_write (stream, str, len);
643 Lstream_funget_emchar (Lstream *stream, Emchar ch)
645 Bufbyte str[MAX_EMCHAR_LEN];
646 Bytecount len = set_charptr_emchar (str, ch);
647 Lstream_unread (stream, str, len);
651 /************************************************************************/
653 /************************************************************************/
656 mark_charset (Lisp_Object obj)
658 Lisp_Charset *cs = XCHARSET (obj);
660 mark_object (cs->short_name);
661 mark_object (cs->long_name);
662 mark_object (cs->doc_string);
663 mark_object (cs->registry);
664 mark_object (cs->ccl_program);
666 mark_object (cs->decoding_table);
667 mark_object (cs->mother);
673 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
675 Lisp_Charset *cs = XCHARSET (obj);
679 error ("printing unreadable object #<charset %s 0x%x>",
680 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
683 write_c_string ("#<charset ", printcharfun);
684 print_internal (CHARSET_NAME (cs), printcharfun, 0);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
691 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
693 CHARSET_DIMENSION (cs),
694 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
695 CHARSET_COLUMNS (cs),
696 CHARSET_GRAPHIC (cs),
698 write_c_string (buf, printcharfun);
699 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
700 sprintf (buf, " 0x%x>", cs->header.uid);
701 write_c_string (buf, printcharfun);
704 static const struct lrecord_description charset_description[] = {
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
719 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
720 mark_charset, print_charset, 0, 0, 0,
724 /* Make a new charset. */
725 /* #### SJT Should generic properties be allowed? */
727 make_charset (Charset_ID id, Lisp_Object name,
728 unsigned short chars, unsigned char dimension,
729 unsigned char columns, unsigned char graphic,
730 Bufbyte final, unsigned char direction, Lisp_Object short_name,
731 Lisp_Object long_name, Lisp_Object doc,
733 Lisp_Object decoding_table,
734 Emchar min_code, Emchar max_code,
735 Emchar code_offset, unsigned char byte_offset,
736 Lisp_Object mother, unsigned char conversion)
739 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
743 XSETCHARSET (obj, cs);
745 CHARSET_ID (cs) = id;
746 CHARSET_NAME (cs) = name;
747 CHARSET_SHORT_NAME (cs) = short_name;
748 CHARSET_LONG_NAME (cs) = long_name;
749 CHARSET_CHARS (cs) = chars;
750 CHARSET_DIMENSION (cs) = dimension;
751 CHARSET_DIRECTION (cs) = direction;
752 CHARSET_COLUMNS (cs) = columns;
753 CHARSET_GRAPHIC (cs) = graphic;
754 CHARSET_FINAL (cs) = final;
755 CHARSET_DOC_STRING (cs) = doc;
756 CHARSET_REGISTRY (cs) = reg;
757 CHARSET_CCL_PROGRAM (cs) = Qnil;
758 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
760 CHARSET_DECODING_TABLE(cs) = Qunbound;
761 CHARSET_MIN_CODE (cs) = min_code;
762 CHARSET_MAX_CODE (cs) = max_code;
763 CHARSET_CODE_OFFSET (cs) = code_offset;
764 CHARSET_BYTE_OFFSET (cs) = byte_offset;
765 CHARSET_MOTHER (cs) = mother;
766 CHARSET_CONVERSION (cs) = conversion;
770 if (id == LEADING_BYTE_ASCII)
771 CHARSET_REP_BYTES (cs) = 1;
773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
780 /* some charsets do not have final characters. This includes
781 ASCII, Control-1, Composite, and the two faux private
783 unsigned char iso2022_type
784 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
786 if (code_offset == 0)
788 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
789 chlook->charset_by_attributes[iso2022_type][final] = obj;
793 (chlook->charset_by_attributes[iso2022_type][final][direction]));
794 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
798 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
799 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
801 /* Some charsets are "faux" and don't have names or really exist at
802 all except in the leading-byte table. */
804 Fputhash (name, obj, Vcharset_hash_table);
809 get_unallocated_leading_byte (int dimension)
814 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
817 lb = chlook->next_allocated_leading_byte++;
821 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
824 lb = chlook->next_allocated_1_byte_leading_byte++;
828 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
831 lb = chlook->next_allocated_2_byte_leading_byte++;
837 ("No more character sets free for this dimension",
838 make_int (dimension));
844 /* Number of Big5 characters which have the same code in 1st byte. */
846 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
849 decode_ccs_conversion (int conv_type, int code_point)
851 if ( conv_type == CONVERSION_IDENTICAL )
855 if ( conv_type == CONVERSION_94x60 )
857 int row = code_point >> 8;
858 int cell = code_point & 255;
862 else if (row < 16 + 32 + 30)
863 return (row - (16 + 32)) * 94 + cell - 33;
864 else if (row < 18 + 32 + 30)
866 else if (row < 18 + 32 + 60)
867 return (row - (18 + 32)) * 94 + cell - 33;
869 else if ( conv_type == CONVERSION_94x94x60 )
871 int plane = code_point >> 16;
872 int row = (code_point >> 8) & 255;
873 int cell = code_point & 255;
877 else if (row < 16 + 32 + 30)
879 (plane - 33) * 94 * 60
880 + (row - (16 + 32)) * 94
882 else if (row < 18 + 32 + 30)
884 else if (row < 18 + 32 + 60)
886 (plane - 33) * 94 * 60
887 + (row - (18 + 32)) * 94
890 else if ( conv_type == CONVERSION_BIG5_1 )
893 = (((code_point >> 8) & 0x7F) - 33) * 94
894 + (( code_point & 0x7F) - 33);
895 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
896 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
898 b2 += b2 < 0x3F ? 0x40 : 0x62;
899 return (b1 << 8) | b2;
901 else if ( conv_type == CONVERSION_BIG5_2 )
904 = (((code_point >> 8) & 0x7F) - 33) * 94
905 + (( code_point & 0x7F) - 33)
906 + BIG5_SAME_ROW * (0xC9 - 0xA1);
907 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
908 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
910 b2 += b2 < 0x3F ? 0x40 : 0x62;
911 return (b1 << 8) | b2;
917 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
919 int dim = XCHARSET_DIMENSION (ccs);
920 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
928 = get_ccs_octet_table (decoding_table, ccs,
929 (code_point >> (dim * 8)) & 255);
931 if (CHARP (decoding_table))
932 return XCHAR (decoding_table);
934 if (EQ (decoding_table, Qunloaded))
936 char_id = load_char_decoding_entry_maybe (ccs, code_point);
938 #endif /* HAVE_CHISE */
941 else if ( !without_inheritance
942 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
945 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
949 code += XCHARSET_CODE_OFFSET(ccs);
950 if ( EQ (mother, Vcharset_ucs) )
951 return DECODE_CHAR (mother, code, without_inheritance);
953 return decode_defined_char (mother, code,
954 without_inheritance);
961 decode_builtin_char (Lisp_Object charset, int code_point)
963 Lisp_Object mother = XCHARSET_MOTHER (charset);
966 if ( XCHARSET_MAX_CODE (charset) > 0 )
968 if ( CHARSETP (mother) )
971 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
976 decode_builtin_char (mother,
977 code + XCHARSET_CODE_OFFSET(charset));
984 = (XCHARSET_DIMENSION (charset) == 1
986 code_point - XCHARSET_BYTE_OFFSET (charset)
988 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
989 * XCHARSET_CHARS (charset)
990 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
991 + XCHARSET_CODE_OFFSET (charset);
992 if ((cid < XCHARSET_MIN_CODE (charset))
993 || (XCHARSET_MAX_CODE (charset) < cid))
998 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1000 if (XCHARSET_DIMENSION (charset) == 1)
1002 switch (XCHARSET_CHARS (charset))
1006 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1009 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1017 switch (XCHARSET_CHARS (charset))
1020 return MIN_CHAR_94x94
1021 + (final - '0') * 94 * 94
1022 + (((code_point >> 8) & 0x7F) - 33) * 94
1023 + ((code_point & 0x7F) - 33);
1025 return MIN_CHAR_96x96
1026 + (final - '0') * 96 * 96
1027 + (((code_point >> 8) & 0x7F) - 32) * 96
1028 + ((code_point & 0x7F) - 32);
1040 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1042 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1045 if ( CHAR_TABLEP (encoding_table)
1046 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1051 Lisp_Object mother = XCHARSET_MOTHER (charset);
1052 int min = XCHARSET_MIN_CODE (charset);
1053 int max = XCHARSET_MAX_CODE (charset);
1056 if ( CHARSETP (mother) )
1058 if (XCHARSET_FINAL (charset) >= '0')
1059 code = charset_code_point (mother, ch, 1);
1061 code = charset_code_point (mother, ch, defined_only);
1063 else if (defined_only)
1065 else if ( ((max == 0) && CHARSETP (mother)
1066 && (XCHARSET_FINAL (charset) == 0))
1067 || ((min <= ch) && (ch <= max)) )
1069 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1070 || ((min <= code) && (code <= max)) )
1072 int d = code - XCHARSET_CODE_OFFSET (charset);
1074 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1076 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1078 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1080 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1083 int cell = d % 94 + 33;
1089 return (row << 8) | cell;
1091 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1093 int B1 = d >> 8, B2 = d & 0xFF;
1095 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1096 - (B2 < 0x7F ? 0x40 : 0x62);
1100 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1103 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1105 int B1 = d >> 8, B2 = d & 0xFF;
1107 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1108 - (B2 < 0x7F ? 0x40 : 0x62);
1112 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1113 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1116 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1117 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1119 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1120 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1122 int plane = d / (94 * 60) + 33;
1123 int row = (d % (94 * 60)) / 94;
1124 int cell = d % 94 + 33;
1130 return (plane << 16) | (row << 8) | cell;
1132 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1134 ( (d / (94 * 94) + 33) << 16)
1135 | ((d / 94 % 94 + 33) << 8)
1137 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1139 ( (d / (96 * 96) + 32) << 16)
1140 | ((d / 96 % 96 + 32) << 8)
1142 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1144 ( (d / (94 * 94 * 94) + 33) << 24)
1145 | ((d / (94 * 94) % 94 + 33) << 16)
1146 | ((d / 94 % 94 + 33) << 8)
1148 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1150 ( (d / (96 * 96 * 96) + 32) << 24)
1151 | ((d / (96 * 96) % 96 + 32) << 16)
1152 | ((d / 96 % 96 + 32) << 8)
1156 printf ("Unknown CCS-conversion %d is specified!",
1157 XCHARSET_CONVERSION (charset));
1161 else if (defined_only)
1163 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1164 ( XCHARSET_MIN_CODE (charset) == 0 )
1166 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1167 (XCHARSET_CODE_OFFSET (charset)
1168 == XCHARSET_MIN_CODE (charset))
1173 if (XCHARSET_DIMENSION (charset) == 1)
1175 if (XCHARSET_CHARS (charset) == 94)
1177 if (((d = ch - (MIN_CHAR_94
1178 + (XCHARSET_FINAL (charset) - '0') * 94))
1183 else if (XCHARSET_CHARS (charset) == 96)
1185 if (((d = ch - (MIN_CHAR_96
1186 + (XCHARSET_FINAL (charset) - '0') * 96))
1194 else if (XCHARSET_DIMENSION (charset) == 2)
1196 if (XCHARSET_CHARS (charset) == 94)
1198 if (((d = ch - (MIN_CHAR_94x94
1200 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1203 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1205 else if (XCHARSET_CHARS (charset) == 96)
1207 if (((d = ch - (MIN_CHAR_96x96
1209 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1212 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1223 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1225 if (c <= MAX_CHAR_BASIC_LATIN)
1227 *charset = Vcharset_ascii;
1232 *charset = Vcharset_control_1;
1237 *charset = Vcharset_latin_iso8859_1;
1241 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1243 *charset = Vcharset_hebrew_iso8859_8;
1244 return c - MIN_CHAR_HEBREW + 0x20;
1247 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1249 *charset = Vcharset_thai_tis620;
1250 return c - MIN_CHAR_THAI + 0x20;
1253 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1254 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1256 return list2 (Vcharset_katakana_jisx0201,
1257 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1260 else if (c <= MAX_CHAR_BMP)
1262 *charset = Vcharset_ucs_bmp;
1265 else if (c <= MAX_CHAR_SMP)
1267 *charset = Vcharset_ucs_smp;
1268 return c - MIN_CHAR_SMP;
1270 else if (c <= MAX_CHAR_SIP)
1272 *charset = Vcharset_ucs_sip;
1273 return c - MIN_CHAR_SIP;
1275 else if (c < MIN_CHAR_94)
1277 *charset = Vcharset_ucs;
1280 else if (c <= MAX_CHAR_94)
1282 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1283 ((c - MIN_CHAR_94) / 94) + '0',
1284 CHARSET_LEFT_TO_RIGHT);
1285 if (!NILP (*charset))
1286 return ((c - MIN_CHAR_94) % 94) + 33;
1289 *charset = Vcharset_ucs;
1293 else if (c <= MAX_CHAR_96)
1295 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1296 ((c - MIN_CHAR_96) / 96) + '0',
1297 CHARSET_LEFT_TO_RIGHT);
1298 if (!NILP (*charset))
1299 return ((c - MIN_CHAR_96) % 96) + 32;
1302 *charset = Vcharset_ucs;
1306 else if (c <= MAX_CHAR_94x94)
1309 = CHARSET_BY_ATTRIBUTES (94, 2,
1310 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1311 CHARSET_LEFT_TO_RIGHT);
1312 if (!NILP (*charset))
1313 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1314 | (((c - MIN_CHAR_94x94) % 94) + 33);
1317 *charset = Vcharset_ucs;
1321 else if (c <= MAX_CHAR_96x96)
1324 = CHARSET_BY_ATTRIBUTES (96, 2,
1325 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1326 CHARSET_LEFT_TO_RIGHT);
1327 if (!NILP (*charset))
1328 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1329 | (((c - MIN_CHAR_96x96) % 96) + 32);
1332 *charset = Vcharset_ucs;
1338 *charset = Vcharset_ucs;
1343 Lisp_Object Vdefault_coded_charset_priority_list;
1347 /************************************************************************/
1348 /* Basic charset Lisp functions */
1349 /************************************************************************/
1351 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1352 Return non-nil if OBJECT is a charset.
1356 return CHARSETP (object) ? Qt : Qnil;
1359 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1360 Retrieve the charset of the given name.
1361 If CHARSET-OR-NAME is a charset object, it is simply returned.
1362 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1363 nil is returned. Otherwise the associated charset object is returned.
1367 if (CHARSETP (charset_or_name))
1368 return charset_or_name;
1370 CHECK_SYMBOL (charset_or_name);
1371 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1374 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1375 Retrieve the charset of the given name.
1376 Same as `find-charset' except an error is signalled if there is no such
1377 charset instead of returning nil.
1381 Lisp_Object charset = Ffind_charset (name);
1384 signal_simple_error ("No such charset", name);
1388 /* We store the charsets in hash tables with the names as the key and the
1389 actual charset object as the value. Occasionally we need to use them
1390 in a list format. These routines provide us with that. */
1391 struct charset_list_closure
1393 Lisp_Object *charset_list;
1397 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1398 void *charset_list_closure)
1400 /* This function can GC */
1401 struct charset_list_closure *chcl =
1402 (struct charset_list_closure*) charset_list_closure;
1403 Lisp_Object *charset_list = chcl->charset_list;
1405 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1409 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1410 Return a list of the names of all defined charsets.
1414 Lisp_Object charset_list = Qnil;
1415 struct gcpro gcpro1;
1416 struct charset_list_closure charset_list_closure;
1418 GCPRO1 (charset_list);
1419 charset_list_closure.charset_list = &charset_list;
1420 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1421 &charset_list_closure);
1424 return charset_list;
1427 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1428 Return the name of charset CHARSET.
1432 return XCHARSET_NAME (Fget_charset (charset));
1435 /* #### SJT Should generic properties be allowed? */
1436 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1437 Define a new character set.
1438 This function is for use with Mule support.
1439 NAME is a symbol, the name by which the character set is normally referred.
1440 DOC-STRING is a string describing the character set.
1441 PROPS is a property list, describing the specific nature of the
1442 character set. Recognized properties are:
1444 'short-name Short version of the charset name (ex: Latin-1)
1445 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1446 'registry A regular expression matching the font registry field for
1448 'dimension Number of octets used to index a character in this charset.
1449 Either 1 or 2. Defaults to 1.
1450 If UTF-2000 feature is enabled, 3 or 4 are also available.
1451 'columns Number of columns used to display a character in this charset.
1452 Only used in TTY mode. (Under X, the actual width of a
1453 character can be derived from the font used to display the
1454 characters.) If unspecified, defaults to the dimension
1455 (this is almost always the correct value).
1456 'chars Number of characters in each dimension (94 or 96).
1457 Defaults to 94. Note that if the dimension is 2, the
1458 character set thus described is 94x94 or 96x96.
1459 If UTF-2000 feature is enabled, 128 or 256 are also available.
1460 'final Final byte of ISO 2022 escape sequence. Must be
1461 supplied. Each combination of (DIMENSION, CHARS) defines a
1462 separate namespace for final bytes. Note that ISO
1463 2022 restricts the final byte to the range
1464 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1465 dimension == 2. Note also that final bytes in the range
1466 0x30 - 0x3F are reserved for user-defined (not official)
1468 'graphic 0 (use left half of font on output) or 1 (use right half
1469 of font on output). Defaults to 0. For example, for
1470 a font whose registry is ISO8859-1, the left half
1471 (octets 0x20 - 0x7F) is the `ascii' character set, while
1472 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1473 character set. With 'graphic set to 0, the octets
1474 will have their high bit cleared; with it set to 1,
1475 the octets will have their high bit set.
1476 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1478 'ccl-program A compiled CCL program used to convert a character in
1479 this charset into an index into the font. This is in
1480 addition to the 'graphic property. The CCL program
1481 is passed the octets of the character, with the high
1482 bit cleared and set depending upon whether the value
1483 of the 'graphic property is 0 or 1.
1484 'mother [UTF-2000 only] Base coded-charset.
1485 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1486 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1487 'code-offset [UTF-2000 only] Offset for a code-point of a base
1489 'conversion [UTF-2000 only] Conversion for a code-point of a base
1490 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1492 (name, doc_string, props))
1494 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1495 int direction = CHARSET_LEFT_TO_RIGHT;
1496 Lisp_Object registry = Qnil;
1497 Lisp_Object charset;
1498 Lisp_Object ccl_program = Qnil;
1499 Lisp_Object short_name = Qnil, long_name = Qnil;
1500 Lisp_Object mother = Qnil;
1501 int min_code = 0, max_code = 0, code_offset = 0;
1502 int byte_offset = -1;
1505 CHECK_SYMBOL (name);
1506 if (!NILP (doc_string))
1507 CHECK_STRING (doc_string);
1509 charset = Ffind_charset (name);
1510 if (!NILP (charset))
1511 signal_simple_error ("Cannot redefine existing charset", name);
1514 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1516 if (EQ (keyword, Qshort_name))
1518 CHECK_STRING (value);
1522 else if (EQ (keyword, Qlong_name))
1524 CHECK_STRING (value);
1528 else if (EQ (keyword, Qiso_ir))
1532 id = - XINT (value);
1536 else if (EQ (keyword, Qdimension))
1539 dimension = XINT (value);
1540 if (dimension < 1 ||
1547 signal_simple_error ("Invalid value for 'dimension", value);
1550 else if (EQ (keyword, Qchars))
1553 chars = XINT (value);
1554 if (chars != 94 && chars != 96
1556 && chars != 128 && chars != 256
1559 signal_simple_error ("Invalid value for 'chars", value);
1562 else if (EQ (keyword, Qcolumns))
1565 columns = XINT (value);
1566 if (columns != 1 && columns != 2)
1567 signal_simple_error ("Invalid value for 'columns", value);
1570 else if (EQ (keyword, Qgraphic))
1573 graphic = XINT (value);
1581 signal_simple_error ("Invalid value for 'graphic", value);
1584 else if (EQ (keyword, Qregistry))
1586 CHECK_STRING (value);
1590 else if (EQ (keyword, Qdirection))
1592 if (EQ (value, Ql2r))
1593 direction = CHARSET_LEFT_TO_RIGHT;
1594 else if (EQ (value, Qr2l))
1595 direction = CHARSET_RIGHT_TO_LEFT;
1597 signal_simple_error ("Invalid value for 'direction", value);
1600 else if (EQ (keyword, Qfinal))
1602 CHECK_CHAR_COERCE_INT (value);
1603 final = XCHAR (value);
1604 if (final < '0' || final > '~')
1605 signal_simple_error ("Invalid value for 'final", value);
1609 else if (EQ (keyword, Qmother))
1611 mother = Fget_charset (value);
1614 else if (EQ (keyword, Qmin_code))
1617 min_code = XUINT (value);
1620 else if (EQ (keyword, Qmax_code))
1623 max_code = XUINT (value);
1626 else if (EQ (keyword, Qcode_offset))
1629 code_offset = XUINT (value);
1632 else if (EQ (keyword, Qconversion))
1634 if (EQ (value, Q94x60))
1635 conversion = CONVERSION_94x60;
1636 else if (EQ (value, Q94x94x60))
1637 conversion = CONVERSION_94x94x60;
1638 else if (EQ (value, Qbig5_1))
1639 conversion = CONVERSION_BIG5_1;
1640 else if (EQ (value, Qbig5_2))
1641 conversion = CONVERSION_BIG5_2;
1643 signal_simple_error ("Unrecognized conversion", value);
1647 else if (EQ (keyword, Qccl_program))
1649 struct ccl_program test_ccl;
1651 if (setup_ccl_program (&test_ccl, value) < 0)
1652 signal_simple_error ("Invalid value for 'ccl-program", value);
1653 ccl_program = value;
1657 signal_simple_error ("Unrecognized property", keyword);
1663 error ("'final must be specified");
1665 if (dimension == 2 && final > 0x5F)
1667 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1670 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1671 CHARSET_LEFT_TO_RIGHT)) ||
1672 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1673 CHARSET_RIGHT_TO_LEFT)))
1675 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1678 id = get_unallocated_leading_byte (dimension);
1680 if (NILP (doc_string))
1681 doc_string = build_string ("");
1683 if (NILP (registry))
1684 registry = build_string ("");
1686 if (NILP (short_name))
1687 XSETSTRING (short_name, XSYMBOL (name)->name);
1689 if (NILP (long_name))
1690 long_name = doc_string;
1693 columns = dimension;
1695 if (byte_offset < 0)
1699 else if (chars == 96)
1705 charset = make_charset (id, name, chars, dimension, columns, graphic,
1706 final, direction, short_name, long_name,
1707 doc_string, registry,
1708 Qnil, min_code, max_code, code_offset, byte_offset,
1709 mother, conversion);
1710 if (!NILP (ccl_program))
1711 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1715 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1717 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1718 NEW-NAME is the name of the new charset. Return the new charset.
1720 (charset, new_name))
1722 Lisp_Object new_charset = Qnil;
1723 int id, chars, dimension, columns, graphic, final;
1725 Lisp_Object registry, doc_string, short_name, long_name;
1728 charset = Fget_charset (charset);
1729 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1730 signal_simple_error ("Charset already has reverse-direction charset",
1733 CHECK_SYMBOL (new_name);
1734 if (!NILP (Ffind_charset (new_name)))
1735 signal_simple_error ("Cannot redefine existing charset", new_name);
1737 cs = XCHARSET (charset);
1739 chars = CHARSET_CHARS (cs);
1740 dimension = CHARSET_DIMENSION (cs);
1741 columns = CHARSET_COLUMNS (cs);
1742 id = get_unallocated_leading_byte (dimension);
1744 graphic = CHARSET_GRAPHIC (cs);
1745 final = CHARSET_FINAL (cs);
1746 direction = CHARSET_RIGHT_TO_LEFT;
1747 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1748 direction = CHARSET_LEFT_TO_RIGHT;
1749 doc_string = CHARSET_DOC_STRING (cs);
1750 short_name = CHARSET_SHORT_NAME (cs);
1751 long_name = CHARSET_LONG_NAME (cs);
1752 registry = CHARSET_REGISTRY (cs);
1754 new_charset = make_charset (id, new_name, chars, dimension, columns,
1755 graphic, final, direction, short_name, long_name,
1756 doc_string, registry,
1758 CHARSET_DECODING_TABLE(cs),
1759 CHARSET_MIN_CODE(cs),
1760 CHARSET_MAX_CODE(cs),
1761 CHARSET_CODE_OFFSET(cs),
1762 CHARSET_BYTE_OFFSET(cs),
1764 CHARSET_CONVERSION (cs)
1766 Qnil, 0, 0, 0, 0, Qnil, 0
1770 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1771 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1776 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1777 Define symbol ALIAS as an alias for CHARSET.
1781 CHECK_SYMBOL (alias);
1782 charset = Fget_charset (charset);
1783 return Fputhash (alias, charset, Vcharset_hash_table);
1786 /* #### Reverse direction charsets not yet implemented. */
1788 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1790 Return the reverse-direction charset parallel to CHARSET, if any.
1791 This is the charset with the same properties (in particular, the same
1792 dimension, number of characters per dimension, and final byte) as
1793 CHARSET but whose characters are displayed in the opposite direction.
1797 charset = Fget_charset (charset);
1798 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1802 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1803 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1804 If DIRECTION is omitted, both directions will be checked (left-to-right
1805 will be returned if character sets exist for both directions).
1807 (dimension, chars, final, direction))
1809 int dm, ch, fi, di = -1;
1810 Lisp_Object obj = Qnil;
1812 CHECK_INT (dimension);
1813 dm = XINT (dimension);
1814 if (dm < 1 || dm > 2)
1815 signal_simple_error ("Invalid value for DIMENSION", dimension);
1819 if (ch != 94 && ch != 96)
1820 signal_simple_error ("Invalid value for CHARS", chars);
1822 CHECK_CHAR_COERCE_INT (final);
1824 if (fi < '0' || fi > '~')
1825 signal_simple_error ("Invalid value for FINAL", final);
1827 if (EQ (direction, Ql2r))
1828 di = CHARSET_LEFT_TO_RIGHT;
1829 else if (EQ (direction, Qr2l))
1830 di = CHARSET_RIGHT_TO_LEFT;
1831 else if (!NILP (direction))
1832 signal_simple_error ("Invalid value for DIRECTION", direction);
1834 if (dm == 2 && fi > 0x5F)
1836 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1840 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1842 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1845 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1848 return XCHARSET_NAME (obj);
1852 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1853 Return short name of CHARSET.
1857 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1860 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1861 Return long name of CHARSET.
1865 return XCHARSET_LONG_NAME (Fget_charset (charset));
1868 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1869 Return description of CHARSET.
1873 return XCHARSET_DOC_STRING (Fget_charset (charset));
1876 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1877 Return dimension of CHARSET.
1881 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1884 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1885 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1886 Recognized properties are those listed in `make-charset', as well as
1887 'name and 'doc-string.
1893 charset = Fget_charset (charset);
1894 cs = XCHARSET (charset);
1896 CHECK_SYMBOL (prop);
1897 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1898 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1899 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1900 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1901 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1902 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1903 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1904 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1905 Qnil : make_char (CHARSET_FINAL (cs));
1906 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1907 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1908 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1909 if (EQ (prop, Qdirection))
1910 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1911 if (EQ (prop, Qreverse_direction_charset))
1913 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1914 /* #### Is this translation OK? If so, error checking sufficient? */
1915 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1918 if (EQ (prop, Qmother))
1919 return CHARSET_MOTHER (cs);
1920 if (EQ (prop, Qmin_code))
1921 return make_int (CHARSET_MIN_CODE (cs));
1922 if (EQ (prop, Qmax_code))
1923 return make_int (CHARSET_MAX_CODE (cs));
1925 signal_simple_error ("Unrecognized charset property name", prop);
1926 return Qnil; /* not reached */
1929 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1930 Return charset identification number of CHARSET.
1934 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1937 /* #### We need to figure out which properties we really want to
1940 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1941 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1943 (charset, ccl_program))
1945 struct ccl_program test_ccl;
1947 charset = Fget_charset (charset);
1948 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1949 signal_simple_error ("Invalid ccl-program", ccl_program);
1950 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1955 invalidate_charset_font_caches (Lisp_Object charset)
1957 /* Invalidate font cache entries for charset on all devices. */
1958 Lisp_Object devcons, concons, hash_table;
1959 DEVICE_LOOP_NO_BREAK (devcons, concons)
1961 struct device *d = XDEVICE (XCAR (devcons));
1962 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1963 if (!UNBOUNDP (hash_table))
1964 Fclrhash (hash_table);
1968 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1969 Set the 'registry property of CHARSET to REGISTRY.
1971 (charset, registry))
1973 charset = Fget_charset (charset);
1974 CHECK_STRING (registry);
1975 XCHARSET_REGISTRY (charset) = registry;
1976 invalidate_charset_font_caches (charset);
1977 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1982 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1983 Return mapping-table of CHARSET.
1987 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1990 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1991 Set mapping-table of CHARSET to TABLE.
1995 struct Lisp_Charset *cs;
1999 charset = Fget_charset (charset);
2000 cs = XCHARSET (charset);
2004 CHARSET_DECODING_TABLE(cs) = Qnil;
2007 else if (VECTORP (table))
2009 int ccs_len = CHARSET_BYTE_SIZE (cs);
2010 int ret = decoding_table_check_elements (table,
2011 CHARSET_DIMENSION (cs),
2016 signal_simple_error ("Too big table", table);
2018 signal_simple_error ("Invalid element is found", table);
2020 signal_simple_error ("Something wrong", table);
2022 CHARSET_DECODING_TABLE(cs) = Qnil;
2025 signal_error (Qwrong_type_argument,
2026 list2 (build_translated_string ("vector-or-nil-p"),
2029 byte_offset = CHARSET_BYTE_OFFSET (cs);
2030 switch (CHARSET_DIMENSION (cs))
2033 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2035 Lisp_Object c = XVECTOR_DATA(table)[i];
2038 Fput_char_attribute (c, XCHARSET_NAME (charset),
2039 make_int (i + byte_offset));
2043 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2045 Lisp_Object v = XVECTOR_DATA(table)[i];
2051 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2053 Lisp_Object c = XVECTOR_DATA(v)[j];
2057 (c, XCHARSET_NAME (charset),
2058 make_int ( ( (i + byte_offset) << 8 )
2064 Fput_char_attribute (v, XCHARSET_NAME (charset),
2065 make_int (i + byte_offset));
2073 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2074 Save mapping-table of CHARSET.
2078 struct Lisp_Charset *cs;
2079 int byte_min, byte_max;
2080 #ifdef HAVE_LIBCHISE
2082 #else /* HAVE_LIBCHISE */
2084 Lisp_Object db_file;
2085 #endif /* not HAVE_LIBCHISE */
2087 charset = Fget_charset (charset);
2088 cs = XCHARSET (charset);
2090 #ifdef HAVE_LIBCHISE
2091 if ( open_chise_data_source_maybe () )
2095 = chise_ds_get_ccs (default_chise_data_source,
2096 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2099 printf ("Can't open decoding-table %s\n",
2100 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2103 #else /* HAVE_LIBCHISE */
2104 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2105 Qsystem_char_id, 1);
2106 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2107 #endif /* not HAVE_LIBCHISE */
2109 byte_min = CHARSET_BYTE_OFFSET (cs);
2110 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2111 switch (CHARSET_DIMENSION (cs))
2115 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2118 for (cell = byte_min; cell < byte_max; cell++)
2120 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2124 #ifdef HAVE_LIBCHISE
2125 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2126 #else /* HAVE_LIBCHISE */
2127 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2128 Fprin1_to_string (c, Qnil),
2130 #endif /* not HAVE_LIBCHISE */
2137 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2140 for (row = byte_min; row < byte_max; row++)
2142 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2145 for (cell = byte_min; cell < byte_max; cell++)
2147 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2151 #ifdef HAVE_LIBCHISE
2152 chise_ccs_set_decoded_char
2154 (row << 8) | cell, XCHAR (c));
2155 #else /* HAVE_LIBCHISE */
2156 Fput_database (Fprin1_to_string (make_int ((row << 8)
2159 Fprin1_to_string (c, Qnil),
2161 #endif /* not HAVE_LIBCHISE */
2169 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2172 for (plane = byte_min; plane < byte_max; plane++)
2175 = get_ccs_octet_table (table_p, charset, plane);
2178 for (row = byte_min; row < byte_max; row++)
2181 = get_ccs_octet_table (table_r, charset, row);
2184 for (cell = byte_min; cell < byte_max; cell++)
2186 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2191 #ifdef HAVE_LIBCHISE
2192 chise_ccs_set_decoded_char
2197 #else /* HAVE_LIBCHISE */
2198 Fput_database (Fprin1_to_string
2199 (make_int ((plane << 16)
2203 Fprin1_to_string (c, Qnil),
2205 #endif /* not HAVE_LIBCHISE */
2214 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2217 for (group = byte_min; group < byte_max; group++)
2220 = get_ccs_octet_table (table_g, charset, group);
2223 for (plane = byte_min; plane < byte_max; plane++)
2226 = get_ccs_octet_table (table_p, charset, plane);
2229 for (row = byte_min; row < byte_max; row++)
2232 = get_ccs_octet_table (table_r, charset, row);
2235 for (cell = byte_min; cell < byte_max; cell++)
2238 = get_ccs_octet_table (table_c, charset, cell);
2242 #ifdef HAVE_LIBCHISE
2243 chise_ccs_set_decoded_char
2249 #else /* HAVE_LIBCHISE */
2250 Fput_database (Fprin1_to_string
2251 (make_int (( group << 24)
2256 Fprin1_to_string (c, Qnil),
2258 #endif /* not HAVE_LIBCHISE */
2266 #ifdef HAVE_LIBCHISE
2267 chise_ccs_sync (dt_ccs);
2269 #else /* HAVE_LIBCHISE */
2270 return Fclose_database (db);
2271 #endif /* not HAVE_LIBCHISE */
2274 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2275 Reset mapping-table of CCS with database file.
2279 #ifdef HAVE_LIBCHISE
2280 CHISE_CCS chise_ccs;
2282 Lisp_Object db_file;
2285 ccs = Fget_charset (ccs);
2287 #ifdef HAVE_LIBCHISE
2288 if ( open_chise_data_source_maybe () )
2291 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2292 XSTRING_DATA (Fsymbol_name
2293 (XCHARSET_NAME(ccs))));
2294 if (chise_ccs == NULL)
2297 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2298 Qsystem_char_id, 0);
2302 #ifdef HAVE_LIBCHISE
2303 chise_ccs_setup_db (chise_ccs, 0) == 0
2305 !NILP (Ffile_exists_p (db_file))
2309 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2316 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2318 #ifdef HAVE_LIBCHISE
2319 CHISE_Char_ID char_id;
2321 if ( open_chise_data_source_maybe () )
2325 = chise_ds_decode_char (default_chise_data_source,
2326 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2329 decoding_table_put_char (ccs, code_point, make_char (char_id));
2331 decoding_table_put_char (ccs, code_point, Qnil);
2333 /* chise_ccst_close (dt_ccs); */
2335 #else /* HAVE_LIBCHISE */
2338 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2341 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2345 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2352 decoding_table_put_char (ccs, code_point, ret);
2353 Fclose_database (db);
2357 decoding_table_put_char (ccs, code_point, Qnil);
2358 Fclose_database (db);
2361 #endif /* not HAVE_LIBCHISE */
2363 #endif /* HAVE_CHISE */
2364 #endif /* UTF2000 */
2367 /************************************************************************/
2368 /* Lisp primitives for working with characters */
2369 /************************************************************************/
2372 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2373 Make a character from CHARSET and code-point CODE.
2374 If DEFINED_ONLY is non-nil, builtin character is not returned.
2375 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2376 If corresponding character is not found, nil is returned.
2378 (charset, code, defined_only, without_inheritance))
2382 charset = Fget_charset (charset);
2385 if (XCHARSET_GRAPHIC (charset) == 1)
2387 if (NILP (defined_only))
2388 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2390 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2391 return c >= 0 ? make_char (c) : Qnil;
2394 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2395 Make a builtin character from CHARSET and code-point CODE.
2401 charset = Fget_charset (charset);
2403 if (EQ (charset, Vcharset_latin_viscii))
2405 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2411 (ret = Fget_char_attribute (chr,
2412 Vcharset_latin_viscii_lower,
2415 charset = Vcharset_latin_viscii_lower;
2419 (ret = Fget_char_attribute (chr,
2420 Vcharset_latin_viscii_upper,
2423 charset = Vcharset_latin_viscii_upper;
2430 if (XCHARSET_GRAPHIC (charset) == 1)
2433 c = decode_builtin_char (charset, c);
2435 c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil, Qnil);
2439 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2440 Make a character from CHARSET and octets ARG1 and ARG2.
2441 ARG2 is required only for characters from two-dimensional charsets.
2442 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2443 character s with caron.
2445 (charset, arg1, arg2))
2449 int lowlim, highlim;
2451 charset = Fget_charset (charset);
2452 cs = XCHARSET (charset);
2454 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2455 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2457 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2459 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2460 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2463 /* It is useful (and safe, according to Olivier Galibert) to strip
2464 the 8th bit off ARG1 and ARG2 because it allows programmers to
2465 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2466 Latin 2 code of the character. */
2474 if (a1 < lowlim || a1 > highlim)
2475 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2477 if (CHARSET_DIMENSION (cs) == 1)
2481 ("Charset is of dimension one; second octet must be nil", arg2);
2482 return make_char (MAKE_CHAR (charset, a1, 0));
2491 a2 = XINT (arg2) & 0x7f;
2493 if (a2 < lowlim || a2 > highlim)
2494 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2496 return make_char (MAKE_CHAR (charset, a1, a2));
2499 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2500 Return the character set of CHARACTER.
2504 CHECK_CHAR_COERCE_INT (character);
2506 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2509 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2510 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2511 N defaults to 0 if omitted.
2515 Lisp_Object charset;
2518 CHECK_CHAR_COERCE_INT (character);
2520 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2522 if (NILP (n) || EQ (n, Qzero))
2523 return make_int (octet0);
2524 else if (EQ (n, make_int (1)))
2525 return make_int (octet1);
2527 signal_simple_error ("Octet number must be 0 or 1", n);
2531 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2532 Return code-point of CHARACTER in specified CHARSET.
2534 (character, charset, defined_only))
2538 CHECK_CHAR_COERCE_INT (character);
2539 charset = Fget_charset (charset);
2540 code_point = charset_code_point (charset, XCHAR (character),
2541 !NILP (defined_only));
2542 if (code_point >= 0)
2543 return make_int (code_point);
2549 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2550 Return list of charset and one or two position-codes of CHARACTER.
2554 /* This function can GC */
2555 struct gcpro gcpro1, gcpro2;
2556 Lisp_Object charset = Qnil;
2557 Lisp_Object rc = Qnil;
2565 GCPRO2 (charset, rc);
2566 CHECK_CHAR_COERCE_INT (character);
2569 code_point = ENCODE_CHAR (XCHAR (character), charset);
2570 dimension = XCHARSET_DIMENSION (charset);
2571 while (dimension > 0)
2573 rc = Fcons (make_int (code_point & 255), rc);
2577 rc = Fcons (XCHARSET_NAME (charset), rc);
2579 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2581 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2583 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2587 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2596 #ifdef ENABLE_COMPOSITE_CHARS
2597 /************************************************************************/
2598 /* composite character functions */
2599 /************************************************************************/
2602 lookup_composite_char (Bufbyte *str, int len)
2604 Lisp_Object lispstr = make_string (str, len);
2605 Lisp_Object ch = Fgethash (lispstr,
2606 Vcomposite_char_string2char_hash_table,
2612 if (composite_char_row_next >= 128)
2613 signal_simple_error ("No more composite chars available", lispstr);
2614 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2615 composite_char_col_next);
2616 Fputhash (make_char (emch), lispstr,
2617 Vcomposite_char_char2string_hash_table);
2618 Fputhash (lispstr, make_char (emch),
2619 Vcomposite_char_string2char_hash_table);
2620 composite_char_col_next++;
2621 if (composite_char_col_next >= 128)
2623 composite_char_col_next = 32;
2624 composite_char_row_next++;
2633 composite_char_string (Emchar ch)
2635 Lisp_Object str = Fgethash (make_char (ch),
2636 Vcomposite_char_char2string_hash_table,
2638 assert (!UNBOUNDP (str));
2642 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2643 Convert a string into a single composite character.
2644 The character is the result of overstriking all the characters in
2649 CHECK_STRING (string);
2650 return make_char (lookup_composite_char (XSTRING_DATA (string),
2651 XSTRING_LENGTH (string)));
2654 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2655 Return a string of the characters comprising a composite character.
2663 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2664 signal_simple_error ("Must be composite char", ch);
2665 return composite_char_string (emch);
2667 #endif /* ENABLE_COMPOSITE_CHARS */
2670 /************************************************************************/
2671 /* initialization */
2672 /************************************************************************/
2675 syms_of_mule_charset (void)
2677 INIT_LRECORD_IMPLEMENTATION (charset);
2679 DEFSUBR (Fcharsetp);
2680 DEFSUBR (Ffind_charset);
2681 DEFSUBR (Fget_charset);
2682 DEFSUBR (Fcharset_list);
2683 DEFSUBR (Fcharset_name);
2684 DEFSUBR (Fmake_charset);
2685 DEFSUBR (Fmake_reverse_direction_charset);
2686 /* DEFSUBR (Freverse_direction_charset); */
2687 DEFSUBR (Fdefine_charset_alias);
2688 DEFSUBR (Fcharset_from_attributes);
2689 DEFSUBR (Fcharset_short_name);
2690 DEFSUBR (Fcharset_long_name);
2691 DEFSUBR (Fcharset_description);
2692 DEFSUBR (Fcharset_dimension);
2693 DEFSUBR (Fcharset_property);
2694 DEFSUBR (Fcharset_id);
2695 DEFSUBR (Fset_charset_ccl_program);
2696 DEFSUBR (Fset_charset_registry);
2699 DEFSUBR (Fcharset_mapping_table);
2700 DEFSUBR (Fset_charset_mapping_table);
2702 DEFSUBR (Fsave_charset_mapping_table);
2703 DEFSUBR (Freset_charset_mapping_table);
2704 #endif /* HAVE_CHISE */
2705 DEFSUBR (Fdecode_char);
2706 DEFSUBR (Fdecode_builtin_char);
2707 DEFSUBR (Fencode_char);
2710 DEFSUBR (Fmake_char);
2711 DEFSUBR (Fchar_charset);
2712 DEFSUBR (Fchar_octet);
2713 DEFSUBR (Fsplit_char);
2715 #ifdef ENABLE_COMPOSITE_CHARS
2716 DEFSUBR (Fmake_composite_char);
2717 DEFSUBR (Fcomposite_char_string);
2720 defsymbol (&Qcharsetp, "charsetp");
2721 defsymbol (&Qregistry, "registry");
2722 defsymbol (&Qfinal, "final");
2723 defsymbol (&Qgraphic, "graphic");
2724 defsymbol (&Qdirection, "direction");
2725 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2726 defsymbol (&Qshort_name, "short-name");
2727 defsymbol (&Qlong_name, "long-name");
2728 defsymbol (&Qiso_ir, "iso-ir");
2730 defsymbol (&Qmother, "mother");
2731 defsymbol (&Qmin_code, "min-code");
2732 defsymbol (&Qmax_code, "max-code");
2733 defsymbol (&Qcode_offset, "code-offset");
2734 defsymbol (&Qconversion, "conversion");
2735 defsymbol (&Q94x60, "94x60");
2736 defsymbol (&Q94x94x60, "94x94x60");
2737 defsymbol (&Qbig5_1, "big5-1");
2738 defsymbol (&Qbig5_2, "big5-2");
2741 defsymbol (&Ql2r, "l2r");
2742 defsymbol (&Qr2l, "r2l");
2744 /* Charsets, compatible with FSF 20.3
2745 Naming convention is Script-Charset[-Edition] */
2746 defsymbol (&Qascii, "ascii");
2747 defsymbol (&Qcontrol_1, "control-1");
2748 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2749 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2750 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2751 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2752 defsymbol (&Qthai_tis620, "thai-tis620");
2753 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2754 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2755 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2756 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2757 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2758 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2759 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2760 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2761 defsymbol (&Qmap_gb2312, "=gb2312");
2762 defsymbol (&Qmap_gb12345, "=gb12345");
2763 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2764 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2765 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2766 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2767 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2769 defsymbol (&Qmap_ucs, "=ucs");
2770 defsymbol (&Qucs, "ucs");
2771 defsymbol (&Qucs_bmp, "ucs-bmp");
2772 defsymbol (&Qucs_smp, "ucs-smp");
2773 defsymbol (&Qucs_sip, "ucs-sip");
2774 defsymbol (&Qlatin_viscii, "latin-viscii");
2775 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2776 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2777 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2778 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2779 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2780 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2781 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2782 defsymbol (&Qmap_big5, "=big5");
2783 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2785 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2786 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2788 defsymbol (&Qcomposite, "composite");
2792 vars_of_mule_charset (void)
2799 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2800 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2802 /* Table of charsets indexed by leading byte. */
2803 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2804 chlook->charset_by_leading_byte[i] = Qnil;
2807 /* Table of charsets indexed by type/final-byte. */
2808 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2809 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2810 chlook->charset_by_attributes[i][j] = Qnil;
2812 /* Table of charsets indexed by type/final-byte/direction. */
2813 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2814 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2815 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2816 chlook->charset_by_attributes[i][j][k] = Qnil;
2820 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2822 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2823 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2827 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2828 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2829 Leading-code of private TYPE9N charset of column-width 1.
2831 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2835 Vdefault_coded_charset_priority_list = Qnil;
2836 DEFVAR_LISP ("default-coded-charset-priority-list",
2837 &Vdefault_coded_charset_priority_list /*
2838 Default order of preferred coded-character-sets.
2844 complex_vars_of_mule_charset (void)
2846 staticpro (&Vcharset_hash_table);
2847 Vcharset_hash_table =
2848 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2850 /* Predefined character sets. We store them into variables for
2854 staticpro (&Vcharset_ucs);
2856 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2857 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2858 build_string ("UCS"),
2859 build_string ("UCS"),
2860 build_string ("ISO/IEC 10646"),
2862 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2863 staticpro (&Vcharset_ucs_bmp);
2865 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2866 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2867 build_string ("BMP"),
2868 build_string ("UCS-BMP"),
2869 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2871 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2872 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2873 staticpro (&Vcharset_ucs_smp);
2875 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2876 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2877 build_string ("SMP"),
2878 build_string ("UCS-SMP"),
2879 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2880 build_string ("UCS00-1"),
2881 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2882 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2883 staticpro (&Vcharset_ucs_sip);
2885 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2886 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2887 build_string ("SIP"),
2888 build_string ("UCS-SIP"),
2889 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2890 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2891 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2892 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2894 # define MIN_CHAR_THAI 0
2895 # define MAX_CHAR_THAI 0
2896 /* # define MIN_CHAR_HEBREW 0 */
2897 /* # define MAX_CHAR_HEBREW 0 */
2898 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2899 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2901 staticpro (&Vcharset_ascii);
2903 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2904 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2905 build_string ("ASCII"),
2906 build_string ("ASCII)"),
2907 build_string ("ASCII (ISO646 IRV)"),
2908 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2909 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2910 staticpro (&Vcharset_control_1);
2911 Vcharset_control_1 =
2912 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2913 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2914 build_string ("C1"),
2915 build_string ("Control characters"),
2916 build_string ("Control characters 128-191"),
2918 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2919 staticpro (&Vcharset_latin_iso8859_1);
2920 Vcharset_latin_iso8859_1 =
2921 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2922 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2923 build_string ("Latin-1"),
2924 build_string ("ISO8859-1 (Latin-1)"),
2925 build_string ("ISO8859-1 (Latin-1)"),
2926 build_string ("iso8859-1"),
2927 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2928 staticpro (&Vcharset_latin_iso8859_2);
2929 Vcharset_latin_iso8859_2 =
2930 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2931 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2932 build_string ("Latin-2"),
2933 build_string ("ISO8859-2 (Latin-2)"),
2934 build_string ("ISO8859-2 (Latin-2)"),
2935 build_string ("iso8859-2"),
2936 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2937 staticpro (&Vcharset_latin_iso8859_3);
2938 Vcharset_latin_iso8859_3 =
2939 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2940 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2941 build_string ("Latin-3"),
2942 build_string ("ISO8859-3 (Latin-3)"),
2943 build_string ("ISO8859-3 (Latin-3)"),
2944 build_string ("iso8859-3"),
2945 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2946 staticpro (&Vcharset_latin_iso8859_4);
2947 Vcharset_latin_iso8859_4 =
2948 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2949 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2950 build_string ("Latin-4"),
2951 build_string ("ISO8859-4 (Latin-4)"),
2952 build_string ("ISO8859-4 (Latin-4)"),
2953 build_string ("iso8859-4"),
2954 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2955 staticpro (&Vcharset_thai_tis620);
2956 Vcharset_thai_tis620 =
2957 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2958 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2959 build_string ("TIS620"),
2960 build_string ("TIS620 (Thai)"),
2961 build_string ("TIS620.2529 (Thai)"),
2962 build_string ("tis620"),
2963 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2964 staticpro (&Vcharset_greek_iso8859_7);
2965 Vcharset_greek_iso8859_7 =
2966 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2967 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2968 build_string ("ISO8859-7"),
2969 build_string ("ISO8859-7 (Greek)"),
2970 build_string ("ISO8859-7 (Greek)"),
2971 build_string ("iso8859-7"),
2972 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2973 staticpro (&Vcharset_arabic_iso8859_6);
2974 Vcharset_arabic_iso8859_6 =
2975 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2976 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2977 build_string ("ISO8859-6"),
2978 build_string ("ISO8859-6 (Arabic)"),
2979 build_string ("ISO8859-6 (Arabic)"),
2980 build_string ("iso8859-6"),
2981 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2982 staticpro (&Vcharset_hebrew_iso8859_8);
2983 Vcharset_hebrew_iso8859_8 =
2984 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2985 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2986 build_string ("ISO8859-8"),
2987 build_string ("ISO8859-8 (Hebrew)"),
2988 build_string ("ISO8859-8 (Hebrew)"),
2989 build_string ("iso8859-8"),
2991 0 /* MIN_CHAR_HEBREW */,
2992 0 /* MAX_CHAR_HEBREW */, 0, 32,
2993 Qnil, CONVERSION_IDENTICAL);
2994 staticpro (&Vcharset_katakana_jisx0201);
2995 Vcharset_katakana_jisx0201 =
2996 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2997 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2998 build_string ("JISX0201 Kana"),
2999 build_string ("JISX0201.1976 (Japanese Kana)"),
3000 build_string ("JISX0201.1976 Japanese Kana"),
3001 build_string ("jisx0201\\.1976"),
3002 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3003 staticpro (&Vcharset_latin_jisx0201);
3004 Vcharset_latin_jisx0201 =
3005 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3006 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3007 build_string ("JISX0201 Roman"),
3008 build_string ("JISX0201.1976 (Japanese Roman)"),
3009 build_string ("JISX0201.1976 Japanese Roman"),
3010 build_string ("jisx0201\\.1976"),
3011 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3012 staticpro (&Vcharset_cyrillic_iso8859_5);
3013 Vcharset_cyrillic_iso8859_5 =
3014 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3015 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3016 build_string ("ISO8859-5"),
3017 build_string ("ISO8859-5 (Cyrillic)"),
3018 build_string ("ISO8859-5 (Cyrillic)"),
3019 build_string ("iso8859-5"),
3020 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3021 staticpro (&Vcharset_latin_iso8859_9);
3022 Vcharset_latin_iso8859_9 =
3023 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3024 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3025 build_string ("Latin-5"),
3026 build_string ("ISO8859-9 (Latin-5)"),
3027 build_string ("ISO8859-9 (Latin-5)"),
3028 build_string ("iso8859-9"),
3029 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3031 staticpro (&Vcharset_jis_x0208);
3032 Vcharset_jis_x0208 =
3033 make_charset (LEADING_BYTE_JIS_X0208,
3034 Qmap_jis_x0208, 94, 2,
3035 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3036 build_string ("JIS X0208"),
3037 build_string ("JIS X0208 Common"),
3038 build_string ("JIS X0208 Common part"),
3039 build_string ("jisx0208\\.1990"),
3041 MIN_CHAR_JIS_X0208_1990,
3042 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3043 Qnil, CONVERSION_94x94);
3045 staticpro (&Vcharset_japanese_jisx0208_1978);
3046 Vcharset_japanese_jisx0208_1978 =
3047 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3048 Qmap_jis_x0208_1978, 94, 2,
3049 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3050 build_string ("JIS X0208:1978"),
3051 build_string ("JIS X0208:1978 (Japanese)"),
3053 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3054 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3061 CONVERSION_IDENTICAL);
3062 staticpro (&Vcharset_chinese_gb2312);
3063 Vcharset_chinese_gb2312 =
3064 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3065 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3066 build_string ("GB2312"),
3067 build_string ("GB2312)"),
3068 build_string ("GB2312 Chinese simplified"),
3069 build_string ("gb2312"),
3070 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3071 staticpro (&Vcharset_chinese_gb12345);
3072 Vcharset_chinese_gb12345 =
3073 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3074 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3075 build_string ("G1"),
3076 build_string ("GB 12345)"),
3077 build_string ("GB 12345-1990"),
3078 build_string ("GB12345\\(\\.1990\\)?-0"),
3079 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3080 staticpro (&Vcharset_japanese_jisx0208);
3081 Vcharset_japanese_jisx0208 =
3082 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3083 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3084 build_string ("JISX0208"),
3085 build_string ("JIS X0208:1983 (Japanese)"),
3086 build_string ("JIS X0208:1983 Japanese Kanji"),
3087 build_string ("jisx0208\\.1983"),
3094 CONVERSION_IDENTICAL);
3096 staticpro (&Vcharset_japanese_jisx0208_1990);
3097 Vcharset_japanese_jisx0208_1990 =
3098 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3099 Qmap_jis_x0208_1990, 94, 2,
3100 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3101 build_string ("JISX0208-1990"),
3102 build_string ("JIS X0208:1990 (Japanese)"),
3103 build_string ("JIS X0208:1990 Japanese Kanji"),
3104 build_string ("jisx0208\\.1990"),
3106 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3107 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3108 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3109 Vcharset_jis_x0208 /* Qnil */,
3110 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3112 staticpro (&Vcharset_korean_ksc5601);
3113 Vcharset_korean_ksc5601 =
3114 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3115 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3116 build_string ("KSC5601"),
3117 build_string ("KSC5601 (Korean"),
3118 build_string ("KSC5601 Korean Hangul and Hanja"),
3119 build_string ("ksc5601"),
3120 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3121 staticpro (&Vcharset_japanese_jisx0212);
3122 Vcharset_japanese_jisx0212 =
3123 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3124 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3125 build_string ("JISX0212"),
3126 build_string ("JISX0212 (Japanese)"),
3127 build_string ("JISX0212 Japanese Supplement"),
3128 build_string ("jisx0212"),
3129 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3131 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3132 staticpro (&Vcharset_chinese_cns11643_1);
3133 Vcharset_chinese_cns11643_1 =
3134 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3135 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3136 build_string ("CNS11643-1"),
3137 build_string ("CNS11643-1 (Chinese traditional)"),
3139 ("CNS 11643 Plane 1 Chinese traditional"),
3140 build_string (CHINESE_CNS_PLANE_RE("1")),
3141 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3142 staticpro (&Vcharset_chinese_cns11643_2);
3143 Vcharset_chinese_cns11643_2 =
3144 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3145 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3146 build_string ("CNS11643-2"),
3147 build_string ("CNS11643-2 (Chinese traditional)"),
3149 ("CNS 11643 Plane 2 Chinese traditional"),
3150 build_string (CHINESE_CNS_PLANE_RE("2")),
3151 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3153 staticpro (&Vcharset_latin_tcvn5712);
3154 Vcharset_latin_tcvn5712 =
3155 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3156 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3157 build_string ("TCVN 5712"),
3158 build_string ("TCVN 5712 (VSCII-2)"),
3159 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3160 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3161 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3162 staticpro (&Vcharset_latin_viscii_lower);
3163 Vcharset_latin_viscii_lower =
3164 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3165 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3166 build_string ("VISCII lower"),
3167 build_string ("VISCII lower (Vietnamese)"),
3168 build_string ("VISCII lower (Vietnamese)"),
3169 build_string ("MULEVISCII-LOWER"),
3170 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3171 staticpro (&Vcharset_latin_viscii_upper);
3172 Vcharset_latin_viscii_upper =
3173 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3174 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3175 build_string ("VISCII upper"),
3176 build_string ("VISCII upper (Vietnamese)"),
3177 build_string ("VISCII upper (Vietnamese)"),
3178 build_string ("MULEVISCII-UPPER"),
3179 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3180 staticpro (&Vcharset_latin_viscii);
3181 Vcharset_latin_viscii =
3182 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3183 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3184 build_string ("VISCII"),
3185 build_string ("VISCII 1.1 (Vietnamese)"),
3186 build_string ("VISCII 1.1 (Vietnamese)"),
3187 build_string ("VISCII1\\.1"),
3188 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3189 staticpro (&Vcharset_chinese_big5);
3190 Vcharset_chinese_big5 =
3191 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3192 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3193 build_string ("Big5"),
3194 build_string ("Big5"),
3195 build_string ("Big5 Chinese traditional"),
3196 build_string ("big5-0"),
3198 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3199 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3201 staticpro (&Vcharset_ethiopic_ucs);
3202 Vcharset_ethiopic_ucs =
3203 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3204 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3205 build_string ("Ethiopic (UCS)"),
3206 build_string ("Ethiopic (UCS)"),
3207 build_string ("Ethiopic of UCS"),
3208 build_string ("Ethiopic-Unicode"),
3209 Qnil, 0x1200, 0x137F, 0, 0,
3210 Qnil, CONVERSION_IDENTICAL);
3212 staticpro (&Vcharset_chinese_big5_1);
3213 Vcharset_chinese_big5_1 =
3214 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3215 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3216 build_string ("Big5"),
3217 build_string ("Big5 (Level-1)"),
3219 ("Big5 Level-1 Chinese traditional"),
3220 build_string ("big5"),
3221 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3222 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3223 staticpro (&Vcharset_chinese_big5_2);
3224 Vcharset_chinese_big5_2 =
3225 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3226 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3227 build_string ("Big5"),
3228 build_string ("Big5 (Level-2)"),
3230 ("Big5 Level-2 Chinese traditional"),
3231 build_string ("big5"),
3232 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3233 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3235 #ifdef ENABLE_COMPOSITE_CHARS
3236 /* #### For simplicity, we put composite chars into a 96x96 charset.
3237 This is going to lead to problems because you can run out of
3238 room, esp. as we don't yet recycle numbers. */
3239 staticpro (&Vcharset_composite);
3240 Vcharset_composite =
3241 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3242 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3243 build_string ("Composite"),
3244 build_string ("Composite characters"),
3245 build_string ("Composite characters"),
3248 /* #### not dumped properly */
3249 composite_char_row_next = 32;
3250 composite_char_col_next = 32;
3252 Vcomposite_char_string2char_hash_table =
3253 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3254 Vcomposite_char_char2string_hash_table =
3255 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3256 staticpro (&Vcomposite_char_string2char_hash_table);
3257 staticpro (&Vcomposite_char_char2string_hash_table);
3258 #endif /* ENABLE_COMPOSITE_CHARS */