1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_ucs;
71 Lisp_Object Vcharset_ucs_bmp;
72 Lisp_Object Vcharset_ucs_smp;
73 Lisp_Object Vcharset_ucs_sip;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
282 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
283 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
317 Qvietnamese_viscii_lower,
318 Qvietnamese_viscii_upper,
328 Lisp_Object Ql2r, Qr2l;
330 Lisp_Object Vcharset_hash_table;
332 /* Composite characters are characters constructed by overstriking two
333 or more regular characters.
335 1) The old Mule implementation involves storing composite characters
336 in a buffer as a tag followed by all of the actual characters
337 used to make up the composite character. I think this is a bad
338 idea; it greatly complicates code that wants to handle strings
339 one character at a time because it has to deal with the possibility
340 of great big ungainly characters. It's much more reasonable to
341 simply store an index into a table of composite characters.
343 2) The current implementation only allows for 16,384 separate
344 composite characters over the lifetime of the XEmacs process.
345 This could become a potential problem if the user
346 edited lots of different files that use composite characters.
347 Due to FSF bogosity, increasing the number of allowable
348 composite characters under Mule would decrease the number
349 of possible faces that can exist. Mule already has shrunk
350 this to 2048, and further shrinkage would become uncomfortable.
351 No such problems exist in XEmacs.
353 Composite characters could be represented as 0x80 C1 C2 C3,
354 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
355 for slightly under 2^20 (one million) composite characters
356 over the XEmacs process lifetime, and you only need to
357 increase the size of a Mule character from 19 to 21 bits.
358 Or you could use 0x80 C1 C2 C3 C4, allowing for about
359 85 million (slightly over 2^26) composite characters. */
362 /************************************************************************/
363 /* Basic Emchar functions */
364 /************************************************************************/
366 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
367 string in STR. Returns the number of bytes stored.
368 Do not call this directly. Use the macro set_charptr_emchar() instead.
372 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
387 else if ( c <= 0x7ff )
389 *p++ = (c >> 6) | 0xc0;
390 *p++ = (c & 0x3f) | 0x80;
392 else if ( c <= 0xffff )
394 *p++ = (c >> 12) | 0xe0;
395 *p++ = ((c >> 6) & 0x3f) | 0x80;
396 *p++ = (c & 0x3f) | 0x80;
398 else if ( c <= 0x1fffff )
400 *p++ = (c >> 18) | 0xf0;
401 *p++ = ((c >> 12) & 0x3f) | 0x80;
402 *p++ = ((c >> 6) & 0x3f) | 0x80;
403 *p++ = (c & 0x3f) | 0x80;
405 else if ( c <= 0x3ffffff )
407 *p++ = (c >> 24) | 0xf8;
408 *p++ = ((c >> 18) & 0x3f) | 0x80;
409 *p++ = ((c >> 12) & 0x3f) | 0x80;
410 *p++ = ((c >> 6) & 0x3f) | 0x80;
411 *p++ = (c & 0x3f) | 0x80;
415 *p++ = (c >> 30) | 0xfc;
416 *p++ = ((c >> 24) & 0x3f) | 0x80;
417 *p++ = ((c >> 18) & 0x3f) | 0x80;
418 *p++ = ((c >> 12) & 0x3f) | 0x80;
419 *p++ = ((c >> 6) & 0x3f) | 0x80;
420 *p++ = (c & 0x3f) | 0x80;
423 BREAKUP_CHAR (c, charset, c1, c2);
424 lb = CHAR_LEADING_BYTE (c);
425 if (LEADING_BYTE_PRIVATE_P (lb))
426 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
428 if (EQ (charset, Vcharset_control_1))
437 /* Return the first character from a Mule-encoded string in STR,
438 assuming it's non-ASCII. Do not call this directly.
439 Use the macro charptr_emchar() instead. */
442 non_ascii_charptr_emchar (const Bufbyte *str)
455 else if ( b >= 0xf8 )
460 else if ( b >= 0xf0 )
465 else if ( b >= 0xe0 )
470 else if ( b >= 0xc0 )
480 for( ; len > 0; len-- )
483 ch = ( ch << 6 ) | ( b & 0x3f );
487 Bufbyte i0 = *str, i1, i2 = 0;
490 if (i0 == LEADING_BYTE_CONTROL_1)
491 return (Emchar) (*++str - 0x20);
493 if (LEADING_BYTE_PREFIX_P (i0))
498 charset = CHARSET_BY_LEADING_BYTE (i0);
499 if (XCHARSET_DIMENSION (charset) == 2)
502 return MAKE_CHAR (charset, i1, i2);
506 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
507 Do not call this directly. Use the macro valid_char_p() instead. */
511 non_ascii_valid_char_p (Emchar ch)
515 /* Must have only lowest 19 bits set */
519 f1 = CHAR_FIELD1 (ch);
520 f2 = CHAR_FIELD2 (ch);
521 f3 = CHAR_FIELD3 (ch);
527 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
528 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
529 f2 > MAX_CHAR_FIELD2_PRIVATE)
534 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
535 f2 <= MAX_CHAR_FIELD2_PRIVATE))
539 NOTE: This takes advantage of the fact that
540 FIELD2_TO_OFFICIAL_LEADING_BYTE and
541 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
543 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
544 if (EQ (charset, Qnil))
546 return (XCHARSET_CHARS (charset) == 96);
552 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
553 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
554 f1 > MAX_CHAR_FIELD1_PRIVATE)
556 if (f2 < 0x20 || f3 < 0x20)
559 #ifdef ENABLE_COMPOSITE_CHARS
560 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
562 if (UNBOUNDP (Fgethash (make_int (ch),
563 Vcomposite_char_char2string_hash_table,
568 #endif /* ENABLE_COMPOSITE_CHARS */
570 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
571 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
574 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
581 if (EQ (charset, Qnil))
583 return (XCHARSET_CHARS (charset) == 96);
589 /************************************************************************/
590 /* Basic string functions */
591 /************************************************************************/
593 /* Copy the character pointed to by SRC into DST. Do not call this
594 directly. Use the macro charptr_copy_char() instead.
595 Return the number of bytes copied. */
598 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
600 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
602 for (i = bytes; i; i--, dst++, src++)
608 /************************************************************************/
609 /* streams of Emchars */
610 /************************************************************************/
612 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
613 The functions below are not meant to be called directly; use
614 the macros in insdel.h. */
617 Lstream_get_emchar_1 (Lstream *stream, int ch)
619 Bufbyte str[MAX_EMCHAR_LEN];
620 Bufbyte *strptr = str;
623 str[0] = (Bufbyte) ch;
625 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
627 int c = Lstream_getc (stream);
628 bufpos_checking_assert (c >= 0);
629 *++strptr = (Bufbyte) c;
631 return charptr_emchar (str);
635 Lstream_fput_emchar (Lstream *stream, Emchar ch)
637 Bufbyte str[MAX_EMCHAR_LEN];
638 Bytecount len = set_charptr_emchar (str, ch);
639 return Lstream_write (stream, str, len);
643 Lstream_funget_emchar (Lstream *stream, Emchar ch)
645 Bufbyte str[MAX_EMCHAR_LEN];
646 Bytecount len = set_charptr_emchar (str, ch);
647 Lstream_unread (stream, str, len);
651 /************************************************************************/
653 /************************************************************************/
656 mark_charset (Lisp_Object obj)
658 Lisp_Charset *cs = XCHARSET (obj);
660 mark_object (cs->short_name);
661 mark_object (cs->long_name);
662 mark_object (cs->doc_string);
663 mark_object (cs->registry);
664 mark_object (cs->ccl_program);
666 mark_object (cs->decoding_table);
667 mark_object (cs->mother);
673 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
675 Lisp_Charset *cs = XCHARSET (obj);
679 error ("printing unreadable object #<charset %s 0x%x>",
680 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
683 write_c_string ("#<charset ", printcharfun);
684 print_internal (CHARSET_NAME (cs), printcharfun, 0);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
691 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
693 CHARSET_DIMENSION (cs),
694 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
695 CHARSET_COLUMNS (cs),
696 CHARSET_GRAPHIC (cs),
698 write_c_string (buf, printcharfun);
699 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
700 sprintf (buf, " 0x%x>", cs->header.uid);
701 write_c_string (buf, printcharfun);
704 static const struct lrecord_description charset_description[] = {
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
719 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
720 mark_charset, print_charset, 0, 0, 0,
724 /* Make a new charset. */
725 /* #### SJT Should generic properties be allowed? */
727 make_charset (Charset_ID id, Lisp_Object name,
728 unsigned short chars, unsigned char dimension,
729 unsigned char columns, unsigned char graphic,
730 Bufbyte final, unsigned char direction, Lisp_Object short_name,
731 Lisp_Object long_name, Lisp_Object doc,
733 Lisp_Object decoding_table,
734 Emchar min_code, Emchar max_code,
735 Emchar code_offset, unsigned char byte_offset,
736 Lisp_Object mother, unsigned char conversion)
739 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
743 XSETCHARSET (obj, cs);
745 CHARSET_ID (cs) = id;
746 CHARSET_NAME (cs) = name;
747 CHARSET_SHORT_NAME (cs) = short_name;
748 CHARSET_LONG_NAME (cs) = long_name;
749 CHARSET_CHARS (cs) = chars;
750 CHARSET_DIMENSION (cs) = dimension;
751 CHARSET_DIRECTION (cs) = direction;
752 CHARSET_COLUMNS (cs) = columns;
753 CHARSET_GRAPHIC (cs) = graphic;
754 CHARSET_FINAL (cs) = final;
755 CHARSET_DOC_STRING (cs) = doc;
756 CHARSET_REGISTRY (cs) = reg;
757 CHARSET_CCL_PROGRAM (cs) = Qnil;
758 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
760 CHARSET_DECODING_TABLE(cs) = Qunbound;
761 CHARSET_MIN_CODE (cs) = min_code;
762 CHARSET_MAX_CODE (cs) = max_code;
763 CHARSET_CODE_OFFSET (cs) = code_offset;
764 CHARSET_BYTE_OFFSET (cs) = byte_offset;
765 CHARSET_MOTHER (cs) = mother;
766 CHARSET_CONVERSION (cs) = conversion;
770 if (id == LEADING_BYTE_ASCII)
771 CHARSET_REP_BYTES (cs) = 1;
773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
780 /* some charsets do not have final characters. This includes
781 ASCII, Control-1, Composite, and the two faux private
783 unsigned char iso2022_type
784 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
786 if (code_offset == 0)
788 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
789 chlook->charset_by_attributes[iso2022_type][final] = obj;
793 (chlook->charset_by_attributes[iso2022_type][final][direction]));
794 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
798 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
799 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
801 /* Some charsets are "faux" and don't have names or really exist at
802 all except in the leading-byte table. */
804 Fputhash (name, obj, Vcharset_hash_table);
809 get_unallocated_leading_byte (int dimension)
814 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
817 lb = chlook->next_allocated_leading_byte++;
821 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
824 lb = chlook->next_allocated_1_byte_leading_byte++;
828 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
831 lb = chlook->next_allocated_2_byte_leading_byte++;
837 ("No more character sets free for this dimension",
838 make_int (dimension));
844 /* Number of Big5 characters which have the same code in 1st byte. */
846 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
849 decode_defined_char (Lisp_Object ccs, int code_point)
851 int dim = XCHARSET_DIMENSION (ccs);
852 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
860 = get_ccs_octet_table (decoding_table, ccs,
861 (code_point >> (dim * 8)) & 255);
863 if (CHARP (decoding_table))
864 return XCHAR (decoding_table);
866 if (EQ (decoding_table, Qunloaded))
868 char_id = load_char_decoding_entry_maybe (ccs, code_point);
870 #endif /* HAVE_CHISE */
873 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
875 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
877 if ( EQ (mother, Vcharset_ucs) )
878 return DECODE_CHAR (mother, code_point);
880 return decode_defined_char (mother, code_point);
882 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
885 = (((code_point >> 8) & 0x7F) - 33) * 94
886 + (( code_point & 0x7F) - 33);
887 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
888 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
890 b2 += b2 < 0x3F ? 0x40 : 0x62;
891 return decode_defined_char (mother, (b1 << 8) | b2);
893 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
896 = (((code_point >> 8) & 0x7F) - 33) * 94
897 + (( code_point & 0x7F) - 33)
898 + BIG5_SAME_ROW * (0xC9 - 0xA1);
899 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
900 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
902 b2 += b2 < 0x3F ? 0x40 : 0x62;
903 return decode_defined_char (mother, (b1 << 8) | b2);
910 decode_builtin_char (Lisp_Object charset, int code_point)
912 Lisp_Object mother = XCHARSET_MOTHER (charset);
915 if ( XCHARSET_MAX_CODE (charset) > 0 )
917 if ( CHARSETP (mother) )
919 int code = code_point;
921 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
923 int row = code_point >> 8;
924 int cell = code_point & 255;
928 else if (row < 16 + 32 + 30)
929 code = (row - (16 + 32)) * 94 + cell - 33;
930 else if (row < 18 + 32 + 30)
932 else if (row < 18 + 32 + 60)
933 code = (row - (18 + 32)) * 94 + cell - 33;
935 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
937 int plane = code_point >> 16;
938 int row = (code_point >> 8) & 255;
939 int cell = code_point & 255;
943 else if (row < 16 + 32 + 30)
945 = (plane - 33) * 94 * 60
946 + (row - (16 + 32)) * 94
948 else if (row < 18 + 32 + 30)
950 else if (row < 18 + 32 + 60)
952 = (plane - 33) * 94 * 60
953 + (row - (18 + 32)) * 94
956 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
959 = (((code_point >> 8) & 0x7F) - 33) * 94
960 + (( code_point & 0x7F) - 33);
961 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
962 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
964 b2 += b2 < 0x3F ? 0x40 : 0x62;
965 code = (b1 << 8) | b2;
967 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
970 = (((code_point >> 8) & 0x7F) - 33) * 94
971 + (( code_point & 0x7F) - 33)
972 + BIG5_SAME_ROW * (0xC9 - 0xA1);
973 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
974 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
976 b2 += b2 < 0x3F ? 0x40 : 0x62;
977 code = (b1 << 8) | b2;
980 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
985 = (XCHARSET_DIMENSION (charset) == 1
987 code_point - XCHARSET_BYTE_OFFSET (charset)
989 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
990 * XCHARSET_CHARS (charset)
991 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
992 + XCHARSET_CODE_OFFSET (charset);
993 if ((cid < XCHARSET_MIN_CODE (charset))
994 || (XCHARSET_MAX_CODE (charset) < cid))
999 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1001 if (XCHARSET_DIMENSION (charset) == 1)
1003 switch (XCHARSET_CHARS (charset))
1007 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1010 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1018 switch (XCHARSET_CHARS (charset))
1021 return MIN_CHAR_94x94
1022 + (final - '0') * 94 * 94
1023 + (((code_point >> 8) & 0x7F) - 33) * 94
1024 + ((code_point & 0x7F) - 33);
1026 return MIN_CHAR_96x96
1027 + (final - '0') * 96 * 96
1028 + (((code_point >> 8) & 0x7F) - 32) * 96
1029 + ((code_point & 0x7F) - 32);
1041 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1043 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1046 if ( CHAR_TABLEP (encoding_table)
1047 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1052 Lisp_Object mother = XCHARSET_MOTHER (charset);
1053 int min = XCHARSET_MIN_CODE (charset);
1054 int max = XCHARSET_MAX_CODE (charset);
1057 if ( CHARSETP (mother) )
1059 if (XCHARSET_FINAL (charset) >= '0')
1060 code = charset_code_point (mother, ch, 1);
1062 code = charset_code_point (mother, ch, defined_only);
1064 else if (defined_only)
1066 else if ( ((max == 0) && CHARSETP (mother)
1067 && (XCHARSET_FINAL (charset) == 0))
1068 || ((min <= ch) && (ch <= max)) )
1070 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1071 || ((min <= code) && (code <= max)) )
1073 int d = code - XCHARSET_CODE_OFFSET (charset);
1075 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1084 int cell = d % 94 + 33;
1090 return (row << 8) | cell;
1092 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1094 int B1 = d >> 8, B2 = d & 0xFF;
1096 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1097 - (B2 < 0x7F ? 0x40 : 0x62);
1101 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1104 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1106 int B1 = d >> 8, B2 = d & 0xFF;
1108 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1109 - (B2 < 0x7F ? 0x40 : 0x62);
1113 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1114 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1117 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1118 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1119 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1120 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1123 int plane = d / (94 * 60) + 33;
1124 int row = (d % (94 * 60)) / 94;
1125 int cell = d % 94 + 33;
1131 return (plane << 16) | (row << 8) | cell;
1133 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1135 ( (d / (94 * 94) + 33) << 16)
1136 | ((d / 94 % 94 + 33) << 8)
1138 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1140 ( (d / (96 * 96) + 32) << 16)
1141 | ((d / 96 % 96 + 32) << 8)
1143 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1145 ( (d / (94 * 94 * 94) + 33) << 24)
1146 | ((d / (94 * 94) % 94 + 33) << 16)
1147 | ((d / 94 % 94 + 33) << 8)
1149 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1151 ( (d / (96 * 96 * 96) + 32) << 24)
1152 | ((d / (96 * 96) % 96 + 32) << 16)
1153 | ((d / 96 % 96 + 32) << 8)
1157 printf ("Unknown CCS-conversion %d is specified!",
1158 XCHARSET_CONVERSION (charset));
1162 else if (defined_only)
1164 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1165 ( XCHARSET_MIN_CODE (charset) == 0 )
1167 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1168 (XCHARSET_CODE_OFFSET (charset)
1169 == XCHARSET_MIN_CODE (charset))
1174 if (XCHARSET_DIMENSION (charset) == 1)
1176 if (XCHARSET_CHARS (charset) == 94)
1178 if (((d = ch - (MIN_CHAR_94
1179 + (XCHARSET_FINAL (charset) - '0') * 94))
1184 else if (XCHARSET_CHARS (charset) == 96)
1186 if (((d = ch - (MIN_CHAR_96
1187 + (XCHARSET_FINAL (charset) - '0') * 96))
1195 else if (XCHARSET_DIMENSION (charset) == 2)
1197 if (XCHARSET_CHARS (charset) == 94)
1199 if (((d = ch - (MIN_CHAR_94x94
1201 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1204 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1206 else if (XCHARSET_CHARS (charset) == 96)
1208 if (((d = ch - (MIN_CHAR_96x96
1210 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1213 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1224 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1226 if (c <= MAX_CHAR_BASIC_LATIN)
1228 *charset = Vcharset_ascii;
1233 *charset = Vcharset_control_1;
1238 *charset = Vcharset_latin_iso8859_1;
1242 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1244 *charset = Vcharset_hebrew_iso8859_8;
1245 return c - MIN_CHAR_HEBREW + 0x20;
1248 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1250 *charset = Vcharset_thai_tis620;
1251 return c - MIN_CHAR_THAI + 0x20;
1254 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1255 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1257 return list2 (Vcharset_katakana_jisx0201,
1258 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1261 else if (c <= MAX_CHAR_BMP)
1263 *charset = Vcharset_ucs_bmp;
1266 else if (c <= MAX_CHAR_SMP)
1268 *charset = Vcharset_ucs_smp;
1269 return c - MIN_CHAR_SMP;
1271 else if (c <= MAX_CHAR_SIP)
1273 *charset = Vcharset_ucs_sip;
1274 return c - MIN_CHAR_SIP;
1276 else if (c < MIN_CHAR_94)
1278 *charset = Vcharset_ucs;
1281 else if (c <= MAX_CHAR_94)
1283 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1284 ((c - MIN_CHAR_94) / 94) + '0',
1285 CHARSET_LEFT_TO_RIGHT);
1286 if (!NILP (*charset))
1287 return ((c - MIN_CHAR_94) % 94) + 33;
1290 *charset = Vcharset_ucs;
1294 else if (c <= MAX_CHAR_96)
1296 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1297 ((c - MIN_CHAR_96) / 96) + '0',
1298 CHARSET_LEFT_TO_RIGHT);
1299 if (!NILP (*charset))
1300 return ((c - MIN_CHAR_96) % 96) + 32;
1303 *charset = Vcharset_ucs;
1307 else if (c <= MAX_CHAR_94x94)
1310 = CHARSET_BY_ATTRIBUTES (94, 2,
1311 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1312 CHARSET_LEFT_TO_RIGHT);
1313 if (!NILP (*charset))
1314 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1315 | (((c - MIN_CHAR_94x94) % 94) + 33);
1318 *charset = Vcharset_ucs;
1322 else if (c <= MAX_CHAR_96x96)
1325 = CHARSET_BY_ATTRIBUTES (96, 2,
1326 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1327 CHARSET_LEFT_TO_RIGHT);
1328 if (!NILP (*charset))
1329 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1330 | (((c - MIN_CHAR_96x96) % 96) + 32);
1333 *charset = Vcharset_ucs;
1339 *charset = Vcharset_ucs;
1344 Lisp_Object Vdefault_coded_charset_priority_list;
1348 /************************************************************************/
1349 /* Basic charset Lisp functions */
1350 /************************************************************************/
1352 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1353 Return non-nil if OBJECT is a charset.
1357 return CHARSETP (object) ? Qt : Qnil;
1360 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1361 Retrieve the charset of the given name.
1362 If CHARSET-OR-NAME is a charset object, it is simply returned.
1363 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1364 nil is returned. Otherwise the associated charset object is returned.
1368 if (CHARSETP (charset_or_name))
1369 return charset_or_name;
1371 CHECK_SYMBOL (charset_or_name);
1372 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1375 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1376 Retrieve the charset of the given name.
1377 Same as `find-charset' except an error is signalled if there is no such
1378 charset instead of returning nil.
1382 Lisp_Object charset = Ffind_charset (name);
1385 signal_simple_error ("No such charset", name);
1389 /* We store the charsets in hash tables with the names as the key and the
1390 actual charset object as the value. Occasionally we need to use them
1391 in a list format. These routines provide us with that. */
1392 struct charset_list_closure
1394 Lisp_Object *charset_list;
1398 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1399 void *charset_list_closure)
1401 /* This function can GC */
1402 struct charset_list_closure *chcl =
1403 (struct charset_list_closure*) charset_list_closure;
1404 Lisp_Object *charset_list = chcl->charset_list;
1406 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1410 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1411 Return a list of the names of all defined charsets.
1415 Lisp_Object charset_list = Qnil;
1416 struct gcpro gcpro1;
1417 struct charset_list_closure charset_list_closure;
1419 GCPRO1 (charset_list);
1420 charset_list_closure.charset_list = &charset_list;
1421 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1422 &charset_list_closure);
1425 return charset_list;
1428 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1429 Return the name of charset CHARSET.
1433 return XCHARSET_NAME (Fget_charset (charset));
1436 /* #### SJT Should generic properties be allowed? */
1437 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1438 Define a new character set.
1439 This function is for use with Mule support.
1440 NAME is a symbol, the name by which the character set is normally referred.
1441 DOC-STRING is a string describing the character set.
1442 PROPS is a property list, describing the specific nature of the
1443 character set. Recognized properties are:
1445 'short-name Short version of the charset name (ex: Latin-1)
1446 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1447 'registry A regular expression matching the font registry field for
1449 'dimension Number of octets used to index a character in this charset.
1450 Either 1 or 2. Defaults to 1.
1451 If UTF-2000 feature is enabled, 3 or 4 are also available.
1452 'columns Number of columns used to display a character in this charset.
1453 Only used in TTY mode. (Under X, the actual width of a
1454 character can be derived from the font used to display the
1455 characters.) If unspecified, defaults to the dimension
1456 (this is almost always the correct value).
1457 'chars Number of characters in each dimension (94 or 96).
1458 Defaults to 94. Note that if the dimension is 2, the
1459 character set thus described is 94x94 or 96x96.
1460 If UTF-2000 feature is enabled, 128 or 256 are also available.
1461 'final Final byte of ISO 2022 escape sequence. Must be
1462 supplied. Each combination of (DIMENSION, CHARS) defines a
1463 separate namespace for final bytes. Note that ISO
1464 2022 restricts the final byte to the range
1465 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1466 dimension == 2. Note also that final bytes in the range
1467 0x30 - 0x3F are reserved for user-defined (not official)
1469 'graphic 0 (use left half of font on output) or 1 (use right half
1470 of font on output). Defaults to 0. For example, for
1471 a font whose registry is ISO8859-1, the left half
1472 (octets 0x20 - 0x7F) is the `ascii' character set, while
1473 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1474 character set. With 'graphic set to 0, the octets
1475 will have their high bit cleared; with it set to 1,
1476 the octets will have their high bit set.
1477 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1479 'ccl-program A compiled CCL program used to convert a character in
1480 this charset into an index into the font. This is in
1481 addition to the 'graphic property. The CCL program
1482 is passed the octets of the character, with the high
1483 bit cleared and set depending upon whether the value
1484 of the 'graphic property is 0 or 1.
1485 'mother [UTF-2000 only] Base coded-charset.
1486 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1487 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1488 'code-offset [UTF-2000 only] Offset for a code-point of a base
1490 'conversion [UTF-2000 only] Conversion for a code-point of a base
1491 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1493 (name, doc_string, props))
1495 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1496 int direction = CHARSET_LEFT_TO_RIGHT;
1497 Lisp_Object registry = Qnil;
1498 Lisp_Object charset;
1499 Lisp_Object ccl_program = Qnil;
1500 Lisp_Object short_name = Qnil, long_name = Qnil;
1501 Lisp_Object mother = Qnil;
1502 int min_code = 0, max_code = 0, code_offset = 0;
1503 int byte_offset = -1;
1506 CHECK_SYMBOL (name);
1507 if (!NILP (doc_string))
1508 CHECK_STRING (doc_string);
1510 charset = Ffind_charset (name);
1511 if (!NILP (charset))
1512 signal_simple_error ("Cannot redefine existing charset", name);
1515 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1517 if (EQ (keyword, Qshort_name))
1519 CHECK_STRING (value);
1523 else if (EQ (keyword, Qlong_name))
1525 CHECK_STRING (value);
1529 else if (EQ (keyword, Qiso_ir))
1533 id = - XINT (value);
1537 else if (EQ (keyword, Qdimension))
1540 dimension = XINT (value);
1541 if (dimension < 1 ||
1548 signal_simple_error ("Invalid value for 'dimension", value);
1551 else if (EQ (keyword, Qchars))
1554 chars = XINT (value);
1555 if (chars != 94 && chars != 96
1557 && chars != 128 && chars != 256
1560 signal_simple_error ("Invalid value for 'chars", value);
1563 else if (EQ (keyword, Qcolumns))
1566 columns = XINT (value);
1567 if (columns != 1 && columns != 2)
1568 signal_simple_error ("Invalid value for 'columns", value);
1571 else if (EQ (keyword, Qgraphic))
1574 graphic = XINT (value);
1582 signal_simple_error ("Invalid value for 'graphic", value);
1585 else if (EQ (keyword, Qregistry))
1587 CHECK_STRING (value);
1591 else if (EQ (keyword, Qdirection))
1593 if (EQ (value, Ql2r))
1594 direction = CHARSET_LEFT_TO_RIGHT;
1595 else if (EQ (value, Qr2l))
1596 direction = CHARSET_RIGHT_TO_LEFT;
1598 signal_simple_error ("Invalid value for 'direction", value);
1601 else if (EQ (keyword, Qfinal))
1603 CHECK_CHAR_COERCE_INT (value);
1604 final = XCHAR (value);
1605 if (final < '0' || final > '~')
1606 signal_simple_error ("Invalid value for 'final", value);
1610 else if (EQ (keyword, Qmother))
1612 mother = Fget_charset (value);
1615 else if (EQ (keyword, Qmin_code))
1618 min_code = XUINT (value);
1621 else if (EQ (keyword, Qmax_code))
1624 max_code = XUINT (value);
1627 else if (EQ (keyword, Qcode_offset))
1630 code_offset = XUINT (value);
1633 else if (EQ (keyword, Qconversion))
1635 if (EQ (value, Q94x60))
1636 conversion = CONVERSION_94x60;
1637 else if (EQ (value, Q94x94x60))
1638 conversion = CONVERSION_94x94x60;
1639 else if (EQ (value, Qbig5_1))
1640 conversion = CONVERSION_BIG5_1;
1641 else if (EQ (value, Qbig5_2))
1642 conversion = CONVERSION_BIG5_2;
1644 signal_simple_error ("Unrecognized conversion", value);
1648 else if (EQ (keyword, Qccl_program))
1650 struct ccl_program test_ccl;
1652 if (setup_ccl_program (&test_ccl, value) < 0)
1653 signal_simple_error ("Invalid value for 'ccl-program", value);
1654 ccl_program = value;
1658 signal_simple_error ("Unrecognized property", keyword);
1664 error ("'final must be specified");
1666 if (dimension == 2 && final > 0x5F)
1668 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1671 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1672 CHARSET_LEFT_TO_RIGHT)) ||
1673 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1674 CHARSET_RIGHT_TO_LEFT)))
1676 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1679 id = get_unallocated_leading_byte (dimension);
1681 if (NILP (doc_string))
1682 doc_string = build_string ("");
1684 if (NILP (registry))
1685 registry = build_string ("");
1687 if (NILP (short_name))
1688 XSETSTRING (short_name, XSYMBOL (name)->name);
1690 if (NILP (long_name))
1691 long_name = doc_string;
1694 columns = dimension;
1696 if (byte_offset < 0)
1700 else if (chars == 96)
1706 charset = make_charset (id, name, chars, dimension, columns, graphic,
1707 final, direction, short_name, long_name,
1708 doc_string, registry,
1709 Qnil, min_code, max_code, code_offset, byte_offset,
1710 mother, conversion);
1711 if (!NILP (ccl_program))
1712 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1716 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1718 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1719 NEW-NAME is the name of the new charset. Return the new charset.
1721 (charset, new_name))
1723 Lisp_Object new_charset = Qnil;
1724 int id, chars, dimension, columns, graphic, final;
1726 Lisp_Object registry, doc_string, short_name, long_name;
1729 charset = Fget_charset (charset);
1730 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1731 signal_simple_error ("Charset already has reverse-direction charset",
1734 CHECK_SYMBOL (new_name);
1735 if (!NILP (Ffind_charset (new_name)))
1736 signal_simple_error ("Cannot redefine existing charset", new_name);
1738 cs = XCHARSET (charset);
1740 chars = CHARSET_CHARS (cs);
1741 dimension = CHARSET_DIMENSION (cs);
1742 columns = CHARSET_COLUMNS (cs);
1743 id = get_unallocated_leading_byte (dimension);
1745 graphic = CHARSET_GRAPHIC (cs);
1746 final = CHARSET_FINAL (cs);
1747 direction = CHARSET_RIGHT_TO_LEFT;
1748 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1749 direction = CHARSET_LEFT_TO_RIGHT;
1750 doc_string = CHARSET_DOC_STRING (cs);
1751 short_name = CHARSET_SHORT_NAME (cs);
1752 long_name = CHARSET_LONG_NAME (cs);
1753 registry = CHARSET_REGISTRY (cs);
1755 new_charset = make_charset (id, new_name, chars, dimension, columns,
1756 graphic, final, direction, short_name, long_name,
1757 doc_string, registry,
1759 CHARSET_DECODING_TABLE(cs),
1760 CHARSET_MIN_CODE(cs),
1761 CHARSET_MAX_CODE(cs),
1762 CHARSET_CODE_OFFSET(cs),
1763 CHARSET_BYTE_OFFSET(cs),
1765 CHARSET_CONVERSION (cs)
1767 Qnil, 0, 0, 0, 0, Qnil, 0
1771 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1772 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1777 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1778 Define symbol ALIAS as an alias for CHARSET.
1782 CHECK_SYMBOL (alias);
1783 charset = Fget_charset (charset);
1784 return Fputhash (alias, charset, Vcharset_hash_table);
1787 /* #### Reverse direction charsets not yet implemented. */
1789 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1791 Return the reverse-direction charset parallel to CHARSET, if any.
1792 This is the charset with the same properties (in particular, the same
1793 dimension, number of characters per dimension, and final byte) as
1794 CHARSET but whose characters are displayed in the opposite direction.
1798 charset = Fget_charset (charset);
1799 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1803 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1804 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1805 If DIRECTION is omitted, both directions will be checked (left-to-right
1806 will be returned if character sets exist for both directions).
1808 (dimension, chars, final, direction))
1810 int dm, ch, fi, di = -1;
1811 Lisp_Object obj = Qnil;
1813 CHECK_INT (dimension);
1814 dm = XINT (dimension);
1815 if (dm < 1 || dm > 2)
1816 signal_simple_error ("Invalid value for DIMENSION", dimension);
1820 if (ch != 94 && ch != 96)
1821 signal_simple_error ("Invalid value for CHARS", chars);
1823 CHECK_CHAR_COERCE_INT (final);
1825 if (fi < '0' || fi > '~')
1826 signal_simple_error ("Invalid value for FINAL", final);
1828 if (EQ (direction, Ql2r))
1829 di = CHARSET_LEFT_TO_RIGHT;
1830 else if (EQ (direction, Qr2l))
1831 di = CHARSET_RIGHT_TO_LEFT;
1832 else if (!NILP (direction))
1833 signal_simple_error ("Invalid value for DIRECTION", direction);
1835 if (dm == 2 && fi > 0x5F)
1837 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1841 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1843 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1846 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1849 return XCHARSET_NAME (obj);
1853 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1854 Return short name of CHARSET.
1858 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1861 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1862 Return long name of CHARSET.
1866 return XCHARSET_LONG_NAME (Fget_charset (charset));
1869 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1870 Return description of CHARSET.
1874 return XCHARSET_DOC_STRING (Fget_charset (charset));
1877 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1878 Return dimension of CHARSET.
1882 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1885 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1886 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1887 Recognized properties are those listed in `make-charset', as well as
1888 'name and 'doc-string.
1894 charset = Fget_charset (charset);
1895 cs = XCHARSET (charset);
1897 CHECK_SYMBOL (prop);
1898 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1899 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1900 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1901 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1902 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1903 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1904 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1905 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1906 Qnil : make_char (CHARSET_FINAL (cs));
1907 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1908 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1909 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1910 if (EQ (prop, Qdirection))
1911 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1912 if (EQ (prop, Qreverse_direction_charset))
1914 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1915 /* #### Is this translation OK? If so, error checking sufficient? */
1916 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1919 if (EQ (prop, Qmother))
1920 return CHARSET_MOTHER (cs);
1921 if (EQ (prop, Qmin_code))
1922 return make_int (CHARSET_MIN_CODE (cs));
1923 if (EQ (prop, Qmax_code))
1924 return make_int (CHARSET_MAX_CODE (cs));
1926 signal_simple_error ("Unrecognized charset property name", prop);
1927 return Qnil; /* not reached */
1930 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1931 Return charset identification number of CHARSET.
1935 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1938 /* #### We need to figure out which properties we really want to
1941 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1942 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1944 (charset, ccl_program))
1946 struct ccl_program test_ccl;
1948 charset = Fget_charset (charset);
1949 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1950 signal_simple_error ("Invalid ccl-program", ccl_program);
1951 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1956 invalidate_charset_font_caches (Lisp_Object charset)
1958 /* Invalidate font cache entries for charset on all devices. */
1959 Lisp_Object devcons, concons, hash_table;
1960 DEVICE_LOOP_NO_BREAK (devcons, concons)
1962 struct device *d = XDEVICE (XCAR (devcons));
1963 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1964 if (!UNBOUNDP (hash_table))
1965 Fclrhash (hash_table);
1969 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1970 Set the 'registry property of CHARSET to REGISTRY.
1972 (charset, registry))
1974 charset = Fget_charset (charset);
1975 CHECK_STRING (registry);
1976 XCHARSET_REGISTRY (charset) = registry;
1977 invalidate_charset_font_caches (charset);
1978 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1983 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1984 Return mapping-table of CHARSET.
1988 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1991 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1992 Set mapping-table of CHARSET to TABLE.
1996 struct Lisp_Charset *cs;
2000 charset = Fget_charset (charset);
2001 cs = XCHARSET (charset);
2005 CHARSET_DECODING_TABLE(cs) = Qnil;
2008 else if (VECTORP (table))
2010 int ccs_len = CHARSET_BYTE_SIZE (cs);
2011 int ret = decoding_table_check_elements (table,
2012 CHARSET_DIMENSION (cs),
2017 signal_simple_error ("Too big table", table);
2019 signal_simple_error ("Invalid element is found", table);
2021 signal_simple_error ("Something wrong", table);
2023 CHARSET_DECODING_TABLE(cs) = Qnil;
2026 signal_error (Qwrong_type_argument,
2027 list2 (build_translated_string ("vector-or-nil-p"),
2030 byte_offset = CHARSET_BYTE_OFFSET (cs);
2031 switch (CHARSET_DIMENSION (cs))
2034 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2036 Lisp_Object c = XVECTOR_DATA(table)[i];
2039 Fput_char_attribute (c, XCHARSET_NAME (charset),
2040 make_int (i + byte_offset));
2044 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2046 Lisp_Object v = XVECTOR_DATA(table)[i];
2052 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2054 Lisp_Object c = XVECTOR_DATA(v)[j];
2058 (c, XCHARSET_NAME (charset),
2059 make_int ( ( (i + byte_offset) << 8 )
2065 Fput_char_attribute (v, XCHARSET_NAME (charset),
2066 make_int (i + byte_offset));
2074 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2075 Save mapping-table of CHARSET.
2079 struct Lisp_Charset *cs;
2080 int byte_min, byte_max;
2081 #ifdef HAVE_LIBCHISE
2083 #else /* HAVE_LIBCHISE */
2085 Lisp_Object db_file;
2086 #endif /* not HAVE_LIBCHISE */
2088 charset = Fget_charset (charset);
2089 cs = XCHARSET (charset);
2091 #ifdef HAVE_LIBCHISE
2092 if ( open_chise_data_source_maybe () )
2096 = chise_ds_get_ccs (default_chise_data_source,
2097 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2100 printf ("Can't open decoding-table %s\n",
2101 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2104 #else /* HAVE_LIBCHISE */
2105 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2106 Qsystem_char_id, 1);
2107 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2108 #endif /* not HAVE_LIBCHISE */
2110 byte_min = CHARSET_BYTE_OFFSET (cs);
2111 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2112 switch (CHARSET_DIMENSION (cs))
2116 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2119 for (cell = byte_min; cell < byte_max; cell++)
2121 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2125 #ifdef HAVE_LIBCHISE
2126 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2127 #else /* HAVE_LIBCHISE */
2128 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2129 Fprin1_to_string (c, Qnil),
2131 #endif /* not HAVE_LIBCHISE */
2138 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2141 for (row = byte_min; row < byte_max; row++)
2143 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2146 for (cell = byte_min; cell < byte_max; cell++)
2148 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2152 #ifdef HAVE_LIBCHISE
2153 chise_ccs_set_decoded_char
2155 (row << 8) | cell, XCHAR (c));
2156 #else /* HAVE_LIBCHISE */
2157 Fput_database (Fprin1_to_string (make_int ((row << 8)
2160 Fprin1_to_string (c, Qnil),
2162 #endif /* not HAVE_LIBCHISE */
2170 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2173 for (plane = byte_min; plane < byte_max; plane++)
2176 = get_ccs_octet_table (table_p, charset, plane);
2179 for (row = byte_min; row < byte_max; row++)
2182 = get_ccs_octet_table (table_r, charset, row);
2185 for (cell = byte_min; cell < byte_max; cell++)
2187 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2192 #ifdef HAVE_LIBCHISE
2193 chise_ccs_set_decoded_char
2198 #else /* HAVE_LIBCHISE */
2199 Fput_database (Fprin1_to_string
2200 (make_int ((plane << 16)
2204 Fprin1_to_string (c, Qnil),
2206 #endif /* not HAVE_LIBCHISE */
2215 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2218 for (group = byte_min; group < byte_max; group++)
2221 = get_ccs_octet_table (table_g, charset, group);
2224 for (plane = byte_min; plane < byte_max; plane++)
2227 = get_ccs_octet_table (table_p, charset, plane);
2230 for (row = byte_min; row < byte_max; row++)
2233 = get_ccs_octet_table (table_r, charset, row);
2236 for (cell = byte_min; cell < byte_max; cell++)
2239 = get_ccs_octet_table (table_c, charset, cell);
2243 #ifdef HAVE_LIBCHISE
2244 chise_ccs_set_decoded_char
2250 #else /* HAVE_LIBCHISE */
2251 Fput_database (Fprin1_to_string
2252 (make_int (( group << 24)
2257 Fprin1_to_string (c, Qnil),
2259 #endif /* not HAVE_LIBCHISE */
2267 #ifdef HAVE_LIBCHISE
2268 chise_ccs_sync (dt_ccs);
2270 #else /* HAVE_LIBCHISE */
2271 return Fclose_database (db);
2272 #endif /* not HAVE_LIBCHISE */
2275 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2276 Reset mapping-table of CCS with database file.
2280 #ifdef HAVE_LIBCHISE
2281 CHISE_CCS chise_ccs;
2283 Lisp_Object db_file;
2286 ccs = Fget_charset (ccs);
2288 #ifdef HAVE_LIBCHISE
2289 if ( open_chise_data_source_maybe () )
2292 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2293 XSTRING_DATA (Fsymbol_name
2294 (XCHARSET_NAME(ccs))));
2295 if (chise_ccs == NULL)
2298 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2299 Qsystem_char_id, 0);
2303 #ifdef HAVE_LIBCHISE
2304 chise_ccs_setup_db (chise_ccs, 0) == 0
2306 !NILP (Ffile_exists_p (db_file))
2310 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2317 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2319 #ifdef HAVE_LIBCHISE
2320 CHISE_Char_ID char_id;
2322 if ( open_chise_data_source_maybe () )
2326 = chise_ds_decode_char (default_chise_data_source,
2327 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2330 decoding_table_put_char (ccs, code_point, make_char (char_id));
2332 decoding_table_put_char (ccs, code_point, Qnil);
2334 /* chise_ccst_close (dt_ccs); */
2336 #else /* HAVE_LIBCHISE */
2339 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2342 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2346 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2353 decoding_table_put_char (ccs, code_point, ret);
2354 Fclose_database (db);
2358 decoding_table_put_char (ccs, code_point, Qnil);
2359 Fclose_database (db);
2362 #endif /* not HAVE_LIBCHISE */
2364 #endif /* HAVE_CHISE */
2365 #endif /* UTF2000 */
2368 /************************************************************************/
2369 /* Lisp primitives for working with characters */
2370 /************************************************************************/
2373 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2374 Make a character from CHARSET and code-point CODE.
2375 If DEFINED_ONLY is non-nil, builtin character is not returned.
2376 If corresponding character is not found, nil is returned.
2378 (charset, code, defined_only))
2382 charset = Fget_charset (charset);
2385 if (XCHARSET_GRAPHIC (charset) == 1)
2387 if (NILP (defined_only))
2388 c = DECODE_CHAR (charset, c);
2390 c = decode_defined_char (charset, c);
2391 return c >= 0 ? make_char (c) : Qnil;
2394 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2395 Make a builtin character from CHARSET and code-point CODE.
2401 charset = Fget_charset (charset);
2403 if (EQ (charset, Vcharset_latin_viscii))
2405 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2411 (ret = Fget_char_attribute (chr,
2412 Vcharset_latin_viscii_lower,
2415 charset = Vcharset_latin_viscii_lower;
2419 (ret = Fget_char_attribute (chr,
2420 Vcharset_latin_viscii_upper,
2423 charset = Vcharset_latin_viscii_upper;
2430 if (XCHARSET_GRAPHIC (charset) == 1)
2433 c = decode_builtin_char (charset, c);
2434 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2438 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2439 Make a character from CHARSET and octets ARG1 and ARG2.
2440 ARG2 is required only for characters from two-dimensional charsets.
2441 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2442 character s with caron.
2444 (charset, arg1, arg2))
2448 int lowlim, highlim;
2450 charset = Fget_charset (charset);
2451 cs = XCHARSET (charset);
2453 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2454 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2456 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2458 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2459 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2462 /* It is useful (and safe, according to Olivier Galibert) to strip
2463 the 8th bit off ARG1 and ARG2 because it allows programmers to
2464 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2465 Latin 2 code of the character. */
2473 if (a1 < lowlim || a1 > highlim)
2474 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2476 if (CHARSET_DIMENSION (cs) == 1)
2480 ("Charset is of dimension one; second octet must be nil", arg2);
2481 return make_char (MAKE_CHAR (charset, a1, 0));
2490 a2 = XINT (arg2) & 0x7f;
2492 if (a2 < lowlim || a2 > highlim)
2493 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2495 return make_char (MAKE_CHAR (charset, a1, a2));
2498 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2499 Return the character set of CHARACTER.
2503 CHECK_CHAR_COERCE_INT (character);
2505 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2508 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2509 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2510 N defaults to 0 if omitted.
2514 Lisp_Object charset;
2517 CHECK_CHAR_COERCE_INT (character);
2519 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2521 if (NILP (n) || EQ (n, Qzero))
2522 return make_int (octet0);
2523 else if (EQ (n, make_int (1)))
2524 return make_int (octet1);
2526 signal_simple_error ("Octet number must be 0 or 1", n);
2530 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2531 Return code-point of CHARACTER in specified CHARSET.
2533 (character, charset, defined_only))
2537 CHECK_CHAR_COERCE_INT (character);
2538 charset = Fget_charset (charset);
2539 code_point = charset_code_point (charset, XCHAR (character),
2540 !NILP (defined_only));
2541 if (code_point >= 0)
2542 return make_int (code_point);
2548 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2549 Return list of charset and one or two position-codes of CHARACTER.
2553 /* This function can GC */
2554 struct gcpro gcpro1, gcpro2;
2555 Lisp_Object charset = Qnil;
2556 Lisp_Object rc = Qnil;
2564 GCPRO2 (charset, rc);
2565 CHECK_CHAR_COERCE_INT (character);
2568 code_point = ENCODE_CHAR (XCHAR (character), charset);
2569 dimension = XCHARSET_DIMENSION (charset);
2570 while (dimension > 0)
2572 rc = Fcons (make_int (code_point & 255), rc);
2576 rc = Fcons (XCHARSET_NAME (charset), rc);
2578 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2580 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2582 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2586 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2595 #ifdef ENABLE_COMPOSITE_CHARS
2596 /************************************************************************/
2597 /* composite character functions */
2598 /************************************************************************/
2601 lookup_composite_char (Bufbyte *str, int len)
2603 Lisp_Object lispstr = make_string (str, len);
2604 Lisp_Object ch = Fgethash (lispstr,
2605 Vcomposite_char_string2char_hash_table,
2611 if (composite_char_row_next >= 128)
2612 signal_simple_error ("No more composite chars available", lispstr);
2613 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2614 composite_char_col_next);
2615 Fputhash (make_char (emch), lispstr,
2616 Vcomposite_char_char2string_hash_table);
2617 Fputhash (lispstr, make_char (emch),
2618 Vcomposite_char_string2char_hash_table);
2619 composite_char_col_next++;
2620 if (composite_char_col_next >= 128)
2622 composite_char_col_next = 32;
2623 composite_char_row_next++;
2632 composite_char_string (Emchar ch)
2634 Lisp_Object str = Fgethash (make_char (ch),
2635 Vcomposite_char_char2string_hash_table,
2637 assert (!UNBOUNDP (str));
2641 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2642 Convert a string into a single composite character.
2643 The character is the result of overstriking all the characters in
2648 CHECK_STRING (string);
2649 return make_char (lookup_composite_char (XSTRING_DATA (string),
2650 XSTRING_LENGTH (string)));
2653 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2654 Return a string of the characters comprising a composite character.
2662 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2663 signal_simple_error ("Must be composite char", ch);
2664 return composite_char_string (emch);
2666 #endif /* ENABLE_COMPOSITE_CHARS */
2669 /************************************************************************/
2670 /* initialization */
2671 /************************************************************************/
2674 syms_of_mule_charset (void)
2676 INIT_LRECORD_IMPLEMENTATION (charset);
2678 DEFSUBR (Fcharsetp);
2679 DEFSUBR (Ffind_charset);
2680 DEFSUBR (Fget_charset);
2681 DEFSUBR (Fcharset_list);
2682 DEFSUBR (Fcharset_name);
2683 DEFSUBR (Fmake_charset);
2684 DEFSUBR (Fmake_reverse_direction_charset);
2685 /* DEFSUBR (Freverse_direction_charset); */
2686 DEFSUBR (Fdefine_charset_alias);
2687 DEFSUBR (Fcharset_from_attributes);
2688 DEFSUBR (Fcharset_short_name);
2689 DEFSUBR (Fcharset_long_name);
2690 DEFSUBR (Fcharset_description);
2691 DEFSUBR (Fcharset_dimension);
2692 DEFSUBR (Fcharset_property);
2693 DEFSUBR (Fcharset_id);
2694 DEFSUBR (Fset_charset_ccl_program);
2695 DEFSUBR (Fset_charset_registry);
2698 DEFSUBR (Fcharset_mapping_table);
2699 DEFSUBR (Fset_charset_mapping_table);
2701 DEFSUBR (Fsave_charset_mapping_table);
2702 DEFSUBR (Freset_charset_mapping_table);
2703 #endif /* HAVE_CHISE */
2704 DEFSUBR (Fdecode_char);
2705 DEFSUBR (Fdecode_builtin_char);
2706 DEFSUBR (Fencode_char);
2709 DEFSUBR (Fmake_char);
2710 DEFSUBR (Fchar_charset);
2711 DEFSUBR (Fchar_octet);
2712 DEFSUBR (Fsplit_char);
2714 #ifdef ENABLE_COMPOSITE_CHARS
2715 DEFSUBR (Fmake_composite_char);
2716 DEFSUBR (Fcomposite_char_string);
2719 defsymbol (&Qcharsetp, "charsetp");
2720 defsymbol (&Qregistry, "registry");
2721 defsymbol (&Qfinal, "final");
2722 defsymbol (&Qgraphic, "graphic");
2723 defsymbol (&Qdirection, "direction");
2724 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2725 defsymbol (&Qshort_name, "short-name");
2726 defsymbol (&Qlong_name, "long-name");
2727 defsymbol (&Qiso_ir, "iso-ir");
2729 defsymbol (&Qmother, "mother");
2730 defsymbol (&Qmin_code, "min-code");
2731 defsymbol (&Qmax_code, "max-code");
2732 defsymbol (&Qcode_offset, "code-offset");
2733 defsymbol (&Qconversion, "conversion");
2734 defsymbol (&Q94x60, "94x60");
2735 defsymbol (&Q94x94x60, "94x94x60");
2736 defsymbol (&Qbig5_1, "big5-1");
2737 defsymbol (&Qbig5_2, "big5-2");
2740 defsymbol (&Ql2r, "l2r");
2741 defsymbol (&Qr2l, "r2l");
2743 /* Charsets, compatible with FSF 20.3
2744 Naming convention is Script-Charset[-Edition] */
2745 defsymbol (&Qascii, "ascii");
2746 defsymbol (&Qcontrol_1, "control-1");
2747 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2748 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2749 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2750 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2751 defsymbol (&Qthai_tis620, "thai-tis620");
2752 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2753 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2754 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2755 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2756 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2757 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2758 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2759 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2760 defsymbol (&Qmap_gb2312, "=gb2312");
2761 defsymbol (&Qmap_gb12345, "=gb12345");
2762 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2763 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2764 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2765 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2766 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2768 defsymbol (&Qmap_ucs, "=ucs");
2769 defsymbol (&Qucs, "ucs");
2770 defsymbol (&Qucs_bmp, "ucs-bmp");
2771 defsymbol (&Qucs_smp, "ucs-smp");
2772 defsymbol (&Qucs_sip, "ucs-sip");
2773 defsymbol (&Qlatin_viscii, "latin-viscii");
2774 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2775 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2776 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2777 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2778 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2779 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2780 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2781 defsymbol (&Qmap_big5, "=big5");
2782 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2784 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2785 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2787 defsymbol (&Qcomposite, "composite");
2791 vars_of_mule_charset (void)
2798 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2799 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2801 /* Table of charsets indexed by leading byte. */
2802 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2803 chlook->charset_by_leading_byte[i] = Qnil;
2806 /* Table of charsets indexed by type/final-byte. */
2807 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2808 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2809 chlook->charset_by_attributes[i][j] = Qnil;
2811 /* Table of charsets indexed by type/final-byte/direction. */
2812 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2813 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2814 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2815 chlook->charset_by_attributes[i][j][k] = Qnil;
2819 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2821 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2822 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2826 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2827 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2828 Leading-code of private TYPE9N charset of column-width 1.
2830 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2834 Vdefault_coded_charset_priority_list = Qnil;
2835 DEFVAR_LISP ("default-coded-charset-priority-list",
2836 &Vdefault_coded_charset_priority_list /*
2837 Default order of preferred coded-character-sets.
2843 complex_vars_of_mule_charset (void)
2845 staticpro (&Vcharset_hash_table);
2846 Vcharset_hash_table =
2847 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2849 /* Predefined character sets. We store them into variables for
2853 staticpro (&Vcharset_ucs);
2855 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2856 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2857 build_string ("UCS"),
2858 build_string ("UCS"),
2859 build_string ("ISO/IEC 10646"),
2861 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_ucs_bmp);
2864 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2865 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2866 build_string ("BMP"),
2867 build_string ("UCS-BMP"),
2868 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2870 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2871 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2872 staticpro (&Vcharset_ucs_smp);
2874 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2875 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2876 build_string ("SMP"),
2877 build_string ("UCS-SMP"),
2878 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2879 build_string ("UCS00-1"),
2880 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2881 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2882 staticpro (&Vcharset_ucs_sip);
2884 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2885 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2886 build_string ("SIP"),
2887 build_string ("UCS-SIP"),
2888 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2889 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2890 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2891 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2893 # define MIN_CHAR_THAI 0
2894 # define MAX_CHAR_THAI 0
2895 /* # define MIN_CHAR_HEBREW 0 */
2896 /* # define MAX_CHAR_HEBREW 0 */
2897 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2898 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2900 staticpro (&Vcharset_ascii);
2902 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2903 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2904 build_string ("ASCII"),
2905 build_string ("ASCII)"),
2906 build_string ("ASCII (ISO646 IRV)"),
2907 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2908 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2909 staticpro (&Vcharset_control_1);
2910 Vcharset_control_1 =
2911 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2912 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2913 build_string ("C1"),
2914 build_string ("Control characters"),
2915 build_string ("Control characters 128-191"),
2917 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2918 staticpro (&Vcharset_latin_iso8859_1);
2919 Vcharset_latin_iso8859_1 =
2920 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2921 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2922 build_string ("Latin-1"),
2923 build_string ("ISO8859-1 (Latin-1)"),
2924 build_string ("ISO8859-1 (Latin-1)"),
2925 build_string ("iso8859-1"),
2926 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2927 staticpro (&Vcharset_latin_iso8859_2);
2928 Vcharset_latin_iso8859_2 =
2929 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2930 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2931 build_string ("Latin-2"),
2932 build_string ("ISO8859-2 (Latin-2)"),
2933 build_string ("ISO8859-2 (Latin-2)"),
2934 build_string ("iso8859-2"),
2935 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2936 staticpro (&Vcharset_latin_iso8859_3);
2937 Vcharset_latin_iso8859_3 =
2938 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2939 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2940 build_string ("Latin-3"),
2941 build_string ("ISO8859-3 (Latin-3)"),
2942 build_string ("ISO8859-3 (Latin-3)"),
2943 build_string ("iso8859-3"),
2944 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2945 staticpro (&Vcharset_latin_iso8859_4);
2946 Vcharset_latin_iso8859_4 =
2947 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2948 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2949 build_string ("Latin-4"),
2950 build_string ("ISO8859-4 (Latin-4)"),
2951 build_string ("ISO8859-4 (Latin-4)"),
2952 build_string ("iso8859-4"),
2953 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2954 staticpro (&Vcharset_thai_tis620);
2955 Vcharset_thai_tis620 =
2956 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2957 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2958 build_string ("TIS620"),
2959 build_string ("TIS620 (Thai)"),
2960 build_string ("TIS620.2529 (Thai)"),
2961 build_string ("tis620"),
2962 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2963 staticpro (&Vcharset_greek_iso8859_7);
2964 Vcharset_greek_iso8859_7 =
2965 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2966 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2967 build_string ("ISO8859-7"),
2968 build_string ("ISO8859-7 (Greek)"),
2969 build_string ("ISO8859-7 (Greek)"),
2970 build_string ("iso8859-7"),
2971 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2972 staticpro (&Vcharset_arabic_iso8859_6);
2973 Vcharset_arabic_iso8859_6 =
2974 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2975 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2976 build_string ("ISO8859-6"),
2977 build_string ("ISO8859-6 (Arabic)"),
2978 build_string ("ISO8859-6 (Arabic)"),
2979 build_string ("iso8859-6"),
2980 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2981 staticpro (&Vcharset_hebrew_iso8859_8);
2982 Vcharset_hebrew_iso8859_8 =
2983 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2984 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2985 build_string ("ISO8859-8"),
2986 build_string ("ISO8859-8 (Hebrew)"),
2987 build_string ("ISO8859-8 (Hebrew)"),
2988 build_string ("iso8859-8"),
2990 0 /* MIN_CHAR_HEBREW */,
2991 0 /* MAX_CHAR_HEBREW */, 0, 32,
2992 Qnil, CONVERSION_IDENTICAL);
2993 staticpro (&Vcharset_katakana_jisx0201);
2994 Vcharset_katakana_jisx0201 =
2995 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2996 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2997 build_string ("JISX0201 Kana"),
2998 build_string ("JISX0201.1976 (Japanese Kana)"),
2999 build_string ("JISX0201.1976 Japanese Kana"),
3000 build_string ("jisx0201\\.1976"),
3001 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3002 staticpro (&Vcharset_latin_jisx0201);
3003 Vcharset_latin_jisx0201 =
3004 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3005 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3006 build_string ("JISX0201 Roman"),
3007 build_string ("JISX0201.1976 (Japanese Roman)"),
3008 build_string ("JISX0201.1976 Japanese Roman"),
3009 build_string ("jisx0201\\.1976"),
3010 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3011 staticpro (&Vcharset_cyrillic_iso8859_5);
3012 Vcharset_cyrillic_iso8859_5 =
3013 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3014 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3015 build_string ("ISO8859-5"),
3016 build_string ("ISO8859-5 (Cyrillic)"),
3017 build_string ("ISO8859-5 (Cyrillic)"),
3018 build_string ("iso8859-5"),
3019 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3020 staticpro (&Vcharset_latin_iso8859_9);
3021 Vcharset_latin_iso8859_9 =
3022 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3023 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3024 build_string ("Latin-5"),
3025 build_string ("ISO8859-9 (Latin-5)"),
3026 build_string ("ISO8859-9 (Latin-5)"),
3027 build_string ("iso8859-9"),
3028 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3030 staticpro (&Vcharset_jis_x0208);
3031 Vcharset_jis_x0208 =
3032 make_charset (LEADING_BYTE_JIS_X0208,
3033 Qmap_jis_x0208, 94, 2,
3034 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3035 build_string ("JIS X0208"),
3036 build_string ("JIS X0208 Common"),
3037 build_string ("JIS X0208 Common part"),
3038 build_string ("jisx0208\\.1990"),
3040 MIN_CHAR_JIS_X0208_1990,
3041 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3042 Qnil, CONVERSION_94x94);
3044 staticpro (&Vcharset_japanese_jisx0208_1978);
3045 Vcharset_japanese_jisx0208_1978 =
3046 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3047 Qmap_jis_x0208_1978, 94, 2,
3048 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3049 build_string ("JIS X0208:1978"),
3050 build_string ("JIS X0208:1978 (Japanese)"),
3052 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3053 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3060 CONVERSION_IDENTICAL);
3061 staticpro (&Vcharset_chinese_gb2312);
3062 Vcharset_chinese_gb2312 =
3063 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3064 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3065 build_string ("GB2312"),
3066 build_string ("GB2312)"),
3067 build_string ("GB2312 Chinese simplified"),
3068 build_string ("gb2312"),
3069 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3070 staticpro (&Vcharset_chinese_gb12345);
3071 Vcharset_chinese_gb12345 =
3072 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3073 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3074 build_string ("G1"),
3075 build_string ("GB 12345)"),
3076 build_string ("GB 12345-1990"),
3077 build_string ("GB12345\\(\\.1990\\)?-0"),
3078 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3079 staticpro (&Vcharset_japanese_jisx0208);
3080 Vcharset_japanese_jisx0208 =
3081 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3082 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3083 build_string ("JISX0208"),
3084 build_string ("JIS X0208:1983 (Japanese)"),
3085 build_string ("JIS X0208:1983 Japanese Kanji"),
3086 build_string ("jisx0208\\.1983"),
3093 CONVERSION_IDENTICAL);
3095 staticpro (&Vcharset_japanese_jisx0208_1990);
3096 Vcharset_japanese_jisx0208_1990 =
3097 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3098 Qmap_jis_x0208_1990, 94, 2,
3099 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3100 build_string ("JISX0208-1990"),
3101 build_string ("JIS X0208:1990 (Japanese)"),
3102 build_string ("JIS X0208:1990 Japanese Kanji"),
3103 build_string ("jisx0208\\.1990"),
3105 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3106 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3107 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3108 Vcharset_jis_x0208 /* Qnil */,
3109 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3111 staticpro (&Vcharset_korean_ksc5601);
3112 Vcharset_korean_ksc5601 =
3113 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3114 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3115 build_string ("KSC5601"),
3116 build_string ("KSC5601 (Korean"),
3117 build_string ("KSC5601 Korean Hangul and Hanja"),
3118 build_string ("ksc5601"),
3119 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3120 staticpro (&Vcharset_japanese_jisx0212);
3121 Vcharset_japanese_jisx0212 =
3122 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3123 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3124 build_string ("JISX0212"),
3125 build_string ("JISX0212 (Japanese)"),
3126 build_string ("JISX0212 Japanese Supplement"),
3127 build_string ("jisx0212"),
3128 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3130 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3131 staticpro (&Vcharset_chinese_cns11643_1);
3132 Vcharset_chinese_cns11643_1 =
3133 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3134 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3135 build_string ("CNS11643-1"),
3136 build_string ("CNS11643-1 (Chinese traditional)"),
3138 ("CNS 11643 Plane 1 Chinese traditional"),
3139 build_string (CHINESE_CNS_PLANE_RE("1")),
3140 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3141 staticpro (&Vcharset_chinese_cns11643_2);
3142 Vcharset_chinese_cns11643_2 =
3143 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3144 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3145 build_string ("CNS11643-2"),
3146 build_string ("CNS11643-2 (Chinese traditional)"),
3148 ("CNS 11643 Plane 2 Chinese traditional"),
3149 build_string (CHINESE_CNS_PLANE_RE("2")),
3150 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3152 staticpro (&Vcharset_latin_tcvn5712);
3153 Vcharset_latin_tcvn5712 =
3154 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3155 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3156 build_string ("TCVN 5712"),
3157 build_string ("TCVN 5712 (VSCII-2)"),
3158 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3159 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3160 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3161 staticpro (&Vcharset_latin_viscii_lower);
3162 Vcharset_latin_viscii_lower =
3163 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3164 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3165 build_string ("VISCII lower"),
3166 build_string ("VISCII lower (Vietnamese)"),
3167 build_string ("VISCII lower (Vietnamese)"),
3168 build_string ("MULEVISCII-LOWER"),
3169 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3170 staticpro (&Vcharset_latin_viscii_upper);
3171 Vcharset_latin_viscii_upper =
3172 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3173 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3174 build_string ("VISCII upper"),
3175 build_string ("VISCII upper (Vietnamese)"),
3176 build_string ("VISCII upper (Vietnamese)"),
3177 build_string ("MULEVISCII-UPPER"),
3178 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3179 staticpro (&Vcharset_latin_viscii);
3180 Vcharset_latin_viscii =
3181 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3182 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3183 build_string ("VISCII"),
3184 build_string ("VISCII 1.1 (Vietnamese)"),
3185 build_string ("VISCII 1.1 (Vietnamese)"),
3186 build_string ("VISCII1\\.1"),
3187 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3188 staticpro (&Vcharset_chinese_big5);
3189 Vcharset_chinese_big5 =
3190 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3191 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3192 build_string ("Big5"),
3193 build_string ("Big5"),
3194 build_string ("Big5 Chinese traditional"),
3195 build_string ("big5-0"),
3197 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3198 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3200 staticpro (&Vcharset_ethiopic_ucs);
3201 Vcharset_ethiopic_ucs =
3202 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3203 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3204 build_string ("Ethiopic (UCS)"),
3205 build_string ("Ethiopic (UCS)"),
3206 build_string ("Ethiopic of UCS"),
3207 build_string ("Ethiopic-Unicode"),
3208 Qnil, 0x1200, 0x137F, 0, 0,
3209 Qnil, CONVERSION_IDENTICAL);
3211 staticpro (&Vcharset_chinese_big5_1);
3212 Vcharset_chinese_big5_1 =
3213 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3214 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3215 build_string ("Big5"),
3216 build_string ("Big5 (Level-1)"),
3218 ("Big5 Level-1 Chinese traditional"),
3219 build_string ("big5"),
3220 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3221 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3222 staticpro (&Vcharset_chinese_big5_2);
3223 Vcharset_chinese_big5_2 =
3224 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3225 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3226 build_string ("Big5"),
3227 build_string ("Big5 (Level-2)"),
3229 ("Big5 Level-2 Chinese traditional"),
3230 build_string ("big5"),
3231 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3232 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3234 #ifdef ENABLE_COMPOSITE_CHARS
3235 /* #### For simplicity, we put composite chars into a 96x96 charset.
3236 This is going to lead to problems because you can run out of
3237 room, esp. as we don't yet recycle numbers. */
3238 staticpro (&Vcharset_composite);
3239 Vcharset_composite =
3240 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3241 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3242 build_string ("Composite"),
3243 build_string ("Composite characters"),
3244 build_string ("Composite characters"),
3247 /* #### not dumped properly */
3248 composite_char_row_next = 32;
3249 composite_char_col_next = 32;
3251 Vcomposite_char_string2char_hash_table =
3252 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3253 Vcomposite_char_char2string_hash_table =
3254 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3255 staticpro (&Vcomposite_char_string2char_hash_table);
3256 staticpro (&Vcomposite_char_char2string_hash_table);
3257 #endif /* ENABLE_COMPOSITE_CHARS */