1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_system_char_id;
71 Lisp_Object Vcharset_ucs;
72 Lisp_Object Vcharset_ucs_bmp;
73 Lisp_Object Vcharset_ucs_smp;
74 Lisp_Object Vcharset_ucs_sip;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 decoding_table_put_char (Lisp_Object ccs,
181 int code_point, Lisp_Object character)
184 Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
185 int dim = XCHARSET_DIMENSION (ccs);
188 XCHARSET_DECODING_TABLE (ccs)
189 = put_ccs_octet_table (table1, ccs, code_point, character);
193 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
195 table2 = put_ccs_octet_table (table2, ccs,
196 (unsigned char)code_point, character);
197 XCHARSET_DECODING_TABLE (ccs)
198 = put_ccs_octet_table (table1, ccs,
199 (unsigned char)(code_point >> 8), table2);
204 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
206 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
208 table3 = put_ccs_octet_table (table3, ccs,
209 (unsigned char)code_point, character);
210 table2 = put_ccs_octet_table (table2, ccs,
211 (unsigned char)(code_point >> 8), table3);
212 XCHARSET_DECODING_TABLE (ccs)
213 = put_ccs_octet_table (table1, ccs,
214 (unsigned char)(code_point >> 16), table2);
216 else /* if (dim == 4) */
219 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
221 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
223 = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
225 table4 = put_ccs_octet_table (table4, ccs,
226 (unsigned char)code_point, character);
227 table3 = put_ccs_octet_table (table3, ccs,
228 (unsigned char)(code_point >> 8), table4);
229 table2 = put_ccs_octet_table (table2, ccs,
230 (unsigned char)(code_point >> 16), table3);
231 XCHARSET_DECODING_TABLE (ccs)
232 = put_ccs_octet_table (table1, ccs,
233 (unsigned char)(code_point >> 24), table2);
236 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
237 int dim = XCHARSET_DIMENSION (ccs);
238 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
241 int ccs_len = XVECTOR_LENGTH (v);
246 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
247 nv = XVECTOR_DATA(v)[i];
252 if (EQ (nv, character))
255 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
262 XVECTOR_DATA(v)[i] = character;
267 put_char_ccs_code_point (Lisp_Object character,
268 Lisp_Object ccs, Lisp_Object value)
270 if ( !( EQ (XCHARSET_NAME (ccs), Qmap_ucs)
271 && INTP (value) && (XINT (value) < 0xF0000)
272 && XCHAR (character) == XINT (value) )
275 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
279 { /* obsolete representation: value must be a list of bytes */
280 Lisp_Object ret = Fcar (value);
284 signal_simple_error ("Invalid value for coded-charset", value);
285 code_point = XINT (ret);
286 if (XCHARSET_GRAPHIC (ccs) == 1)
294 signal_simple_error ("Invalid value for coded-charset",
298 signal_simple_error ("Invalid value for coded-charset",
301 if (XCHARSET_GRAPHIC (ccs) == 1)
303 code_point = (code_point << 8) | j;
306 value = make_int (code_point);
308 else if (INTP (value))
310 code_point = XINT (value);
311 if (XCHARSET_GRAPHIC (ccs) == 1)
313 code_point &= 0x7F7F7F7F;
314 value = make_int (code_point);
318 signal_simple_error ("Invalid value for coded-charset", value);
322 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
325 decoding_table_remove_char (ccs, XINT (cpos));
328 decoding_table_put_char (ccs, code_point, character);
334 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
336 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
337 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
339 if (VECTORP (decoding_table))
341 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
345 decoding_table_remove_char (ccs, XINT (cpos));
348 if (CHAR_TABLEP (encoding_table))
350 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
371 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
372 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
407 Qvietnamese_viscii_lower,
408 Qvietnamese_viscii_upper,
418 Lisp_Object Ql2r, Qr2l;
420 Lisp_Object Vcharset_hash_table;
422 /* Composite characters are characters constructed by overstriking two
423 or more regular characters.
425 1) The old Mule implementation involves storing composite characters
426 in a buffer as a tag followed by all of the actual characters
427 used to make up the composite character. I think this is a bad
428 idea; it greatly complicates code that wants to handle strings
429 one character at a time because it has to deal with the possibility
430 of great big ungainly characters. It's much more reasonable to
431 simply store an index into a table of composite characters.
433 2) The current implementation only allows for 16,384 separate
434 composite characters over the lifetime of the XEmacs process.
435 This could become a potential problem if the user
436 edited lots of different files that use composite characters.
437 Due to FSF bogosity, increasing the number of allowable
438 composite characters under Mule would decrease the number
439 of possible faces that can exist. Mule already has shrunk
440 this to 2048, and further shrinkage would become uncomfortable.
441 No such problems exist in XEmacs.
443 Composite characters could be represented as 0x80 C1 C2 C3,
444 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
445 for slightly under 2^20 (one million) composite characters
446 over the XEmacs process lifetime, and you only need to
447 increase the size of a Mule character from 19 to 21 bits.
448 Or you could use 0x80 C1 C2 C3 C4, allowing for about
449 85 million (slightly over 2^26) composite characters. */
452 /************************************************************************/
453 /* Basic Emchar functions */
454 /************************************************************************/
456 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
457 string in STR. Returns the number of bytes stored.
458 Do not call this directly. Use the macro set_charptr_emchar() instead.
462 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
477 else if ( c <= 0x7ff )
479 *p++ = (c >> 6) | 0xc0;
480 *p++ = (c & 0x3f) | 0x80;
482 else if ( c <= 0xffff )
484 *p++ = (c >> 12) | 0xe0;
485 *p++ = ((c >> 6) & 0x3f) | 0x80;
486 *p++ = (c & 0x3f) | 0x80;
488 else if ( c <= 0x1fffff )
490 *p++ = (c >> 18) | 0xf0;
491 *p++ = ((c >> 12) & 0x3f) | 0x80;
492 *p++ = ((c >> 6) & 0x3f) | 0x80;
493 *p++ = (c & 0x3f) | 0x80;
495 else if ( c <= 0x3ffffff )
497 *p++ = (c >> 24) | 0xf8;
498 *p++ = ((c >> 18) & 0x3f) | 0x80;
499 *p++ = ((c >> 12) & 0x3f) | 0x80;
500 *p++ = ((c >> 6) & 0x3f) | 0x80;
501 *p++ = (c & 0x3f) | 0x80;
505 *p++ = (c >> 30) | 0xfc;
506 *p++ = ((c >> 24) & 0x3f) | 0x80;
507 *p++ = ((c >> 18) & 0x3f) | 0x80;
508 *p++ = ((c >> 12) & 0x3f) | 0x80;
509 *p++ = ((c >> 6) & 0x3f) | 0x80;
510 *p++ = (c & 0x3f) | 0x80;
513 BREAKUP_CHAR (c, charset, c1, c2);
514 lb = CHAR_LEADING_BYTE (c);
515 if (LEADING_BYTE_PRIVATE_P (lb))
516 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
518 if (EQ (charset, Vcharset_control_1))
527 /* Return the first character from a Mule-encoded string in STR,
528 assuming it's non-ASCII. Do not call this directly.
529 Use the macro charptr_emchar() instead. */
532 non_ascii_charptr_emchar (const Bufbyte *str)
545 else if ( b >= 0xf8 )
550 else if ( b >= 0xf0 )
555 else if ( b >= 0xe0 )
560 else if ( b >= 0xc0 )
570 for( ; len > 0; len-- )
573 ch = ( ch << 6 ) | ( b & 0x3f );
577 Bufbyte i0 = *str, i1, i2 = 0;
580 if (i0 == LEADING_BYTE_CONTROL_1)
581 return (Emchar) (*++str - 0x20);
583 if (LEADING_BYTE_PREFIX_P (i0))
588 charset = CHARSET_BY_LEADING_BYTE (i0);
589 if (XCHARSET_DIMENSION (charset) == 2)
592 return MAKE_CHAR (charset, i1, i2);
596 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
597 Do not call this directly. Use the macro valid_char_p() instead. */
601 non_ascii_valid_char_p (Emchar ch)
605 /* Must have only lowest 19 bits set */
609 f1 = CHAR_FIELD1 (ch);
610 f2 = CHAR_FIELD2 (ch);
611 f3 = CHAR_FIELD3 (ch);
617 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
618 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
619 f2 > MAX_CHAR_FIELD2_PRIVATE)
624 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
625 f2 <= MAX_CHAR_FIELD2_PRIVATE))
629 NOTE: This takes advantage of the fact that
630 FIELD2_TO_OFFICIAL_LEADING_BYTE and
631 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
633 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
634 if (EQ (charset, Qnil))
636 return (XCHARSET_CHARS (charset) == 96);
642 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
643 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
644 f1 > MAX_CHAR_FIELD1_PRIVATE)
646 if (f2 < 0x20 || f3 < 0x20)
649 #ifdef ENABLE_COMPOSITE_CHARS
650 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
652 if (UNBOUNDP (Fgethash (make_int (ch),
653 Vcomposite_char_char2string_hash_table,
658 #endif /* ENABLE_COMPOSITE_CHARS */
660 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
661 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
664 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
666 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
669 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
671 if (EQ (charset, Qnil))
673 return (XCHARSET_CHARS (charset) == 96);
679 /************************************************************************/
680 /* Basic string functions */
681 /************************************************************************/
683 /* Copy the character pointed to by SRC into DST. Do not call this
684 directly. Use the macro charptr_copy_char() instead.
685 Return the number of bytes copied. */
688 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
690 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
692 for (i = bytes; i; i--, dst++, src++)
698 /************************************************************************/
699 /* streams of Emchars */
700 /************************************************************************/
702 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
703 The functions below are not meant to be called directly; use
704 the macros in insdel.h. */
707 Lstream_get_emchar_1 (Lstream *stream, int ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bufbyte *strptr = str;
713 str[0] = (Bufbyte) ch;
715 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
717 int c = Lstream_getc (stream);
718 bufpos_checking_assert (c >= 0);
719 *++strptr = (Bufbyte) c;
721 return charptr_emchar (str);
725 Lstream_fput_emchar (Lstream *stream, Emchar ch)
727 Bufbyte str[MAX_EMCHAR_LEN];
728 Bytecount len = set_charptr_emchar (str, ch);
729 return Lstream_write (stream, str, len);
733 Lstream_funget_emchar (Lstream *stream, Emchar ch)
735 Bufbyte str[MAX_EMCHAR_LEN];
736 Bytecount len = set_charptr_emchar (str, ch);
737 Lstream_unread (stream, str, len);
741 /************************************************************************/
743 /************************************************************************/
746 mark_charset (Lisp_Object obj)
748 Lisp_Charset *cs = XCHARSET (obj);
750 mark_object (cs->short_name);
751 mark_object (cs->long_name);
752 mark_object (cs->doc_string);
753 mark_object (cs->registry);
754 mark_object (cs->ccl_program);
756 mark_object (cs->decoding_table);
757 mark_object (cs->mother);
763 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
765 Lisp_Charset *cs = XCHARSET (obj);
769 error ("printing unreadable object #<charset %s 0x%x>",
770 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
773 write_c_string ("#<charset ", printcharfun);
774 print_internal (CHARSET_NAME (cs), printcharfun, 0);
775 write_c_string (" ", printcharfun);
776 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
777 write_c_string (" ", printcharfun);
778 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
779 write_c_string (" ", printcharfun);
780 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
781 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
783 CHARSET_DIMENSION (cs),
784 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
785 CHARSET_COLUMNS (cs),
786 CHARSET_GRAPHIC (cs),
788 write_c_string (buf, printcharfun);
789 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
790 sprintf (buf, " 0x%x>", cs->header.uid);
791 write_c_string (buf, printcharfun);
794 static const struct lrecord_description charset_description[] = {
795 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
796 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
797 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
798 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
799 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
800 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
801 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
803 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
804 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
809 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
810 mark_charset, print_charset, 0, 0, 0,
814 /* Make a new charset. */
815 /* #### SJT Should generic properties be allowed? */
817 make_charset (Charset_ID id, Lisp_Object name,
818 unsigned short chars, unsigned char dimension,
819 unsigned char columns, unsigned char graphic,
820 Bufbyte final, unsigned char direction, Lisp_Object short_name,
821 Lisp_Object long_name, Lisp_Object doc,
823 Lisp_Object decoding_table,
824 Emchar min_code, Emchar max_code,
825 Emchar code_offset, unsigned char byte_offset,
826 Lisp_Object mother, unsigned char conversion)
829 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
833 XSETCHARSET (obj, cs);
835 CHARSET_ID (cs) = id;
836 CHARSET_NAME (cs) = name;
837 CHARSET_SHORT_NAME (cs) = short_name;
838 CHARSET_LONG_NAME (cs) = long_name;
839 CHARSET_CHARS (cs) = chars;
840 CHARSET_DIMENSION (cs) = dimension;
841 CHARSET_DIRECTION (cs) = direction;
842 CHARSET_COLUMNS (cs) = columns;
843 CHARSET_GRAPHIC (cs) = graphic;
844 CHARSET_FINAL (cs) = final;
845 CHARSET_DOC_STRING (cs) = doc;
846 CHARSET_REGISTRY (cs) = reg;
847 CHARSET_CCL_PROGRAM (cs) = Qnil;
848 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
850 CHARSET_DECODING_TABLE(cs) = Qunbound;
851 CHARSET_MIN_CODE (cs) = min_code;
852 CHARSET_MAX_CODE (cs) = max_code;
853 CHARSET_CODE_OFFSET (cs) = code_offset;
854 CHARSET_BYTE_OFFSET (cs) = byte_offset;
855 CHARSET_MOTHER (cs) = mother;
856 CHARSET_CONVERSION (cs) = conversion;
860 if (id == LEADING_BYTE_ASCII)
861 CHARSET_REP_BYTES (cs) = 1;
863 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
865 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
870 /* some charsets do not have final characters. This includes
871 ASCII, Control-1, Composite, and the two faux private
873 unsigned char iso2022_type
874 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
876 if (code_offset == 0)
878 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
879 chlook->charset_by_attributes[iso2022_type][final] = obj;
883 (chlook->charset_by_attributes[iso2022_type][final][direction]));
884 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
888 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
889 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
891 /* Some charsets are "faux" and don't have names or really exist at
892 all except in the leading-byte table. */
894 Fputhash (name, obj, Vcharset_hash_table);
899 get_unallocated_leading_byte (int dimension)
904 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
907 lb = chlook->next_allocated_leading_byte++;
911 if (chlook->next_allocated_1_byte_leading_byte >
912 MAX_LEADING_BYTE_PRIVATE_1)
915 lb = chlook->next_allocated_1_byte_leading_byte++;
919 /* awfully fragile, but correct */
920 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
921 if (chlook->next_allocated_2_byte_leading_byte == 0)
923 if (chlook->next_allocated_2_byte_leading_byte >
924 MAX_LEADING_BYTE_PRIVATE_2)
928 lb = chlook->next_allocated_2_byte_leading_byte++;
934 ("No more character sets free for this dimension",
935 make_int (dimension));
941 /* Number of Big5 characters which have the same code in 1st byte. */
943 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
946 decode_ccs_conversion (int conv_type, int code_point)
948 if ( conv_type == CONVERSION_IDENTICAL )
952 if ( conv_type == CONVERSION_94x60 )
954 int row = code_point >> 8;
955 int cell = code_point & 255;
959 else if (row < 16 + 32 + 30)
960 return (row - (16 + 32)) * 94 + cell - 33;
961 else if (row < 18 + 32 + 30)
963 else if (row < 18 + 32 + 60)
964 return (row - (18 + 32)) * 94 + cell - 33;
966 else if ( conv_type == CONVERSION_94x94x60 )
968 int plane = code_point >> 16;
969 int row = (code_point >> 8) & 255;
970 int cell = code_point & 255;
974 else if (row < 16 + 32 + 30)
976 (plane - 33) * 94 * 60
977 + (row - (16 + 32)) * 94
979 else if (row < 18 + 32 + 30)
981 else if (row < 18 + 32 + 60)
983 (plane - 33) * 94 * 60
984 + (row - (18 + 32)) * 94
987 else if ( conv_type == CONVERSION_BIG5_1 )
990 = (((code_point >> 8) & 0x7F) - 33) * 94
991 + (( code_point & 0x7F) - 33);
992 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
993 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
995 b2 += b2 < 0x3F ? 0x40 : 0x62;
996 return (b1 << 8) | b2;
998 else if ( conv_type == CONVERSION_BIG5_2 )
1001 = (((code_point >> 8) & 0x7F) - 33) * 94
1002 + (( code_point & 0x7F) - 33)
1003 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1004 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1005 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1007 b2 += b2 < 0x3F ? 0x40 : 0x62;
1008 return (b1 << 8) | b2;
1014 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
1016 int dim = XCHARSET_DIMENSION (ccs);
1017 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1018 Emchar char_id = -1;
1025 = get_ccs_octet_table (decoding_table, ccs,
1026 (code_point >> (dim * 8)) & 255);
1028 if (CHARP (decoding_table))
1029 return XCHAR (decoding_table);
1031 if (EQ (decoding_table, Qunloaded))
1033 char_id = load_char_decoding_entry_maybe (ccs, code_point);
1035 #endif /* HAVE_CHISE */
1038 else if ( !without_inheritance
1039 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
1042 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
1046 code += XCHARSET_CODE_OFFSET(ccs);
1047 if ( EQ (mother, Vcharset_ucs) )
1048 return DECODE_CHAR (mother, code, without_inheritance);
1050 return decode_defined_char (mother, code,
1051 without_inheritance);
1058 decode_builtin_char (Lisp_Object charset, int code_point)
1060 Lisp_Object mother = XCHARSET_MOTHER (charset);
1063 if ( XCHARSET_MAX_CODE (charset) > 0 )
1065 if ( CHARSETP (mother) )
1068 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
1073 decode_builtin_char (mother,
1074 code + XCHARSET_CODE_OFFSET(charset));
1081 = (XCHARSET_DIMENSION (charset) == 1
1083 code_point - XCHARSET_BYTE_OFFSET (charset)
1085 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1086 * XCHARSET_CHARS (charset)
1087 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1088 + XCHARSET_CODE_OFFSET (charset);
1089 if ((cid < XCHARSET_MIN_CODE (charset))
1090 || (XCHARSET_MAX_CODE (charset) < cid))
1095 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1097 if (XCHARSET_DIMENSION (charset) == 1)
1099 switch (XCHARSET_CHARS (charset))
1103 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1106 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1114 switch (XCHARSET_CHARS (charset))
1117 return MIN_CHAR_94x94
1118 + (final - '0') * 94 * 94
1119 + (((code_point >> 8) & 0x7F) - 33) * 94
1120 + ((code_point & 0x7F) - 33);
1122 return MIN_CHAR_96x96
1123 + (final - '0') * 96 * 96
1124 + (((code_point >> 8) & 0x7F) - 32) * 96
1125 + ((code_point & 0x7F) - 32);
1137 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1139 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1142 if ( CHAR_TABLEP (encoding_table)
1143 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1148 Lisp_Object mother = XCHARSET_MOTHER (charset);
1149 int min = XCHARSET_MIN_CODE (charset);
1150 int max = XCHARSET_MAX_CODE (charset);
1153 if ( CHARSETP (mother) )
1155 if (XCHARSET_FINAL (charset) >= '0')
1156 code = charset_code_point (mother, ch, 1);
1158 code = charset_code_point (mother, ch, defined_only);
1160 else if (defined_only)
1162 else if ( ((max == 0) && CHARSETP (mother)
1163 && (XCHARSET_FINAL (charset) == 0))
1164 || ((min <= ch) && (ch <= max)) )
1166 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1167 || ((min <= code) && (code <= max)) )
1169 int d = code - XCHARSET_CODE_OFFSET (charset);
1171 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1173 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1175 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1177 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1180 int cell = d % 94 + 33;
1186 return (row << 8) | cell;
1188 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1190 int B1 = d >> 8, B2 = d & 0xFF;
1192 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1193 - (B2 < 0x7F ? 0x40 : 0x62);
1197 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1200 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1202 int B1 = d >> 8, B2 = d & 0xFF;
1204 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1205 - (B2 < 0x7F ? 0x40 : 0x62);
1209 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1210 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1213 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1214 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1215 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1216 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1217 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1219 int plane = d / (94 * 60) + 33;
1220 int row = (d % (94 * 60)) / 94;
1221 int cell = d % 94 + 33;
1227 return (plane << 16) | (row << 8) | cell;
1229 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1231 ( (d / (94 * 94) + 33) << 16)
1232 | ((d / 94 % 94 + 33) << 8)
1234 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1236 ( (d / (96 * 96) + 32) << 16)
1237 | ((d / 96 % 96 + 32) << 8)
1239 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1241 ( (d / (94 * 94 * 94) + 33) << 24)
1242 | ((d / (94 * 94) % 94 + 33) << 16)
1243 | ((d / 94 % 94 + 33) << 8)
1245 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1247 ( (d / (96 * 96 * 96) + 32) << 24)
1248 | ((d / (96 * 96) % 96 + 32) << 16)
1249 | ((d / 96 % 96 + 32) << 8)
1253 printf ("Unknown CCS-conversion %d is specified!",
1254 XCHARSET_CONVERSION (charset));
1258 else if (defined_only)
1260 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1261 ( XCHARSET_MIN_CODE (charset) == 0 )
1263 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1264 (XCHARSET_CODE_OFFSET (charset)
1265 == XCHARSET_MIN_CODE (charset))
1270 if (XCHARSET_DIMENSION (charset) == 1)
1272 if (XCHARSET_CHARS (charset) == 94)
1274 if (((d = ch - (MIN_CHAR_94
1275 + (XCHARSET_FINAL (charset) - '0') * 94))
1280 else if (XCHARSET_CHARS (charset) == 96)
1282 if (((d = ch - (MIN_CHAR_96
1283 + (XCHARSET_FINAL (charset) - '0') * 96))
1291 else if (XCHARSET_DIMENSION (charset) == 2)
1293 if (XCHARSET_CHARS (charset) == 94)
1295 if (((d = ch - (MIN_CHAR_94x94
1297 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1300 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1302 else if (XCHARSET_CHARS (charset) == 96)
1304 if (((d = ch - (MIN_CHAR_96x96
1306 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1309 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1320 encode_char_2 (Emchar ch, Lisp_Object* charset)
1322 Lisp_Object charsets = Vdefault_coded_charset_priority_list;
1325 while (!NILP (charsets))
1327 *charset = Ffind_charset (Fcar (charsets));
1328 if ( !NILP (*charset)
1329 && (XCHARSET_DIMENSION (*charset) <= 2) )
1331 code_point = charset_code_point (*charset, ch, 0);
1332 if (code_point >= 0)
1335 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1336 NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1338 code_point = encode_char_2_search_children (ch, charset);
1339 if (code_point >= 0)
1343 charsets = Fcdr (charsets);
1346 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1347 !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1349 charsets = Vdefault_coded_charset_priority_list;
1350 while (!NILP (charsets))
1352 *charset = Ffind_charset (Fcar (charsets));
1353 if ( !NILP (*charset)
1354 && (XCHARSET_DIMENSION (*charset) <= 2) )
1356 code_point = encode_char_2_search_children (ch, charset);
1357 if (code_point >= 0)
1360 charsets = Fcdr (charsets);
1364 /* otherwise --- maybe for bootstrap */
1365 return encode_builtin_char_1 (ch, charset);
1369 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1371 if (c <= MAX_CHAR_BASIC_LATIN)
1373 *charset = Vcharset_ascii;
1378 *charset = Vcharset_control_1;
1383 *charset = Vcharset_latin_iso8859_1;
1387 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1389 *charset = Vcharset_hebrew_iso8859_8;
1390 return c - MIN_CHAR_HEBREW + 0x20;
1393 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1395 *charset = Vcharset_thai_tis620;
1396 return c - MIN_CHAR_THAI + 0x20;
1399 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1400 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1402 return list2 (Vcharset_katakana_jisx0201,
1403 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1406 else if (c <= MAX_CHAR_BMP)
1408 *charset = Vcharset_ucs_bmp;
1411 else if (c <= MAX_CHAR_SMP)
1413 *charset = Vcharset_ucs_smp;
1414 return c - MIN_CHAR_SMP;
1416 else if (c <= MAX_CHAR_SIP)
1418 *charset = Vcharset_ucs_sip;
1419 return c - MIN_CHAR_SIP;
1421 else if (c < MIN_CHAR_94)
1423 *charset = Vcharset_ucs;
1426 else if (c <= MAX_CHAR_94)
1428 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1429 ((c - MIN_CHAR_94) / 94) + '0',
1430 CHARSET_LEFT_TO_RIGHT);
1431 if (!NILP (*charset))
1432 return ((c - MIN_CHAR_94) % 94) + 33;
1435 *charset = Vcharset_ucs;
1439 else if (c <= MAX_CHAR_96)
1441 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1442 ((c - MIN_CHAR_96) / 96) + '0',
1443 CHARSET_LEFT_TO_RIGHT);
1444 if (!NILP (*charset))
1445 return ((c - MIN_CHAR_96) % 96) + 32;
1448 *charset = Vcharset_ucs;
1452 else if (c <= MAX_CHAR_94x94)
1455 = CHARSET_BY_ATTRIBUTES (94, 2,
1456 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1457 CHARSET_LEFT_TO_RIGHT);
1458 if (!NILP (*charset))
1459 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1460 | (((c - MIN_CHAR_94x94) % 94) + 33);
1463 *charset = Vcharset_ucs;
1467 else if (c <= MAX_CHAR_96x96)
1470 = CHARSET_BY_ATTRIBUTES (96, 2,
1471 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1472 CHARSET_LEFT_TO_RIGHT);
1473 if (!NILP (*charset))
1474 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1475 | (((c - MIN_CHAR_96x96) % 96) + 32);
1478 *charset = Vcharset_ucs;
1484 *charset = Vcharset_ucs;
1489 Lisp_Object Vdefault_coded_charset_priority_list;
1490 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1491 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1495 /************************************************************************/
1496 /* Basic charset Lisp functions */
1497 /************************************************************************/
1499 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1500 Return non-nil if OBJECT is a charset.
1504 return CHARSETP (object) ? Qt : Qnil;
1507 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1508 Retrieve the charset of the given name.
1509 If CHARSET-OR-NAME is a charset object, it is simply returned.
1510 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1511 nil is returned. Otherwise the associated charset object is returned.
1515 if (CHARSETP (charset_or_name))
1516 return charset_or_name;
1518 CHECK_SYMBOL (charset_or_name);
1519 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1522 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1523 Retrieve the charset of the given name.
1524 Same as `find-charset' except an error is signalled if there is no such
1525 charset instead of returning nil.
1529 Lisp_Object charset = Ffind_charset (name);
1532 signal_simple_error ("No such charset", name);
1536 /* We store the charsets in hash tables with the names as the key and the
1537 actual charset object as the value. Occasionally we need to use them
1538 in a list format. These routines provide us with that. */
1539 struct charset_list_closure
1541 Lisp_Object *charset_list;
1545 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1546 void *charset_list_closure)
1548 /* This function can GC */
1549 struct charset_list_closure *chcl =
1550 (struct charset_list_closure*) charset_list_closure;
1551 Lisp_Object *charset_list = chcl->charset_list;
1553 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1557 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1558 Return a list of the names of all defined charsets.
1562 Lisp_Object charset_list = Qnil;
1563 struct gcpro gcpro1;
1564 struct charset_list_closure charset_list_closure;
1566 GCPRO1 (charset_list);
1567 charset_list_closure.charset_list = &charset_list;
1568 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1569 &charset_list_closure);
1572 return charset_list;
1575 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1576 Return the name of charset CHARSET.
1580 return XCHARSET_NAME (Fget_charset (charset));
1583 /* #### SJT Should generic properties be allowed? */
1584 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1585 Define a new character set.
1586 This function is for use with Mule support.
1587 NAME is a symbol, the name by which the character set is normally referred.
1588 DOC-STRING is a string describing the character set.
1589 PROPS is a property list, describing the specific nature of the
1590 character set. Recognized properties are:
1592 'short-name Short version of the charset name (ex: Latin-1)
1593 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1594 'registry A regular expression matching the font registry field for
1596 'dimension Number of octets used to index a character in this charset.
1597 Either 1 or 2. Defaults to 1.
1598 If UTF-2000 feature is enabled, 3 or 4 are also available.
1599 'columns Number of columns used to display a character in this charset.
1600 Only used in TTY mode. (Under X, the actual width of a
1601 character can be derived from the font used to display the
1602 characters.) If unspecified, defaults to the dimension
1603 (this is almost always the correct value).
1604 'chars Number of characters in each dimension (94 or 96).
1605 Defaults to 94. Note that if the dimension is 2, the
1606 character set thus described is 94x94 or 96x96.
1607 If UTF-2000 feature is enabled, 128 or 256 are also available.
1608 'final Final byte of ISO 2022 escape sequence. Must be
1609 supplied. Each combination of (DIMENSION, CHARS) defines a
1610 separate namespace for final bytes. Note that ISO
1611 2022 restricts the final byte to the range
1612 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1613 dimension == 2. Note also that final bytes in the range
1614 0x30 - 0x3F are reserved for user-defined (not official)
1616 'graphic 0 (use left half of font on output) or 1 (use right half
1617 of font on output). Defaults to 0. For example, for
1618 a font whose registry is ISO8859-1, the left half
1619 (octets 0x20 - 0x7F) is the `ascii' character set, while
1620 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1621 character set. With 'graphic set to 0, the octets
1622 will have their high bit cleared; with it set to 1,
1623 the octets will have their high bit set.
1624 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1626 'ccl-program A compiled CCL program used to convert a character in
1627 this charset into an index into the font. This is in
1628 addition to the 'graphic property. The CCL program
1629 is passed the octets of the character, with the high
1630 bit cleared and set depending upon whether the value
1631 of the 'graphic property is 0 or 1.
1632 'mother [UTF-2000 only] Base coded-charset.
1633 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1634 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1635 'code-offset [UTF-2000 only] Offset for a code-point of a base
1637 'conversion [UTF-2000 only] Conversion for a code-point of a base
1638 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1640 (name, doc_string, props))
1642 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1643 int direction = CHARSET_LEFT_TO_RIGHT;
1644 Lisp_Object registry = Qnil;
1645 Lisp_Object charset;
1646 Lisp_Object ccl_program = Qnil;
1647 Lisp_Object short_name = Qnil, long_name = Qnil;
1648 Lisp_Object mother = Qnil;
1649 int min_code = 0, max_code = 0, code_offset = 0;
1650 int byte_offset = -1;
1653 CHECK_SYMBOL (name);
1654 if (!NILP (doc_string))
1655 CHECK_STRING (doc_string);
1657 charset = Ffind_charset (name);
1658 if (!NILP (charset))
1659 signal_simple_error ("Cannot redefine existing charset", name);
1662 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1664 if (EQ (keyword, Qshort_name))
1666 CHECK_STRING (value);
1670 else if (EQ (keyword, Qlong_name))
1672 CHECK_STRING (value);
1676 else if (EQ (keyword, Qiso_ir))
1680 id = - XINT (value);
1684 else if (EQ (keyword, Qdimension))
1687 dimension = XINT (value);
1688 if (dimension < 1 ||
1695 signal_simple_error ("Invalid value for 'dimension", value);
1698 else if (EQ (keyword, Qchars))
1701 chars = XINT (value);
1702 if (chars != 94 && chars != 96
1704 && chars != 128 && chars != 256
1707 signal_simple_error ("Invalid value for 'chars", value);
1710 else if (EQ (keyword, Qcolumns))
1713 columns = XINT (value);
1714 if (columns != 1 && columns != 2)
1715 signal_simple_error ("Invalid value for 'columns", value);
1718 else if (EQ (keyword, Qgraphic))
1721 graphic = XINT (value);
1729 signal_simple_error ("Invalid value for 'graphic", value);
1732 else if (EQ (keyword, Qregistry))
1734 CHECK_STRING (value);
1738 else if (EQ (keyword, Qdirection))
1740 if (EQ (value, Ql2r))
1741 direction = CHARSET_LEFT_TO_RIGHT;
1742 else if (EQ (value, Qr2l))
1743 direction = CHARSET_RIGHT_TO_LEFT;
1745 signal_simple_error ("Invalid value for 'direction", value);
1748 else if (EQ (keyword, Qfinal))
1750 CHECK_CHAR_COERCE_INT (value);
1751 final = XCHAR (value);
1752 if (final < '0' || final > '~')
1753 signal_simple_error ("Invalid value for 'final", value);
1757 else if (EQ (keyword, Qmother))
1759 mother = Fget_charset (value);
1762 else if (EQ (keyword, Qmin_code))
1765 min_code = XUINT (value);
1768 else if (EQ (keyword, Qmax_code))
1771 max_code = XUINT (value);
1774 else if (EQ (keyword, Qcode_offset))
1777 code_offset = XUINT (value);
1780 else if (EQ (keyword, Qconversion))
1782 if (EQ (value, Q94x60))
1783 conversion = CONVERSION_94x60;
1784 else if (EQ (value, Q94x94x60))
1785 conversion = CONVERSION_94x94x60;
1786 else if (EQ (value, Qbig5_1))
1787 conversion = CONVERSION_BIG5_1;
1788 else if (EQ (value, Qbig5_2))
1789 conversion = CONVERSION_BIG5_2;
1791 signal_simple_error ("Unrecognized conversion", value);
1795 else if (EQ (keyword, Qccl_program))
1797 struct ccl_program test_ccl;
1799 if (setup_ccl_program (&test_ccl, value) < 0)
1800 signal_simple_error ("Invalid value for 'ccl-program", value);
1801 ccl_program = value;
1805 signal_simple_error ("Unrecognized property", keyword);
1811 error ("'final must be specified");
1813 if (dimension == 2 && final > 0x5F)
1815 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1818 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1819 CHARSET_LEFT_TO_RIGHT)) ||
1820 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1821 CHARSET_RIGHT_TO_LEFT)))
1823 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1826 id = get_unallocated_leading_byte (dimension);
1828 if (NILP (doc_string))
1829 doc_string = build_string ("");
1831 if (NILP (registry))
1832 registry = build_string ("");
1834 if (NILP (short_name))
1835 XSETSTRING (short_name, XSYMBOL (name)->name);
1837 if (NILP (long_name))
1838 long_name = doc_string;
1841 columns = dimension;
1843 if (byte_offset < 0)
1847 else if (chars == 96)
1853 charset = make_charset (id, name, chars, dimension, columns, graphic,
1854 final, direction, short_name, long_name,
1855 doc_string, registry,
1856 Qnil, min_code, max_code, code_offset, byte_offset,
1857 mother, conversion);
1858 if (!NILP (ccl_program))
1859 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1863 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1865 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1866 NEW-NAME is the name of the new charset. Return the new charset.
1868 (charset, new_name))
1870 Lisp_Object new_charset = Qnil;
1871 int id, chars, dimension, columns, graphic, final;
1873 Lisp_Object registry, doc_string, short_name, long_name;
1876 charset = Fget_charset (charset);
1877 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1878 signal_simple_error ("Charset already has reverse-direction charset",
1881 CHECK_SYMBOL (new_name);
1882 if (!NILP (Ffind_charset (new_name)))
1883 signal_simple_error ("Cannot redefine existing charset", new_name);
1885 cs = XCHARSET (charset);
1887 chars = CHARSET_CHARS (cs);
1888 dimension = CHARSET_DIMENSION (cs);
1889 columns = CHARSET_COLUMNS (cs);
1890 id = get_unallocated_leading_byte (dimension);
1892 graphic = CHARSET_GRAPHIC (cs);
1893 final = CHARSET_FINAL (cs);
1894 direction = CHARSET_RIGHT_TO_LEFT;
1895 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1896 direction = CHARSET_LEFT_TO_RIGHT;
1897 doc_string = CHARSET_DOC_STRING (cs);
1898 short_name = CHARSET_SHORT_NAME (cs);
1899 long_name = CHARSET_LONG_NAME (cs);
1900 registry = CHARSET_REGISTRY (cs);
1902 new_charset = make_charset (id, new_name, chars, dimension, columns,
1903 graphic, final, direction, short_name, long_name,
1904 doc_string, registry,
1906 CHARSET_DECODING_TABLE(cs),
1907 CHARSET_MIN_CODE(cs),
1908 CHARSET_MAX_CODE(cs),
1909 CHARSET_CODE_OFFSET(cs),
1910 CHARSET_BYTE_OFFSET(cs),
1912 CHARSET_CONVERSION (cs)
1914 Qnil, 0, 0, 0, 0, Qnil, 0
1918 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1919 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1924 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1925 Define symbol ALIAS as an alias for CHARSET.
1929 CHECK_SYMBOL (alias);
1930 charset = Fget_charset (charset);
1931 return Fputhash (alias, charset, Vcharset_hash_table);
1934 /* #### Reverse direction charsets not yet implemented. */
1936 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1938 Return the reverse-direction charset parallel to CHARSET, if any.
1939 This is the charset with the same properties (in particular, the same
1940 dimension, number of characters per dimension, and final byte) as
1941 CHARSET but whose characters are displayed in the opposite direction.
1945 charset = Fget_charset (charset);
1946 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1950 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1951 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1952 If DIRECTION is omitted, both directions will be checked (left-to-right
1953 will be returned if character sets exist for both directions).
1955 (dimension, chars, final, direction))
1957 int dm, ch, fi, di = -1;
1958 Lisp_Object obj = Qnil;
1960 CHECK_INT (dimension);
1961 dm = XINT (dimension);
1962 if (dm < 1 || dm > 2)
1963 signal_simple_error ("Invalid value for DIMENSION", dimension);
1967 if (ch != 94 && ch != 96)
1968 signal_simple_error ("Invalid value for CHARS", chars);
1970 CHECK_CHAR_COERCE_INT (final);
1972 if (fi < '0' || fi > '~')
1973 signal_simple_error ("Invalid value for FINAL", final);
1975 if (EQ (direction, Ql2r))
1976 di = CHARSET_LEFT_TO_RIGHT;
1977 else if (EQ (direction, Qr2l))
1978 di = CHARSET_RIGHT_TO_LEFT;
1979 else if (!NILP (direction))
1980 signal_simple_error ("Invalid value for DIRECTION", direction);
1982 if (dm == 2 && fi > 0x5F)
1984 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1988 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1990 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1993 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1996 return XCHARSET_NAME (obj);
2000 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2001 Return short name of CHARSET.
2005 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2008 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2009 Return long name of CHARSET.
2013 return XCHARSET_LONG_NAME (Fget_charset (charset));
2016 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2017 Return description of CHARSET.
2021 return XCHARSET_DOC_STRING (Fget_charset (charset));
2024 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2025 Return dimension of CHARSET.
2029 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2032 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2033 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2034 Recognized properties are those listed in `make-charset', as well as
2035 'name and 'doc-string.
2041 charset = Fget_charset (charset);
2042 cs = XCHARSET (charset);
2044 CHECK_SYMBOL (prop);
2045 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2046 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2047 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2048 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2049 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2050 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2051 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2052 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
2053 Qnil : make_char (CHARSET_FINAL (cs));
2054 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2055 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2056 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2057 if (EQ (prop, Qdirection))
2058 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2059 if (EQ (prop, Qreverse_direction_charset))
2061 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2062 /* #### Is this translation OK? If so, error checking sufficient? */
2063 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2066 if (EQ (prop, Qmother))
2067 return CHARSET_MOTHER (cs);
2068 if (EQ (prop, Qmin_code))
2069 return make_int (CHARSET_MIN_CODE (cs));
2070 if (EQ (prop, Qmax_code))
2071 return make_int (CHARSET_MAX_CODE (cs));
2073 signal_simple_error ("Unrecognized charset property name", prop);
2074 return Qnil; /* not reached */
2077 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2078 Return charset identification number of CHARSET.
2082 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2085 /* #### We need to figure out which properties we really want to
2088 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2089 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2091 (charset, ccl_program))
2093 struct ccl_program test_ccl;
2095 charset = Fget_charset (charset);
2096 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2097 signal_simple_error ("Invalid ccl-program", ccl_program);
2098 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2103 invalidate_charset_font_caches (Lisp_Object charset)
2105 /* Invalidate font cache entries for charset on all devices. */
2106 Lisp_Object devcons, concons, hash_table;
2107 DEVICE_LOOP_NO_BREAK (devcons, concons)
2109 struct device *d = XDEVICE (XCAR (devcons));
2110 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2111 if (!UNBOUNDP (hash_table))
2112 Fclrhash (hash_table);
2116 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2117 Set the 'registry property of CHARSET to REGISTRY.
2119 (charset, registry))
2121 charset = Fget_charset (charset);
2122 CHECK_STRING (registry);
2123 XCHARSET_REGISTRY (charset) = registry;
2124 invalidate_charset_font_caches (charset);
2125 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2130 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2131 Return mapping-table of CHARSET.
2135 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2138 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2139 Set mapping-table of CHARSET to TABLE.
2143 struct Lisp_Charset *cs;
2147 charset = Fget_charset (charset);
2148 cs = XCHARSET (charset);
2152 CHARSET_DECODING_TABLE(cs) = Qnil;
2155 else if (VECTORP (table))
2157 int ccs_len = CHARSET_BYTE_SIZE (cs);
2158 int ret = decoding_table_check_elements (table,
2159 CHARSET_DIMENSION (cs),
2164 signal_simple_error ("Too big table", table);
2166 signal_simple_error ("Invalid element is found", table);
2168 signal_simple_error ("Something wrong", table);
2170 CHARSET_DECODING_TABLE(cs) = Qnil;
2173 signal_error (Qwrong_type_argument,
2174 list2 (build_translated_string ("vector-or-nil-p"),
2177 byte_offset = CHARSET_BYTE_OFFSET (cs);
2178 switch (CHARSET_DIMENSION (cs))
2181 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2183 Lisp_Object c = XVECTOR_DATA(table)[i];
2186 Fput_char_attribute (c, XCHARSET_NAME (charset),
2187 make_int (i + byte_offset));
2191 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2193 Lisp_Object v = XVECTOR_DATA(table)[i];
2199 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2201 Lisp_Object c = XVECTOR_DATA(v)[j];
2205 (c, XCHARSET_NAME (charset),
2206 make_int ( ( (i + byte_offset) << 8 )
2212 Fput_char_attribute (v, XCHARSET_NAME (charset),
2213 make_int (i + byte_offset));
2221 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2222 Save mapping-table of CHARSET.
2226 struct Lisp_Charset *cs;
2227 int byte_min, byte_max;
2228 #ifdef HAVE_LIBCHISE
2230 #else /* HAVE_LIBCHISE */
2232 Lisp_Object db_file;
2233 #endif /* not HAVE_LIBCHISE */
2235 charset = Fget_charset (charset);
2236 cs = XCHARSET (charset);
2238 #ifdef HAVE_LIBCHISE
2239 if ( open_chise_data_source_maybe () )
2243 = chise_ds_get_ccs (default_chise_data_source,
2244 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2247 printf ("Can't open decoding-table %s\n",
2248 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2251 #else /* HAVE_LIBCHISE */
2252 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2253 Qsystem_char_id, 1);
2254 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2255 #endif /* not HAVE_LIBCHISE */
2257 byte_min = CHARSET_BYTE_OFFSET (cs);
2258 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2259 switch (CHARSET_DIMENSION (cs))
2263 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2266 for (cell = byte_min; cell < byte_max; cell++)
2268 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2272 #ifdef HAVE_LIBCHISE
2273 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2274 #else /* HAVE_LIBCHISE */
2275 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2276 Fprin1_to_string (c, Qnil),
2278 #endif /* not HAVE_LIBCHISE */
2285 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2288 for (row = byte_min; row < byte_max; row++)
2290 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2293 for (cell = byte_min; cell < byte_max; cell++)
2295 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2299 #ifdef HAVE_LIBCHISE
2300 chise_ccs_set_decoded_char
2302 (row << 8) | cell, XCHAR (c));
2303 #else /* HAVE_LIBCHISE */
2304 Fput_database (Fprin1_to_string (make_int ((row << 8)
2307 Fprin1_to_string (c, Qnil),
2309 #endif /* not HAVE_LIBCHISE */
2317 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2320 for (plane = byte_min; plane < byte_max; plane++)
2323 = get_ccs_octet_table (table_p, charset, plane);
2326 for (row = byte_min; row < byte_max; row++)
2329 = get_ccs_octet_table (table_r, charset, row);
2332 for (cell = byte_min; cell < byte_max; cell++)
2334 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2339 #ifdef HAVE_LIBCHISE
2340 chise_ccs_set_decoded_char
2345 #else /* HAVE_LIBCHISE */
2346 Fput_database (Fprin1_to_string
2347 (make_int ((plane << 16)
2351 Fprin1_to_string (c, Qnil),
2353 #endif /* not HAVE_LIBCHISE */
2362 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2365 for (group = byte_min; group < byte_max; group++)
2368 = get_ccs_octet_table (table_g, charset, group);
2371 for (plane = byte_min; plane < byte_max; plane++)
2374 = get_ccs_octet_table (table_p, charset, plane);
2377 for (row = byte_min; row < byte_max; row++)
2380 = get_ccs_octet_table (table_r, charset, row);
2383 for (cell = byte_min; cell < byte_max; cell++)
2386 = get_ccs_octet_table (table_c, charset, cell);
2390 #ifdef HAVE_LIBCHISE
2391 chise_ccs_set_decoded_char
2397 #else /* HAVE_LIBCHISE */
2398 Fput_database (Fprin1_to_string
2399 (make_int (( group << 24)
2404 Fprin1_to_string (c, Qnil),
2406 #endif /* not HAVE_LIBCHISE */
2414 #ifdef HAVE_LIBCHISE
2415 chise_ccs_sync (dt_ccs);
2417 #else /* HAVE_LIBCHISE */
2418 return Fclose_database (db);
2419 #endif /* not HAVE_LIBCHISE */
2422 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2423 Reset mapping-table of CCS with database file.
2427 #ifdef HAVE_LIBCHISE
2428 CHISE_CCS chise_ccs;
2430 Lisp_Object db_file;
2433 ccs = Fget_charset (ccs);
2435 #ifdef HAVE_LIBCHISE
2436 if ( open_chise_data_source_maybe () )
2439 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2440 XSTRING_DATA (Fsymbol_name
2441 (XCHARSET_NAME(ccs))));
2442 if (chise_ccs == NULL)
2445 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2446 Qsystem_char_id, 0);
2450 #ifdef HAVE_LIBCHISE
2451 chise_ccs_setup_db (chise_ccs, 0) == 0
2453 !NILP (Ffile_exists_p (db_file))
2457 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2464 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2466 #ifdef HAVE_LIBCHISE
2467 CHISE_Char_ID char_id;
2469 if ( open_chise_data_source_maybe () )
2473 = chise_ds_decode_char (default_chise_data_source,
2474 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2477 decoding_table_put_char (ccs, code_point, make_char (char_id));
2479 decoding_table_put_char (ccs, code_point, Qnil);
2481 /* chise_ccst_close (dt_ccs); */
2483 #else /* HAVE_LIBCHISE */
2486 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2489 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2493 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2500 decoding_table_put_char (ccs, code_point, ret);
2501 Fclose_database (db);
2505 decoding_table_put_char (ccs, code_point, Qnil);
2506 Fclose_database (db);
2509 #endif /* not HAVE_LIBCHISE */
2512 #ifdef HAVE_LIBCHISE
2513 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2514 Save properties of CHARSET.
2518 struct Lisp_Charset *cs;
2519 CHISE_Property property;
2521 unsigned char* feature_name;
2523 ccs = Fget_charset (charset);
2524 cs = XCHARSET (ccs);
2526 if ( open_chise_data_source_maybe () )
2529 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2531 property = chise_ds_get_property (default_chise_data_source,
2533 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2534 chise_feature_set_property_value
2535 (chise_ds_get_feature (default_chise_data_source, feature_name),
2536 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2538 chise_property_sync (property);
2540 charset = XCHARSET_NAME (ccs);
2541 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2543 property = chise_ds_get_property (default_chise_data_source,
2545 chise_feature_set_property_value
2546 (chise_ds_get_feature (default_chise_data_source, feature_name),
2547 property, XSTRING_DATA (Fprin1_to_string
2548 (CHARSET_DOC_STRING (cs), Qnil)));
2549 chise_property_sync (property);
2551 property = chise_ds_get_property (default_chise_data_source, "type");
2552 chise_feature_set_property_value
2553 (chise_ds_get_feature (default_chise_data_source, feature_name),
2555 chise_property_sync (property);
2557 property = chise_ds_get_property (default_chise_data_source, "chars");
2558 chise_feature_set_property_value
2559 (chise_ds_get_feature (default_chise_data_source, feature_name),
2560 property, XSTRING_DATA (Fprin1_to_string (make_int
2561 (CHARSET_CHARS (cs)),
2563 chise_property_sync (property);
2565 property = chise_ds_get_property (default_chise_data_source, "dimension");
2566 chise_feature_set_property_value
2567 (chise_ds_get_feature (default_chise_data_source, feature_name),
2568 property, XSTRING_DATA (Fprin1_to_string (make_int
2569 (CHARSET_DIMENSION (cs)),
2571 chise_property_sync (property);
2573 if ( CHARSET_FINAL (cs) != 0 )
2575 property = chise_ds_get_property (default_chise_data_source,
2577 chise_feature_set_property_value
2578 (chise_ds_get_feature (default_chise_data_source, feature_name),
2579 property, XSTRING_DATA (Fprin1_to_string (make_int
2580 (CHARSET_FINAL (cs)),
2582 chise_property_sync (property);
2585 if ( !NILP (CHARSET_MOTHER (cs)) )
2587 Lisp_Object mother = CHARSET_MOTHER (cs);
2589 if ( CHARSETP (mother) )
2590 mother = XCHARSET_NAME (mother);
2592 property = chise_ds_get_property (default_chise_data_source,
2594 chise_feature_set_property_value
2595 (chise_ds_get_feature (default_chise_data_source, feature_name),
2596 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2597 chise_property_sync (property);
2600 if ( CHARSET_MAX_CODE (cs) != 0 )
2604 property = chise_ds_get_property (default_chise_data_source,
2606 if ( CHARSET_MIN_CODE (cs) == 0 )
2607 chise_feature_set_property_value
2608 (chise_ds_get_feature (default_chise_data_source, feature_name),
2612 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2613 chise_feature_set_property_value
2614 (chise_ds_get_feature (default_chise_data_source, feature_name),
2617 chise_property_sync (property);
2619 property = chise_ds_get_property (default_chise_data_source,
2621 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2622 chise_feature_set_property_value
2623 (chise_ds_get_feature (default_chise_data_source, feature_name),
2625 chise_property_sync (property);
2627 property = chise_ds_get_property (default_chise_data_source,
2628 "mother-code-offset");
2629 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2630 chise_feature_set_property_value
2631 (chise_ds_get_feature (default_chise_data_source, feature_name),
2635 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2636 chise_feature_set_property_value
2637 (chise_ds_get_feature (default_chise_data_source, feature_name),
2640 chise_property_sync (property);
2642 property = chise_ds_get_property (default_chise_data_source,
2643 "mother-code-conversion");
2644 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2645 chise_feature_set_property_value
2646 (chise_ds_get_feature (default_chise_data_source, feature_name),
2647 property, "identical");
2650 Lisp_Object sym = Qnil;
2652 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2654 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2656 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2658 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2661 chise_feature_set_property_value
2662 (chise_ds_get_feature (default_chise_data_source, feature_name),
2663 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2665 chise_feature_set_property_value
2666 (chise_ds_get_feature (default_chise_data_source, feature_name),
2667 property, "unknown");
2669 chise_property_sync (property);
2673 #endif /* HAVE_LIBCHISE */
2675 #endif /* HAVE_CHISE */
2676 #endif /* UTF2000 */
2679 /************************************************************************/
2680 /* Lisp primitives for working with characters */
2681 /************************************************************************/
2684 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2685 Make a character from CHARSET and code-point CODE.
2686 If DEFINED_ONLY is non-nil, builtin character is not returned.
2687 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2688 If corresponding character is not found, nil is returned.
2690 (charset, code, defined_only, without_inheritance))
2694 charset = Fget_charset (charset);
2697 if (XCHARSET_GRAPHIC (charset) == 1)
2699 if (NILP (defined_only))
2700 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2702 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2703 return c >= 0 ? make_char (c) : Qnil;
2706 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2707 Make a builtin character from CHARSET and code-point CODE.
2714 charset = Fget_charset (charset);
2716 if (EQ (charset, Vcharset_latin_viscii))
2718 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2724 (ret = Fget_char_attribute (chr,
2725 Vcharset_latin_viscii_lower,
2728 charset = Vcharset_latin_viscii_lower;
2732 (ret = Fget_char_attribute (chr,
2733 Vcharset_latin_viscii_upper,
2736 charset = Vcharset_latin_viscii_upper;
2743 if (XCHARSET_GRAPHIC (charset) == 1)
2746 ch = decode_builtin_char (charset, c);
2748 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2752 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2753 Make a character from CHARSET and octets ARG1 and ARG2.
2754 ARG2 is required only for characters from two-dimensional charsets.
2755 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2756 character s with caron.
2758 (charset, arg1, arg2))
2762 int lowlim, highlim;
2764 charset = Fget_charset (charset);
2765 cs = XCHARSET (charset);
2767 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2768 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2770 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2772 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2773 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2776 /* It is useful (and safe, according to Olivier Galibert) to strip
2777 the 8th bit off ARG1 and ARG2 because it allows programmers to
2778 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2779 Latin 2 code of the character. */
2787 if (a1 < lowlim || a1 > highlim)
2788 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2790 if (CHARSET_DIMENSION (cs) == 1)
2794 ("Charset is of dimension one; second octet must be nil", arg2);
2795 return make_char (MAKE_CHAR (charset, a1, 0));
2804 a2 = XINT (arg2) & 0x7f;
2806 if (a2 < lowlim || a2 > highlim)
2807 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2809 return make_char (MAKE_CHAR (charset, a1, a2));
2812 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2813 Return the character set of CHARACTER.
2817 CHECK_CHAR_COERCE_INT (character);
2819 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2822 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2823 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2824 N defaults to 0 if omitted.
2828 Lisp_Object charset;
2831 CHECK_CHAR_COERCE_INT (character);
2833 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2835 if (NILP (n) || EQ (n, Qzero))
2836 return make_int (octet0);
2837 else if (EQ (n, make_int (1)))
2838 return make_int (octet1);
2840 signal_simple_error ("Octet number must be 0 or 1", n);
2844 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2845 Return code-point of CHARACTER in specified CHARSET.
2847 (character, charset, defined_only))
2851 CHECK_CHAR_COERCE_INT (character);
2852 charset = Fget_charset (charset);
2853 code_point = charset_code_point (charset, XCHAR (character),
2854 !NILP (defined_only));
2855 if (code_point >= 0)
2856 return make_int (code_point);
2862 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2863 Return list of charset and one or two position-codes of CHARACTER.
2867 /* This function can GC */
2868 struct gcpro gcpro1, gcpro2;
2869 Lisp_Object charset = Qnil;
2870 Lisp_Object rc = Qnil;
2878 GCPRO2 (charset, rc);
2879 CHECK_CHAR_COERCE_INT (character);
2882 code_point = ENCODE_CHAR (XCHAR (character), charset);
2883 dimension = XCHARSET_DIMENSION (charset);
2884 while (dimension > 0)
2886 rc = Fcons (make_int (code_point & 255), rc);
2890 rc = Fcons (XCHARSET_NAME (charset), rc);
2892 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2894 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2896 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2900 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2909 #ifdef ENABLE_COMPOSITE_CHARS
2910 /************************************************************************/
2911 /* composite character functions */
2912 /************************************************************************/
2915 lookup_composite_char (Bufbyte *str, int len)
2917 Lisp_Object lispstr = make_string (str, len);
2918 Lisp_Object ch = Fgethash (lispstr,
2919 Vcomposite_char_string2char_hash_table,
2925 if (composite_char_row_next >= 128)
2926 signal_simple_error ("No more composite chars available", lispstr);
2927 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2928 composite_char_col_next);
2929 Fputhash (make_char (emch), lispstr,
2930 Vcomposite_char_char2string_hash_table);
2931 Fputhash (lispstr, make_char (emch),
2932 Vcomposite_char_string2char_hash_table);
2933 composite_char_col_next++;
2934 if (composite_char_col_next >= 128)
2936 composite_char_col_next = 32;
2937 composite_char_row_next++;
2946 composite_char_string (Emchar ch)
2948 Lisp_Object str = Fgethash (make_char (ch),
2949 Vcomposite_char_char2string_hash_table,
2951 assert (!UNBOUNDP (str));
2955 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2956 Convert a string into a single composite character.
2957 The character is the result of overstriking all the characters in
2962 CHECK_STRING (string);
2963 return make_char (lookup_composite_char (XSTRING_DATA (string),
2964 XSTRING_LENGTH (string)));
2967 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2968 Return a string of the characters comprising a composite character.
2976 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2977 signal_simple_error ("Must be composite char", ch);
2978 return composite_char_string (emch);
2980 #endif /* ENABLE_COMPOSITE_CHARS */
2983 /************************************************************************/
2984 /* initialization */
2985 /************************************************************************/
2988 syms_of_mule_charset (void)
2990 INIT_LRECORD_IMPLEMENTATION (charset);
2992 DEFSUBR (Fcharsetp);
2993 DEFSUBR (Ffind_charset);
2994 DEFSUBR (Fget_charset);
2995 DEFSUBR (Fcharset_list);
2996 DEFSUBR (Fcharset_name);
2997 DEFSUBR (Fmake_charset);
2998 DEFSUBR (Fmake_reverse_direction_charset);
2999 /* DEFSUBR (Freverse_direction_charset); */
3000 DEFSUBR (Fdefine_charset_alias);
3001 DEFSUBR (Fcharset_from_attributes);
3002 DEFSUBR (Fcharset_short_name);
3003 DEFSUBR (Fcharset_long_name);
3004 DEFSUBR (Fcharset_description);
3005 DEFSUBR (Fcharset_dimension);
3006 DEFSUBR (Fcharset_property);
3007 DEFSUBR (Fcharset_id);
3008 DEFSUBR (Fset_charset_ccl_program);
3009 DEFSUBR (Fset_charset_registry);
3012 DEFSUBR (Fcharset_mapping_table);
3013 DEFSUBR (Fset_charset_mapping_table);
3015 DEFSUBR (Fsave_charset_mapping_table);
3016 DEFSUBR (Freset_charset_mapping_table);
3017 #ifdef HAVE_LIBCHISE
3018 DEFSUBR (Fsave_charset_properties);
3019 #endif /* HAVE_LIBCHISE */
3020 #endif /* HAVE_CHISE */
3021 DEFSUBR (Fdecode_char);
3022 DEFSUBR (Fdecode_builtin_char);
3023 DEFSUBR (Fencode_char);
3026 DEFSUBR (Fmake_char);
3027 DEFSUBR (Fchar_charset);
3028 DEFSUBR (Fchar_octet);
3029 DEFSUBR (Fsplit_char);
3031 #ifdef ENABLE_COMPOSITE_CHARS
3032 DEFSUBR (Fmake_composite_char);
3033 DEFSUBR (Fcomposite_char_string);
3036 defsymbol (&Qcharsetp, "charsetp");
3037 defsymbol (&Qregistry, "registry");
3038 defsymbol (&Qfinal, "final");
3039 defsymbol (&Qgraphic, "graphic");
3040 defsymbol (&Qdirection, "direction");
3041 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3042 defsymbol (&Qshort_name, "short-name");
3043 defsymbol (&Qlong_name, "long-name");
3044 defsymbol (&Qiso_ir, "iso-ir");
3046 defsymbol (&Qmother, "mother");
3047 defsymbol (&Qmin_code, "min-code");
3048 defsymbol (&Qmax_code, "max-code");
3049 defsymbol (&Qcode_offset, "code-offset");
3050 defsymbol (&Qconversion, "conversion");
3051 defsymbol (&Q94x60, "94x60");
3052 defsymbol (&Q94x94x60, "94x94x60");
3053 defsymbol (&Qbig5_1, "big5-1");
3054 defsymbol (&Qbig5_2, "big5-2");
3057 defsymbol (&Ql2r, "l2r");
3058 defsymbol (&Qr2l, "r2l");
3060 /* Charsets, compatible with FSF 20.3
3061 Naming convention is Script-Charset[-Edition] */
3062 defsymbol (&Qascii, "ascii");
3063 defsymbol (&Qcontrol_1, "control-1");
3064 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3065 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3066 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3067 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3068 defsymbol (&Qthai_tis620, "thai-tis620");
3069 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3070 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3071 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3072 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3073 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3074 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3075 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3076 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208@1978");
3077 defsymbol (&Qmap_gb2312, "=gb2312");
3078 defsymbol (&Qmap_gb12345, "=gb12345");
3079 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208@1983");
3080 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
3081 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
3082 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
3083 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
3085 defsymbol (&Qsystem_char_id, "system-char-id");
3086 defsymbol (&Qmap_ucs, "=ucs");
3087 defsymbol (&Qucs, "ucs");
3088 defsymbol (&Qucs_bmp, "ucs-bmp");
3089 defsymbol (&Qucs_smp, "ucs-smp");
3090 defsymbol (&Qucs_sip, "ucs-sip");
3091 defsymbol (&Qlatin_viscii, "latin-viscii");
3092 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3093 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3094 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3095 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3096 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3097 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
3098 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208@1990");
3099 defsymbol (&Qmap_big5, "=big5");
3100 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3102 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3103 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3105 defsymbol (&Qcomposite, "composite");
3109 vars_of_mule_charset (void)
3116 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3117 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3119 /* Table of charsets indexed by leading byte. */
3120 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3121 chlook->charset_by_leading_byte[i] = Qnil;
3124 /* Table of charsets indexed by type/final-byte. */
3125 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3126 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3127 chlook->charset_by_attributes[i][j] = Qnil;
3129 /* Table of charsets indexed by type/final-byte/direction. */
3130 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3131 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3132 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3133 chlook->charset_by_attributes[i][j][k] = Qnil;
3137 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3139 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3140 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3144 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3145 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3146 Leading-code of private TYPE9N charset of column-width 1.
3148 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3152 Vdefault_coded_charset_priority_list = Qnil;
3153 DEFVAR_LISP ("default-coded-charset-priority-list",
3154 &Vdefault_coded_charset_priority_list /*
3155 Default order of preferred coded-character-sets.
3157 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3158 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3159 &Vdisplay_coded_charset_priority_use_inheritance /*
3160 If non-nil, use character inheritance.
3162 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3163 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3164 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3165 If non-nil, prefer nearest character in hierarchy order.
3171 complex_vars_of_mule_charset (void)
3173 staticpro (&Vcharset_hash_table);
3174 Vcharset_hash_table =
3175 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3177 /* Predefined character sets. We store them into variables for
3181 staticpro (&Vcharset_system_char_id);
3182 Vcharset_system_char_id =
3183 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3184 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3185 build_string ("SCID"),
3186 build_string ("CHAR-ID"),
3187 build_string ("System char-id"),
3189 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3190 staticpro (&Vcharset_ucs);
3192 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
3193 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3194 build_string ("UCS"),
3195 build_string ("UCS"),
3196 build_string ("ISO/IEC 10646"),
3198 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3199 staticpro (&Vcharset_ucs_bmp);
3201 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3202 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3203 build_string ("BMP"),
3204 build_string ("UCS-BMP"),
3205 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3207 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3208 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3209 staticpro (&Vcharset_ucs_smp);
3211 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3212 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3213 build_string ("SMP"),
3214 build_string ("UCS-SMP"),
3215 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3216 build_string ("UCS00-1"),
3217 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3218 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
3219 staticpro (&Vcharset_ucs_sip);
3221 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3222 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3223 build_string ("SIP"),
3224 build_string ("UCS-SIP"),
3225 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3226 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3227 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3228 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
3230 # define MIN_CHAR_THAI 0
3231 # define MAX_CHAR_THAI 0
3232 /* # define MIN_CHAR_HEBREW 0 */
3233 /* # define MAX_CHAR_HEBREW 0 */
3234 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3235 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3237 staticpro (&Vcharset_ascii);
3239 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3240 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3241 build_string ("ASCII"),
3242 build_string ("ASCII)"),
3243 build_string ("ASCII (ISO646 IRV)"),
3244 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3245 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
3246 staticpro (&Vcharset_control_1);
3247 Vcharset_control_1 =
3248 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3249 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3250 build_string ("C1"),
3251 build_string ("Control characters"),
3252 build_string ("Control characters 128-191"),
3254 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
3255 staticpro (&Vcharset_latin_iso8859_1);
3256 Vcharset_latin_iso8859_1 =
3257 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3258 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3259 build_string ("Latin-1"),
3260 build_string ("ISO8859-1 (Latin-1)"),
3261 build_string ("ISO8859-1 (Latin-1)"),
3262 build_string ("iso8859-1"),
3263 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3264 staticpro (&Vcharset_latin_iso8859_2);
3265 Vcharset_latin_iso8859_2 =
3266 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3267 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3268 build_string ("Latin-2"),
3269 build_string ("ISO8859-2 (Latin-2)"),
3270 build_string ("ISO8859-2 (Latin-2)"),
3271 build_string ("iso8859-2"),
3272 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3273 staticpro (&Vcharset_latin_iso8859_3);
3274 Vcharset_latin_iso8859_3 =
3275 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3276 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3277 build_string ("Latin-3"),
3278 build_string ("ISO8859-3 (Latin-3)"),
3279 build_string ("ISO8859-3 (Latin-3)"),
3280 build_string ("iso8859-3"),
3281 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3282 staticpro (&Vcharset_latin_iso8859_4);
3283 Vcharset_latin_iso8859_4 =
3284 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3285 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3286 build_string ("Latin-4"),
3287 build_string ("ISO8859-4 (Latin-4)"),
3288 build_string ("ISO8859-4 (Latin-4)"),
3289 build_string ("iso8859-4"),
3290 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3291 staticpro (&Vcharset_thai_tis620);
3292 Vcharset_thai_tis620 =
3293 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3294 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3295 build_string ("TIS620"),
3296 build_string ("TIS620 (Thai)"),
3297 build_string ("TIS620.2529 (Thai)"),
3298 build_string ("tis620"),
3299 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3300 staticpro (&Vcharset_greek_iso8859_7);
3301 Vcharset_greek_iso8859_7 =
3302 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3303 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3304 build_string ("ISO8859-7"),
3305 build_string ("ISO8859-7 (Greek)"),
3306 build_string ("ISO8859-7 (Greek)"),
3307 build_string ("iso8859-7"),
3308 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3309 staticpro (&Vcharset_arabic_iso8859_6);
3310 Vcharset_arabic_iso8859_6 =
3311 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3312 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3313 build_string ("ISO8859-6"),
3314 build_string ("ISO8859-6 (Arabic)"),
3315 build_string ("ISO8859-6 (Arabic)"),
3316 build_string ("iso8859-6"),
3317 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3318 staticpro (&Vcharset_hebrew_iso8859_8);
3319 Vcharset_hebrew_iso8859_8 =
3320 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3321 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3322 build_string ("ISO8859-8"),
3323 build_string ("ISO8859-8 (Hebrew)"),
3324 build_string ("ISO8859-8 (Hebrew)"),
3325 build_string ("iso8859-8"),
3327 0 /* MIN_CHAR_HEBREW */,
3328 0 /* MAX_CHAR_HEBREW */, 0, 32,
3329 Qnil, CONVERSION_IDENTICAL);
3330 staticpro (&Vcharset_katakana_jisx0201);
3331 Vcharset_katakana_jisx0201 =
3332 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3333 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3334 build_string ("JISX0201 Kana"),
3335 build_string ("JISX0201.1976 (Japanese Kana)"),
3336 build_string ("JISX0201.1976 Japanese Kana"),
3337 build_string ("jisx0201\\.1976"),
3338 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3339 staticpro (&Vcharset_latin_jisx0201);
3340 Vcharset_latin_jisx0201 =
3341 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3342 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3343 build_string ("JISX0201 Roman"),
3344 build_string ("JISX0201.1976 (Japanese Roman)"),
3345 build_string ("JISX0201.1976 Japanese Roman"),
3346 build_string ("jisx0201\\.1976"),
3347 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3348 staticpro (&Vcharset_cyrillic_iso8859_5);
3349 Vcharset_cyrillic_iso8859_5 =
3350 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3351 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3352 build_string ("ISO8859-5"),
3353 build_string ("ISO8859-5 (Cyrillic)"),
3354 build_string ("ISO8859-5 (Cyrillic)"),
3355 build_string ("iso8859-5"),
3356 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3357 staticpro (&Vcharset_latin_iso8859_9);
3358 Vcharset_latin_iso8859_9 =
3359 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3360 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3361 build_string ("Latin-5"),
3362 build_string ("ISO8859-9 (Latin-5)"),
3363 build_string ("ISO8859-9 (Latin-5)"),
3364 build_string ("iso8859-9"),
3365 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3367 staticpro (&Vcharset_jis_x0208);
3368 Vcharset_jis_x0208 =
3369 make_charset (LEADING_BYTE_JIS_X0208,
3370 Qmap_jis_x0208, 94, 2,
3371 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3372 build_string ("JIS X0208"),
3373 build_string ("JIS X0208 Common"),
3374 build_string ("JIS X0208 Common part"),
3375 build_string ("jisx0208\\.1990"),
3377 MIN_CHAR_JIS_X0208_1990,
3378 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3379 Qnil, CONVERSION_94x94);
3381 staticpro (&Vcharset_japanese_jisx0208_1978);
3382 Vcharset_japanese_jisx0208_1978 =
3383 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3384 Qmap_jis_x0208_1978, 94, 2,
3385 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3386 build_string ("JIS X0208:1978"),
3387 build_string ("JIS X0208:1978 (Japanese)"),
3389 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3390 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3397 CONVERSION_IDENTICAL);
3398 staticpro (&Vcharset_chinese_gb2312);
3399 Vcharset_chinese_gb2312 =
3400 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3401 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3402 build_string ("GB2312"),
3403 build_string ("GB2312)"),
3404 build_string ("GB2312 Chinese simplified"),
3405 build_string ("gb2312"),
3406 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3407 staticpro (&Vcharset_chinese_gb12345);
3408 Vcharset_chinese_gb12345 =
3409 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3410 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3411 build_string ("G1"),
3412 build_string ("GB 12345)"),
3413 build_string ("GB 12345-1990"),
3414 build_string ("GB12345\\(\\.1990\\)?-0"),
3415 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3416 staticpro (&Vcharset_japanese_jisx0208);
3417 Vcharset_japanese_jisx0208 =
3418 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3419 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3420 build_string ("JISX0208"),
3421 build_string ("JIS X0208:1983 (Japanese)"),
3422 build_string ("JIS X0208:1983 Japanese Kanji"),
3423 build_string ("jisx0208\\.1983"),
3430 CONVERSION_IDENTICAL);
3432 staticpro (&Vcharset_japanese_jisx0208_1990);
3433 Vcharset_japanese_jisx0208_1990 =
3434 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3435 Qmap_jis_x0208_1990, 94, 2,
3436 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3437 build_string ("JISX0208-1990"),
3438 build_string ("JIS X0208:1990 (Japanese)"),
3439 build_string ("JIS X0208:1990 Japanese Kanji"),
3440 build_string ("jisx0208\\.1990"),
3442 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3443 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3444 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3445 Vcharset_jis_x0208 /* Qnil */,
3446 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3448 staticpro (&Vcharset_korean_ksc5601);
3449 Vcharset_korean_ksc5601 =
3450 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3451 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3452 build_string ("KSC5601"),
3453 build_string ("KSC5601 (Korean"),
3454 build_string ("KSC5601 Korean Hangul and Hanja"),
3455 build_string ("ksc5601"),
3456 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3457 staticpro (&Vcharset_japanese_jisx0212);
3458 Vcharset_japanese_jisx0212 =
3459 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3460 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3461 build_string ("JISX0212"),
3462 build_string ("JISX0212 (Japanese)"),
3463 build_string ("JISX0212 Japanese Supplement"),
3464 build_string ("jisx0212"),
3465 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3467 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3468 staticpro (&Vcharset_chinese_cns11643_1);
3469 Vcharset_chinese_cns11643_1 =
3470 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3471 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3472 build_string ("CNS11643-1"),
3473 build_string ("CNS11643-1 (Chinese traditional)"),
3475 ("CNS 11643 Plane 1 Chinese traditional"),
3476 build_string (CHINESE_CNS_PLANE_RE("1")),
3477 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3478 staticpro (&Vcharset_chinese_cns11643_2);
3479 Vcharset_chinese_cns11643_2 =
3480 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3481 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3482 build_string ("CNS11643-2"),
3483 build_string ("CNS11643-2 (Chinese traditional)"),
3485 ("CNS 11643 Plane 2 Chinese traditional"),
3486 build_string (CHINESE_CNS_PLANE_RE("2")),
3487 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3489 staticpro (&Vcharset_latin_tcvn5712);
3490 Vcharset_latin_tcvn5712 =
3491 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3492 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3493 build_string ("TCVN 5712"),
3494 build_string ("TCVN 5712 (VSCII-2)"),
3495 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3496 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3497 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3498 staticpro (&Vcharset_latin_viscii_lower);
3499 Vcharset_latin_viscii_lower =
3500 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3501 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3502 build_string ("VISCII lower"),
3503 build_string ("VISCII lower (Vietnamese)"),
3504 build_string ("VISCII lower (Vietnamese)"),
3505 build_string ("MULEVISCII-LOWER"),
3506 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3507 staticpro (&Vcharset_latin_viscii_upper);
3508 Vcharset_latin_viscii_upper =
3509 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3510 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3511 build_string ("VISCII upper"),
3512 build_string ("VISCII upper (Vietnamese)"),
3513 build_string ("VISCII upper (Vietnamese)"),
3514 build_string ("MULEVISCII-UPPER"),
3515 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3516 staticpro (&Vcharset_latin_viscii);
3517 Vcharset_latin_viscii =
3518 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3519 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3520 build_string ("VISCII"),
3521 build_string ("VISCII 1.1 (Vietnamese)"),
3522 build_string ("VISCII 1.1 (Vietnamese)"),
3523 build_string ("VISCII1\\.1"),
3524 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3525 staticpro (&Vcharset_chinese_big5);
3526 Vcharset_chinese_big5 =
3527 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3528 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3529 build_string ("Big5"),
3530 build_string ("Big5"),
3531 build_string ("Big5 Chinese traditional"),
3532 build_string ("big5-0"),
3534 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3535 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3537 staticpro (&Vcharset_ethiopic_ucs);
3538 Vcharset_ethiopic_ucs =
3539 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3540 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3541 build_string ("Ethiopic (UCS)"),
3542 build_string ("Ethiopic (UCS)"),
3543 build_string ("Ethiopic of UCS"),
3544 build_string ("Ethiopic-Unicode"),
3545 Qnil, 0x1200, 0x137F, 0, 0,
3546 Qnil, CONVERSION_IDENTICAL);
3548 staticpro (&Vcharset_chinese_big5_1);
3549 Vcharset_chinese_big5_1 =
3550 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3551 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3552 build_string ("Big5"),
3553 build_string ("Big5 (Level-1)"),
3555 ("Big5 Level-1 Chinese traditional"),
3556 build_string ("big5"),
3557 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3558 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3559 staticpro (&Vcharset_chinese_big5_2);
3560 Vcharset_chinese_big5_2 =
3561 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3562 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3563 build_string ("Big5"),
3564 build_string ("Big5 (Level-2)"),
3566 ("Big5 Level-2 Chinese traditional"),
3567 build_string ("big5"),
3568 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3569 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3571 #ifdef ENABLE_COMPOSITE_CHARS
3572 /* #### For simplicity, we put composite chars into a 96x96 charset.
3573 This is going to lead to problems because you can run out of
3574 room, esp. as we don't yet recycle numbers. */
3575 staticpro (&Vcharset_composite);
3576 Vcharset_composite =
3577 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3578 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3579 build_string ("Composite"),
3580 build_string ("Composite characters"),
3581 build_string ("Composite characters"),
3584 /* #### not dumped properly */
3585 composite_char_row_next = 32;
3586 composite_char_col_next = 32;
3588 Vcomposite_char_string2char_hash_table =
3589 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3590 Vcomposite_char_char2string_hash_table =
3591 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3592 staticpro (&Vcomposite_char_string2char_hash_table);
3593 staticpro (&Vcomposite_char_char2string_hash_table);
3594 #endif /* ENABLE_COMPOSITE_CHARS */