1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_system_char_id;
71 Lisp_Object Vcharset_ucs;
72 Lisp_Object Vcharset_ucs_bmp;
73 Lisp_Object Vcharset_ucs_smp;
74 Lisp_Object Vcharset_ucs_sip;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 decoding_table_put_char (Lisp_Object ccs,
181 int code_point, Lisp_Object character)
184 Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
185 int dim = XCHARSET_DIMENSION (ccs);
188 XCHARSET_DECODING_TABLE (ccs)
189 = put_ccs_octet_table (table1, ccs, code_point, character);
193 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
195 table2 = put_ccs_octet_table (table2, ccs,
196 (unsigned char)code_point, character);
197 XCHARSET_DECODING_TABLE (ccs)
198 = put_ccs_octet_table (table1, ccs,
199 (unsigned char)(code_point >> 8), table2);
204 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
206 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
208 table3 = put_ccs_octet_table (table3, ccs,
209 (unsigned char)code_point, character);
210 table2 = put_ccs_octet_table (table2, ccs,
211 (unsigned char)(code_point >> 8), table3);
212 XCHARSET_DECODING_TABLE (ccs)
213 = put_ccs_octet_table (table1, ccs,
214 (unsigned char)(code_point >> 16), table2);
216 else /* if (dim == 4) */
219 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
221 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
223 = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
225 table4 = put_ccs_octet_table (table4, ccs,
226 (unsigned char)code_point, character);
227 table3 = put_ccs_octet_table (table3, ccs,
228 (unsigned char)(code_point >> 8), table4);
229 table2 = put_ccs_octet_table (table2, ccs,
230 (unsigned char)(code_point >> 16), table3);
231 XCHARSET_DECODING_TABLE (ccs)
232 = put_ccs_octet_table (table1, ccs,
233 (unsigned char)(code_point >> 24), table2);
236 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
237 int dim = XCHARSET_DIMENSION (ccs);
238 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
241 int ccs_len = XVECTOR_LENGTH (v);
246 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
247 nv = XVECTOR_DATA(v)[i];
252 if (EQ (nv, character))
255 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
262 XVECTOR_DATA(v)[i] = character;
267 put_char_ccs_code_point (Lisp_Object character,
268 Lisp_Object ccs, Lisp_Object value)
270 if ( !( EQ (XCHARSET_NAME (ccs), Qmap_ucs)
271 && INTP (value) && (XINT (value) < 0xF0000)
272 && XCHAR (character) == XINT (value) )
275 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
279 { /* obsolete representation: value must be a list of bytes */
280 Lisp_Object ret = Fcar (value);
284 signal_simple_error ("Invalid value for coded-charset", value);
285 code_point = XINT (ret);
286 if (XCHARSET_GRAPHIC (ccs) == 1)
294 signal_simple_error ("Invalid value for coded-charset",
298 signal_simple_error ("Invalid value for coded-charset",
301 if (XCHARSET_GRAPHIC (ccs) == 1)
303 code_point = (code_point << 8) | j;
306 value = make_int (code_point);
308 else if (INTP (value))
310 code_point = XINT (value);
311 if (XCHARSET_GRAPHIC (ccs) == 1)
313 code_point &= 0x7F7F7F7F;
314 value = make_int (code_point);
318 signal_simple_error ("Invalid value for coded-charset", value);
322 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
325 decoding_table_remove_char (ccs, XINT (cpos));
328 decoding_table_put_char (ccs, code_point, character);
334 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
336 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
337 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
339 if (VECTORP (decoding_table))
341 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
345 decoding_table_remove_char (ccs, XINT (cpos));
348 if (CHAR_TABLEP (encoding_table))
350 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
371 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
372 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
407 Qvietnamese_viscii_lower,
408 Qvietnamese_viscii_upper,
418 Lisp_Object Ql2r, Qr2l;
420 Lisp_Object Vcharset_hash_table;
422 /* Composite characters are characters constructed by overstriking two
423 or more regular characters.
425 1) The old Mule implementation involves storing composite characters
426 in a buffer as a tag followed by all of the actual characters
427 used to make up the composite character. I think this is a bad
428 idea; it greatly complicates code that wants to handle strings
429 one character at a time because it has to deal with the possibility
430 of great big ungainly characters. It's much more reasonable to
431 simply store an index into a table of composite characters.
433 2) The current implementation only allows for 16,384 separate
434 composite characters over the lifetime of the XEmacs process.
435 This could become a potential problem if the user
436 edited lots of different files that use composite characters.
437 Due to FSF bogosity, increasing the number of allowable
438 composite characters under Mule would decrease the number
439 of possible faces that can exist. Mule already has shrunk
440 this to 2048, and further shrinkage would become uncomfortable.
441 No such problems exist in XEmacs.
443 Composite characters could be represented as 0x80 C1 C2 C3,
444 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
445 for slightly under 2^20 (one million) composite characters
446 over the XEmacs process lifetime, and you only need to
447 increase the size of a Mule character from 19 to 21 bits.
448 Or you could use 0x80 C1 C2 C3 C4, allowing for about
449 85 million (slightly over 2^26) composite characters. */
452 /************************************************************************/
453 /* Basic Emchar functions */
454 /************************************************************************/
456 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
457 string in STR. Returns the number of bytes stored.
458 Do not call this directly. Use the macro set_charptr_emchar() instead.
462 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
477 else if ( c <= 0x7ff )
479 *p++ = (c >> 6) | 0xc0;
480 *p++ = (c & 0x3f) | 0x80;
482 else if ( c <= 0xffff )
484 *p++ = (c >> 12) | 0xe0;
485 *p++ = ((c >> 6) & 0x3f) | 0x80;
486 *p++ = (c & 0x3f) | 0x80;
488 else if ( c <= 0x1fffff )
490 *p++ = (c >> 18) | 0xf0;
491 *p++ = ((c >> 12) & 0x3f) | 0x80;
492 *p++ = ((c >> 6) & 0x3f) | 0x80;
493 *p++ = (c & 0x3f) | 0x80;
495 else if ( c <= 0x3ffffff )
497 *p++ = (c >> 24) | 0xf8;
498 *p++ = ((c >> 18) & 0x3f) | 0x80;
499 *p++ = ((c >> 12) & 0x3f) | 0x80;
500 *p++ = ((c >> 6) & 0x3f) | 0x80;
501 *p++ = (c & 0x3f) | 0x80;
505 *p++ = (c >> 30) | 0xfc;
506 *p++ = ((c >> 24) & 0x3f) | 0x80;
507 *p++ = ((c >> 18) & 0x3f) | 0x80;
508 *p++ = ((c >> 12) & 0x3f) | 0x80;
509 *p++ = ((c >> 6) & 0x3f) | 0x80;
510 *p++ = (c & 0x3f) | 0x80;
513 BREAKUP_CHAR (c, charset, c1, c2);
514 lb = CHAR_LEADING_BYTE (c);
515 if (LEADING_BYTE_PRIVATE_P (lb))
516 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
518 if (EQ (charset, Vcharset_control_1))
527 /* Return the first character from a Mule-encoded string in STR,
528 assuming it's non-ASCII. Do not call this directly.
529 Use the macro charptr_emchar() instead. */
532 non_ascii_charptr_emchar (const Bufbyte *str)
545 else if ( b >= 0xf8 )
550 else if ( b >= 0xf0 )
555 else if ( b >= 0xe0 )
560 else if ( b >= 0xc0 )
570 for( ; len > 0; len-- )
573 ch = ( ch << 6 ) | ( b & 0x3f );
577 Bufbyte i0 = *str, i1, i2 = 0;
580 if (i0 == LEADING_BYTE_CONTROL_1)
581 return (Emchar) (*++str - 0x20);
583 if (LEADING_BYTE_PREFIX_P (i0))
588 charset = CHARSET_BY_LEADING_BYTE (i0);
589 if (XCHARSET_DIMENSION (charset) == 2)
592 return MAKE_CHAR (charset, i1, i2);
596 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
597 Do not call this directly. Use the macro valid_char_p() instead. */
601 non_ascii_valid_char_p (Emchar ch)
605 /* Must have only lowest 19 bits set */
609 f1 = CHAR_FIELD1 (ch);
610 f2 = CHAR_FIELD2 (ch);
611 f3 = CHAR_FIELD3 (ch);
617 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
618 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
619 f2 > MAX_CHAR_FIELD2_PRIVATE)
624 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
625 f2 <= MAX_CHAR_FIELD2_PRIVATE))
629 NOTE: This takes advantage of the fact that
630 FIELD2_TO_OFFICIAL_LEADING_BYTE and
631 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
633 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
634 if (EQ (charset, Qnil))
636 return (XCHARSET_CHARS (charset) == 96);
642 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
643 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
644 f1 > MAX_CHAR_FIELD1_PRIVATE)
646 if (f2 < 0x20 || f3 < 0x20)
649 #ifdef ENABLE_COMPOSITE_CHARS
650 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
652 if (UNBOUNDP (Fgethash (make_int (ch),
653 Vcomposite_char_char2string_hash_table,
658 #endif /* ENABLE_COMPOSITE_CHARS */
660 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
661 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
664 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
666 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
669 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
671 if (EQ (charset, Qnil))
673 return (XCHARSET_CHARS (charset) == 96);
679 /************************************************************************/
680 /* Basic string functions */
681 /************************************************************************/
683 /* Copy the character pointed to by SRC into DST. Do not call this
684 directly. Use the macro charptr_copy_char() instead.
685 Return the number of bytes copied. */
688 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
690 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
692 for (i = bytes; i; i--, dst++, src++)
698 /************************************************************************/
699 /* streams of Emchars */
700 /************************************************************************/
702 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
703 The functions below are not meant to be called directly; use
704 the macros in insdel.h. */
707 Lstream_get_emchar_1 (Lstream *stream, int ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bufbyte *strptr = str;
713 str[0] = (Bufbyte) ch;
715 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
717 int c = Lstream_getc (stream);
718 bufpos_checking_assert (c >= 0);
719 *++strptr = (Bufbyte) c;
721 return charptr_emchar (str);
725 Lstream_fput_emchar (Lstream *stream, Emchar ch)
727 Bufbyte str[MAX_EMCHAR_LEN];
728 Bytecount len = set_charptr_emchar (str, ch);
729 return Lstream_write (stream, str, len);
733 Lstream_funget_emchar (Lstream *stream, Emchar ch)
735 Bufbyte str[MAX_EMCHAR_LEN];
736 Bytecount len = set_charptr_emchar (str, ch);
737 Lstream_unread (stream, str, len);
741 /************************************************************************/
743 /************************************************************************/
746 mark_charset (Lisp_Object obj)
748 Lisp_Charset *cs = XCHARSET (obj);
750 mark_object (cs->short_name);
751 mark_object (cs->long_name);
752 mark_object (cs->doc_string);
753 mark_object (cs->registry);
754 mark_object (cs->ccl_program);
756 mark_object (cs->decoding_table);
757 mark_object (cs->mother);
763 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
765 Lisp_Charset *cs = XCHARSET (obj);
769 error ("printing unreadable object #<charset %s 0x%x>",
770 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
773 write_c_string ("#<charset ", printcharfun);
774 print_internal (CHARSET_NAME (cs), printcharfun, 0);
775 write_c_string (" ", printcharfun);
776 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
777 write_c_string (" ", printcharfun);
778 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
779 write_c_string (" ", printcharfun);
780 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
781 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
783 CHARSET_DIMENSION (cs),
784 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
785 CHARSET_COLUMNS (cs),
786 CHARSET_GRAPHIC (cs),
788 write_c_string (buf, printcharfun);
789 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
790 sprintf (buf, " 0x%x>", cs->header.uid);
791 write_c_string (buf, printcharfun);
794 static const struct lrecord_description charset_description[] = {
795 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
796 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
797 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
798 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
799 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
800 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
801 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
803 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
804 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
809 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
810 mark_charset, print_charset, 0, 0, 0,
814 /* Make a new charset. */
815 /* #### SJT Should generic properties be allowed? */
817 make_charset (Charset_ID id, Lisp_Object name,
818 unsigned short chars, unsigned char dimension,
819 unsigned char columns, unsigned char graphic,
820 Bufbyte final, unsigned char direction, Lisp_Object short_name,
821 Lisp_Object long_name, Lisp_Object doc,
823 Lisp_Object decoding_table,
824 Emchar min_code, Emchar max_code,
825 Emchar code_offset, unsigned char byte_offset,
826 Lisp_Object mother, unsigned char conversion)
829 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
833 XSETCHARSET (obj, cs);
835 CHARSET_ID (cs) = id;
836 CHARSET_NAME (cs) = name;
837 CHARSET_SHORT_NAME (cs) = short_name;
838 CHARSET_LONG_NAME (cs) = long_name;
839 CHARSET_CHARS (cs) = chars;
840 CHARSET_DIMENSION (cs) = dimension;
841 CHARSET_DIRECTION (cs) = direction;
842 CHARSET_COLUMNS (cs) = columns;
843 CHARSET_GRAPHIC (cs) = graphic;
844 CHARSET_FINAL (cs) = final;
845 CHARSET_DOC_STRING (cs) = doc;
846 CHARSET_REGISTRY (cs) = reg;
847 CHARSET_CCL_PROGRAM (cs) = Qnil;
848 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
850 CHARSET_DECODING_TABLE(cs) = Qunbound;
851 CHARSET_MIN_CODE (cs) = min_code;
852 CHARSET_MAX_CODE (cs) = max_code;
853 CHARSET_CODE_OFFSET (cs) = code_offset;
854 CHARSET_BYTE_OFFSET (cs) = byte_offset;
855 CHARSET_MOTHER (cs) = mother;
856 CHARSET_CONVERSION (cs) = conversion;
860 if (id == LEADING_BYTE_ASCII)
861 CHARSET_REP_BYTES (cs) = 1;
863 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
865 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
870 /* some charsets do not have final characters. This includes
871 ASCII, Control-1, Composite, and the two faux private
873 unsigned char iso2022_type
874 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
876 if (code_offset == 0)
878 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
879 chlook->charset_by_attributes[iso2022_type][final] = obj;
883 (chlook->charset_by_attributes[iso2022_type][final][direction]));
884 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
888 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
889 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
891 /* Some charsets are "faux" and don't have names or really exist at
892 all except in the leading-byte table. */
894 Fputhash (name, obj, Vcharset_hash_table);
899 get_unallocated_leading_byte (int dimension)
904 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
907 lb = chlook->next_allocated_leading_byte++;
911 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
914 lb = chlook->next_allocated_1_byte_leading_byte++;
918 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
921 lb = chlook->next_allocated_2_byte_leading_byte++;
927 ("No more character sets free for this dimension",
928 make_int (dimension));
934 /* Number of Big5 characters which have the same code in 1st byte. */
936 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
939 decode_ccs_conversion (int conv_type, int code_point)
941 if ( conv_type == CONVERSION_IDENTICAL )
945 if ( conv_type == CONVERSION_94x60 )
947 int row = code_point >> 8;
948 int cell = code_point & 255;
952 else if (row < 16 + 32 + 30)
953 return (row - (16 + 32)) * 94 + cell - 33;
954 else if (row < 18 + 32 + 30)
956 else if (row < 18 + 32 + 60)
957 return (row - (18 + 32)) * 94 + cell - 33;
959 else if ( conv_type == CONVERSION_94x94x60 )
961 int plane = code_point >> 16;
962 int row = (code_point >> 8) & 255;
963 int cell = code_point & 255;
967 else if (row < 16 + 32 + 30)
969 (plane - 33) * 94 * 60
970 + (row - (16 + 32)) * 94
972 else if (row < 18 + 32 + 30)
974 else if (row < 18 + 32 + 60)
976 (plane - 33) * 94 * 60
977 + (row - (18 + 32)) * 94
980 else if ( conv_type == CONVERSION_BIG5_1 )
983 = (((code_point >> 8) & 0x7F) - 33) * 94
984 + (( code_point & 0x7F) - 33);
985 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
986 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
988 b2 += b2 < 0x3F ? 0x40 : 0x62;
989 return (b1 << 8) | b2;
991 else if ( conv_type == CONVERSION_BIG5_2 )
994 = (((code_point >> 8) & 0x7F) - 33) * 94
995 + (( code_point & 0x7F) - 33)
996 + BIG5_SAME_ROW * (0xC9 - 0xA1);
997 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
998 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1000 b2 += b2 < 0x3F ? 0x40 : 0x62;
1001 return (b1 << 8) | b2;
1007 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
1009 int dim = XCHARSET_DIMENSION (ccs);
1010 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1011 Emchar char_id = -1;
1018 = get_ccs_octet_table (decoding_table, ccs,
1019 (code_point >> (dim * 8)) & 255);
1021 if (CHARP (decoding_table))
1022 return XCHAR (decoding_table);
1024 if (EQ (decoding_table, Qunloaded))
1026 char_id = load_char_decoding_entry_maybe (ccs, code_point);
1028 #endif /* HAVE_CHISE */
1031 else if ( !without_inheritance
1032 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
1035 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
1039 code += XCHARSET_CODE_OFFSET(ccs);
1040 if ( EQ (mother, Vcharset_ucs) )
1041 return DECODE_CHAR (mother, code, without_inheritance);
1043 return decode_defined_char (mother, code,
1044 without_inheritance);
1051 decode_builtin_char (Lisp_Object charset, int code_point)
1053 Lisp_Object mother = XCHARSET_MOTHER (charset);
1056 if ( XCHARSET_MAX_CODE (charset) > 0 )
1058 if ( CHARSETP (mother) )
1061 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
1066 decode_builtin_char (mother,
1067 code + XCHARSET_CODE_OFFSET(charset));
1074 = (XCHARSET_DIMENSION (charset) == 1
1076 code_point - XCHARSET_BYTE_OFFSET (charset)
1078 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1079 * XCHARSET_CHARS (charset)
1080 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1081 + XCHARSET_CODE_OFFSET (charset);
1082 if ((cid < XCHARSET_MIN_CODE (charset))
1083 || (XCHARSET_MAX_CODE (charset) < cid))
1088 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1090 if (XCHARSET_DIMENSION (charset) == 1)
1092 switch (XCHARSET_CHARS (charset))
1096 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1099 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1107 switch (XCHARSET_CHARS (charset))
1110 return MIN_CHAR_94x94
1111 + (final - '0') * 94 * 94
1112 + (((code_point >> 8) & 0x7F) - 33) * 94
1113 + ((code_point & 0x7F) - 33);
1115 return MIN_CHAR_96x96
1116 + (final - '0') * 96 * 96
1117 + (((code_point >> 8) & 0x7F) - 32) * 96
1118 + ((code_point & 0x7F) - 32);
1130 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1132 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1135 if ( CHAR_TABLEP (encoding_table)
1136 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1141 Lisp_Object mother = XCHARSET_MOTHER (charset);
1142 int min = XCHARSET_MIN_CODE (charset);
1143 int max = XCHARSET_MAX_CODE (charset);
1146 if ( CHARSETP (mother) )
1148 if (XCHARSET_FINAL (charset) >= '0')
1149 code = charset_code_point (mother, ch, 1);
1151 code = charset_code_point (mother, ch, defined_only);
1153 else if (defined_only)
1155 else if ( ((max == 0) && CHARSETP (mother)
1156 && (XCHARSET_FINAL (charset) == 0))
1157 || ((min <= ch) && (ch <= max)) )
1159 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1160 || ((min <= code) && (code <= max)) )
1162 int d = code - XCHARSET_CODE_OFFSET (charset);
1164 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1166 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1168 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1170 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1173 int cell = d % 94 + 33;
1179 return (row << 8) | cell;
1181 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1183 int B1 = d >> 8, B2 = d & 0xFF;
1185 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1186 - (B2 < 0x7F ? 0x40 : 0x62);
1190 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1193 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1195 int B1 = d >> 8, B2 = d & 0xFF;
1197 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1198 - (B2 < 0x7F ? 0x40 : 0x62);
1202 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1203 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1206 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1207 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1208 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1209 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1210 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1212 int plane = d / (94 * 60) + 33;
1213 int row = (d % (94 * 60)) / 94;
1214 int cell = d % 94 + 33;
1220 return (plane << 16) | (row << 8) | cell;
1222 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1224 ( (d / (94 * 94) + 33) << 16)
1225 | ((d / 94 % 94 + 33) << 8)
1227 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1229 ( (d / (96 * 96) + 32) << 16)
1230 | ((d / 96 % 96 + 32) << 8)
1232 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1234 ( (d / (94 * 94 * 94) + 33) << 24)
1235 | ((d / (94 * 94) % 94 + 33) << 16)
1236 | ((d / 94 % 94 + 33) << 8)
1238 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1240 ( (d / (96 * 96 * 96) + 32) << 24)
1241 | ((d / (96 * 96) % 96 + 32) << 16)
1242 | ((d / 96 % 96 + 32) << 8)
1246 printf ("Unknown CCS-conversion %d is specified!",
1247 XCHARSET_CONVERSION (charset));
1251 else if (defined_only)
1253 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1254 ( XCHARSET_MIN_CODE (charset) == 0 )
1256 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1257 (XCHARSET_CODE_OFFSET (charset)
1258 == XCHARSET_MIN_CODE (charset))
1263 if (XCHARSET_DIMENSION (charset) == 1)
1265 if (XCHARSET_CHARS (charset) == 94)
1267 if (((d = ch - (MIN_CHAR_94
1268 + (XCHARSET_FINAL (charset) - '0') * 94))
1273 else if (XCHARSET_CHARS (charset) == 96)
1275 if (((d = ch - (MIN_CHAR_96
1276 + (XCHARSET_FINAL (charset) - '0') * 96))
1284 else if (XCHARSET_DIMENSION (charset) == 2)
1286 if (XCHARSET_CHARS (charset) == 94)
1288 if (((d = ch - (MIN_CHAR_94x94
1290 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1293 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1295 else if (XCHARSET_CHARS (charset) == 96)
1297 if (((d = ch - (MIN_CHAR_96x96
1299 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1302 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1313 encode_char_2 (Emchar ch, Lisp_Object* charset)
1315 Lisp_Object charsets = Vdefault_coded_charset_priority_list;
1318 while (!NILP (charsets))
1320 *charset = Ffind_charset (Fcar (charsets));
1321 if ( !NILP (*charset)
1322 && (XCHARSET_DIMENSION (*charset) <= 2) )
1324 code_point = charset_code_point (*charset, ch, 0);
1325 if (code_point >= 0)
1328 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1329 NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1331 code_point = encode_char_2_search_children (ch, charset);
1332 if (code_point >= 0)
1336 charsets = Fcdr (charsets);
1339 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1340 !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1342 charsets = Vdefault_coded_charset_priority_list;
1343 while (!NILP (charsets))
1345 *charset = Ffind_charset (Fcar (charsets));
1346 if ( !NILP (*charset)
1347 && (XCHARSET_DIMENSION (*charset) <= 2) )
1349 code_point = encode_char_2_search_children (ch, charset);
1350 if (code_point >= 0)
1353 charsets = Fcdr (charsets);
1357 /* otherwise --- maybe for bootstrap */
1358 return encode_builtin_char_1 (ch, charset);
1362 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1364 if (c <= MAX_CHAR_BASIC_LATIN)
1366 *charset = Vcharset_ascii;
1371 *charset = Vcharset_control_1;
1376 *charset = Vcharset_latin_iso8859_1;
1380 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1382 *charset = Vcharset_hebrew_iso8859_8;
1383 return c - MIN_CHAR_HEBREW + 0x20;
1386 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1388 *charset = Vcharset_thai_tis620;
1389 return c - MIN_CHAR_THAI + 0x20;
1392 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1393 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1395 return list2 (Vcharset_katakana_jisx0201,
1396 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1399 else if (c <= MAX_CHAR_BMP)
1401 *charset = Vcharset_ucs_bmp;
1404 else if (c <= MAX_CHAR_SMP)
1406 *charset = Vcharset_ucs_smp;
1407 return c - MIN_CHAR_SMP;
1409 else if (c <= MAX_CHAR_SIP)
1411 *charset = Vcharset_ucs_sip;
1412 return c - MIN_CHAR_SIP;
1414 else if (c < MIN_CHAR_94)
1416 *charset = Vcharset_ucs;
1419 else if (c <= MAX_CHAR_94)
1421 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1422 ((c - MIN_CHAR_94) / 94) + '0',
1423 CHARSET_LEFT_TO_RIGHT);
1424 if (!NILP (*charset))
1425 return ((c - MIN_CHAR_94) % 94) + 33;
1428 *charset = Vcharset_ucs;
1432 else if (c <= MAX_CHAR_96)
1434 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1435 ((c - MIN_CHAR_96) / 96) + '0',
1436 CHARSET_LEFT_TO_RIGHT);
1437 if (!NILP (*charset))
1438 return ((c - MIN_CHAR_96) % 96) + 32;
1441 *charset = Vcharset_ucs;
1445 else if (c <= MAX_CHAR_94x94)
1448 = CHARSET_BY_ATTRIBUTES (94, 2,
1449 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1450 CHARSET_LEFT_TO_RIGHT);
1451 if (!NILP (*charset))
1452 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1453 | (((c - MIN_CHAR_94x94) % 94) + 33);
1456 *charset = Vcharset_ucs;
1460 else if (c <= MAX_CHAR_96x96)
1463 = CHARSET_BY_ATTRIBUTES (96, 2,
1464 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1465 CHARSET_LEFT_TO_RIGHT);
1466 if (!NILP (*charset))
1467 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1468 | (((c - MIN_CHAR_96x96) % 96) + 32);
1471 *charset = Vcharset_ucs;
1477 *charset = Vcharset_ucs;
1482 Lisp_Object Vdefault_coded_charset_priority_list;
1483 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1484 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1488 /************************************************************************/
1489 /* Basic charset Lisp functions */
1490 /************************************************************************/
1492 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1493 Return non-nil if OBJECT is a charset.
1497 return CHARSETP (object) ? Qt : Qnil;
1500 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1501 Retrieve the charset of the given name.
1502 If CHARSET-OR-NAME is a charset object, it is simply returned.
1503 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1504 nil is returned. Otherwise the associated charset object is returned.
1508 if (CHARSETP (charset_or_name))
1509 return charset_or_name;
1511 CHECK_SYMBOL (charset_or_name);
1512 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1515 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1516 Retrieve the charset of the given name.
1517 Same as `find-charset' except an error is signalled if there is no such
1518 charset instead of returning nil.
1522 Lisp_Object charset = Ffind_charset (name);
1525 signal_simple_error ("No such charset", name);
1529 /* We store the charsets in hash tables with the names as the key and the
1530 actual charset object as the value. Occasionally we need to use them
1531 in a list format. These routines provide us with that. */
1532 struct charset_list_closure
1534 Lisp_Object *charset_list;
1538 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1539 void *charset_list_closure)
1541 /* This function can GC */
1542 struct charset_list_closure *chcl =
1543 (struct charset_list_closure*) charset_list_closure;
1544 Lisp_Object *charset_list = chcl->charset_list;
1546 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1550 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1551 Return a list of the names of all defined charsets.
1555 Lisp_Object charset_list = Qnil;
1556 struct gcpro gcpro1;
1557 struct charset_list_closure charset_list_closure;
1559 GCPRO1 (charset_list);
1560 charset_list_closure.charset_list = &charset_list;
1561 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1562 &charset_list_closure);
1565 return charset_list;
1568 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1569 Return the name of charset CHARSET.
1573 return XCHARSET_NAME (Fget_charset (charset));
1576 /* #### SJT Should generic properties be allowed? */
1577 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1578 Define a new character set.
1579 This function is for use with Mule support.
1580 NAME is a symbol, the name by which the character set is normally referred.
1581 DOC-STRING is a string describing the character set.
1582 PROPS is a property list, describing the specific nature of the
1583 character set. Recognized properties are:
1585 'short-name Short version of the charset name (ex: Latin-1)
1586 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1587 'registry A regular expression matching the font registry field for
1589 'dimension Number of octets used to index a character in this charset.
1590 Either 1 or 2. Defaults to 1.
1591 If UTF-2000 feature is enabled, 3 or 4 are also available.
1592 'columns Number of columns used to display a character in this charset.
1593 Only used in TTY mode. (Under X, the actual width of a
1594 character can be derived from the font used to display the
1595 characters.) If unspecified, defaults to the dimension
1596 (this is almost always the correct value).
1597 'chars Number of characters in each dimension (94 or 96).
1598 Defaults to 94. Note that if the dimension is 2, the
1599 character set thus described is 94x94 or 96x96.
1600 If UTF-2000 feature is enabled, 128 or 256 are also available.
1601 'final Final byte of ISO 2022 escape sequence. Must be
1602 supplied. Each combination of (DIMENSION, CHARS) defines a
1603 separate namespace for final bytes. Note that ISO
1604 2022 restricts the final byte to the range
1605 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1606 dimension == 2. Note also that final bytes in the range
1607 0x30 - 0x3F are reserved for user-defined (not official)
1609 'graphic 0 (use left half of font on output) or 1 (use right half
1610 of font on output). Defaults to 0. For example, for
1611 a font whose registry is ISO8859-1, the left half
1612 (octets 0x20 - 0x7F) is the `ascii' character set, while
1613 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1614 character set. With 'graphic set to 0, the octets
1615 will have their high bit cleared; with it set to 1,
1616 the octets will have their high bit set.
1617 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1619 'ccl-program A compiled CCL program used to convert a character in
1620 this charset into an index into the font. This is in
1621 addition to the 'graphic property. The CCL program
1622 is passed the octets of the character, with the high
1623 bit cleared and set depending upon whether the value
1624 of the 'graphic property is 0 or 1.
1625 'mother [UTF-2000 only] Base coded-charset.
1626 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1627 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1628 'code-offset [UTF-2000 only] Offset for a code-point of a base
1630 'conversion [UTF-2000 only] Conversion for a code-point of a base
1631 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1633 (name, doc_string, props))
1635 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1636 int direction = CHARSET_LEFT_TO_RIGHT;
1637 Lisp_Object registry = Qnil;
1638 Lisp_Object charset;
1639 Lisp_Object ccl_program = Qnil;
1640 Lisp_Object short_name = Qnil, long_name = Qnil;
1641 Lisp_Object mother = Qnil;
1642 int min_code = 0, max_code = 0, code_offset = 0;
1643 int byte_offset = -1;
1646 CHECK_SYMBOL (name);
1647 if (!NILP (doc_string))
1648 CHECK_STRING (doc_string);
1650 charset = Ffind_charset (name);
1651 if (!NILP (charset))
1652 signal_simple_error ("Cannot redefine existing charset", name);
1655 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1657 if (EQ (keyword, Qshort_name))
1659 CHECK_STRING (value);
1663 else if (EQ (keyword, Qlong_name))
1665 CHECK_STRING (value);
1669 else if (EQ (keyword, Qiso_ir))
1673 id = - XINT (value);
1677 else if (EQ (keyword, Qdimension))
1680 dimension = XINT (value);
1681 if (dimension < 1 ||
1688 signal_simple_error ("Invalid value for 'dimension", value);
1691 else if (EQ (keyword, Qchars))
1694 chars = XINT (value);
1695 if (chars != 94 && chars != 96
1697 && chars != 128 && chars != 256
1700 signal_simple_error ("Invalid value for 'chars", value);
1703 else if (EQ (keyword, Qcolumns))
1706 columns = XINT (value);
1707 if (columns != 1 && columns != 2)
1708 signal_simple_error ("Invalid value for 'columns", value);
1711 else if (EQ (keyword, Qgraphic))
1714 graphic = XINT (value);
1722 signal_simple_error ("Invalid value for 'graphic", value);
1725 else if (EQ (keyword, Qregistry))
1727 CHECK_STRING (value);
1731 else if (EQ (keyword, Qdirection))
1733 if (EQ (value, Ql2r))
1734 direction = CHARSET_LEFT_TO_RIGHT;
1735 else if (EQ (value, Qr2l))
1736 direction = CHARSET_RIGHT_TO_LEFT;
1738 signal_simple_error ("Invalid value for 'direction", value);
1741 else if (EQ (keyword, Qfinal))
1743 CHECK_CHAR_COERCE_INT (value);
1744 final = XCHAR (value);
1745 if (final < '0' || final > '~')
1746 signal_simple_error ("Invalid value for 'final", value);
1750 else if (EQ (keyword, Qmother))
1752 mother = Fget_charset (value);
1755 else if (EQ (keyword, Qmin_code))
1758 min_code = XUINT (value);
1761 else if (EQ (keyword, Qmax_code))
1764 max_code = XUINT (value);
1767 else if (EQ (keyword, Qcode_offset))
1770 code_offset = XUINT (value);
1773 else if (EQ (keyword, Qconversion))
1775 if (EQ (value, Q94x60))
1776 conversion = CONVERSION_94x60;
1777 else if (EQ (value, Q94x94x60))
1778 conversion = CONVERSION_94x94x60;
1779 else if (EQ (value, Qbig5_1))
1780 conversion = CONVERSION_BIG5_1;
1781 else if (EQ (value, Qbig5_2))
1782 conversion = CONVERSION_BIG5_2;
1784 signal_simple_error ("Unrecognized conversion", value);
1788 else if (EQ (keyword, Qccl_program))
1790 struct ccl_program test_ccl;
1792 if (setup_ccl_program (&test_ccl, value) < 0)
1793 signal_simple_error ("Invalid value for 'ccl-program", value);
1794 ccl_program = value;
1798 signal_simple_error ("Unrecognized property", keyword);
1804 error ("'final must be specified");
1806 if (dimension == 2 && final > 0x5F)
1808 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1811 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1812 CHARSET_LEFT_TO_RIGHT)) ||
1813 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1814 CHARSET_RIGHT_TO_LEFT)))
1816 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1819 id = get_unallocated_leading_byte (dimension);
1821 if (NILP (doc_string))
1822 doc_string = build_string ("");
1824 if (NILP (registry))
1825 registry = build_string ("");
1827 if (NILP (short_name))
1828 XSETSTRING (short_name, XSYMBOL (name)->name);
1830 if (NILP (long_name))
1831 long_name = doc_string;
1834 columns = dimension;
1836 if (byte_offset < 0)
1840 else if (chars == 96)
1846 charset = make_charset (id, name, chars, dimension, columns, graphic,
1847 final, direction, short_name, long_name,
1848 doc_string, registry,
1849 Qnil, min_code, max_code, code_offset, byte_offset,
1850 mother, conversion);
1851 if (!NILP (ccl_program))
1852 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1856 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1858 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1859 NEW-NAME is the name of the new charset. Return the new charset.
1861 (charset, new_name))
1863 Lisp_Object new_charset = Qnil;
1864 int id, chars, dimension, columns, graphic, final;
1866 Lisp_Object registry, doc_string, short_name, long_name;
1869 charset = Fget_charset (charset);
1870 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1871 signal_simple_error ("Charset already has reverse-direction charset",
1874 CHECK_SYMBOL (new_name);
1875 if (!NILP (Ffind_charset (new_name)))
1876 signal_simple_error ("Cannot redefine existing charset", new_name);
1878 cs = XCHARSET (charset);
1880 chars = CHARSET_CHARS (cs);
1881 dimension = CHARSET_DIMENSION (cs);
1882 columns = CHARSET_COLUMNS (cs);
1883 id = get_unallocated_leading_byte (dimension);
1885 graphic = CHARSET_GRAPHIC (cs);
1886 final = CHARSET_FINAL (cs);
1887 direction = CHARSET_RIGHT_TO_LEFT;
1888 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1889 direction = CHARSET_LEFT_TO_RIGHT;
1890 doc_string = CHARSET_DOC_STRING (cs);
1891 short_name = CHARSET_SHORT_NAME (cs);
1892 long_name = CHARSET_LONG_NAME (cs);
1893 registry = CHARSET_REGISTRY (cs);
1895 new_charset = make_charset (id, new_name, chars, dimension, columns,
1896 graphic, final, direction, short_name, long_name,
1897 doc_string, registry,
1899 CHARSET_DECODING_TABLE(cs),
1900 CHARSET_MIN_CODE(cs),
1901 CHARSET_MAX_CODE(cs),
1902 CHARSET_CODE_OFFSET(cs),
1903 CHARSET_BYTE_OFFSET(cs),
1905 CHARSET_CONVERSION (cs)
1907 Qnil, 0, 0, 0, 0, Qnil, 0
1911 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1912 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1917 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1918 Define symbol ALIAS as an alias for CHARSET.
1922 CHECK_SYMBOL (alias);
1923 charset = Fget_charset (charset);
1924 return Fputhash (alias, charset, Vcharset_hash_table);
1927 /* #### Reverse direction charsets not yet implemented. */
1929 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1931 Return the reverse-direction charset parallel to CHARSET, if any.
1932 This is the charset with the same properties (in particular, the same
1933 dimension, number of characters per dimension, and final byte) as
1934 CHARSET but whose characters are displayed in the opposite direction.
1938 charset = Fget_charset (charset);
1939 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1943 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1944 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1945 If DIRECTION is omitted, both directions will be checked (left-to-right
1946 will be returned if character sets exist for both directions).
1948 (dimension, chars, final, direction))
1950 int dm, ch, fi, di = -1;
1951 Lisp_Object obj = Qnil;
1953 CHECK_INT (dimension);
1954 dm = XINT (dimension);
1955 if (dm < 1 || dm > 2)
1956 signal_simple_error ("Invalid value for DIMENSION", dimension);
1960 if (ch != 94 && ch != 96)
1961 signal_simple_error ("Invalid value for CHARS", chars);
1963 CHECK_CHAR_COERCE_INT (final);
1965 if (fi < '0' || fi > '~')
1966 signal_simple_error ("Invalid value for FINAL", final);
1968 if (EQ (direction, Ql2r))
1969 di = CHARSET_LEFT_TO_RIGHT;
1970 else if (EQ (direction, Qr2l))
1971 di = CHARSET_RIGHT_TO_LEFT;
1972 else if (!NILP (direction))
1973 signal_simple_error ("Invalid value for DIRECTION", direction);
1975 if (dm == 2 && fi > 0x5F)
1977 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1981 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1983 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1986 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1989 return XCHARSET_NAME (obj);
1993 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1994 Return short name of CHARSET.
1998 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2001 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2002 Return long name of CHARSET.
2006 return XCHARSET_LONG_NAME (Fget_charset (charset));
2009 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2010 Return description of CHARSET.
2014 return XCHARSET_DOC_STRING (Fget_charset (charset));
2017 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2018 Return dimension of CHARSET.
2022 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2025 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2026 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2027 Recognized properties are those listed in `make-charset', as well as
2028 'name and 'doc-string.
2034 charset = Fget_charset (charset);
2035 cs = XCHARSET (charset);
2037 CHECK_SYMBOL (prop);
2038 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2039 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2040 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2041 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2042 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2043 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2044 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2045 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
2046 Qnil : make_char (CHARSET_FINAL (cs));
2047 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2048 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2049 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2050 if (EQ (prop, Qdirection))
2051 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2052 if (EQ (prop, Qreverse_direction_charset))
2054 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2055 /* #### Is this translation OK? If so, error checking sufficient? */
2056 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2059 if (EQ (prop, Qmother))
2060 return CHARSET_MOTHER (cs);
2061 if (EQ (prop, Qmin_code))
2062 return make_int (CHARSET_MIN_CODE (cs));
2063 if (EQ (prop, Qmax_code))
2064 return make_int (CHARSET_MAX_CODE (cs));
2066 signal_simple_error ("Unrecognized charset property name", prop);
2067 return Qnil; /* not reached */
2070 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2071 Return charset identification number of CHARSET.
2075 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2078 /* #### We need to figure out which properties we really want to
2081 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2082 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2084 (charset, ccl_program))
2086 struct ccl_program test_ccl;
2088 charset = Fget_charset (charset);
2089 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2090 signal_simple_error ("Invalid ccl-program", ccl_program);
2091 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2096 invalidate_charset_font_caches (Lisp_Object charset)
2098 /* Invalidate font cache entries for charset on all devices. */
2099 Lisp_Object devcons, concons, hash_table;
2100 DEVICE_LOOP_NO_BREAK (devcons, concons)
2102 struct device *d = XDEVICE (XCAR (devcons));
2103 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2104 if (!UNBOUNDP (hash_table))
2105 Fclrhash (hash_table);
2109 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2110 Set the 'registry property of CHARSET to REGISTRY.
2112 (charset, registry))
2114 charset = Fget_charset (charset);
2115 CHECK_STRING (registry);
2116 XCHARSET_REGISTRY (charset) = registry;
2117 invalidate_charset_font_caches (charset);
2118 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2123 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2124 Return mapping-table of CHARSET.
2128 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2131 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2132 Set mapping-table of CHARSET to TABLE.
2136 struct Lisp_Charset *cs;
2140 charset = Fget_charset (charset);
2141 cs = XCHARSET (charset);
2145 CHARSET_DECODING_TABLE(cs) = Qnil;
2148 else if (VECTORP (table))
2150 int ccs_len = CHARSET_BYTE_SIZE (cs);
2151 int ret = decoding_table_check_elements (table,
2152 CHARSET_DIMENSION (cs),
2157 signal_simple_error ("Too big table", table);
2159 signal_simple_error ("Invalid element is found", table);
2161 signal_simple_error ("Something wrong", table);
2163 CHARSET_DECODING_TABLE(cs) = Qnil;
2166 signal_error (Qwrong_type_argument,
2167 list2 (build_translated_string ("vector-or-nil-p"),
2170 byte_offset = CHARSET_BYTE_OFFSET (cs);
2171 switch (CHARSET_DIMENSION (cs))
2174 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2176 Lisp_Object c = XVECTOR_DATA(table)[i];
2179 Fput_char_attribute (c, XCHARSET_NAME (charset),
2180 make_int (i + byte_offset));
2184 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2186 Lisp_Object v = XVECTOR_DATA(table)[i];
2192 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2194 Lisp_Object c = XVECTOR_DATA(v)[j];
2198 (c, XCHARSET_NAME (charset),
2199 make_int ( ( (i + byte_offset) << 8 )
2205 Fput_char_attribute (v, XCHARSET_NAME (charset),
2206 make_int (i + byte_offset));
2214 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2215 Save mapping-table of CHARSET.
2219 struct Lisp_Charset *cs;
2220 int byte_min, byte_max;
2221 #ifdef HAVE_LIBCHISE
2223 #else /* HAVE_LIBCHISE */
2225 Lisp_Object db_file;
2226 #endif /* not HAVE_LIBCHISE */
2228 charset = Fget_charset (charset);
2229 cs = XCHARSET (charset);
2231 #ifdef HAVE_LIBCHISE
2232 if ( open_chise_data_source_maybe () )
2236 = chise_ds_get_ccs (default_chise_data_source,
2237 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2240 printf ("Can't open decoding-table %s\n",
2241 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2244 #else /* HAVE_LIBCHISE */
2245 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2246 Qsystem_char_id, 1);
2247 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2248 #endif /* not HAVE_LIBCHISE */
2250 byte_min = CHARSET_BYTE_OFFSET (cs);
2251 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2252 switch (CHARSET_DIMENSION (cs))
2256 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2259 for (cell = byte_min; cell < byte_max; cell++)
2261 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2265 #ifdef HAVE_LIBCHISE
2266 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2267 #else /* HAVE_LIBCHISE */
2268 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2269 Fprin1_to_string (c, Qnil),
2271 #endif /* not HAVE_LIBCHISE */
2278 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2281 for (row = byte_min; row < byte_max; row++)
2283 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2286 for (cell = byte_min; cell < byte_max; cell++)
2288 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2292 #ifdef HAVE_LIBCHISE
2293 chise_ccs_set_decoded_char
2295 (row << 8) | cell, XCHAR (c));
2296 #else /* HAVE_LIBCHISE */
2297 Fput_database (Fprin1_to_string (make_int ((row << 8)
2300 Fprin1_to_string (c, Qnil),
2302 #endif /* not HAVE_LIBCHISE */
2310 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2313 for (plane = byte_min; plane < byte_max; plane++)
2316 = get_ccs_octet_table (table_p, charset, plane);
2319 for (row = byte_min; row < byte_max; row++)
2322 = get_ccs_octet_table (table_r, charset, row);
2325 for (cell = byte_min; cell < byte_max; cell++)
2327 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2332 #ifdef HAVE_LIBCHISE
2333 chise_ccs_set_decoded_char
2338 #else /* HAVE_LIBCHISE */
2339 Fput_database (Fprin1_to_string
2340 (make_int ((plane << 16)
2344 Fprin1_to_string (c, Qnil),
2346 #endif /* not HAVE_LIBCHISE */
2355 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2358 for (group = byte_min; group < byte_max; group++)
2361 = get_ccs_octet_table (table_g, charset, group);
2364 for (plane = byte_min; plane < byte_max; plane++)
2367 = get_ccs_octet_table (table_p, charset, plane);
2370 for (row = byte_min; row < byte_max; row++)
2373 = get_ccs_octet_table (table_r, charset, row);
2376 for (cell = byte_min; cell < byte_max; cell++)
2379 = get_ccs_octet_table (table_c, charset, cell);
2383 #ifdef HAVE_LIBCHISE
2384 chise_ccs_set_decoded_char
2390 #else /* HAVE_LIBCHISE */
2391 Fput_database (Fprin1_to_string
2392 (make_int (( group << 24)
2397 Fprin1_to_string (c, Qnil),
2399 #endif /* not HAVE_LIBCHISE */
2407 #ifdef HAVE_LIBCHISE
2408 chise_ccs_sync (dt_ccs);
2410 #else /* HAVE_LIBCHISE */
2411 return Fclose_database (db);
2412 #endif /* not HAVE_LIBCHISE */
2415 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2416 Reset mapping-table of CCS with database file.
2420 #ifdef HAVE_LIBCHISE
2421 CHISE_CCS chise_ccs;
2423 Lisp_Object db_file;
2426 ccs = Fget_charset (ccs);
2428 #ifdef HAVE_LIBCHISE
2429 if ( open_chise_data_source_maybe () )
2432 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2433 XSTRING_DATA (Fsymbol_name
2434 (XCHARSET_NAME(ccs))));
2435 if (chise_ccs == NULL)
2438 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2439 Qsystem_char_id, 0);
2443 #ifdef HAVE_LIBCHISE
2444 chise_ccs_setup_db (chise_ccs, 0) == 0
2446 !NILP (Ffile_exists_p (db_file))
2450 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2457 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2459 #ifdef HAVE_LIBCHISE
2460 CHISE_Char_ID char_id;
2462 if ( open_chise_data_source_maybe () )
2466 = chise_ds_decode_char (default_chise_data_source,
2467 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2470 decoding_table_put_char (ccs, code_point, make_char (char_id));
2472 decoding_table_put_char (ccs, code_point, Qnil);
2474 /* chise_ccst_close (dt_ccs); */
2476 #else /* HAVE_LIBCHISE */
2479 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2482 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2486 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2493 decoding_table_put_char (ccs, code_point, ret);
2494 Fclose_database (db);
2498 decoding_table_put_char (ccs, code_point, Qnil);
2499 Fclose_database (db);
2502 #endif /* not HAVE_LIBCHISE */
2505 #ifdef HAVE_LIBCHISE
2506 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2507 Save properties of CHARSET.
2511 struct Lisp_Charset *cs;
2512 CHISE_Property property;
2514 unsigned char* feature_name;
2516 ccs = Fget_charset (charset);
2517 cs = XCHARSET (ccs);
2519 if ( open_chise_data_source_maybe () )
2522 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2524 property = chise_ds_get_property (default_chise_data_source,
2526 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2527 chise_feature_set_property_value
2528 (chise_ds_get_feature (default_chise_data_source, feature_name),
2529 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2531 chise_property_sync (property);
2533 charset = XCHARSET_NAME (ccs);
2534 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2536 property = chise_ds_get_property (default_chise_data_source,
2538 chise_feature_set_property_value
2539 (chise_ds_get_feature (default_chise_data_source, feature_name),
2540 property, XSTRING_DATA (Fprin1_to_string
2541 (CHARSET_DOC_STRING (cs), Qnil)));
2542 chise_property_sync (property);
2544 property = chise_ds_get_property (default_chise_data_source, "type");
2545 chise_feature_set_property_value
2546 (chise_ds_get_feature (default_chise_data_source, feature_name),
2548 chise_property_sync (property);
2550 property = chise_ds_get_property (default_chise_data_source, "chars");
2551 chise_feature_set_property_value
2552 (chise_ds_get_feature (default_chise_data_source, feature_name),
2553 property, XSTRING_DATA (Fprin1_to_string (make_int
2554 (CHARSET_CHARS (cs)),
2556 chise_property_sync (property);
2558 property = chise_ds_get_property (default_chise_data_source, "dimension");
2559 chise_feature_set_property_value
2560 (chise_ds_get_feature (default_chise_data_source, feature_name),
2561 property, XSTRING_DATA (Fprin1_to_string (make_int
2562 (CHARSET_DIMENSION (cs)),
2564 chise_property_sync (property);
2566 if ( CHARSET_FINAL (cs) != 0 )
2568 property = chise_ds_get_property (default_chise_data_source,
2570 chise_feature_set_property_value
2571 (chise_ds_get_feature (default_chise_data_source, feature_name),
2572 property, XSTRING_DATA (Fprin1_to_string (make_int
2573 (CHARSET_FINAL (cs)),
2575 chise_property_sync (property);
2578 if ( !NILP (CHARSET_MOTHER (cs)) )
2580 Lisp_Object mother = CHARSET_MOTHER (cs);
2582 if ( CHARSETP (mother) )
2583 mother = XCHARSET_NAME (mother);
2585 property = chise_ds_get_property (default_chise_data_source,
2587 chise_feature_set_property_value
2588 (chise_ds_get_feature (default_chise_data_source, feature_name),
2589 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2590 chise_property_sync (property);
2593 if ( CHARSET_MAX_CODE (cs) != 0 )
2597 property = chise_ds_get_property (default_chise_data_source,
2599 if ( CHARSET_MIN_CODE (cs) == 0 )
2600 chise_feature_set_property_value
2601 (chise_ds_get_feature (default_chise_data_source, feature_name),
2605 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2606 chise_feature_set_property_value
2607 (chise_ds_get_feature (default_chise_data_source, feature_name),
2610 chise_property_sync (property);
2612 property = chise_ds_get_property (default_chise_data_source,
2614 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2615 chise_feature_set_property_value
2616 (chise_ds_get_feature (default_chise_data_source, feature_name),
2618 chise_property_sync (property);
2620 property = chise_ds_get_property (default_chise_data_source,
2621 "mother-code-offset");
2622 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2623 chise_feature_set_property_value
2624 (chise_ds_get_feature (default_chise_data_source, feature_name),
2628 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2629 chise_feature_set_property_value
2630 (chise_ds_get_feature (default_chise_data_source, feature_name),
2633 chise_property_sync (property);
2635 property = chise_ds_get_property (default_chise_data_source,
2636 "mother-code-conversion");
2637 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2638 chise_feature_set_property_value
2639 (chise_ds_get_feature (default_chise_data_source, feature_name),
2640 property, "identical");
2643 Lisp_Object sym = Qnil;
2645 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2647 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2649 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2651 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2654 chise_feature_set_property_value
2655 (chise_ds_get_feature (default_chise_data_source, feature_name),
2656 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2658 chise_feature_set_property_value
2659 (chise_ds_get_feature (default_chise_data_source, feature_name),
2660 property, "unknown");
2662 chise_property_sync (property);
2666 #endif /* HAVE_LIBCHISE */
2668 #endif /* HAVE_CHISE */
2669 #endif /* UTF2000 */
2672 /************************************************************************/
2673 /* Lisp primitives for working with characters */
2674 /************************************************************************/
2677 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2678 Make a character from CHARSET and code-point CODE.
2679 If DEFINED_ONLY is non-nil, builtin character is not returned.
2680 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2681 If corresponding character is not found, nil is returned.
2683 (charset, code, defined_only, without_inheritance))
2687 charset = Fget_charset (charset);
2690 if (XCHARSET_GRAPHIC (charset) == 1)
2692 if (NILP (defined_only))
2693 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2695 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2696 return c >= 0 ? make_char (c) : Qnil;
2699 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2700 Make a builtin character from CHARSET and code-point CODE.
2707 charset = Fget_charset (charset);
2709 if (EQ (charset, Vcharset_latin_viscii))
2711 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2717 (ret = Fget_char_attribute (chr,
2718 Vcharset_latin_viscii_lower,
2721 charset = Vcharset_latin_viscii_lower;
2725 (ret = Fget_char_attribute (chr,
2726 Vcharset_latin_viscii_upper,
2729 charset = Vcharset_latin_viscii_upper;
2736 if (XCHARSET_GRAPHIC (charset) == 1)
2739 ch = decode_builtin_char (charset, c);
2741 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2745 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2746 Make a character from CHARSET and octets ARG1 and ARG2.
2747 ARG2 is required only for characters from two-dimensional charsets.
2748 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2749 character s with caron.
2751 (charset, arg1, arg2))
2755 int lowlim, highlim;
2757 charset = Fget_charset (charset);
2758 cs = XCHARSET (charset);
2760 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2761 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2763 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2765 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2766 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2769 /* It is useful (and safe, according to Olivier Galibert) to strip
2770 the 8th bit off ARG1 and ARG2 because it allows programmers to
2771 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2772 Latin 2 code of the character. */
2780 if (a1 < lowlim || a1 > highlim)
2781 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2783 if (CHARSET_DIMENSION (cs) == 1)
2787 ("Charset is of dimension one; second octet must be nil", arg2);
2788 return make_char (MAKE_CHAR (charset, a1, 0));
2797 a2 = XINT (arg2) & 0x7f;
2799 if (a2 < lowlim || a2 > highlim)
2800 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2802 return make_char (MAKE_CHAR (charset, a1, a2));
2805 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2806 Return the character set of CHARACTER.
2810 CHECK_CHAR_COERCE_INT (character);
2812 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2815 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2816 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2817 N defaults to 0 if omitted.
2821 Lisp_Object charset;
2824 CHECK_CHAR_COERCE_INT (character);
2826 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2828 if (NILP (n) || EQ (n, Qzero))
2829 return make_int (octet0);
2830 else if (EQ (n, make_int (1)))
2831 return make_int (octet1);
2833 signal_simple_error ("Octet number must be 0 or 1", n);
2837 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2838 Return code-point of CHARACTER in specified CHARSET.
2840 (character, charset, defined_only))
2844 CHECK_CHAR_COERCE_INT (character);
2845 charset = Fget_charset (charset);
2846 code_point = charset_code_point (charset, XCHAR (character),
2847 !NILP (defined_only));
2848 if (code_point >= 0)
2849 return make_int (code_point);
2855 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2856 Return list of charset and one or two position-codes of CHARACTER.
2860 /* This function can GC */
2861 struct gcpro gcpro1, gcpro2;
2862 Lisp_Object charset = Qnil;
2863 Lisp_Object rc = Qnil;
2871 GCPRO2 (charset, rc);
2872 CHECK_CHAR_COERCE_INT (character);
2875 code_point = ENCODE_CHAR (XCHAR (character), charset);
2876 dimension = XCHARSET_DIMENSION (charset);
2877 while (dimension > 0)
2879 rc = Fcons (make_int (code_point & 255), rc);
2883 rc = Fcons (XCHARSET_NAME (charset), rc);
2885 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2887 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2889 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2893 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2902 #ifdef ENABLE_COMPOSITE_CHARS
2903 /************************************************************************/
2904 /* composite character functions */
2905 /************************************************************************/
2908 lookup_composite_char (Bufbyte *str, int len)
2910 Lisp_Object lispstr = make_string (str, len);
2911 Lisp_Object ch = Fgethash (lispstr,
2912 Vcomposite_char_string2char_hash_table,
2918 if (composite_char_row_next >= 128)
2919 signal_simple_error ("No more composite chars available", lispstr);
2920 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2921 composite_char_col_next);
2922 Fputhash (make_char (emch), lispstr,
2923 Vcomposite_char_char2string_hash_table);
2924 Fputhash (lispstr, make_char (emch),
2925 Vcomposite_char_string2char_hash_table);
2926 composite_char_col_next++;
2927 if (composite_char_col_next >= 128)
2929 composite_char_col_next = 32;
2930 composite_char_row_next++;
2939 composite_char_string (Emchar ch)
2941 Lisp_Object str = Fgethash (make_char (ch),
2942 Vcomposite_char_char2string_hash_table,
2944 assert (!UNBOUNDP (str));
2948 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2949 Convert a string into a single composite character.
2950 The character is the result of overstriking all the characters in
2955 CHECK_STRING (string);
2956 return make_char (lookup_composite_char (XSTRING_DATA (string),
2957 XSTRING_LENGTH (string)));
2960 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2961 Return a string of the characters comprising a composite character.
2969 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2970 signal_simple_error ("Must be composite char", ch);
2971 return composite_char_string (emch);
2973 #endif /* ENABLE_COMPOSITE_CHARS */
2976 /************************************************************************/
2977 /* initialization */
2978 /************************************************************************/
2981 syms_of_mule_charset (void)
2983 INIT_LRECORD_IMPLEMENTATION (charset);
2985 DEFSUBR (Fcharsetp);
2986 DEFSUBR (Ffind_charset);
2987 DEFSUBR (Fget_charset);
2988 DEFSUBR (Fcharset_list);
2989 DEFSUBR (Fcharset_name);
2990 DEFSUBR (Fmake_charset);
2991 DEFSUBR (Fmake_reverse_direction_charset);
2992 /* DEFSUBR (Freverse_direction_charset); */
2993 DEFSUBR (Fdefine_charset_alias);
2994 DEFSUBR (Fcharset_from_attributes);
2995 DEFSUBR (Fcharset_short_name);
2996 DEFSUBR (Fcharset_long_name);
2997 DEFSUBR (Fcharset_description);
2998 DEFSUBR (Fcharset_dimension);
2999 DEFSUBR (Fcharset_property);
3000 DEFSUBR (Fcharset_id);
3001 DEFSUBR (Fset_charset_ccl_program);
3002 DEFSUBR (Fset_charset_registry);
3005 DEFSUBR (Fcharset_mapping_table);
3006 DEFSUBR (Fset_charset_mapping_table);
3008 DEFSUBR (Fsave_charset_mapping_table);
3009 DEFSUBR (Freset_charset_mapping_table);
3010 #ifdef HAVE_LIBCHISE
3011 DEFSUBR (Fsave_charset_properties);
3012 #endif /* HAVE_LIBCHISE */
3013 #endif /* HAVE_CHISE */
3014 DEFSUBR (Fdecode_char);
3015 DEFSUBR (Fdecode_builtin_char);
3016 DEFSUBR (Fencode_char);
3019 DEFSUBR (Fmake_char);
3020 DEFSUBR (Fchar_charset);
3021 DEFSUBR (Fchar_octet);
3022 DEFSUBR (Fsplit_char);
3024 #ifdef ENABLE_COMPOSITE_CHARS
3025 DEFSUBR (Fmake_composite_char);
3026 DEFSUBR (Fcomposite_char_string);
3029 defsymbol (&Qcharsetp, "charsetp");
3030 defsymbol (&Qregistry, "registry");
3031 defsymbol (&Qfinal, "final");
3032 defsymbol (&Qgraphic, "graphic");
3033 defsymbol (&Qdirection, "direction");
3034 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3035 defsymbol (&Qshort_name, "short-name");
3036 defsymbol (&Qlong_name, "long-name");
3037 defsymbol (&Qiso_ir, "iso-ir");
3039 defsymbol (&Qmother, "mother");
3040 defsymbol (&Qmin_code, "min-code");
3041 defsymbol (&Qmax_code, "max-code");
3042 defsymbol (&Qcode_offset, "code-offset");
3043 defsymbol (&Qconversion, "conversion");
3044 defsymbol (&Q94x60, "94x60");
3045 defsymbol (&Q94x94x60, "94x94x60");
3046 defsymbol (&Qbig5_1, "big5-1");
3047 defsymbol (&Qbig5_2, "big5-2");
3050 defsymbol (&Ql2r, "l2r");
3051 defsymbol (&Qr2l, "r2l");
3053 /* Charsets, compatible with FSF 20.3
3054 Naming convention is Script-Charset[-Edition] */
3055 defsymbol (&Qascii, "ascii");
3056 defsymbol (&Qcontrol_1, "control-1");
3057 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3058 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3059 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3060 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3061 defsymbol (&Qthai_tis620, "thai-tis620");
3062 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3063 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3064 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3065 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3066 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3067 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3068 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3069 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208@1978");
3070 defsymbol (&Qmap_gb2312, "=gb2312");
3071 defsymbol (&Qmap_gb12345, "=gb12345");
3072 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208@1983");
3073 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
3074 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
3075 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
3076 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
3078 defsymbol (&Qsystem_char_id, "system-char-id");
3079 defsymbol (&Qmap_ucs, "=ucs");
3080 defsymbol (&Qucs, "ucs");
3081 defsymbol (&Qucs_bmp, "ucs-bmp");
3082 defsymbol (&Qucs_smp, "ucs-smp");
3083 defsymbol (&Qucs_sip, "ucs-sip");
3084 defsymbol (&Qlatin_viscii, "latin-viscii");
3085 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3086 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3087 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3088 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3089 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3090 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
3091 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208@1990");
3092 defsymbol (&Qmap_big5, "=big5");
3093 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3095 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3096 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3098 defsymbol (&Qcomposite, "composite");
3102 vars_of_mule_charset (void)
3109 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3110 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3112 /* Table of charsets indexed by leading byte. */
3113 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3114 chlook->charset_by_leading_byte[i] = Qnil;
3117 /* Table of charsets indexed by type/final-byte. */
3118 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3119 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3120 chlook->charset_by_attributes[i][j] = Qnil;
3122 /* Table of charsets indexed by type/final-byte/direction. */
3123 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3124 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3125 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3126 chlook->charset_by_attributes[i][j][k] = Qnil;
3130 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3132 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3133 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3137 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3138 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3139 Leading-code of private TYPE9N charset of column-width 1.
3141 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3145 Vdefault_coded_charset_priority_list = Qnil;
3146 DEFVAR_LISP ("default-coded-charset-priority-list",
3147 &Vdefault_coded_charset_priority_list /*
3148 Default order of preferred coded-character-sets.
3150 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3151 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3152 &Vdisplay_coded_charset_priority_use_inheritance /*
3153 If non-nil, use character inheritance.
3155 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3156 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3157 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3158 If non-nil, prefer nearest character in hierarchy order.
3164 complex_vars_of_mule_charset (void)
3166 staticpro (&Vcharset_hash_table);
3167 Vcharset_hash_table =
3168 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3170 /* Predefined character sets. We store them into variables for
3174 staticpro (&Vcharset_system_char_id);
3175 Vcharset_system_char_id =
3176 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3177 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3178 build_string ("SCID"),
3179 build_string ("CHAR-ID"),
3180 build_string ("System char-id"),
3182 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3183 staticpro (&Vcharset_ucs);
3185 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
3186 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3187 build_string ("UCS"),
3188 build_string ("UCS"),
3189 build_string ("ISO/IEC 10646"),
3191 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3192 staticpro (&Vcharset_ucs_bmp);
3194 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3195 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3196 build_string ("BMP"),
3197 build_string ("UCS-BMP"),
3198 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3200 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3201 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3202 staticpro (&Vcharset_ucs_smp);
3204 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3205 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3206 build_string ("SMP"),
3207 build_string ("UCS-SMP"),
3208 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3209 build_string ("UCS00-1"),
3210 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3211 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
3212 staticpro (&Vcharset_ucs_sip);
3214 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3215 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3216 build_string ("SIP"),
3217 build_string ("UCS-SIP"),
3218 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3219 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3220 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3221 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
3223 # define MIN_CHAR_THAI 0
3224 # define MAX_CHAR_THAI 0
3225 /* # define MIN_CHAR_HEBREW 0 */
3226 /* # define MAX_CHAR_HEBREW 0 */
3227 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3228 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3230 staticpro (&Vcharset_ascii);
3232 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3233 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3234 build_string ("ASCII"),
3235 build_string ("ASCII)"),
3236 build_string ("ASCII (ISO646 IRV)"),
3237 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3238 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
3239 staticpro (&Vcharset_control_1);
3240 Vcharset_control_1 =
3241 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3242 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3243 build_string ("C1"),
3244 build_string ("Control characters"),
3245 build_string ("Control characters 128-191"),
3247 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
3248 staticpro (&Vcharset_latin_iso8859_1);
3249 Vcharset_latin_iso8859_1 =
3250 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3251 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3252 build_string ("Latin-1"),
3253 build_string ("ISO8859-1 (Latin-1)"),
3254 build_string ("ISO8859-1 (Latin-1)"),
3255 build_string ("iso8859-1"),
3256 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3257 staticpro (&Vcharset_latin_iso8859_2);
3258 Vcharset_latin_iso8859_2 =
3259 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3260 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3261 build_string ("Latin-2"),
3262 build_string ("ISO8859-2 (Latin-2)"),
3263 build_string ("ISO8859-2 (Latin-2)"),
3264 build_string ("iso8859-2"),
3265 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3266 staticpro (&Vcharset_latin_iso8859_3);
3267 Vcharset_latin_iso8859_3 =
3268 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3269 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3270 build_string ("Latin-3"),
3271 build_string ("ISO8859-3 (Latin-3)"),
3272 build_string ("ISO8859-3 (Latin-3)"),
3273 build_string ("iso8859-3"),
3274 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3275 staticpro (&Vcharset_latin_iso8859_4);
3276 Vcharset_latin_iso8859_4 =
3277 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3278 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3279 build_string ("Latin-4"),
3280 build_string ("ISO8859-4 (Latin-4)"),
3281 build_string ("ISO8859-4 (Latin-4)"),
3282 build_string ("iso8859-4"),
3283 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3284 staticpro (&Vcharset_thai_tis620);
3285 Vcharset_thai_tis620 =
3286 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3287 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3288 build_string ("TIS620"),
3289 build_string ("TIS620 (Thai)"),
3290 build_string ("TIS620.2529 (Thai)"),
3291 build_string ("tis620"),
3292 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3293 staticpro (&Vcharset_greek_iso8859_7);
3294 Vcharset_greek_iso8859_7 =
3295 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3296 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3297 build_string ("ISO8859-7"),
3298 build_string ("ISO8859-7 (Greek)"),
3299 build_string ("ISO8859-7 (Greek)"),
3300 build_string ("iso8859-7"),
3301 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3302 staticpro (&Vcharset_arabic_iso8859_6);
3303 Vcharset_arabic_iso8859_6 =
3304 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3305 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3306 build_string ("ISO8859-6"),
3307 build_string ("ISO8859-6 (Arabic)"),
3308 build_string ("ISO8859-6 (Arabic)"),
3309 build_string ("iso8859-6"),
3310 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3311 staticpro (&Vcharset_hebrew_iso8859_8);
3312 Vcharset_hebrew_iso8859_8 =
3313 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3314 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3315 build_string ("ISO8859-8"),
3316 build_string ("ISO8859-8 (Hebrew)"),
3317 build_string ("ISO8859-8 (Hebrew)"),
3318 build_string ("iso8859-8"),
3320 0 /* MIN_CHAR_HEBREW */,
3321 0 /* MAX_CHAR_HEBREW */, 0, 32,
3322 Qnil, CONVERSION_IDENTICAL);
3323 staticpro (&Vcharset_katakana_jisx0201);
3324 Vcharset_katakana_jisx0201 =
3325 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3326 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3327 build_string ("JISX0201 Kana"),
3328 build_string ("JISX0201.1976 (Japanese Kana)"),
3329 build_string ("JISX0201.1976 Japanese Kana"),
3330 build_string ("jisx0201\\.1976"),
3331 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3332 staticpro (&Vcharset_latin_jisx0201);
3333 Vcharset_latin_jisx0201 =
3334 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3335 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3336 build_string ("JISX0201 Roman"),
3337 build_string ("JISX0201.1976 (Japanese Roman)"),
3338 build_string ("JISX0201.1976 Japanese Roman"),
3339 build_string ("jisx0201\\.1976"),
3340 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3341 staticpro (&Vcharset_cyrillic_iso8859_5);
3342 Vcharset_cyrillic_iso8859_5 =
3343 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3344 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3345 build_string ("ISO8859-5"),
3346 build_string ("ISO8859-5 (Cyrillic)"),
3347 build_string ("ISO8859-5 (Cyrillic)"),
3348 build_string ("iso8859-5"),
3349 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3350 staticpro (&Vcharset_latin_iso8859_9);
3351 Vcharset_latin_iso8859_9 =
3352 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3353 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3354 build_string ("Latin-5"),
3355 build_string ("ISO8859-9 (Latin-5)"),
3356 build_string ("ISO8859-9 (Latin-5)"),
3357 build_string ("iso8859-9"),
3358 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3360 staticpro (&Vcharset_jis_x0208);
3361 Vcharset_jis_x0208 =
3362 make_charset (LEADING_BYTE_JIS_X0208,
3363 Qmap_jis_x0208, 94, 2,
3364 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3365 build_string ("JIS X0208"),
3366 build_string ("JIS X0208 Common"),
3367 build_string ("JIS X0208 Common part"),
3368 build_string ("jisx0208\\.1990"),
3370 MIN_CHAR_JIS_X0208_1990,
3371 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3372 Qnil, CONVERSION_94x94);
3374 staticpro (&Vcharset_japanese_jisx0208_1978);
3375 Vcharset_japanese_jisx0208_1978 =
3376 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3377 Qmap_jis_x0208_1978, 94, 2,
3378 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3379 build_string ("JIS X0208:1978"),
3380 build_string ("JIS X0208:1978 (Japanese)"),
3382 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3383 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3390 CONVERSION_IDENTICAL);
3391 staticpro (&Vcharset_chinese_gb2312);
3392 Vcharset_chinese_gb2312 =
3393 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3394 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3395 build_string ("GB2312"),
3396 build_string ("GB2312)"),
3397 build_string ("GB2312 Chinese simplified"),
3398 build_string ("gb2312"),
3399 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3400 staticpro (&Vcharset_chinese_gb12345);
3401 Vcharset_chinese_gb12345 =
3402 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3403 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3404 build_string ("G1"),
3405 build_string ("GB 12345)"),
3406 build_string ("GB 12345-1990"),
3407 build_string ("GB12345\\(\\.1990\\)?-0"),
3408 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3409 staticpro (&Vcharset_japanese_jisx0208);
3410 Vcharset_japanese_jisx0208 =
3411 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3412 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3413 build_string ("JISX0208"),
3414 build_string ("JIS X0208:1983 (Japanese)"),
3415 build_string ("JIS X0208:1983 Japanese Kanji"),
3416 build_string ("jisx0208\\.1983"),
3423 CONVERSION_IDENTICAL);
3425 staticpro (&Vcharset_japanese_jisx0208_1990);
3426 Vcharset_japanese_jisx0208_1990 =
3427 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3428 Qmap_jis_x0208_1990, 94, 2,
3429 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3430 build_string ("JISX0208-1990"),
3431 build_string ("JIS X0208:1990 (Japanese)"),
3432 build_string ("JIS X0208:1990 Japanese Kanji"),
3433 build_string ("jisx0208\\.1990"),
3435 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3436 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3437 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3438 Vcharset_jis_x0208 /* Qnil */,
3439 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3441 staticpro (&Vcharset_korean_ksc5601);
3442 Vcharset_korean_ksc5601 =
3443 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3444 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3445 build_string ("KSC5601"),
3446 build_string ("KSC5601 (Korean"),
3447 build_string ("KSC5601 Korean Hangul and Hanja"),
3448 build_string ("ksc5601"),
3449 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3450 staticpro (&Vcharset_japanese_jisx0212);
3451 Vcharset_japanese_jisx0212 =
3452 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3453 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3454 build_string ("JISX0212"),
3455 build_string ("JISX0212 (Japanese)"),
3456 build_string ("JISX0212 Japanese Supplement"),
3457 build_string ("jisx0212"),
3458 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3460 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3461 staticpro (&Vcharset_chinese_cns11643_1);
3462 Vcharset_chinese_cns11643_1 =
3463 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3464 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3465 build_string ("CNS11643-1"),
3466 build_string ("CNS11643-1 (Chinese traditional)"),
3468 ("CNS 11643 Plane 1 Chinese traditional"),
3469 build_string (CHINESE_CNS_PLANE_RE("1")),
3470 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3471 staticpro (&Vcharset_chinese_cns11643_2);
3472 Vcharset_chinese_cns11643_2 =
3473 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3474 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3475 build_string ("CNS11643-2"),
3476 build_string ("CNS11643-2 (Chinese traditional)"),
3478 ("CNS 11643 Plane 2 Chinese traditional"),
3479 build_string (CHINESE_CNS_PLANE_RE("2")),
3480 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3482 staticpro (&Vcharset_latin_tcvn5712);
3483 Vcharset_latin_tcvn5712 =
3484 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3485 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3486 build_string ("TCVN 5712"),
3487 build_string ("TCVN 5712 (VSCII-2)"),
3488 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3489 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3490 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3491 staticpro (&Vcharset_latin_viscii_lower);
3492 Vcharset_latin_viscii_lower =
3493 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3494 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3495 build_string ("VISCII lower"),
3496 build_string ("VISCII lower (Vietnamese)"),
3497 build_string ("VISCII lower (Vietnamese)"),
3498 build_string ("MULEVISCII-LOWER"),
3499 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3500 staticpro (&Vcharset_latin_viscii_upper);
3501 Vcharset_latin_viscii_upper =
3502 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3503 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3504 build_string ("VISCII upper"),
3505 build_string ("VISCII upper (Vietnamese)"),
3506 build_string ("VISCII upper (Vietnamese)"),
3507 build_string ("MULEVISCII-UPPER"),
3508 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3509 staticpro (&Vcharset_latin_viscii);
3510 Vcharset_latin_viscii =
3511 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3512 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3513 build_string ("VISCII"),
3514 build_string ("VISCII 1.1 (Vietnamese)"),
3515 build_string ("VISCII 1.1 (Vietnamese)"),
3516 build_string ("VISCII1\\.1"),
3517 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3518 staticpro (&Vcharset_chinese_big5);
3519 Vcharset_chinese_big5 =
3520 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3521 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3522 build_string ("Big5"),
3523 build_string ("Big5"),
3524 build_string ("Big5 Chinese traditional"),
3525 build_string ("big5-0"),
3527 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3528 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3530 staticpro (&Vcharset_ethiopic_ucs);
3531 Vcharset_ethiopic_ucs =
3532 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3533 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3534 build_string ("Ethiopic (UCS)"),
3535 build_string ("Ethiopic (UCS)"),
3536 build_string ("Ethiopic of UCS"),
3537 build_string ("Ethiopic-Unicode"),
3538 Qnil, 0x1200, 0x137F, 0, 0,
3539 Qnil, CONVERSION_IDENTICAL);
3541 staticpro (&Vcharset_chinese_big5_1);
3542 Vcharset_chinese_big5_1 =
3543 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3544 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3545 build_string ("Big5"),
3546 build_string ("Big5 (Level-1)"),
3548 ("Big5 Level-1 Chinese traditional"),
3549 build_string ("big5"),
3550 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3551 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3552 staticpro (&Vcharset_chinese_big5_2);
3553 Vcharset_chinese_big5_2 =
3554 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3555 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3556 build_string ("Big5"),
3557 build_string ("Big5 (Level-2)"),
3559 ("Big5 Level-2 Chinese traditional"),
3560 build_string ("big5"),
3561 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3562 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3564 #ifdef ENABLE_COMPOSITE_CHARS
3565 /* #### For simplicity, we put composite chars into a 96x96 charset.
3566 This is going to lead to problems because you can run out of
3567 room, esp. as we don't yet recycle numbers. */
3568 staticpro (&Vcharset_composite);
3569 Vcharset_composite =
3570 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3571 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3572 build_string ("Composite"),
3573 build_string ("Composite characters"),
3574 build_string ("Composite characters"),
3577 /* #### not dumped properly */
3578 composite_char_row_next = 32;
3579 composite_char_col_next = 32;
3581 Vcomposite_char_string2char_hash_table =
3582 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3583 Vcomposite_char_char2string_hash_table =
3584 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3585 staticpro (&Vcomposite_char_string2char_hash_table);
3586 staticpro (&Vcomposite_char_char2string_hash_table);
3587 #endif /* ENABLE_COMPOSITE_CHARS */