1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003,2004,2008 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 /* Lisp_Object Vcharset_japanese_jisx0208_1978; */
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_system_char_id;
71 Lisp_Object Vcharset_ucs;
72 Lisp_Object Vcharset_ucs_bmp;
73 Lisp_Object Vcharset_ucs_smp;
74 Lisp_Object Vcharset_ucs_sip;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 decoding_table_put_char (Lisp_Object ccs,
181 int code_point, Lisp_Object character)
184 Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
185 int dim = XCHARSET_DIMENSION (ccs);
188 XCHARSET_DECODING_TABLE (ccs)
189 = put_ccs_octet_table (table1, ccs, code_point, character);
193 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
195 table2 = put_ccs_octet_table (table2, ccs,
196 (unsigned char)code_point, character);
197 XCHARSET_DECODING_TABLE (ccs)
198 = put_ccs_octet_table (table1, ccs,
199 (unsigned char)(code_point >> 8), table2);
204 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
206 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
208 table3 = put_ccs_octet_table (table3, ccs,
209 (unsigned char)code_point, character);
210 table2 = put_ccs_octet_table (table2, ccs,
211 (unsigned char)(code_point >> 8), table3);
212 XCHARSET_DECODING_TABLE (ccs)
213 = put_ccs_octet_table (table1, ccs,
214 (unsigned char)(code_point >> 16), table2);
216 else /* if (dim == 4) */
219 = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
221 = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
223 = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
225 table4 = put_ccs_octet_table (table4, ccs,
226 (unsigned char)code_point, character);
227 table3 = put_ccs_octet_table (table3, ccs,
228 (unsigned char)(code_point >> 8), table4);
229 table2 = put_ccs_octet_table (table2, ccs,
230 (unsigned char)(code_point >> 16), table3);
231 XCHARSET_DECODING_TABLE (ccs)
232 = put_ccs_octet_table (table1, ccs,
233 (unsigned char)(code_point >> 24), table2);
236 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
237 int dim = XCHARSET_DIMENSION (ccs);
238 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
241 int ccs_len = XVECTOR_LENGTH (v);
246 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
247 nv = XVECTOR_DATA(v)[i];
252 if (EQ (nv, character))
255 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
262 XVECTOR_DATA(v)[i] = character;
267 put_char_ccs_code_point (Lisp_Object character,
268 Lisp_Object ccs, Lisp_Object value)
270 if ( !( EQ (XCHARSET_NAME (ccs), Qrep_ucs)
271 && INTP (value) && (XINT (value) < 0xF0000)
272 && XCHAR (character) == XINT (value) )
275 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
279 { /* obsolete representation: value must be a list of bytes */
280 Lisp_Object ret = Fcar (value);
284 signal_simple_error ("Invalid value for coded-charset", value);
285 code_point = XINT (ret);
286 if (XCHARSET_GRAPHIC (ccs) == 1)
294 signal_simple_error ("Invalid value for coded-charset",
298 signal_simple_error ("Invalid value for coded-charset",
301 if (XCHARSET_GRAPHIC (ccs) == 1)
303 code_point = (code_point << 8) | j;
306 value = make_int (code_point);
308 else if (INTP (value))
310 code_point = XINT (value);
311 if (XCHARSET_GRAPHIC (ccs) == 1)
313 code_point &= 0x7F7F7F7F;
314 value = make_int (code_point);
318 signal_simple_error ("Invalid value for coded-charset", value);
322 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
325 decoding_table_remove_char (ccs, XINT (cpos));
328 decoding_table_put_char (ccs, code_point, character);
334 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
336 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
337 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
339 if (VECTORP (decoding_table))
341 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
345 decoding_table_remove_char (ccs, XINT (cpos));
348 if (CHAR_TABLEP (encoding_table))
350 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
371 Lisp_Object Qpartial;
372 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
373 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
390 /* Qrep_jis_x0208_1978, */
408 Qvietnamese_viscii_lower,
409 Qvietnamese_viscii_upper,
419 Lisp_Object Ql2r, Qr2l;
421 Lisp_Object Vcharset_hash_table;
423 /* Composite characters are characters constructed by overstriking two
424 or more regular characters.
426 1) The old Mule implementation involves storing composite characters
427 in a buffer as a tag followed by all of the actual characters
428 used to make up the composite character. I think this is a bad
429 idea; it greatly complicates code that wants to handle strings
430 one character at a time because it has to deal with the possibility
431 of great big ungainly characters. It's much more reasonable to
432 simply store an index into a table of composite characters.
434 2) The current implementation only allows for 16,384 separate
435 composite characters over the lifetime of the XEmacs process.
436 This could become a potential problem if the user
437 edited lots of different files that use composite characters.
438 Due to FSF bogosity, increasing the number of allowable
439 composite characters under Mule would decrease the number
440 of possible faces that can exist. Mule already has shrunk
441 this to 2048, and further shrinkage would become uncomfortable.
442 No such problems exist in XEmacs.
444 Composite characters could be represented as 0x80 C1 C2 C3,
445 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
446 for slightly under 2^20 (one million) composite characters
447 over the XEmacs process lifetime, and you only need to
448 increase the size of a Mule character from 19 to 21 bits.
449 Or you could use 0x80 C1 C2 C3 C4, allowing for about
450 85 million (slightly over 2^26) composite characters. */
453 /************************************************************************/
454 /* Basic Emchar functions */
455 /************************************************************************/
457 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
458 string in STR. Returns the number of bytes stored.
459 Do not call this directly. Use the macro set_charptr_emchar() instead.
463 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
478 else if ( c <= 0x7ff )
480 *p++ = (c >> 6) | 0xc0;
481 *p++ = (c & 0x3f) | 0x80;
483 else if ( c <= 0xffff )
485 *p++ = (c >> 12) | 0xe0;
486 *p++ = ((c >> 6) & 0x3f) | 0x80;
487 *p++ = (c & 0x3f) | 0x80;
489 else if ( c <= 0x1fffff )
491 *p++ = (c >> 18) | 0xf0;
492 *p++ = ((c >> 12) & 0x3f) | 0x80;
493 *p++ = ((c >> 6) & 0x3f) | 0x80;
494 *p++ = (c & 0x3f) | 0x80;
496 else if ( c <= 0x3ffffff )
498 *p++ = (c >> 24) | 0xf8;
499 *p++ = ((c >> 18) & 0x3f) | 0x80;
500 *p++ = ((c >> 12) & 0x3f) | 0x80;
501 *p++ = ((c >> 6) & 0x3f) | 0x80;
502 *p++ = (c & 0x3f) | 0x80;
506 *p++ = (c >> 30) | 0xfc;
507 *p++ = ((c >> 24) & 0x3f) | 0x80;
508 *p++ = ((c >> 18) & 0x3f) | 0x80;
509 *p++ = ((c >> 12) & 0x3f) | 0x80;
510 *p++ = ((c >> 6) & 0x3f) | 0x80;
511 *p++ = (c & 0x3f) | 0x80;
514 BREAKUP_CHAR (c, charset, c1, c2);
515 lb = CHAR_LEADING_BYTE (c);
516 if (LEADING_BYTE_PRIVATE_P (lb))
517 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
519 if (EQ (charset, Vcharset_control_1))
528 /* Return the first character from a Mule-encoded string in STR,
529 assuming it's non-ASCII. Do not call this directly.
530 Use the macro charptr_emchar() instead. */
533 non_ascii_charptr_emchar (const Bufbyte *str)
546 else if ( b >= 0xf8 )
551 else if ( b >= 0xf0 )
556 else if ( b >= 0xe0 )
561 else if ( b >= 0xc0 )
571 for( ; len > 0; len-- )
574 ch = ( ch << 6 ) | ( b & 0x3f );
578 Bufbyte i0 = *str, i1, i2 = 0;
581 if (i0 == LEADING_BYTE_CONTROL_1)
582 return (Emchar) (*++str - 0x20);
584 if (LEADING_BYTE_PREFIX_P (i0))
589 charset = CHARSET_BY_LEADING_BYTE (i0);
590 if (XCHARSET_DIMENSION (charset) == 2)
593 return MAKE_CHAR (charset, i1, i2);
597 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
598 Do not call this directly. Use the macro valid_char_p() instead. */
602 non_ascii_valid_char_p (Emchar ch)
606 /* Must have only lowest 19 bits set */
610 f1 = CHAR_FIELD1 (ch);
611 f2 = CHAR_FIELD2 (ch);
612 f3 = CHAR_FIELD3 (ch);
618 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
619 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
620 f2 > MAX_CHAR_FIELD2_PRIVATE)
625 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
626 f2 <= MAX_CHAR_FIELD2_PRIVATE))
630 NOTE: This takes advantage of the fact that
631 FIELD2_TO_OFFICIAL_LEADING_BYTE and
632 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
634 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
635 if (EQ (charset, Qnil))
637 return (XCHARSET_CHARS (charset) == 96);
643 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
644 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
645 f1 > MAX_CHAR_FIELD1_PRIVATE)
647 if (f2 < 0x20 || f3 < 0x20)
650 #ifdef ENABLE_COMPOSITE_CHARS
651 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
653 if (UNBOUNDP (Fgethash (make_int (ch),
654 Vcomposite_char_char2string_hash_table,
659 #endif /* ENABLE_COMPOSITE_CHARS */
661 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
662 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
665 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
667 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
670 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
672 if (EQ (charset, Qnil))
674 return (XCHARSET_CHARS (charset) == 96);
680 /************************************************************************/
681 /* Basic string functions */
682 /************************************************************************/
684 /* Copy the character pointed to by SRC into DST. Do not call this
685 directly. Use the macro charptr_copy_char() instead.
686 Return the number of bytes copied. */
689 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
691 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
693 for (i = bytes; i; i--, dst++, src++)
699 /************************************************************************/
700 /* streams of Emchars */
701 /************************************************************************/
703 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
704 The functions below are not meant to be called directly; use
705 the macros in insdel.h. */
708 Lstream_get_emchar_1 (Lstream *stream, int ch)
710 Bufbyte str[MAX_EMCHAR_LEN];
711 Bufbyte *strptr = str;
714 str[0] = (Bufbyte) ch;
716 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
718 int c = Lstream_getc (stream);
719 bufpos_checking_assert (c >= 0);
720 *++strptr = (Bufbyte) c;
722 return charptr_emchar (str);
726 Lstream_fput_emchar (Lstream *stream, Emchar ch)
728 Bufbyte str[MAX_EMCHAR_LEN];
729 Bytecount len = set_charptr_emchar (str, ch);
730 return Lstream_write (stream, str, len);
734 Lstream_funget_emchar (Lstream *stream, Emchar ch)
736 Bufbyte str[MAX_EMCHAR_LEN];
737 Bytecount len = set_charptr_emchar (str, ch);
738 Lstream_unread (stream, str, len);
742 /************************************************************************/
744 /************************************************************************/
747 mark_charset (Lisp_Object obj)
749 Lisp_Charset *cs = XCHARSET (obj);
751 mark_object (cs->short_name);
752 mark_object (cs->long_name);
753 mark_object (cs->doc_string);
754 mark_object (cs->registry);
755 mark_object (cs->ccl_program);
757 mark_object (cs->decoding_table);
758 mark_object (cs->mother);
764 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
766 Lisp_Charset *cs = XCHARSET (obj);
770 error ("printing unreadable object #<charset %s 0x%x>",
771 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
774 write_c_string ("#<charset ", printcharfun);
775 print_internal (CHARSET_NAME (cs), printcharfun, 0);
776 write_c_string (" ", printcharfun);
777 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
778 write_c_string (" ", printcharfun);
779 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
780 write_c_string (" ", printcharfun);
781 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
782 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
784 CHARSET_DIMENSION (cs),
785 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
786 CHARSET_COLUMNS (cs),
787 CHARSET_GRAPHIC (cs),
789 write_c_string (buf, printcharfun);
790 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
791 sprintf (buf, " 0x%x>", cs->header.uid);
792 write_c_string (buf, printcharfun);
795 static const struct lrecord_description charset_description[] = {
796 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
797 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
798 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
799 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
800 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
801 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
802 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
804 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
805 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
810 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
811 mark_charset, print_charset, 0, 0, 0,
815 /* Make a new charset. */
816 /* #### SJT Should generic properties be allowed? */
818 make_charset (Charset_ID id, Lisp_Object name,
819 unsigned short chars, unsigned char dimension,
820 unsigned char columns, unsigned char graphic,
821 Bufbyte final, unsigned char direction, Lisp_Object short_name,
822 Lisp_Object long_name, Lisp_Object doc,
824 Lisp_Object decoding_table,
825 Emchar min_code, Emchar max_code,
826 Emchar code_offset, unsigned char byte_offset,
827 Lisp_Object mother, unsigned char conversion,
831 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
835 XSETCHARSET (obj, cs);
837 CHARSET_ID (cs) = id;
838 CHARSET_NAME (cs) = name;
839 CHARSET_SHORT_NAME (cs) = short_name;
840 CHARSET_LONG_NAME (cs) = long_name;
841 CHARSET_CHARS (cs) = chars;
842 CHARSET_DIMENSION (cs) = dimension;
843 CHARSET_DIRECTION (cs) = direction;
844 CHARSET_COLUMNS (cs) = columns;
845 CHARSET_GRAPHIC (cs) = graphic;
846 CHARSET_FINAL (cs) = final;
847 CHARSET_DOC_STRING (cs) = doc;
848 CHARSET_REGISTRY (cs) = reg;
849 CHARSET_CCL_PROGRAM (cs) = Qnil;
850 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
852 CHARSET_DECODING_TABLE(cs) = Qunbound;
853 CHARSET_MIN_CODE (cs) = min_code;
854 CHARSET_MAX_CODE (cs) = max_code;
855 CHARSET_CODE_OFFSET (cs) = code_offset;
856 CHARSET_BYTE_OFFSET (cs) = byte_offset;
857 CHARSET_MOTHER (cs) = mother;
858 CHARSET_CONVERSION (cs) = conversion;
862 if (id == LEADING_BYTE_ASCII)
863 CHARSET_REP_BYTES (cs) = 1;
865 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
867 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
872 /* some charsets do not have final characters. This includes
873 ASCII, Control-1, Composite, and the two faux private
875 unsigned char iso2022_type
876 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
878 if ( ( !partial ) && ( code_offset == 0 ) )
880 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
881 chlook->charset_by_attributes[iso2022_type][final] = obj;
885 (chlook->charset_by_attributes[iso2022_type][final][direction]));
886 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
890 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
891 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
893 /* Some charsets are "faux" and don't have names or really exist at
894 all except in the leading-byte table. */
896 Fputhash (name, obj, Vcharset_hash_table);
901 get_unallocated_leading_byte (int dimension)
906 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
909 lb = chlook->next_allocated_leading_byte++;
913 if (chlook->next_allocated_1_byte_leading_byte >
914 MAX_LEADING_BYTE_PRIVATE_1)
917 lb = chlook->next_allocated_1_byte_leading_byte++;
921 /* awfully fragile, but correct */
922 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
923 if (chlook->next_allocated_2_byte_leading_byte == 0)
925 if (chlook->next_allocated_2_byte_leading_byte >
926 MAX_LEADING_BYTE_PRIVATE_2)
930 lb = chlook->next_allocated_2_byte_leading_byte++;
936 ("No more character sets free for this dimension",
937 make_int (dimension));
943 /* Number of Big5 characters which have the same code in 1st byte. */
945 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
948 decode_ccs_conversion (int conv_type, int code_point)
950 if ( conv_type == CONVERSION_IDENTICAL )
954 if ( conv_type == CONVERSION_94x60 )
956 int row = code_point >> 8;
957 int cell = code_point & 255;
961 else if (row < 16 + 32 + 30)
962 return (row - (16 + 32)) * 94 + cell - 33;
963 else if (row < 18 + 32 + 30)
965 else if (row < 18 + 32 + 60)
966 return (row - (18 + 32)) * 94 + cell - 33;
968 else if ( conv_type == CONVERSION_94x94x60 )
970 int plane = code_point >> 16;
971 int row = (code_point >> 8) & 255;
972 int cell = code_point & 255;
976 else if (row < 16 + 32 + 30)
978 (plane - 33) * 94 * 60
979 + (row - (16 + 32)) * 94
981 else if (row < 18 + 32 + 30)
983 else if (row < 18 + 32 + 60)
985 (plane - 33) * 94 * 60
986 + (row - (18 + 32)) * 94
989 else if ( conv_type == CONVERSION_BIG5_1 )
992 = (((code_point >> 8) & 0x7F) - 33) * 94
993 + (( code_point & 0x7F) - 33);
994 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
995 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
997 b2 += b2 < 0x3F ? 0x40 : 0x62;
998 return (b1 << 8) | b2;
1000 else if ( conv_type == CONVERSION_BIG5_2 )
1003 = (((code_point >> 8) & 0x7F) - 33) * 94
1004 + (( code_point & 0x7F) - 33)
1005 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1006 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1007 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1009 b2 += b2 < 0x3F ? 0x40 : 0x62;
1010 return (b1 << 8) | b2;
1016 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
1018 int dim = XCHARSET_DIMENSION (ccs);
1019 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1020 Emchar char_id = -1;
1027 = get_ccs_octet_table (decoding_table, ccs,
1028 (code_point >> (dim * 8)) & 255);
1030 if (CHARP (decoding_table))
1031 return XCHAR (decoding_table);
1033 if (EQ (decoding_table, Qunloaded))
1035 char_id = load_char_decoding_entry_maybe (ccs, code_point);
1037 #endif /* HAVE_CHISE */
1040 else if ( !without_inheritance
1041 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
1044 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
1048 code += XCHARSET_CODE_OFFSET(ccs);
1049 if ( EQ (mother, Vcharset_ucs) )
1050 return DECODE_CHAR (mother, code, without_inheritance);
1052 return decode_defined_char (mother, code,
1053 without_inheritance);
1060 decode_builtin_char (Lisp_Object charset, int code_point)
1062 Lisp_Object mother = XCHARSET_MOTHER (charset);
1065 if ( XCHARSET_MAX_CODE (charset) > 0 )
1067 if ( CHARSETP (mother) )
1070 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
1075 decode_builtin_char (mother,
1076 code + XCHARSET_CODE_OFFSET(charset));
1083 = (XCHARSET_DIMENSION (charset) == 1
1085 code_point - XCHARSET_BYTE_OFFSET (charset)
1087 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1088 * XCHARSET_CHARS (charset)
1089 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1090 + XCHARSET_CODE_OFFSET (charset);
1091 if ((cid < XCHARSET_MIN_CODE (charset))
1092 || (XCHARSET_MAX_CODE (charset) < cid))
1097 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1099 if (XCHARSET_DIMENSION (charset) == 1)
1101 switch (XCHARSET_CHARS (charset))
1105 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1108 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1116 switch (XCHARSET_CHARS (charset))
1119 return MIN_CHAR_94x94
1120 + (final - '0') * 94 * 94
1121 + (((code_point >> 8) & 0x7F) - 33) * 94
1122 + ((code_point & 0x7F) - 33);
1124 return MIN_CHAR_96x96
1125 + (final - '0') * 96 * 96
1126 + (((code_point >> 8) & 0x7F) - 32) * 96
1127 + ((code_point & 0x7F) - 32);
1139 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1141 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1144 if ( CHAR_TABLEP (encoding_table)
1145 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1150 Lisp_Object mother = XCHARSET_MOTHER (charset);
1151 int min = XCHARSET_MIN_CODE (charset);
1152 int max = XCHARSET_MAX_CODE (charset);
1155 if ( CHARSETP (mother) )
1157 if (XCHARSET_FINAL (charset) >= '0')
1158 code = charset_code_point (mother, ch, 1);
1160 code = charset_code_point (mother, ch, defined_only);
1162 else if (defined_only)
1164 else if ( ((max == 0) && CHARSETP (mother)
1165 && (XCHARSET_FINAL (charset) == 0))
1166 || ((min <= ch) && (ch <= max)) )
1168 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1169 || ((min <= code) && (code <= max)) )
1171 int d = code - XCHARSET_CODE_OFFSET (charset);
1173 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1175 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1177 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1179 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1182 int cell = d % 94 + 33;
1188 return (row << 8) | cell;
1190 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1192 int B1 = d >> 8, B2 = d & 0xFF;
1194 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1195 - (B2 < 0x7F ? 0x40 : 0x62);
1199 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1202 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1204 int B1 = d >> 8, B2 = d & 0xFF;
1206 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1207 - (B2 < 0x7F ? 0x40 : 0x62);
1211 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1212 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1215 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1216 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1217 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1218 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1219 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1221 int plane = d / (94 * 60) + 33;
1222 int row = (d % (94 * 60)) / 94;
1223 int cell = d % 94 + 33;
1229 return (plane << 16) | (row << 8) | cell;
1231 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1233 ( (d / (94 * 94) + 33) << 16)
1234 | ((d / 94 % 94 + 33) << 8)
1236 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1238 ( (d / (96 * 96) + 32) << 16)
1239 | ((d / 96 % 96 + 32) << 8)
1241 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1243 ( (d / (94 * 94 * 94) + 33) << 24)
1244 | ((d / (94 * 94) % 94 + 33) << 16)
1245 | ((d / 94 % 94 + 33) << 8)
1247 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1249 ( (d / (96 * 96 * 96) + 32) << 24)
1250 | ((d / (96 * 96) % 96 + 32) << 16)
1251 | ((d / 96 % 96 + 32) << 8)
1255 printf ("Unknown CCS-conversion %d is specified!",
1256 XCHARSET_CONVERSION (charset));
1260 else if (defined_only)
1262 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1263 ( XCHARSET_MIN_CODE (charset) == 0 )
1265 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1266 (XCHARSET_CODE_OFFSET (charset)
1267 == XCHARSET_MIN_CODE (charset))
1272 if (XCHARSET_DIMENSION (charset) == 1)
1274 if (XCHARSET_CHARS (charset) == 94)
1276 if (((d = ch - (MIN_CHAR_94
1277 + (XCHARSET_FINAL (charset) - '0') * 94))
1282 else if (XCHARSET_CHARS (charset) == 96)
1284 if (((d = ch - (MIN_CHAR_96
1285 + (XCHARSET_FINAL (charset) - '0') * 96))
1293 else if (XCHARSET_DIMENSION (charset) == 2)
1295 if (XCHARSET_CHARS (charset) == 94)
1297 if (((d = ch - (MIN_CHAR_94x94
1299 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1302 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1304 else if (XCHARSET_CHARS (charset) == 96)
1306 if (((d = ch - (MIN_CHAR_96x96
1308 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1311 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1322 encode_char_2 (Emchar ch, Lisp_Object* charset)
1324 Lisp_Object charsets = Vdefault_coded_charset_priority_list;
1327 while (!NILP (charsets))
1329 *charset = Ffind_charset (Fcar (charsets));
1330 if ( !NILP (*charset)
1331 && (XCHARSET_DIMENSION (*charset) <= 2) )
1333 code_point = charset_code_point (*charset, ch, 0);
1334 if (code_point >= 0)
1337 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1338 NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1340 code_point = encode_char_2_search_children (ch, charset);
1341 if (code_point >= 0)
1345 charsets = Fcdr (charsets);
1348 if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
1349 !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
1351 charsets = Vdefault_coded_charset_priority_list;
1352 while (!NILP (charsets))
1354 *charset = Ffind_charset (Fcar (charsets));
1355 if ( !NILP (*charset)
1356 && (XCHARSET_DIMENSION (*charset) <= 2) )
1358 code_point = encode_char_2_search_children (ch, charset);
1359 if (code_point >= 0)
1362 charsets = Fcdr (charsets);
1366 /* otherwise --- maybe for bootstrap */
1367 return encode_builtin_char_1 (ch, charset);
1371 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1373 if (c <= MAX_CHAR_BASIC_LATIN)
1375 *charset = Vcharset_ascii;
1380 *charset = Vcharset_control_1;
1385 *charset = Vcharset_latin_iso8859_1;
1389 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1391 *charset = Vcharset_hebrew_iso8859_8;
1392 return c - MIN_CHAR_HEBREW + 0x20;
1395 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1397 *charset = Vcharset_thai_tis620;
1398 return c - MIN_CHAR_THAI + 0x20;
1401 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1402 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1404 return list2 (Vcharset_katakana_jisx0201,
1405 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1408 else if (c <= MAX_CHAR_BMP)
1410 *charset = Vcharset_ucs_bmp;
1413 else if (c <= MAX_CHAR_SMP)
1415 *charset = Vcharset_ucs_smp;
1416 return c - MIN_CHAR_SMP;
1418 else if (c <= MAX_CHAR_SIP)
1420 *charset = Vcharset_ucs_sip;
1421 return c - MIN_CHAR_SIP;
1423 else if (c < MIN_CHAR_94)
1425 *charset = Vcharset_ucs;
1428 else if (c <= MAX_CHAR_94)
1430 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1431 ((c - MIN_CHAR_94) / 94) + '0',
1432 CHARSET_LEFT_TO_RIGHT);
1433 if (!NILP (*charset))
1434 return ((c - MIN_CHAR_94) % 94) + 33;
1437 *charset = Vcharset_ucs;
1441 else if (c <= MAX_CHAR_96)
1443 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1444 ((c - MIN_CHAR_96) / 96) + '0',
1445 CHARSET_LEFT_TO_RIGHT);
1446 if (!NILP (*charset))
1447 return ((c - MIN_CHAR_96) % 96) + 32;
1450 *charset = Vcharset_ucs;
1454 else if (c <= MAX_CHAR_94x94)
1457 = CHARSET_BY_ATTRIBUTES (94, 2,
1458 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1459 CHARSET_LEFT_TO_RIGHT);
1460 if (!NILP (*charset))
1461 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1462 | (((c - MIN_CHAR_94x94) % 94) + 33);
1465 *charset = Vcharset_ucs;
1469 else if (c <= MAX_CHAR_96x96)
1472 = CHARSET_BY_ATTRIBUTES (96, 2,
1473 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1474 CHARSET_LEFT_TO_RIGHT);
1475 if (!NILP (*charset))
1476 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1477 | (((c - MIN_CHAR_96x96) % 96) + 32);
1480 *charset = Vcharset_ucs;
1486 *charset = Vcharset_ucs;
1491 Lisp_Object Vdefault_coded_charset_priority_list;
1492 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1493 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1497 /************************************************************************/
1498 /* Basic charset Lisp functions */
1499 /************************************************************************/
1501 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1502 Return non-nil if OBJECT is a charset.
1506 return CHARSETP (object) ? Qt : Qnil;
1509 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1510 Retrieve the charset of the given name.
1511 If CHARSET-OR-NAME is a charset object, it is simply returned.
1512 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1513 nil is returned. Otherwise the associated charset object is returned.
1517 if (CHARSETP (charset_or_name))
1518 return charset_or_name;
1520 CHECK_SYMBOL (charset_or_name);
1521 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1524 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1525 Retrieve the charset of the given name.
1526 Same as `find-charset' except an error is signalled if there is no such
1527 charset instead of returning nil.
1531 Lisp_Object charset = Ffind_charset (name);
1534 signal_simple_error ("No such charset", name);
1538 /* We store the charsets in hash tables with the names as the key and the
1539 actual charset object as the value. Occasionally we need to use them
1540 in a list format. These routines provide us with that. */
1541 struct charset_list_closure
1543 Lisp_Object *charset_list;
1547 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1548 void *charset_list_closure)
1550 /* This function can GC */
1551 struct charset_list_closure *chcl =
1552 (struct charset_list_closure*) charset_list_closure;
1553 Lisp_Object *charset_list = chcl->charset_list;
1555 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1559 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1560 Return a list of the names of all defined charsets.
1564 Lisp_Object charset_list = Qnil;
1565 struct gcpro gcpro1;
1566 struct charset_list_closure charset_list_closure;
1568 GCPRO1 (charset_list);
1569 charset_list_closure.charset_list = &charset_list;
1570 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1571 &charset_list_closure);
1574 return charset_list;
1577 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1578 Return the name of charset CHARSET.
1582 return XCHARSET_NAME (Fget_charset (charset));
1585 /* #### SJT Should generic properties be allowed? */
1586 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1587 Define a new character set.
1588 This function is for use with Mule support.
1589 NAME is a symbol, the name by which the character set is normally referred.
1590 DOC-STRING is a string describing the character set.
1591 PROPS is a property list, describing the specific nature of the
1592 character set. Recognized properties are:
1594 'short-name Short version of the charset name (ex: Latin-1)
1595 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1596 'registry A regular expression matching the font registry field for
1598 'dimension Number of octets used to index a character in this charset.
1599 Either 1 or 2. Defaults to 1.
1600 If UTF-2000 feature is enabled, 3 or 4 are also available.
1601 'columns Number of columns used to display a character in this charset.
1602 Only used in TTY mode. (Under X, the actual width of a
1603 character can be derived from the font used to display the
1604 characters.) If unspecified, defaults to the dimension
1605 (this is almost always the correct value).
1606 'chars Number of characters in each dimension (94 or 96).
1607 Defaults to 94. Note that if the dimension is 2, the
1608 character set thus described is 94x94 or 96x96.
1609 If UTF-2000 feature is enabled, 128 or 256 are also available.
1610 'final Final byte of ISO 2022 escape sequence. Must be
1611 supplied. Each combination of (DIMENSION, CHARS) defines a
1612 separate namespace for final bytes. Note that ISO
1613 2022 restricts the final byte to the range
1614 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1615 dimension == 2. Note also that final bytes in the range
1616 0x30 - 0x3F are reserved for user-defined (not official)
1618 'graphic 0 (use left half of font on output) or 1 (use right half
1619 of font on output). Defaults to 0. For example, for
1620 a font whose registry is ISO8859-1, the left half
1621 (octets 0x20 - 0x7F) is the `ascii' character set, while
1622 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1623 character set. With 'graphic set to 0, the octets
1624 will have their high bit cleared; with it set to 1,
1625 the octets will have their high bit set.
1626 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1628 'ccl-program A compiled CCL program used to convert a character in
1629 this charset into an index into the font. This is in
1630 addition to the 'graphic property. The CCL program
1631 is passed the octets of the character, with the high
1632 bit cleared and set depending upon whether the value
1633 of the 'graphic property is 0 or 1.
1634 'mother [UTF-2000 only] Base coded-charset.
1635 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1636 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1637 'code-offset [UTF-2000 only] Offset for a code-point of a base
1639 'conversion [UTF-2000 only] Conversion for a code-point of a base
1640 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1641 'partial [UTF-2000 only] If t, specify as a partial coded-charset.
1643 (name, doc_string, props))
1645 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1646 int direction = CHARSET_LEFT_TO_RIGHT;
1647 Lisp_Object registry = Qnil;
1648 Lisp_Object charset;
1649 Lisp_Object ccl_program = Qnil;
1650 Lisp_Object short_name = Qnil, long_name = Qnil;
1651 Lisp_Object mother = Qnil;
1653 int min_code = 0, max_code = 0, code_offset = 0;
1654 int byte_offset = -1;
1657 CHECK_SYMBOL (name);
1658 if (!NILP (doc_string))
1659 CHECK_STRING (doc_string);
1661 charset = Ffind_charset (name);
1662 if (!NILP (charset))
1663 signal_simple_error ("Cannot redefine existing charset", name);
1666 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1668 if (EQ (keyword, Qshort_name))
1670 CHECK_STRING (value);
1674 else if (EQ (keyword, Qlong_name))
1676 CHECK_STRING (value);
1680 else if (EQ (keyword, Qiso_ir))
1684 id = - XINT (value);
1688 else if (EQ (keyword, Qdimension))
1691 dimension = XINT (value);
1692 if (dimension < 1 ||
1699 signal_simple_error ("Invalid value for 'dimension", value);
1702 else if (EQ (keyword, Qchars))
1705 chars = XINT (value);
1706 if (chars != 94 && chars != 96
1708 && chars != 128 && chars != 256
1711 signal_simple_error ("Invalid value for 'chars", value);
1714 else if (EQ (keyword, Qcolumns))
1717 columns = XINT (value);
1718 if (columns != 1 && columns != 2)
1719 signal_simple_error ("Invalid value for 'columns", value);
1722 else if (EQ (keyword, Qgraphic))
1725 graphic = XINT (value);
1733 signal_simple_error ("Invalid value for 'graphic", value);
1736 else if (EQ (keyword, Qregistry))
1738 CHECK_STRING (value);
1742 else if (EQ (keyword, Qdirection))
1744 if (EQ (value, Ql2r))
1745 direction = CHARSET_LEFT_TO_RIGHT;
1746 else if (EQ (value, Qr2l))
1747 direction = CHARSET_RIGHT_TO_LEFT;
1749 signal_simple_error ("Invalid value for 'direction", value);
1752 else if (EQ (keyword, Qfinal))
1754 CHECK_CHAR_COERCE_INT (value);
1755 final = XCHAR (value);
1756 if (final < '0' || final > '~')
1757 signal_simple_error ("Invalid value for 'final", value);
1761 else if (EQ (keyword, Qpartial))
1763 partial = !NILP (value);
1766 else if (EQ (keyword, Qmother))
1768 mother = Fget_charset (value);
1771 else if (EQ (keyword, Qmin_code))
1774 min_code = XUINT (value);
1777 else if (EQ (keyword, Qmax_code))
1780 max_code = XUINT (value);
1783 else if (EQ (keyword, Qcode_offset))
1786 code_offset = XUINT (value);
1789 else if (EQ (keyword, Qconversion))
1791 if (EQ (value, Q94x60))
1792 conversion = CONVERSION_94x60;
1793 else if (EQ (value, Q94x94x60))
1794 conversion = CONVERSION_94x94x60;
1795 else if (EQ (value, Qbig5_1))
1796 conversion = CONVERSION_BIG5_1;
1797 else if (EQ (value, Qbig5_2))
1798 conversion = CONVERSION_BIG5_2;
1800 signal_simple_error ("Unrecognized conversion", value);
1804 else if (EQ (keyword, Qccl_program))
1806 struct ccl_program test_ccl;
1808 if (setup_ccl_program (&test_ccl, value) < 0)
1809 signal_simple_error ("Invalid value for 'ccl-program", value);
1810 ccl_program = value;
1814 signal_simple_error ("Unrecognized property", keyword);
1820 error ("'final must be specified");
1822 if (dimension == 2 && final > 0x5F)
1824 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1827 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1828 CHARSET_LEFT_TO_RIGHT)) ||
1829 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1830 CHARSET_RIGHT_TO_LEFT)))
1832 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1835 id = get_unallocated_leading_byte (dimension);
1837 if (NILP (doc_string))
1838 doc_string = build_string ("");
1840 if (NILP (registry))
1841 registry = build_string ("");
1843 if (NILP (short_name))
1844 XSETSTRING (short_name, XSYMBOL (name)->name);
1846 if (NILP (long_name))
1847 long_name = doc_string;
1850 columns = dimension;
1852 if (byte_offset < 0)
1856 else if (chars == 96)
1862 charset = make_charset (id, name, chars, dimension, columns, graphic,
1863 final, direction, short_name, long_name,
1864 doc_string, registry,
1865 Qnil, min_code, max_code, code_offset, byte_offset,
1866 mother, conversion, partial);
1867 if (!NILP (ccl_program))
1868 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1872 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1874 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1875 NEW-NAME is the name of the new charset. Return the new charset.
1877 (charset, new_name))
1879 Lisp_Object new_charset = Qnil;
1880 int id, chars, dimension, columns, graphic, final;
1882 Lisp_Object registry, doc_string, short_name, long_name;
1885 charset = Fget_charset (charset);
1886 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1887 signal_simple_error ("Charset already has reverse-direction charset",
1890 CHECK_SYMBOL (new_name);
1891 if (!NILP (Ffind_charset (new_name)))
1892 signal_simple_error ("Cannot redefine existing charset", new_name);
1894 cs = XCHARSET (charset);
1896 chars = CHARSET_CHARS (cs);
1897 dimension = CHARSET_DIMENSION (cs);
1898 columns = CHARSET_COLUMNS (cs);
1899 id = get_unallocated_leading_byte (dimension);
1901 graphic = CHARSET_GRAPHIC (cs);
1902 final = CHARSET_FINAL (cs);
1903 direction = CHARSET_RIGHT_TO_LEFT;
1904 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1905 direction = CHARSET_LEFT_TO_RIGHT;
1906 doc_string = CHARSET_DOC_STRING (cs);
1907 short_name = CHARSET_SHORT_NAME (cs);
1908 long_name = CHARSET_LONG_NAME (cs);
1909 registry = CHARSET_REGISTRY (cs);
1911 new_charset = make_charset (id, new_name, chars, dimension, columns,
1912 graphic, final, direction, short_name, long_name,
1913 doc_string, registry,
1915 CHARSET_DECODING_TABLE(cs),
1916 CHARSET_MIN_CODE(cs),
1917 CHARSET_MAX_CODE(cs),
1918 CHARSET_CODE_OFFSET(cs),
1919 CHARSET_BYTE_OFFSET(cs),
1921 CHARSET_CONVERSION (cs)
1923 Qnil, 0, 0, 0, 0, Qnil, 0
1927 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1928 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1933 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1934 Define symbol ALIAS as an alias for CHARSET.
1938 CHECK_SYMBOL (alias);
1939 charset = Fget_charset (charset);
1940 return Fputhash (alias, charset, Vcharset_hash_table);
1943 /* #### Reverse direction charsets not yet implemented. */
1945 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1947 Return the reverse-direction charset parallel to CHARSET, if any.
1948 This is the charset with the same properties (in particular, the same
1949 dimension, number of characters per dimension, and final byte) as
1950 CHARSET but whose characters are displayed in the opposite direction.
1954 charset = Fget_charset (charset);
1955 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1959 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1960 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1961 If DIRECTION is omitted, both directions will be checked (left-to-right
1962 will be returned if character sets exist for both directions).
1964 (dimension, chars, final, direction))
1966 int dm, ch, fi, di = -1;
1967 Lisp_Object obj = Qnil;
1969 CHECK_INT (dimension);
1970 dm = XINT (dimension);
1971 if (dm < 1 || dm > 2)
1972 signal_simple_error ("Invalid value for DIMENSION", dimension);
1976 if (ch != 94 && ch != 96)
1977 signal_simple_error ("Invalid value for CHARS", chars);
1979 CHECK_CHAR_COERCE_INT (final);
1981 if (fi < '0' || fi > '~')
1982 signal_simple_error ("Invalid value for FINAL", final);
1984 if (EQ (direction, Ql2r))
1985 di = CHARSET_LEFT_TO_RIGHT;
1986 else if (EQ (direction, Qr2l))
1987 di = CHARSET_RIGHT_TO_LEFT;
1988 else if (!NILP (direction))
1989 signal_simple_error ("Invalid value for DIRECTION", direction);
1991 if (dm == 2 && fi > 0x5F)
1993 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1997 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1999 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2002 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2005 return XCHARSET_NAME (obj);
2009 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2010 Return short name of CHARSET.
2014 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2017 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2018 Return long name of CHARSET.
2022 return XCHARSET_LONG_NAME (Fget_charset (charset));
2025 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2026 Return description of CHARSET.
2030 return XCHARSET_DOC_STRING (Fget_charset (charset));
2033 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2034 Return dimension of CHARSET.
2038 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2041 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2042 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2043 Recognized properties are those listed in `make-charset', as well as
2044 'name and 'doc-string.
2050 charset = Fget_charset (charset);
2051 cs = XCHARSET (charset);
2053 CHECK_SYMBOL (prop);
2054 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2055 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2056 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2057 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2058 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2059 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2060 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2061 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
2062 Qnil : make_char (CHARSET_FINAL (cs));
2063 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2064 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2065 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2066 if (EQ (prop, Qdirection))
2067 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2068 if (EQ (prop, Qreverse_direction_charset))
2070 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2071 /* #### Is this translation OK? If so, error checking sufficient? */
2072 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2075 if (EQ (prop, Qmother))
2076 return CHARSET_MOTHER (cs);
2077 if (EQ (prop, Qmin_code))
2078 return make_int (CHARSET_MIN_CODE (cs));
2079 if (EQ (prop, Qmax_code))
2080 return make_int (CHARSET_MAX_CODE (cs));
2082 signal_simple_error ("Unrecognized charset property name", prop);
2083 return Qnil; /* not reached */
2086 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2087 Return charset identification number of CHARSET.
2091 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2094 /* #### We need to figure out which properties we really want to
2097 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2098 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2100 (charset, ccl_program))
2102 struct ccl_program test_ccl;
2104 charset = Fget_charset (charset);
2105 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2106 signal_simple_error ("Invalid ccl-program", ccl_program);
2107 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2112 invalidate_charset_font_caches (Lisp_Object charset)
2114 /* Invalidate font cache entries for charset on all devices. */
2115 Lisp_Object devcons, concons, hash_table;
2116 DEVICE_LOOP_NO_BREAK (devcons, concons)
2118 struct device *d = XDEVICE (XCAR (devcons));
2119 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2120 if (!UNBOUNDP (hash_table))
2121 Fclrhash (hash_table);
2125 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2126 Set the 'registry property of CHARSET to REGISTRY.
2128 (charset, registry))
2130 charset = Fget_charset (charset);
2131 CHECK_STRING (registry);
2132 XCHARSET_REGISTRY (charset) = registry;
2133 invalidate_charset_font_caches (charset);
2134 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2139 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2140 Return mapping-table of CHARSET.
2144 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2147 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2148 Set mapping-table of CHARSET to TABLE.
2152 struct Lisp_Charset *cs;
2156 charset = Fget_charset (charset);
2157 cs = XCHARSET (charset);
2161 CHARSET_DECODING_TABLE(cs) = Qnil;
2164 else if (VECTORP (table))
2166 int ccs_len = CHARSET_BYTE_SIZE (cs);
2167 int ret = decoding_table_check_elements (table,
2168 CHARSET_DIMENSION (cs),
2173 signal_simple_error ("Too big table", table);
2175 signal_simple_error ("Invalid element is found", table);
2177 signal_simple_error ("Something wrong", table);
2179 CHARSET_DECODING_TABLE(cs) = Qnil;
2182 signal_error (Qwrong_type_argument,
2183 list2 (build_translated_string ("vector-or-nil-p"),
2186 byte_offset = CHARSET_BYTE_OFFSET (cs);
2187 switch (CHARSET_DIMENSION (cs))
2190 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2192 Lisp_Object c = XVECTOR_DATA(table)[i];
2195 Fput_char_attribute (c, XCHARSET_NAME (charset),
2196 make_int (i + byte_offset));
2200 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2202 Lisp_Object v = XVECTOR_DATA(table)[i];
2208 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2210 Lisp_Object c = XVECTOR_DATA(v)[j];
2214 (c, XCHARSET_NAME (charset),
2215 make_int ( ( (i + byte_offset) << 8 )
2221 Fput_char_attribute (v, XCHARSET_NAME (charset),
2222 make_int (i + byte_offset));
2230 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2231 Save mapping-table of CHARSET.
2235 struct Lisp_Charset *cs;
2236 int byte_min, byte_max;
2237 #ifdef HAVE_LIBCHISE
2239 #else /* HAVE_LIBCHISE */
2241 Lisp_Object db_file;
2242 #endif /* not HAVE_LIBCHISE */
2244 charset = Fget_charset (charset);
2245 cs = XCHARSET (charset);
2247 #ifdef HAVE_LIBCHISE
2248 if ( open_chise_data_source_maybe () )
2252 = chise_ds_get_ccs (default_chise_data_source,
2253 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2256 printf ("Can't open decoding-table %s\n",
2257 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2260 #else /* HAVE_LIBCHISE */
2261 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2262 Qsystem_char_id, 1);
2263 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2264 #endif /* not HAVE_LIBCHISE */
2266 byte_min = CHARSET_BYTE_OFFSET (cs);
2267 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2268 switch (CHARSET_DIMENSION (cs))
2272 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2275 for (cell = byte_min; cell < byte_max; cell++)
2277 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2281 #ifdef HAVE_LIBCHISE
2282 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2283 #else /* HAVE_LIBCHISE */
2284 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2285 Fprin1_to_string (c, Qnil),
2287 #endif /* not HAVE_LIBCHISE */
2294 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2297 for (row = byte_min; row < byte_max; row++)
2299 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2302 for (cell = byte_min; cell < byte_max; cell++)
2304 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2308 #ifdef HAVE_LIBCHISE
2309 chise_ccs_set_decoded_char
2311 (row << 8) | cell, XCHAR (c));
2312 #else /* HAVE_LIBCHISE */
2313 Fput_database (Fprin1_to_string (make_int ((row << 8)
2316 Fprin1_to_string (c, Qnil),
2318 #endif /* not HAVE_LIBCHISE */
2326 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2329 for (plane = byte_min; plane < byte_max; plane++)
2332 = get_ccs_octet_table (table_p, charset, plane);
2335 for (row = byte_min; row < byte_max; row++)
2338 = get_ccs_octet_table (table_r, charset, row);
2341 for (cell = byte_min; cell < byte_max; cell++)
2343 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2348 #ifdef HAVE_LIBCHISE
2349 chise_ccs_set_decoded_char
2354 #else /* HAVE_LIBCHISE */
2355 Fput_database (Fprin1_to_string
2356 (make_int ((plane << 16)
2360 Fprin1_to_string (c, Qnil),
2362 #endif /* not HAVE_LIBCHISE */
2371 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2374 for (group = byte_min; group < byte_max; group++)
2377 = get_ccs_octet_table (table_g, charset, group);
2380 for (plane = byte_min; plane < byte_max; plane++)
2383 = get_ccs_octet_table (table_p, charset, plane);
2386 for (row = byte_min; row < byte_max; row++)
2389 = get_ccs_octet_table (table_r, charset, row);
2392 for (cell = byte_min; cell < byte_max; cell++)
2395 = get_ccs_octet_table (table_c, charset, cell);
2399 #ifdef HAVE_LIBCHISE
2400 chise_ccs_set_decoded_char
2406 #else /* HAVE_LIBCHISE */
2407 Fput_database (Fprin1_to_string
2408 (make_int (( group << 24)
2413 Fprin1_to_string (c, Qnil),
2415 #endif /* not HAVE_LIBCHISE */
2423 #ifdef HAVE_LIBCHISE
2424 chise_ccs_sync (dt_ccs);
2426 #else /* HAVE_LIBCHISE */
2427 return Fclose_database (db);
2428 #endif /* not HAVE_LIBCHISE */
2431 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2432 Reset mapping-table of CCS with database file.
2436 #ifdef HAVE_LIBCHISE
2437 CHISE_CCS chise_ccs;
2439 Lisp_Object db_file;
2442 ccs = Fget_charset (ccs);
2444 #ifdef HAVE_LIBCHISE
2445 if ( open_chise_data_source_maybe () )
2448 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2449 XSTRING_DATA (Fsymbol_name
2450 (XCHARSET_NAME(ccs))));
2451 if (chise_ccs == NULL)
2454 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2455 Qsystem_char_id, 0);
2459 #ifdef HAVE_LIBCHISE
2460 chise_ccs_setup_db (chise_ccs, 0) == 0
2462 !NILP (Ffile_exists_p (db_file))
2466 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2473 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2475 #ifdef HAVE_LIBCHISE
2476 CHISE_Char_ID char_id;
2478 if ( open_chise_data_source_maybe () )
2482 = chise_ds_decode_char (default_chise_data_source,
2483 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2486 decoding_table_put_char (ccs, code_point, make_char (char_id));
2488 decoding_table_put_char (ccs, code_point, Qnil);
2490 /* chise_ccst_close (dt_ccs); */
2492 #else /* HAVE_LIBCHISE */
2495 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2498 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2502 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2509 decoding_table_put_char (ccs, code_point, ret);
2510 Fclose_database (db);
2514 decoding_table_put_char (ccs, code_point, Qnil);
2515 Fclose_database (db);
2518 #endif /* not HAVE_LIBCHISE */
2521 #ifdef HAVE_LIBCHISE
2522 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2523 Save properties of CHARSET.
2527 struct Lisp_Charset *cs;
2528 CHISE_Property property;
2530 unsigned char* feature_name;
2532 ccs = Fget_charset (charset);
2533 cs = XCHARSET (ccs);
2535 if ( open_chise_data_source_maybe () )
2538 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2540 property = chise_ds_get_property (default_chise_data_source,
2542 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2543 chise_feature_set_property_value
2544 (chise_ds_get_feature (default_chise_data_source, feature_name),
2545 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2547 chise_property_sync (property);
2549 charset = XCHARSET_NAME (ccs);
2550 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2552 property = chise_ds_get_property (default_chise_data_source,
2554 chise_feature_set_property_value
2555 (chise_ds_get_feature (default_chise_data_source, feature_name),
2556 property, XSTRING_DATA (Fprin1_to_string
2557 (CHARSET_DOC_STRING (cs), Qnil)));
2558 chise_property_sync (property);
2560 property = chise_ds_get_property (default_chise_data_source, "type");
2561 chise_feature_set_property_value
2562 (chise_ds_get_feature (default_chise_data_source, feature_name),
2564 chise_property_sync (property);
2566 property = chise_ds_get_property (default_chise_data_source, "chars");
2567 chise_feature_set_property_value
2568 (chise_ds_get_feature (default_chise_data_source, feature_name),
2569 property, XSTRING_DATA (Fprin1_to_string (make_int
2570 (CHARSET_CHARS (cs)),
2572 chise_property_sync (property);
2574 property = chise_ds_get_property (default_chise_data_source, "dimension");
2575 chise_feature_set_property_value
2576 (chise_ds_get_feature (default_chise_data_source, feature_name),
2577 property, XSTRING_DATA (Fprin1_to_string (make_int
2578 (CHARSET_DIMENSION (cs)),
2580 chise_property_sync (property);
2582 if ( CHARSET_FINAL (cs) != 0 )
2584 property = chise_ds_get_property (default_chise_data_source,
2586 chise_feature_set_property_value
2587 (chise_ds_get_feature (default_chise_data_source, feature_name),
2588 property, XSTRING_DATA (Fprin1_to_string (make_int
2589 (CHARSET_FINAL (cs)),
2591 chise_property_sync (property);
2594 if ( !NILP (CHARSET_MOTHER (cs)) )
2596 Lisp_Object mother = CHARSET_MOTHER (cs);
2598 if ( CHARSETP (mother) )
2599 mother = XCHARSET_NAME (mother);
2601 property = chise_ds_get_property (default_chise_data_source,
2603 chise_feature_set_property_value
2604 (chise_ds_get_feature (default_chise_data_source, feature_name),
2605 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2606 chise_property_sync (property);
2609 if ( CHARSET_MAX_CODE (cs) != 0 )
2613 property = chise_ds_get_property (default_chise_data_source,
2615 if ( CHARSET_MIN_CODE (cs) == 0 )
2616 chise_feature_set_property_value
2617 (chise_ds_get_feature (default_chise_data_source, feature_name),
2621 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2622 chise_feature_set_property_value
2623 (chise_ds_get_feature (default_chise_data_source, feature_name),
2626 chise_property_sync (property);
2628 property = chise_ds_get_property (default_chise_data_source,
2630 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2631 chise_feature_set_property_value
2632 (chise_ds_get_feature (default_chise_data_source, feature_name),
2634 chise_property_sync (property);
2636 property = chise_ds_get_property (default_chise_data_source,
2637 "mother-code-offset");
2638 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2639 chise_feature_set_property_value
2640 (chise_ds_get_feature (default_chise_data_source, feature_name),
2644 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2645 chise_feature_set_property_value
2646 (chise_ds_get_feature (default_chise_data_source, feature_name),
2649 chise_property_sync (property);
2651 property = chise_ds_get_property (default_chise_data_source,
2652 "mother-code-conversion");
2653 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2654 chise_feature_set_property_value
2655 (chise_ds_get_feature (default_chise_data_source, feature_name),
2656 property, "identical");
2659 Lisp_Object sym = Qnil;
2661 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2663 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2665 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2667 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2670 chise_feature_set_property_value
2671 (chise_ds_get_feature (default_chise_data_source, feature_name),
2672 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2674 chise_feature_set_property_value
2675 (chise_ds_get_feature (default_chise_data_source, feature_name),
2676 property, "unknown");
2678 chise_property_sync (property);
2682 #endif /* HAVE_LIBCHISE */
2684 #endif /* HAVE_CHISE */
2685 #endif /* UTF2000 */
2688 /************************************************************************/
2689 /* Lisp primitives for working with characters */
2690 /************************************************************************/
2693 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2694 Make a character from CHARSET and code-point CODE.
2695 If DEFINED_ONLY is non-nil, builtin character is not returned.
2696 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2697 If corresponding character is not found, nil is returned.
2699 (charset, code, defined_only, without_inheritance))
2703 charset = Fget_charset (charset);
2706 if (XCHARSET_GRAPHIC (charset) == 1)
2708 if (NILP (defined_only))
2709 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2711 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2712 return c >= 0 ? make_char (c) : Qnil;
2715 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2716 Make a builtin character from CHARSET and code-point CODE.
2723 charset = Fget_charset (charset);
2725 if (EQ (charset, Vcharset_latin_viscii))
2727 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2733 (ret = Fget_char_attribute (chr,
2734 Vcharset_latin_viscii_lower,
2737 charset = Vcharset_latin_viscii_lower;
2741 (ret = Fget_char_attribute (chr,
2742 Vcharset_latin_viscii_upper,
2745 charset = Vcharset_latin_viscii_upper;
2752 if (XCHARSET_GRAPHIC (charset) == 1)
2755 ch = decode_builtin_char (charset, c);
2757 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2761 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2762 Make a character from CHARSET and octets ARG1 and ARG2.
2763 ARG2 is required only for characters from two-dimensional charsets.
2764 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2765 character s with caron.
2767 (charset, arg1, arg2))
2771 int lowlim, highlim;
2773 charset = Fget_charset (charset);
2774 cs = XCHARSET (charset);
2776 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2777 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2779 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2781 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2782 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2785 /* It is useful (and safe, according to Olivier Galibert) to strip
2786 the 8th bit off ARG1 and ARG2 because it allows programmers to
2787 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2788 Latin 2 code of the character. */
2796 if (a1 < lowlim || a1 > highlim)
2797 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2799 if (CHARSET_DIMENSION (cs) == 1)
2803 ("Charset is of dimension one; second octet must be nil", arg2);
2804 return make_char (MAKE_CHAR (charset, a1, 0));
2813 a2 = XINT (arg2) & 0x7f;
2815 if (a2 < lowlim || a2 > highlim)
2816 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2818 return make_char (MAKE_CHAR (charset, a1, a2));
2821 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2822 Return the character set of CHARACTER.
2826 CHECK_CHAR_COERCE_INT (character);
2828 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2831 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2832 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2833 N defaults to 0 if omitted.
2837 Lisp_Object charset;
2840 CHECK_CHAR_COERCE_INT (character);
2842 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2844 if (NILP (n) || EQ (n, Qzero))
2845 return make_int (octet0);
2846 else if (EQ (n, make_int (1)))
2847 return make_int (octet1);
2849 signal_simple_error ("Octet number must be 0 or 1", n);
2853 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2854 Return code-point of CHARACTER in specified CHARSET.
2856 (character, charset, defined_only))
2860 CHECK_CHAR_COERCE_INT (character);
2861 charset = Fget_charset (charset);
2862 code_point = charset_code_point (charset, XCHAR (character),
2863 !NILP (defined_only));
2864 if (code_point >= 0)
2865 return make_int (code_point);
2871 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2872 Return list of charset and one or two position-codes of CHARACTER.
2876 /* This function can GC */
2877 struct gcpro gcpro1, gcpro2;
2878 Lisp_Object charset = Qnil;
2879 Lisp_Object rc = Qnil;
2887 GCPRO2 (charset, rc);
2888 CHECK_CHAR_COERCE_INT (character);
2891 code_point = ENCODE_CHAR (XCHAR (character), charset);
2892 dimension = XCHARSET_DIMENSION (charset);
2893 while (dimension > 0)
2895 rc = Fcons (make_int (code_point & 255), rc);
2899 rc = Fcons (XCHARSET_NAME (charset), rc);
2901 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2903 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2905 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2909 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2918 #ifdef ENABLE_COMPOSITE_CHARS
2919 /************************************************************************/
2920 /* composite character functions */
2921 /************************************************************************/
2924 lookup_composite_char (Bufbyte *str, int len)
2926 Lisp_Object lispstr = make_string (str, len);
2927 Lisp_Object ch = Fgethash (lispstr,
2928 Vcomposite_char_string2char_hash_table,
2934 if (composite_char_row_next >= 128)
2935 signal_simple_error ("No more composite chars available", lispstr);
2936 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2937 composite_char_col_next);
2938 Fputhash (make_char (emch), lispstr,
2939 Vcomposite_char_char2string_hash_table);
2940 Fputhash (lispstr, make_char (emch),
2941 Vcomposite_char_string2char_hash_table);
2942 composite_char_col_next++;
2943 if (composite_char_col_next >= 128)
2945 composite_char_col_next = 32;
2946 composite_char_row_next++;
2955 composite_char_string (Emchar ch)
2957 Lisp_Object str = Fgethash (make_char (ch),
2958 Vcomposite_char_char2string_hash_table,
2960 assert (!UNBOUNDP (str));
2964 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2965 Convert a string into a single composite character.
2966 The character is the result of overstriking all the characters in
2971 CHECK_STRING (string);
2972 return make_char (lookup_composite_char (XSTRING_DATA (string),
2973 XSTRING_LENGTH (string)));
2976 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2977 Return a string of the characters comprising a composite character.
2985 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2986 signal_simple_error ("Must be composite char", ch);
2987 return composite_char_string (emch);
2989 #endif /* ENABLE_COMPOSITE_CHARS */
2992 /************************************************************************/
2993 /* initialization */
2994 /************************************************************************/
2997 syms_of_mule_charset (void)
2999 INIT_LRECORD_IMPLEMENTATION (charset);
3001 DEFSUBR (Fcharsetp);
3002 DEFSUBR (Ffind_charset);
3003 DEFSUBR (Fget_charset);
3004 DEFSUBR (Fcharset_list);
3005 DEFSUBR (Fcharset_name);
3006 DEFSUBR (Fmake_charset);
3007 DEFSUBR (Fmake_reverse_direction_charset);
3008 /* DEFSUBR (Freverse_direction_charset); */
3009 DEFSUBR (Fdefine_charset_alias);
3010 DEFSUBR (Fcharset_from_attributes);
3011 DEFSUBR (Fcharset_short_name);
3012 DEFSUBR (Fcharset_long_name);
3013 DEFSUBR (Fcharset_description);
3014 DEFSUBR (Fcharset_dimension);
3015 DEFSUBR (Fcharset_property);
3016 DEFSUBR (Fcharset_id);
3017 DEFSUBR (Fset_charset_ccl_program);
3018 DEFSUBR (Fset_charset_registry);
3021 DEFSUBR (Fcharset_mapping_table);
3022 DEFSUBR (Fset_charset_mapping_table);
3024 DEFSUBR (Fsave_charset_mapping_table);
3025 DEFSUBR (Freset_charset_mapping_table);
3026 #ifdef HAVE_LIBCHISE
3027 DEFSUBR (Fsave_charset_properties);
3028 #endif /* HAVE_LIBCHISE */
3029 #endif /* HAVE_CHISE */
3030 DEFSUBR (Fdecode_char);
3031 DEFSUBR (Fdecode_builtin_char);
3032 DEFSUBR (Fencode_char);
3035 DEFSUBR (Fmake_char);
3036 DEFSUBR (Fchar_charset);
3037 DEFSUBR (Fchar_octet);
3038 DEFSUBR (Fsplit_char);
3040 #ifdef ENABLE_COMPOSITE_CHARS
3041 DEFSUBR (Fmake_composite_char);
3042 DEFSUBR (Fcomposite_char_string);
3045 defsymbol (&Qcharsetp, "charsetp");
3046 defsymbol (&Qregistry, "registry");
3047 defsymbol (&Qfinal, "final");
3048 defsymbol (&Qgraphic, "graphic");
3049 defsymbol (&Qdirection, "direction");
3050 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3051 defsymbol (&Qshort_name, "short-name");
3052 defsymbol (&Qlong_name, "long-name");
3053 defsymbol (&Qiso_ir, "iso-ir");
3055 defsymbol (&Qpartial, "partial");
3056 defsymbol (&Qmother, "mother");
3057 defsymbol (&Qmin_code, "min-code");
3058 defsymbol (&Qmax_code, "max-code");
3059 defsymbol (&Qcode_offset, "code-offset");
3060 defsymbol (&Qconversion, "conversion");
3061 defsymbol (&Q94x60, "94x60");
3062 defsymbol (&Q94x94x60, "94x94x60");
3063 defsymbol (&Qbig5_1, "big5-1");
3064 defsymbol (&Qbig5_2, "big5-2");
3067 defsymbol (&Ql2r, "l2r");
3068 defsymbol (&Qr2l, "r2l");
3070 /* Charsets, compatible with FSF 20.3
3071 Naming convention is Script-Charset[-Edition] */
3072 defsymbol (&Qascii, "ascii");
3073 defsymbol (&Qcontrol_1, "control-1");
3074 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3075 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3076 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3077 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3078 defsymbol (&Qthai_tis620, "thai-tis620");
3079 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3080 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3081 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3082 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3083 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3084 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3085 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3086 /* defsymbol (&Qrep_jis_x0208_1978, "=jis-x0208@1978"); */
3087 defsymbol (&Qrep_gb2312, "=gb2312");
3088 defsymbol (&Qrep_gb12345, "=gb12345");
3089 defsymbol (&Qrep_jis_x0208_1983, "=jis-x0208@1983");
3090 defsymbol (&Qrep_ks_x1001, "=ks-x1001");
3091 defsymbol (&Qrep_jis_x0212, "=jis-x0212");
3092 defsymbol (&Qrep_cns11643_1, "=cns11643-1");
3093 defsymbol (&Qrep_cns11643_2, "=cns11643-2");
3095 defsymbol (&Qsystem_char_id, "system-char-id");
3096 defsymbol (&Qrep_ucs, "=ucs");
3097 defsymbol (&Qucs, "ucs");
3098 defsymbol (&Qucs_bmp, "ucs-bmp");
3099 defsymbol (&Qucs_smp, "ucs-smp");
3100 defsymbol (&Qucs_sip, "ucs-sip");
3101 defsymbol (&Qlatin_viscii, "latin-viscii");
3102 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3103 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3104 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3105 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3106 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3107 defsymbol (&Qrep_jis_x0208, "=jis-x0208");
3108 defsymbol (&Qrep_jis_x0208_1990, "=jis-x0208@1990");
3109 defsymbol (&Qrep_big5, "=big5");
3110 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3112 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3113 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3115 defsymbol (&Qcomposite, "composite");
3119 vars_of_mule_charset (void)
3126 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3127 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3129 /* Table of charsets indexed by leading byte. */
3130 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3131 chlook->charset_by_leading_byte[i] = Qnil;
3134 /* Table of charsets indexed by type/final-byte. */
3135 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3136 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3137 chlook->charset_by_attributes[i][j] = Qnil;
3139 /* Table of charsets indexed by type/final-byte/direction. */
3140 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3141 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3142 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3143 chlook->charset_by_attributes[i][j][k] = Qnil;
3147 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3149 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3150 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3154 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3155 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3156 Leading-code of private TYPE9N charset of column-width 1.
3158 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3162 Vdefault_coded_charset_priority_list = Qnil;
3163 DEFVAR_LISP ("default-coded-charset-priority-list",
3164 &Vdefault_coded_charset_priority_list /*
3165 Default order of preferred coded-character-sets.
3167 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3168 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3169 &Vdisplay_coded_charset_priority_use_inheritance /*
3170 If non-nil, use character inheritance.
3172 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3173 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3174 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3175 If non-nil, prefer nearest character in hierarchy order.
3181 complex_vars_of_mule_charset (void)
3183 staticpro (&Vcharset_hash_table);
3184 Vcharset_hash_table =
3185 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3187 /* Predefined character sets. We store them into variables for
3191 staticpro (&Vcharset_system_char_id);
3192 Vcharset_system_char_id =
3193 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3194 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3195 build_string ("SCID"),
3196 build_string ("CHAR-ID"),
3197 build_string ("System char-id"),
3199 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3201 staticpro (&Vcharset_ucs);
3203 make_charset (LEADING_BYTE_UCS, Qrep_ucs, 256, 4,
3204 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3205 build_string ("UCS"),
3206 build_string ("UCS"),
3207 build_string ("ISO/IEC 10646"),
3209 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3211 staticpro (&Vcharset_ucs_bmp);
3213 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3214 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3215 build_string ("BMP"),
3216 build_string ("UCS-BMP"),
3217 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3219 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3220 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL,
3222 staticpro (&Vcharset_ucs_smp);
3224 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3225 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3226 build_string ("SMP"),
3227 build_string ("UCS-SMP"),
3228 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3229 build_string ("UCS00-1"),
3230 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3231 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL,
3233 staticpro (&Vcharset_ucs_sip);
3235 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3236 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3237 build_string ("SIP"),
3238 build_string ("UCS-SIP"),
3239 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3240 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3241 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3242 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL,
3245 # define MIN_CHAR_THAI 0
3246 # define MAX_CHAR_THAI 0
3247 /* # define MIN_CHAR_HEBREW 0 */
3248 /* # define MAX_CHAR_HEBREW 0 */
3249 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3250 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3252 staticpro (&Vcharset_ascii);
3254 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3255 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3256 build_string ("ASCII"),
3257 build_string ("ASCII)"),
3258 build_string ("ASCII (ISO646 IRV)"),
3259 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3260 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL,
3262 staticpro (&Vcharset_control_1);
3263 Vcharset_control_1 =
3264 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3265 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3266 build_string ("C1"),
3267 build_string ("Control characters"),
3268 build_string ("Control characters 128-191"),
3270 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL,
3272 staticpro (&Vcharset_latin_iso8859_1);
3273 Vcharset_latin_iso8859_1 =
3274 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3275 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3276 build_string ("Latin-1"),
3277 build_string ("ISO8859-1 (Latin-1)"),
3278 build_string ("ISO8859-1 (Latin-1)"),
3279 build_string ("iso8859-1"),
3280 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3282 staticpro (&Vcharset_latin_iso8859_2);
3283 Vcharset_latin_iso8859_2 =
3284 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3285 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3286 build_string ("Latin-2"),
3287 build_string ("ISO8859-2 (Latin-2)"),
3288 build_string ("ISO8859-2 (Latin-2)"),
3289 build_string ("iso8859-2"),
3290 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3292 staticpro (&Vcharset_latin_iso8859_3);
3293 Vcharset_latin_iso8859_3 =
3294 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3295 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3296 build_string ("Latin-3"),
3297 build_string ("ISO8859-3 (Latin-3)"),
3298 build_string ("ISO8859-3 (Latin-3)"),
3299 build_string ("iso8859-3"),
3300 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3302 staticpro (&Vcharset_latin_iso8859_4);
3303 Vcharset_latin_iso8859_4 =
3304 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3305 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3306 build_string ("Latin-4"),
3307 build_string ("ISO8859-4 (Latin-4)"),
3308 build_string ("ISO8859-4 (Latin-4)"),
3309 build_string ("iso8859-4"),
3310 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3312 staticpro (&Vcharset_thai_tis620);
3313 Vcharset_thai_tis620 =
3314 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3315 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3316 build_string ("TIS620"),
3317 build_string ("TIS620 (Thai)"),
3318 build_string ("TIS620.2529 (Thai)"),
3319 build_string ("tis620"),
3320 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3322 staticpro (&Vcharset_greek_iso8859_7);
3323 Vcharset_greek_iso8859_7 =
3324 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3325 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3326 build_string ("ISO8859-7"),
3327 build_string ("ISO8859-7 (Greek)"),
3328 build_string ("ISO8859-7 (Greek)"),
3329 build_string ("iso8859-7"),
3330 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3332 staticpro (&Vcharset_arabic_iso8859_6);
3333 Vcharset_arabic_iso8859_6 =
3334 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3335 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3336 build_string ("ISO8859-6"),
3337 build_string ("ISO8859-6 (Arabic)"),
3338 build_string ("ISO8859-6 (Arabic)"),
3339 build_string ("iso8859-6"),
3340 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3342 staticpro (&Vcharset_hebrew_iso8859_8);
3343 Vcharset_hebrew_iso8859_8 =
3344 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3345 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3346 build_string ("ISO8859-8"),
3347 build_string ("ISO8859-8 (Hebrew)"),
3348 build_string ("ISO8859-8 (Hebrew)"),
3349 build_string ("iso8859-8"),
3351 0 /* MIN_CHAR_HEBREW */,
3352 0 /* MAX_CHAR_HEBREW */, 0, 32,
3353 Qnil, CONVERSION_IDENTICAL,
3355 staticpro (&Vcharset_katakana_jisx0201);
3356 Vcharset_katakana_jisx0201 =
3357 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3358 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3359 build_string ("JISX0201 Kana"),
3360 build_string ("JISX0201.1976 (Japanese Kana)"),
3361 build_string ("JISX0201.1976 Japanese Kana"),
3362 build_string ("jisx0201\\.1976"),
3363 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3365 staticpro (&Vcharset_latin_jisx0201);
3366 Vcharset_latin_jisx0201 =
3367 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3368 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3369 build_string ("JISX0201 Roman"),
3370 build_string ("JISX0201.1976 (Japanese Roman)"),
3371 build_string ("JISX0201.1976 Japanese Roman"),
3372 build_string ("jisx0201\\.1976"),
3373 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3375 staticpro (&Vcharset_cyrillic_iso8859_5);
3376 Vcharset_cyrillic_iso8859_5 =
3377 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3378 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3379 build_string ("ISO8859-5"),
3380 build_string ("ISO8859-5 (Cyrillic)"),
3381 build_string ("ISO8859-5 (Cyrillic)"),
3382 build_string ("iso8859-5"),
3383 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3385 staticpro (&Vcharset_latin_iso8859_9);
3386 Vcharset_latin_iso8859_9 =
3387 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3388 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3389 build_string ("Latin-5"),
3390 build_string ("ISO8859-9 (Latin-5)"),
3391 build_string ("ISO8859-9 (Latin-5)"),
3392 build_string ("iso8859-9"),
3393 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3396 staticpro (&Vcharset_jis_x0208);
3397 Vcharset_jis_x0208 =
3398 make_charset (LEADING_BYTE_JIS_X0208,
3399 Qrep_jis_x0208, 94, 2,
3400 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3401 build_string ("JIS X0208"),
3402 build_string ("JIS X0208 Common"),
3403 build_string ("JIS X0208 Common part"),
3404 build_string ("jisx0208\\.1990"),
3406 MIN_CHAR_JIS_X0208_1990,
3407 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3408 Qnil, CONVERSION_94x94,
3412 staticpro (&Vcharset_japanese_jisx0208_1978);
3413 Vcharset_japanese_jisx0208_1978 =
3414 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3415 Qrep_jis_x0208_1978, 94, 2,
3416 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3417 build_string ("JIS X0208:1978"),
3418 build_string ("JIS X0208:1978 (Japanese)"),
3420 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3421 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3428 CONVERSION_IDENTICAL,
3431 staticpro (&Vcharset_chinese_gb2312);
3432 Vcharset_chinese_gb2312 =
3433 make_charset (LEADING_BYTE_CHINESE_GB2312, Qrep_gb2312, 94, 2,
3434 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3435 build_string ("GB2312"),
3436 build_string ("GB2312)"),
3437 build_string ("GB2312 Chinese simplified"),
3438 build_string ("gb2312"),
3439 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3441 staticpro (&Vcharset_chinese_gb12345);
3442 Vcharset_chinese_gb12345 =
3443 make_charset (LEADING_BYTE_CHINESE_GB12345, Qrep_gb12345, 94, 2,
3444 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3445 build_string ("G1"),
3446 build_string ("GB 12345)"),
3447 build_string ("GB 12345-1990"),
3448 build_string ("GB12345\\(\\.1990\\)?-0"),
3449 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3451 staticpro (&Vcharset_japanese_jisx0208);
3452 Vcharset_japanese_jisx0208 =
3453 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qrep_jis_x0208_1983, 94, 2,
3454 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3455 build_string ("JISX0208"),
3456 build_string ("JIS X0208:1983 (Japanese)"),
3457 build_string ("JIS X0208:1983 Japanese Kanji"),
3458 build_string ("jisx0208\\.1983"),
3465 CONVERSION_IDENTICAL,
3468 staticpro (&Vcharset_japanese_jisx0208_1990);
3469 Vcharset_japanese_jisx0208_1990 =
3470 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3471 Qrep_jis_x0208_1990, 94, 2,
3472 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3473 build_string ("JISX0208-1990"),
3474 build_string ("JIS X0208:1990 (Japanese)"),
3475 build_string ("JIS X0208:1990 Japanese Kanji"),
3476 build_string ("jisx0208\\.1990"),
3478 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3479 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3480 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3481 Vcharset_jis_x0208 /* Qnil */,
3482 CONVERSION_IDENTICAL /* CONVERSION_94x94 */,
3485 staticpro (&Vcharset_korean_ksc5601);
3486 Vcharset_korean_ksc5601 =
3487 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qrep_ks_x1001, 94, 2,
3488 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3489 build_string ("KSC5601"),
3490 build_string ("KSC5601 (Korean"),
3491 build_string ("KSC5601 Korean Hangul and Hanja"),
3492 build_string ("ksc5601"),
3493 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3495 staticpro (&Vcharset_japanese_jisx0212);
3496 Vcharset_japanese_jisx0212 =
3497 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qrep_jis_x0212, 94, 2,
3498 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3499 build_string ("JISX0212"),
3500 build_string ("JISX0212 (Japanese)"),
3501 build_string ("JISX0212 Japanese Supplement"),
3502 build_string ("jisx0212"),
3503 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3506 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3507 staticpro (&Vcharset_chinese_cns11643_1);
3508 Vcharset_chinese_cns11643_1 =
3509 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qrep_cns11643_1, 94, 2,
3510 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3511 build_string ("CNS11643-1"),
3512 build_string ("CNS11643-1 (Chinese traditional)"),
3514 ("CNS 11643 Plane 1 Chinese traditional"),
3515 build_string (CHINESE_CNS_PLANE_RE("1")),
3516 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3518 staticpro (&Vcharset_chinese_cns11643_2);
3519 Vcharset_chinese_cns11643_2 =
3520 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qrep_cns11643_2, 94, 2,
3521 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3522 build_string ("CNS11643-2"),
3523 build_string ("CNS11643-2 (Chinese traditional)"),
3525 ("CNS 11643 Plane 2 Chinese traditional"),
3526 build_string (CHINESE_CNS_PLANE_RE("2")),
3527 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL,
3530 staticpro (&Vcharset_latin_tcvn5712);
3531 Vcharset_latin_tcvn5712 =
3532 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3533 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3534 build_string ("TCVN 5712"),
3535 build_string ("TCVN 5712 (VSCII-2)"),
3536 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3537 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3538 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3540 staticpro (&Vcharset_latin_viscii_lower);
3541 Vcharset_latin_viscii_lower =
3542 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3543 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3544 build_string ("VISCII lower"),
3545 build_string ("VISCII lower (Vietnamese)"),
3546 build_string ("VISCII lower (Vietnamese)"),
3547 build_string ("MULEVISCII-LOWER"),
3548 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3550 staticpro (&Vcharset_latin_viscii_upper);
3551 Vcharset_latin_viscii_upper =
3552 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3553 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3554 build_string ("VISCII upper"),
3555 build_string ("VISCII upper (Vietnamese)"),
3556 build_string ("VISCII upper (Vietnamese)"),
3557 build_string ("MULEVISCII-UPPER"),
3558 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL,
3560 staticpro (&Vcharset_latin_viscii);
3561 Vcharset_latin_viscii =
3562 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3563 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3564 build_string ("VISCII"),
3565 build_string ("VISCII 1.1 (Vietnamese)"),
3566 build_string ("VISCII 1.1 (Vietnamese)"),
3567 build_string ("VISCII1\\.1"),
3568 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL,
3570 staticpro (&Vcharset_chinese_big5);
3571 Vcharset_chinese_big5 =
3572 make_charset (LEADING_BYTE_CHINESE_BIG5, Qrep_big5, 256, 2,
3573 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3574 build_string ("Big5"),
3575 build_string ("Big5"),
3576 build_string ("Big5 Chinese traditional"),
3577 build_string ("big5-0"),
3579 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3580 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL,
3583 staticpro (&Vcharset_ethiopic_ucs);
3584 Vcharset_ethiopic_ucs =
3585 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3586 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3587 build_string ("Ethiopic (UCS)"),
3588 build_string ("Ethiopic (UCS)"),
3589 build_string ("Ethiopic of UCS"),
3590 build_string ("Ethiopic-Unicode"),
3591 Qnil, 0x1200, 0x137F, 0, 0,
3592 Qnil, CONVERSION_IDENTICAL,
3595 staticpro (&Vcharset_chinese_big5_1);
3596 Vcharset_chinese_big5_1 =
3597 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3598 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3599 build_string ("Big5"),
3600 build_string ("Big5 (Level-1)"),
3602 ("Big5 Level-1 Chinese traditional"),
3603 build_string ("big5"),
3604 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3605 Vcharset_chinese_big5, CONVERSION_BIG5_1,
3607 staticpro (&Vcharset_chinese_big5_2);
3608 Vcharset_chinese_big5_2 =
3609 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3610 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3611 build_string ("Big5"),
3612 build_string ("Big5 (Level-2)"),
3614 ("Big5 Level-2 Chinese traditional"),
3615 build_string ("big5"),
3616 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3617 Vcharset_chinese_big5, CONVERSION_BIG5_2,
3620 #ifdef ENABLE_COMPOSITE_CHARS
3621 /* #### For simplicity, we put composite chars into a 96x96 charset.
3622 This is going to lead to problems because you can run out of
3623 room, esp. as we don't yet recycle numbers. */
3624 staticpro (&Vcharset_composite);
3625 Vcharset_composite =
3626 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3627 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3628 build_string ("Composite"),
3629 build_string ("Composite characters"),
3630 build_string ("Composite characters"),
3633 /* #### not dumped properly */
3634 composite_char_row_next = 32;
3635 composite_char_col_next = 32;
3637 Vcomposite_char_string2char_hash_table =
3638 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3639 Vcomposite_char_char2string_hash_table =
3640 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3641 staticpro (&Vcomposite_char_string2char_hash_table);
3642 staticpro (&Vcomposite_char_char2string_hash_table);
3643 #endif /* ENABLE_COMPOSITE_CHARS */