1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_japanese_jef_china3;
79 Lisp_Object Vcharset_ideograph_gt;
80 Lisp_Object Vcharset_ideograph_gt_pj_1;
81 Lisp_Object Vcharset_ideograph_gt_pj_2;
82 Lisp_Object Vcharset_ideograph_gt_pj_3;
83 Lisp_Object Vcharset_ideograph_gt_pj_4;
84 Lisp_Object Vcharset_ideograph_gt_pj_5;
85 Lisp_Object Vcharset_ideograph_gt_pj_6;
86 Lisp_Object Vcharset_ideograph_gt_pj_7;
87 Lisp_Object Vcharset_ideograph_gt_pj_8;
88 Lisp_Object Vcharset_ideograph_gt_pj_9;
89 Lisp_Object Vcharset_ideograph_gt_pj_10;
90 Lisp_Object Vcharset_ideograph_gt_pj_11;
91 Lisp_Object Vcharset_ideograph_daikanwa;
92 Lisp_Object Vcharset_mojikyo;
93 Lisp_Object Vcharset_mojikyo_2022_1;
94 Lisp_Object Vcharset_mojikyo_pj_1;
95 Lisp_Object Vcharset_mojikyo_pj_2;
96 Lisp_Object Vcharset_mojikyo_pj_3;
97 Lisp_Object Vcharset_mojikyo_pj_4;
98 Lisp_Object Vcharset_mojikyo_pj_5;
99 Lisp_Object Vcharset_mojikyo_pj_6;
100 Lisp_Object Vcharset_mojikyo_pj_7;
101 Lisp_Object Vcharset_mojikyo_pj_8;
102 Lisp_Object Vcharset_mojikyo_pj_9;
103 Lisp_Object Vcharset_mojikyo_pj_10;
104 Lisp_Object Vcharset_mojikyo_pj_11;
105 Lisp_Object Vcharset_mojikyo_pj_12;
106 Lisp_Object Vcharset_mojikyo_pj_13;
107 Lisp_Object Vcharset_mojikyo_pj_14;
108 Lisp_Object Vcharset_mojikyo_pj_15;
109 Lisp_Object Vcharset_mojikyo_pj_16;
110 Lisp_Object Vcharset_mojikyo_pj_17;
111 Lisp_Object Vcharset_mojikyo_pj_18;
112 Lisp_Object Vcharset_mojikyo_pj_19;
113 Lisp_Object Vcharset_mojikyo_pj_20;
114 Lisp_Object Vcharset_mojikyo_pj_21;
115 Lisp_Object Vcharset_ethiopic_ucs;
117 Lisp_Object Vcharset_chinese_big5_1;
118 Lisp_Object Vcharset_chinese_big5_2;
120 #ifdef ENABLE_COMPOSITE_CHARS
121 Lisp_Object Vcharset_composite;
123 /* Hash tables for composite chars. One maps string representing
124 composed chars to their equivalent chars; one goes the
126 Lisp_Object Vcomposite_char_char2string_hash_table;
127 Lisp_Object Vcomposite_char_string2char_hash_table;
129 static int composite_char_row_next;
130 static int composite_char_col_next;
132 #endif /* ENABLE_COMPOSITE_CHARS */
134 struct charset_lookup *chlook;
136 static const struct lrecord_description charset_lookup_description_1[] = {
137 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
146 static const struct struct_description charset_lookup_description = {
147 sizeof (struct charset_lookup),
148 charset_lookup_description_1
152 /* Table of number of bytes in the string representation of a character
153 indexed by the first byte of that representation.
155 rep_bytes_by_first_byte(c) is more efficient than the equivalent
156 canonical computation:
158 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
160 const Bytecount rep_bytes_by_first_byte[0xA0] =
161 { /* 0x00 - 0x7f are for straight ASCII */
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 /* 0x80 - 0x8f are for Dimension-1 official charsets */
172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
176 /* 0x90 - 0x9d are for Dimension-2 official charsets */
177 /* 0x9e is for Dimension-1 private charsets */
178 /* 0x9f is for Dimension-2 private charsets */
179 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
185 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
187 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
189 /* ad-hoc method for `ascii' */
190 if ((CHARSET_CHARS (cs) == 94) &&
191 (CHARSET_BYTE_OFFSET (cs) != 33))
192 return 128 - CHARSET_BYTE_OFFSET (cs);
194 return CHARSET_CHARS (cs);
197 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
199 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
201 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
205 if (XVECTOR_LENGTH (v) > ccs_len)
208 for (i = 0; i < XVECTOR_LENGTH (v); i++)
210 Lisp_Object c = XVECTOR_DATA(v)[i];
212 if (!NILP (c) && !CHARP (c))
216 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
228 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
231 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
241 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
242 nv = XVECTOR_DATA(v)[i];
248 XVECTOR_DATA(v)[i] = Qnil;
252 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
253 int code_point, Lisp_Object character);
255 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
256 int code_point, Lisp_Object character)
260 int ccs_len = XVECTOR_LENGTH (v);
265 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
266 nv = XVECTOR_DATA(v)[i];
270 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
276 XVECTOR_DATA(v)[i] = character;
280 put_char_ccs_code_point (Lisp_Object character,
281 Lisp_Object ccs, Lisp_Object value)
283 Lisp_Object encoding_table;
285 if (!EQ (XCHARSET_NAME (ccs), Qucs)
286 || (XCHAR (character) != XINT (value)))
288 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
289 int dim = XCHARSET_DIMENSION (ccs);
290 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
291 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
295 { /* obsolete representation: value must be a list of bytes */
296 Lisp_Object ret = Fcar (value);
300 signal_simple_error ("Invalid value for coded-charset", value);
301 code_point = XINT (ret);
302 if (XCHARSET_GRAPHIC (ccs) == 1)
310 signal_simple_error ("Invalid value for coded-charset",
314 signal_simple_error ("Invalid value for coded-charset",
317 if (XCHARSET_GRAPHIC (ccs) == 1)
319 code_point = (code_point << 8) | j;
322 value = make_int (code_point);
324 else if (INTP (value))
326 code_point = XINT (value);
327 if (XCHARSET_GRAPHIC (ccs) == 1)
329 code_point &= 0x7F7F7F7F;
330 value = make_int (code_point);
334 signal_simple_error ("Invalid value for coded-charset", value);
338 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
341 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
346 XCHARSET_DECODING_TABLE (ccs)
347 = v = make_older_vector (ccs_len, Qnil);
350 decoding_table_put_char (v, dim, byte_offset, code_point, character);
352 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
354 XCHARSET_ENCODING_TABLE (ccs)
355 = encoding_table = make_char_id_table (Qnil);
357 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
362 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
364 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
365 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
367 if (VECTORP (decoding_table))
369 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
373 decoding_table_remove_char (decoding_table,
374 XCHARSET_DIMENSION (ccs),
375 XCHARSET_BYTE_OFFSET (ccs),
379 if (CHAR_TABLEP (encoding_table))
381 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
389 int leading_code_private_11;
392 Lisp_Object Qcharsetp;
394 /* Qdoc_string, Qdimension, Qchars defined in general.c */
395 Lisp_Object Qregistry, Qfinal, Qgraphic;
396 Lisp_Object Qdirection;
397 Lisp_Object Qreverse_direction_charset;
398 Lisp_Object Qleading_byte;
399 Lisp_Object Qshort_name, Qlong_name;
415 Qjapanese_jisx0208_1978,
419 Qjapanese_jisx0208_1990,
434 Qvietnamese_viscii_lower,
435 Qvietnamese_viscii_upper,
438 Qjapanese_jef_china3,
481 Lisp_Object Ql2r, Qr2l;
483 Lisp_Object Vcharset_hash_table;
485 /* Composite characters are characters constructed by overstriking two
486 or more regular characters.
488 1) The old Mule implementation involves storing composite characters
489 in a buffer as a tag followed by all of the actual characters
490 used to make up the composite character. I think this is a bad
491 idea; it greatly complicates code that wants to handle strings
492 one character at a time because it has to deal with the possibility
493 of great big ungainly characters. It's much more reasonable to
494 simply store an index into a table of composite characters.
496 2) The current implementation only allows for 16,384 separate
497 composite characters over the lifetime of the XEmacs process.
498 This could become a potential problem if the user
499 edited lots of different files that use composite characters.
500 Due to FSF bogosity, increasing the number of allowable
501 composite characters under Mule would decrease the number
502 of possible faces that can exist. Mule already has shrunk
503 this to 2048, and further shrinkage would become uncomfortable.
504 No such problems exist in XEmacs.
506 Composite characters could be represented as 0x80 C1 C2 C3,
507 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
508 for slightly under 2^20 (one million) composite characters
509 over the XEmacs process lifetime, and you only need to
510 increase the size of a Mule character from 19 to 21 bits.
511 Or you could use 0x80 C1 C2 C3 C4, allowing for about
512 85 million (slightly over 2^26) composite characters. */
515 /************************************************************************/
516 /* Basic Emchar functions */
517 /************************************************************************/
519 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
520 string in STR. Returns the number of bytes stored.
521 Do not call this directly. Use the macro set_charptr_emchar() instead.
525 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
540 else if ( c <= 0x7ff )
542 *p++ = (c >> 6) | 0xc0;
543 *p++ = (c & 0x3f) | 0x80;
545 else if ( c <= 0xffff )
547 *p++ = (c >> 12) | 0xe0;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
551 else if ( c <= 0x1fffff )
553 *p++ = (c >> 18) | 0xf0;
554 *p++ = ((c >> 12) & 0x3f) | 0x80;
555 *p++ = ((c >> 6) & 0x3f) | 0x80;
556 *p++ = (c & 0x3f) | 0x80;
558 else if ( c <= 0x3ffffff )
560 *p++ = (c >> 24) | 0xf8;
561 *p++ = ((c >> 18) & 0x3f) | 0x80;
562 *p++ = ((c >> 12) & 0x3f) | 0x80;
563 *p++ = ((c >> 6) & 0x3f) | 0x80;
564 *p++ = (c & 0x3f) | 0x80;
568 *p++ = (c >> 30) | 0xfc;
569 *p++ = ((c >> 24) & 0x3f) | 0x80;
570 *p++ = ((c >> 18) & 0x3f) | 0x80;
571 *p++ = ((c >> 12) & 0x3f) | 0x80;
572 *p++ = ((c >> 6) & 0x3f) | 0x80;
573 *p++ = (c & 0x3f) | 0x80;
576 BREAKUP_CHAR (c, charset, c1, c2);
577 lb = CHAR_LEADING_BYTE (c);
578 if (LEADING_BYTE_PRIVATE_P (lb))
579 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
581 if (EQ (charset, Vcharset_control_1))
590 /* Return the first character from a Mule-encoded string in STR,
591 assuming it's non-ASCII. Do not call this directly.
592 Use the macro charptr_emchar() instead. */
595 non_ascii_charptr_emchar (const Bufbyte *str)
608 else if ( b >= 0xf8 )
613 else if ( b >= 0xf0 )
618 else if ( b >= 0xe0 )
623 else if ( b >= 0xc0 )
633 for( ; len > 0; len-- )
636 ch = ( ch << 6 ) | ( b & 0x3f );
640 Bufbyte i0 = *str, i1, i2 = 0;
643 if (i0 == LEADING_BYTE_CONTROL_1)
644 return (Emchar) (*++str - 0x20);
646 if (LEADING_BYTE_PREFIX_P (i0))
651 charset = CHARSET_BY_LEADING_BYTE (i0);
652 if (XCHARSET_DIMENSION (charset) == 2)
655 return MAKE_CHAR (charset, i1, i2);
659 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
660 Do not call this directly. Use the macro valid_char_p() instead. */
664 non_ascii_valid_char_p (Emchar ch)
668 /* Must have only lowest 19 bits set */
672 f1 = CHAR_FIELD1 (ch);
673 f2 = CHAR_FIELD2 (ch);
674 f3 = CHAR_FIELD3 (ch);
680 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
681 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
682 f2 > MAX_CHAR_FIELD2_PRIVATE)
687 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
688 f2 <= MAX_CHAR_FIELD2_PRIVATE))
692 NOTE: This takes advantage of the fact that
693 FIELD2_TO_OFFICIAL_LEADING_BYTE and
694 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
696 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
697 if (EQ (charset, Qnil))
699 return (XCHARSET_CHARS (charset) == 96);
705 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
706 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
707 f1 > MAX_CHAR_FIELD1_PRIVATE)
709 if (f2 < 0x20 || f3 < 0x20)
712 #ifdef ENABLE_COMPOSITE_CHARS
713 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
715 if (UNBOUNDP (Fgethash (make_int (ch),
716 Vcomposite_char_char2string_hash_table,
721 #endif /* ENABLE_COMPOSITE_CHARS */
723 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
724 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
727 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
729 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
732 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
734 if (EQ (charset, Qnil))
736 return (XCHARSET_CHARS (charset) == 96);
742 /************************************************************************/
743 /* Basic string functions */
744 /************************************************************************/
746 /* Copy the character pointed to by SRC into DST. Do not call this
747 directly. Use the macro charptr_copy_char() instead.
748 Return the number of bytes copied. */
751 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
753 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
755 for (i = bytes; i; i--, dst++, src++)
761 /************************************************************************/
762 /* streams of Emchars */
763 /************************************************************************/
765 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
766 The functions below are not meant to be called directly; use
767 the macros in insdel.h. */
770 Lstream_get_emchar_1 (Lstream *stream, int ch)
772 Bufbyte str[MAX_EMCHAR_LEN];
773 Bufbyte *strptr = str;
776 str[0] = (Bufbyte) ch;
778 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
780 int c = Lstream_getc (stream);
781 bufpos_checking_assert (c >= 0);
782 *++strptr = (Bufbyte) c;
784 return charptr_emchar (str);
788 Lstream_fput_emchar (Lstream *stream, Emchar ch)
790 Bufbyte str[MAX_EMCHAR_LEN];
791 Bytecount len = set_charptr_emchar (str, ch);
792 return Lstream_write (stream, str, len);
796 Lstream_funget_emchar (Lstream *stream, Emchar ch)
798 Bufbyte str[MAX_EMCHAR_LEN];
799 Bytecount len = set_charptr_emchar (str, ch);
800 Lstream_unread (stream, str, len);
804 /************************************************************************/
806 /************************************************************************/
809 mark_charset (Lisp_Object obj)
811 Lisp_Charset *cs = XCHARSET (obj);
813 mark_object (cs->short_name);
814 mark_object (cs->long_name);
815 mark_object (cs->doc_string);
816 mark_object (cs->registry);
817 mark_object (cs->ccl_program);
819 mark_object (cs->encoding_table);
820 /* mark_object (cs->decoding_table); */
826 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
828 Lisp_Charset *cs = XCHARSET (obj);
832 error ("printing unreadable object #<charset %s 0x%x>",
833 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
836 write_c_string ("#<charset ", printcharfun);
837 print_internal (CHARSET_NAME (cs), printcharfun, 0);
838 write_c_string (" ", printcharfun);
839 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
840 write_c_string (" ", printcharfun);
841 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
842 write_c_string (" ", printcharfun);
843 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
844 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
846 CHARSET_DIMENSION (cs),
847 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
848 CHARSET_COLUMNS (cs),
849 CHARSET_GRAPHIC (cs),
851 write_c_string (buf, printcharfun);
852 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
853 sprintf (buf, " 0x%x>", cs->header.uid);
854 write_c_string (buf, printcharfun);
857 static const struct lrecord_description charset_description[] = {
858 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
859 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
860 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
861 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
862 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
863 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
864 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
866 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
867 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
872 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
873 mark_charset, print_charset, 0, 0, 0,
877 /* Make a new charset. */
878 /* #### SJT Should generic properties be allowed? */
880 make_charset (Charset_ID id, Lisp_Object name,
881 unsigned short chars, unsigned char dimension,
882 unsigned char columns, unsigned char graphic,
883 Bufbyte final, unsigned char direction, Lisp_Object short_name,
884 Lisp_Object long_name, Lisp_Object doc,
886 Lisp_Object decoding_table,
887 Emchar ucs_min, Emchar ucs_max,
888 Emchar code_offset, unsigned char byte_offset)
891 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
895 XSETCHARSET (obj, cs);
897 CHARSET_ID (cs) = id;
898 CHARSET_NAME (cs) = name;
899 CHARSET_SHORT_NAME (cs) = short_name;
900 CHARSET_LONG_NAME (cs) = long_name;
901 CHARSET_CHARS (cs) = chars;
902 CHARSET_DIMENSION (cs) = dimension;
903 CHARSET_DIRECTION (cs) = direction;
904 CHARSET_COLUMNS (cs) = columns;
905 CHARSET_GRAPHIC (cs) = graphic;
906 CHARSET_FINAL (cs) = final;
907 CHARSET_DOC_STRING (cs) = doc;
908 CHARSET_REGISTRY (cs) = reg;
909 CHARSET_CCL_PROGRAM (cs) = Qnil;
910 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
912 CHARSET_DECODING_TABLE(cs) = Qnil;
913 CHARSET_ENCODING_TABLE(cs) = Qnil;
914 CHARSET_UCS_MIN(cs) = ucs_min;
915 CHARSET_UCS_MAX(cs) = ucs_max;
916 CHARSET_CODE_OFFSET(cs) = code_offset;
917 CHARSET_BYTE_OFFSET(cs) = byte_offset;
921 if (id == LEADING_BYTE_ASCII)
922 CHARSET_REP_BYTES (cs) = 1;
924 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
926 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
931 /* some charsets do not have final characters. This includes
932 ASCII, Control-1, Composite, and the two faux private
934 unsigned char iso2022_type
935 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
937 if (code_offset == 0)
939 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
940 chlook->charset_by_attributes[iso2022_type][final] = obj;
944 (chlook->charset_by_attributes[iso2022_type][final][direction]));
945 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
949 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
950 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
952 /* Some charsets are "faux" and don't have names or really exist at
953 all except in the leading-byte table. */
955 Fputhash (name, obj, Vcharset_hash_table);
960 get_unallocated_leading_byte (int dimension)
965 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
968 lb = chlook->next_allocated_leading_byte++;
972 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
975 lb = chlook->next_allocated_1_byte_leading_byte++;
979 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
982 lb = chlook->next_allocated_2_byte_leading_byte++;
988 ("No more character sets free for this dimension",
989 make_int (dimension));
995 /* Number of Big5 characters which have the same code in 1st byte. */
997 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1000 decode_builtin_char (Lisp_Object charset, int code_point)
1004 if (EQ (charset, Vcharset_chinese_big5))
1006 int c1 = code_point >> 8;
1007 int c2 = code_point & 0xFF;
1010 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1012 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1013 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1015 I = (c1 - 0xA1) * BIG5_SAME_ROW
1016 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1020 charset = Vcharset_chinese_big5_1;
1024 charset = Vcharset_chinese_big5_2;
1025 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1027 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1030 if ((final = XCHARSET_FINAL (charset)) >= '0')
1032 if (XCHARSET_DIMENSION (charset) == 1)
1034 switch (XCHARSET_CHARS (charset))
1038 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1041 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1049 switch (XCHARSET_CHARS (charset))
1052 return MIN_CHAR_94x94
1053 + (final - '0') * 94 * 94
1054 + (((code_point >> 8) & 0x7F) - 33) * 94
1055 + ((code_point & 0x7F) - 33);
1057 return MIN_CHAR_96x96
1058 + (final - '0') * 96 * 96
1059 + (((code_point >> 8) & 0x7F) - 32) * 96
1060 + ((code_point & 0x7F) - 32);
1067 else if (XCHARSET_UCS_MAX (charset))
1070 = (XCHARSET_DIMENSION (charset) == 1
1072 code_point - XCHARSET_BYTE_OFFSET (charset)
1074 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1075 * XCHARSET_CHARS (charset)
1076 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1077 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1078 if ((cid < XCHARSET_UCS_MIN (charset))
1079 || (XCHARSET_UCS_MAX (charset) < cid))
1088 range_charset_code_point (Lisp_Object charset, Emchar ch)
1092 if ((XCHARSET_UCS_MIN (charset) <= ch)
1093 && (ch <= XCHARSET_UCS_MAX (charset)))
1095 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1097 if (XCHARSET_CHARS (charset) == 256)
1099 else if (XCHARSET_DIMENSION (charset) == 1)
1100 return d + XCHARSET_BYTE_OFFSET (charset);
1101 else if (XCHARSET_DIMENSION (charset) == 2)
1103 ((d / XCHARSET_CHARS (charset)
1104 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1105 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1106 else if (XCHARSET_DIMENSION (charset) == 3)
1108 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1109 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1110 | ((d / XCHARSET_CHARS (charset)
1111 % XCHARSET_CHARS (charset)
1112 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1113 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1114 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1116 ((d / (XCHARSET_CHARS (charset)
1117 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1118 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1119 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1120 % XCHARSET_CHARS (charset)
1121 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1122 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1123 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1124 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1126 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1128 if (XCHARSET_DIMENSION (charset) == 1)
1130 if (XCHARSET_CHARS (charset) == 94)
1132 if (((d = ch - (MIN_CHAR_94
1133 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1137 else if (XCHARSET_CHARS (charset) == 96)
1139 if (((d = ch - (MIN_CHAR_96
1140 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1147 else if (XCHARSET_DIMENSION (charset) == 2)
1149 if (XCHARSET_CHARS (charset) == 94)
1151 if (((d = ch - (MIN_CHAR_94x94
1152 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1155 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1157 else if (XCHARSET_CHARS (charset) == 96)
1159 if (((d = ch - (MIN_CHAR_96x96
1160 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1163 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1169 if (EQ (charset, Vcharset_mojikyo_2022_1)
1170 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1172 int m = ch - MIN_CHAR_MOJIKYO - 1;
1173 int byte1 = m / (94 * 60) + 33;
1174 int byte2 = (m % (94 * 60)) / 94;
1175 int byte3 = m % 94 + 33;
1181 return (byte1 << 16) | (byte2 << 8) | byte3;
1187 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1189 if (c <= MAX_CHAR_BASIC_LATIN)
1191 *charset = Vcharset_ascii;
1196 *charset = Vcharset_control_1;
1201 *charset = Vcharset_latin_iso8859_1;
1205 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1207 *charset = Vcharset_hebrew_iso8859_8;
1208 return c - MIN_CHAR_HEBREW + 0x20;
1211 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1213 *charset = Vcharset_thai_tis620;
1214 return c - MIN_CHAR_THAI + 0x20;
1217 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1218 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1220 return list2 (Vcharset_katakana_jisx0201,
1221 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1224 else if (c <= MAX_CHAR_BMP)
1226 *charset = Vcharset_ucs_bmp;
1229 else if (c < MIN_CHAR_DAIKANWA)
1231 *charset = Vcharset_ucs;
1234 else if (c <= MAX_CHAR_DAIKANWA)
1236 *charset = Vcharset_ideograph_daikanwa;
1237 return c - MIN_CHAR_DAIKANWA;
1239 else if (c <= MAX_CHAR_MOJIKYO_0)
1241 *charset = Vcharset_mojikyo;
1242 return c - MIN_CHAR_MOJIKYO_0;
1244 else if (c < MIN_CHAR_94)
1246 *charset = Vcharset_ucs;
1249 else if (c <= MAX_CHAR_94)
1251 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1252 ((c - MIN_CHAR_94) / 94) + '0',
1253 CHARSET_LEFT_TO_RIGHT);
1254 if (!NILP (*charset))
1255 return ((c - MIN_CHAR_94) % 94) + 33;
1258 *charset = Vcharset_ucs;
1262 else if (c <= MAX_CHAR_96)
1264 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1265 ((c - MIN_CHAR_96) / 96) + '0',
1266 CHARSET_LEFT_TO_RIGHT);
1267 if (!NILP (*charset))
1268 return ((c - MIN_CHAR_96) % 96) + 32;
1271 *charset = Vcharset_ucs;
1275 else if (c <= MAX_CHAR_94x94)
1278 = CHARSET_BY_ATTRIBUTES (94, 2,
1279 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1280 CHARSET_LEFT_TO_RIGHT);
1281 if (!NILP (*charset))
1282 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1283 | (((c - MIN_CHAR_94x94) % 94) + 33);
1286 *charset = Vcharset_ucs;
1290 else if (c <= MAX_CHAR_96x96)
1293 = CHARSET_BY_ATTRIBUTES (96, 2,
1294 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1295 CHARSET_LEFT_TO_RIGHT);
1296 if (!NILP (*charset))
1297 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1298 | (((c - MIN_CHAR_96x96) % 96) + 32);
1301 *charset = Vcharset_ucs;
1305 else if (c < MIN_CHAR_MOJIKYO)
1307 *charset = Vcharset_ucs;
1310 else if (c <= MAX_CHAR_MOJIKYO)
1312 *charset = Vcharset_mojikyo;
1313 return c - MIN_CHAR_MOJIKYO;
1315 else if (c < MIN_CHAR_JEF_CHINA3)
1317 *charset = Vcharset_ucs;
1320 else if (c <= MAX_CHAR_JEF_CHINA3)
1322 *charset = Vcharset_japanese_jef_china3;
1323 return c - MAX_CHAR_JEF_CHINA3;
1327 *charset = Vcharset_ucs;
1332 Lisp_Object Vdefault_coded_charset_priority_list;
1336 /************************************************************************/
1337 /* Basic charset Lisp functions */
1338 /************************************************************************/
1340 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1341 Return non-nil if OBJECT is a charset.
1345 return CHARSETP (object) ? Qt : Qnil;
1348 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1349 Retrieve the charset of the given name.
1350 If CHARSET-OR-NAME is a charset object, it is simply returned.
1351 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1352 nil is returned. Otherwise the associated charset object is returned.
1356 if (CHARSETP (charset_or_name))
1357 return charset_or_name;
1359 CHECK_SYMBOL (charset_or_name);
1360 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1363 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1364 Retrieve the charset of the given name.
1365 Same as `find-charset' except an error is signalled if there is no such
1366 charset instead of returning nil.
1370 Lisp_Object charset = Ffind_charset (name);
1373 signal_simple_error ("No such charset", name);
1377 /* We store the charsets in hash tables with the names as the key and the
1378 actual charset object as the value. Occasionally we need to use them
1379 in a list format. These routines provide us with that. */
1380 struct charset_list_closure
1382 Lisp_Object *charset_list;
1386 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1387 void *charset_list_closure)
1389 /* This function can GC */
1390 struct charset_list_closure *chcl =
1391 (struct charset_list_closure*) charset_list_closure;
1392 Lisp_Object *charset_list = chcl->charset_list;
1394 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1398 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1399 Return a list of the names of all defined charsets.
1403 Lisp_Object charset_list = Qnil;
1404 struct gcpro gcpro1;
1405 struct charset_list_closure charset_list_closure;
1407 GCPRO1 (charset_list);
1408 charset_list_closure.charset_list = &charset_list;
1409 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1410 &charset_list_closure);
1413 return charset_list;
1416 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1417 Return the name of charset CHARSET.
1421 return XCHARSET_NAME (Fget_charset (charset));
1424 /* #### SJT Should generic properties be allowed? */
1425 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1426 Define a new character set.
1427 This function is for use with Mule support.
1428 NAME is a symbol, the name by which the character set is normally referred.
1429 DOC-STRING is a string describing the character set.
1430 PROPS is a property list, describing the specific nature of the
1431 character set. Recognized properties are:
1433 'short-name Short version of the charset name (ex: Latin-1)
1434 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1435 'registry A regular expression matching the font registry field for
1437 'dimension Number of octets used to index a character in this charset.
1438 Either 1 or 2. Defaults to 1.
1439 'columns Number of columns used to display a character in this charset.
1440 Only used in TTY mode. (Under X, the actual width of a
1441 character can be derived from the font used to display the
1442 characters.) If unspecified, defaults to the dimension
1443 (this is almost always the correct value).
1444 'chars Number of characters in each dimension (94 or 96).
1445 Defaults to 94. Note that if the dimension is 2, the
1446 character set thus described is 94x94 or 96x96.
1447 'final Final byte of ISO 2022 escape sequence. Must be
1448 supplied. Each combination of (DIMENSION, CHARS) defines a
1449 separate namespace for final bytes. Note that ISO
1450 2022 restricts the final byte to the range
1451 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1452 dimension == 2. Note also that final bytes in the range
1453 0x30 - 0x3F are reserved for user-defined (not official)
1455 'graphic 0 (use left half of font on output) or 1 (use right half
1456 of font on output). Defaults to 0. For example, for
1457 a font whose registry is ISO8859-1, the left half
1458 (octets 0x20 - 0x7F) is the `ascii' character set, while
1459 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1460 character set. With 'graphic set to 0, the octets
1461 will have their high bit cleared; with it set to 1,
1462 the octets will have their high bit set.
1463 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1465 'ccl-program A compiled CCL program used to convert a character in
1466 this charset into an index into the font. This is in
1467 addition to the 'graphic property. The CCL program
1468 is passed the octets of the character, with the high
1469 bit cleared and set depending upon whether the value
1470 of the 'graphic property is 0 or 1.
1472 (name, doc_string, props))
1474 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1475 int direction = CHARSET_LEFT_TO_RIGHT;
1476 Lisp_Object registry = Qnil;
1477 Lisp_Object charset;
1478 Lisp_Object ccl_program = Qnil;
1479 Lisp_Object short_name = Qnil, long_name = Qnil;
1480 int byte_offset = -1;
1482 CHECK_SYMBOL (name);
1483 if (!NILP (doc_string))
1484 CHECK_STRING (doc_string);
1486 charset = Ffind_charset (name);
1487 if (!NILP (charset))
1488 signal_simple_error ("Cannot redefine existing charset", name);
1491 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1493 if (EQ (keyword, Qshort_name))
1495 CHECK_STRING (value);
1499 if (EQ (keyword, Qlong_name))
1501 CHECK_STRING (value);
1505 else if (EQ (keyword, Qdimension))
1508 dimension = XINT (value);
1509 if (dimension < 1 || dimension > 2)
1510 signal_simple_error ("Invalid value for 'dimension", value);
1513 else if (EQ (keyword, Qchars))
1516 chars = XINT (value);
1517 if (chars != 94 && chars != 96)
1518 signal_simple_error ("Invalid value for 'chars", value);
1521 else if (EQ (keyword, Qcolumns))
1524 columns = XINT (value);
1525 if (columns != 1 && columns != 2)
1526 signal_simple_error ("Invalid value for 'columns", value);
1529 else if (EQ (keyword, Qgraphic))
1532 graphic = XINT (value);
1534 if (graphic < 0 || graphic > 2)
1536 if (graphic < 0 || graphic > 1)
1538 signal_simple_error ("Invalid value for 'graphic", value);
1541 else if (EQ (keyword, Qregistry))
1543 CHECK_STRING (value);
1547 else if (EQ (keyword, Qdirection))
1549 if (EQ (value, Ql2r))
1550 direction = CHARSET_LEFT_TO_RIGHT;
1551 else if (EQ (value, Qr2l))
1552 direction = CHARSET_RIGHT_TO_LEFT;
1554 signal_simple_error ("Invalid value for 'direction", value);
1557 else if (EQ (keyword, Qfinal))
1559 CHECK_CHAR_COERCE_INT (value);
1560 final = XCHAR (value);
1561 if (final < '0' || final > '~')
1562 signal_simple_error ("Invalid value for 'final", value);
1565 else if (EQ (keyword, Qccl_program))
1567 struct ccl_program test_ccl;
1569 if (setup_ccl_program (&test_ccl, value) < 0)
1570 signal_simple_error ("Invalid value for 'ccl-program", value);
1571 ccl_program = value;
1575 signal_simple_error ("Unrecognized property", keyword);
1580 error ("'final must be specified");
1581 if (dimension == 2 && final > 0x5F)
1583 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1586 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1587 CHARSET_LEFT_TO_RIGHT)) ||
1588 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1589 CHARSET_RIGHT_TO_LEFT)))
1591 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1593 id = get_unallocated_leading_byte (dimension);
1595 if (NILP (doc_string))
1596 doc_string = build_string ("");
1598 if (NILP (registry))
1599 registry = build_string ("");
1601 if (NILP (short_name))
1602 XSETSTRING (short_name, XSYMBOL (name)->name);
1604 if (NILP (long_name))
1605 long_name = doc_string;
1608 columns = dimension;
1610 if (byte_offset < 0)
1614 else if (chars == 96)
1620 charset = make_charset (id, name, chars, dimension, columns, graphic,
1621 final, direction, short_name, long_name,
1622 doc_string, registry,
1623 Qnil, 0, 0, 0, byte_offset);
1624 if (!NILP (ccl_program))
1625 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1629 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1631 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1632 NEW-NAME is the name of the new charset. Return the new charset.
1634 (charset, new_name))
1636 Lisp_Object new_charset = Qnil;
1637 int id, chars, dimension, columns, graphic, final;
1639 Lisp_Object registry, doc_string, short_name, long_name;
1642 charset = Fget_charset (charset);
1643 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1644 signal_simple_error ("Charset already has reverse-direction charset",
1647 CHECK_SYMBOL (new_name);
1648 if (!NILP (Ffind_charset (new_name)))
1649 signal_simple_error ("Cannot redefine existing charset", new_name);
1651 cs = XCHARSET (charset);
1653 chars = CHARSET_CHARS (cs);
1654 dimension = CHARSET_DIMENSION (cs);
1655 columns = CHARSET_COLUMNS (cs);
1656 id = get_unallocated_leading_byte (dimension);
1658 graphic = CHARSET_GRAPHIC (cs);
1659 final = CHARSET_FINAL (cs);
1660 direction = CHARSET_RIGHT_TO_LEFT;
1661 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1662 direction = CHARSET_LEFT_TO_RIGHT;
1663 doc_string = CHARSET_DOC_STRING (cs);
1664 short_name = CHARSET_SHORT_NAME (cs);
1665 long_name = CHARSET_LONG_NAME (cs);
1666 registry = CHARSET_REGISTRY (cs);
1668 new_charset = make_charset (id, new_name, chars, dimension, columns,
1669 graphic, final, direction, short_name, long_name,
1670 doc_string, registry,
1672 CHARSET_DECODING_TABLE(cs),
1673 CHARSET_UCS_MIN(cs),
1674 CHARSET_UCS_MAX(cs),
1675 CHARSET_CODE_OFFSET(cs),
1676 CHARSET_BYTE_OFFSET(cs)
1682 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1683 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1688 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1689 Define symbol ALIAS as an alias for CHARSET.
1693 CHECK_SYMBOL (alias);
1694 charset = Fget_charset (charset);
1695 return Fputhash (alias, charset, Vcharset_hash_table);
1698 /* #### Reverse direction charsets not yet implemented. */
1700 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1702 Return the reverse-direction charset parallel to CHARSET, if any.
1703 This is the charset with the same properties (in particular, the same
1704 dimension, number of characters per dimension, and final byte) as
1705 CHARSET but whose characters are displayed in the opposite direction.
1709 charset = Fget_charset (charset);
1710 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1714 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1715 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1716 If DIRECTION is omitted, both directions will be checked (left-to-right
1717 will be returned if character sets exist for both directions).
1719 (dimension, chars, final, direction))
1721 int dm, ch, fi, di = -1;
1722 Lisp_Object obj = Qnil;
1724 CHECK_INT (dimension);
1725 dm = XINT (dimension);
1726 if (dm < 1 || dm > 2)
1727 signal_simple_error ("Invalid value for DIMENSION", dimension);
1731 if (ch != 94 && ch != 96)
1732 signal_simple_error ("Invalid value for CHARS", chars);
1734 CHECK_CHAR_COERCE_INT (final);
1736 if (fi < '0' || fi > '~')
1737 signal_simple_error ("Invalid value for FINAL", final);
1739 if (EQ (direction, Ql2r))
1740 di = CHARSET_LEFT_TO_RIGHT;
1741 else if (EQ (direction, Qr2l))
1742 di = CHARSET_RIGHT_TO_LEFT;
1743 else if (!NILP (direction))
1744 signal_simple_error ("Invalid value for DIRECTION", direction);
1746 if (dm == 2 && fi > 0x5F)
1748 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1752 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1754 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1757 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1760 return XCHARSET_NAME (obj);
1764 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1765 Return short name of CHARSET.
1769 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1772 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1773 Return long name of CHARSET.
1777 return XCHARSET_LONG_NAME (Fget_charset (charset));
1780 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1781 Return description of CHARSET.
1785 return XCHARSET_DOC_STRING (Fget_charset (charset));
1788 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1789 Return dimension of CHARSET.
1793 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1796 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1797 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1798 Recognized properties are those listed in `make-charset', as well as
1799 'name and 'doc-string.
1805 charset = Fget_charset (charset);
1806 cs = XCHARSET (charset);
1808 CHECK_SYMBOL (prop);
1809 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1810 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1811 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1812 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1813 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1814 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1815 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1816 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1817 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1818 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1819 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1820 if (EQ (prop, Qdirection))
1821 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1822 if (EQ (prop, Qreverse_direction_charset))
1824 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1825 /* #### Is this translation OK? If so, error checking sufficient? */
1826 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1828 signal_simple_error ("Unrecognized charset property name", prop);
1829 return Qnil; /* not reached */
1832 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1833 Return charset identification number of CHARSET.
1837 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1840 /* #### We need to figure out which properties we really want to
1843 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1844 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1846 (charset, ccl_program))
1848 struct ccl_program test_ccl;
1850 charset = Fget_charset (charset);
1851 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1852 signal_simple_error ("Invalid ccl-program", ccl_program);
1853 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1858 invalidate_charset_font_caches (Lisp_Object charset)
1860 /* Invalidate font cache entries for charset on all devices. */
1861 Lisp_Object devcons, concons, hash_table;
1862 DEVICE_LOOP_NO_BREAK (devcons, concons)
1864 struct device *d = XDEVICE (XCAR (devcons));
1865 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1866 if (!UNBOUNDP (hash_table))
1867 Fclrhash (hash_table);
1871 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1872 Set the 'registry property of CHARSET to REGISTRY.
1874 (charset, registry))
1876 charset = Fget_charset (charset);
1877 CHECK_STRING (registry);
1878 XCHARSET_REGISTRY (charset) = registry;
1879 invalidate_charset_font_caches (charset);
1880 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1885 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1886 Return mapping-table of CHARSET.
1890 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1893 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1894 Set mapping-table of CHARSET to TABLE.
1898 struct Lisp_Charset *cs;
1902 charset = Fget_charset (charset);
1903 cs = XCHARSET (charset);
1907 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1908 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1909 CHARSET_DECODING_TABLE(cs) = Qnil;
1912 else if (VECTORP (table))
1914 int ccs_len = CHARSET_BYTE_SIZE (cs);
1915 int ret = decoding_table_check_elements (table,
1916 CHARSET_DIMENSION (cs),
1921 signal_simple_error ("Too big table", table);
1923 signal_simple_error ("Invalid element is found", table);
1925 signal_simple_error ("Something wrong", table);
1927 CHARSET_DECODING_TABLE(cs) = Qnil;
1930 signal_error (Qwrong_type_argument,
1931 list2 (build_translated_string ("vector-or-nil-p"),
1934 byte_offset = CHARSET_BYTE_OFFSET (cs);
1935 switch (CHARSET_DIMENSION (cs))
1938 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1940 Lisp_Object c = XVECTOR_DATA(table)[i];
1943 put_char_ccs_code_point (c, charset,
1944 make_int (i + byte_offset));
1948 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1950 Lisp_Object v = XVECTOR_DATA(table)[i];
1956 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1958 Lisp_Object c = XVECTOR_DATA(v)[j];
1961 put_char_ccs_code_point
1963 make_int ( ( (i + byte_offset) << 8 )
1969 put_char_ccs_code_point (v, charset,
1970 make_int (i + byte_offset));
1979 /************************************************************************/
1980 /* Lisp primitives for working with characters */
1981 /************************************************************************/
1984 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
1985 Make a character from CHARSET and code-point CODE.
1991 charset = Fget_charset (charset);
1994 if (XCHARSET_GRAPHIC (charset) == 1)
1996 c = DECODE_CHAR (charset, c);
1997 return c >= 0 ? make_char (c) : Qnil;
2000 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2001 Make a builtin character from CHARSET and code-point CODE.
2007 charset = Fget_charset (charset);
2009 if (EQ (charset, Vcharset_latin_viscii))
2011 Lisp_Object chr = Fdecode_char (charset, code);
2017 (ret = Fget_char_attribute (chr,
2018 Vcharset_latin_viscii_lower,
2021 charset = Vcharset_latin_viscii_lower;
2025 (ret = Fget_char_attribute (chr,
2026 Vcharset_latin_viscii_upper,
2029 charset = Vcharset_latin_viscii_upper;
2036 if (XCHARSET_GRAPHIC (charset) == 1)
2039 c = decode_builtin_char (charset, c);
2040 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
2044 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2045 Make a character from CHARSET and octets ARG1 and ARG2.
2046 ARG2 is required only for characters from two-dimensional charsets.
2047 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2048 character s with caron.
2050 (charset, arg1, arg2))
2054 int lowlim, highlim;
2056 charset = Fget_charset (charset);
2057 cs = XCHARSET (charset);
2059 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2060 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2062 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2064 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2065 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2068 /* It is useful (and safe, according to Olivier Galibert) to strip
2069 the 8th bit off ARG1 and ARG2 because it allows programmers to
2070 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2071 Latin 2 code of the character. */
2079 if (a1 < lowlim || a1 > highlim)
2080 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2082 if (CHARSET_DIMENSION (cs) == 1)
2086 ("Charset is of dimension one; second octet must be nil", arg2);
2087 return make_char (MAKE_CHAR (charset, a1, 0));
2096 a2 = XINT (arg2) & 0x7f;
2098 if (a2 < lowlim || a2 > highlim)
2099 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2101 return make_char (MAKE_CHAR (charset, a1, a2));
2104 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2105 Return the character set of CHARACTER.
2109 CHECK_CHAR_COERCE_INT (character);
2111 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2114 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2115 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2116 N defaults to 0 if omitted.
2120 Lisp_Object charset;
2123 CHECK_CHAR_COERCE_INT (character);
2125 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2127 if (NILP (n) || EQ (n, Qzero))
2128 return make_int (octet0);
2129 else if (EQ (n, make_int (1)))
2130 return make_int (octet1);
2132 signal_simple_error ("Octet number must be 0 or 1", n);
2135 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2136 Return list of charset and one or two position-codes of CHARACTER.
2140 /* This function can GC */
2141 struct gcpro gcpro1, gcpro2;
2142 Lisp_Object charset = Qnil;
2143 Lisp_Object rc = Qnil;
2151 GCPRO2 (charset, rc);
2152 CHECK_CHAR_COERCE_INT (character);
2155 code_point = ENCODE_CHAR (XCHAR (character), charset);
2156 dimension = XCHARSET_DIMENSION (charset);
2157 while (dimension > 0)
2159 rc = Fcons (make_int (code_point & 255), rc);
2163 rc = Fcons (XCHARSET_NAME (charset), rc);
2165 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2167 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2169 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2173 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2182 #ifdef ENABLE_COMPOSITE_CHARS
2183 /************************************************************************/
2184 /* composite character functions */
2185 /************************************************************************/
2188 lookup_composite_char (Bufbyte *str, int len)
2190 Lisp_Object lispstr = make_string (str, len);
2191 Lisp_Object ch = Fgethash (lispstr,
2192 Vcomposite_char_string2char_hash_table,
2198 if (composite_char_row_next >= 128)
2199 signal_simple_error ("No more composite chars available", lispstr);
2200 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2201 composite_char_col_next);
2202 Fputhash (make_char (emch), lispstr,
2203 Vcomposite_char_char2string_hash_table);
2204 Fputhash (lispstr, make_char (emch),
2205 Vcomposite_char_string2char_hash_table);
2206 composite_char_col_next++;
2207 if (composite_char_col_next >= 128)
2209 composite_char_col_next = 32;
2210 composite_char_row_next++;
2219 composite_char_string (Emchar ch)
2221 Lisp_Object str = Fgethash (make_char (ch),
2222 Vcomposite_char_char2string_hash_table,
2224 assert (!UNBOUNDP (str));
2228 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2229 Convert a string into a single composite character.
2230 The character is the result of overstriking all the characters in
2235 CHECK_STRING (string);
2236 return make_char (lookup_composite_char (XSTRING_DATA (string),
2237 XSTRING_LENGTH (string)));
2240 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2241 Return a string of the characters comprising a composite character.
2249 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2250 signal_simple_error ("Must be composite char", ch);
2251 return composite_char_string (emch);
2253 #endif /* ENABLE_COMPOSITE_CHARS */
2256 /************************************************************************/
2257 /* initialization */
2258 /************************************************************************/
2261 syms_of_mule_charset (void)
2263 INIT_LRECORD_IMPLEMENTATION (charset);
2265 DEFSUBR (Fcharsetp);
2266 DEFSUBR (Ffind_charset);
2267 DEFSUBR (Fget_charset);
2268 DEFSUBR (Fcharset_list);
2269 DEFSUBR (Fcharset_name);
2270 DEFSUBR (Fmake_charset);
2271 DEFSUBR (Fmake_reverse_direction_charset);
2272 /* DEFSUBR (Freverse_direction_charset); */
2273 DEFSUBR (Fdefine_charset_alias);
2274 DEFSUBR (Fcharset_from_attributes);
2275 DEFSUBR (Fcharset_short_name);
2276 DEFSUBR (Fcharset_long_name);
2277 DEFSUBR (Fcharset_description);
2278 DEFSUBR (Fcharset_dimension);
2279 DEFSUBR (Fcharset_property);
2280 DEFSUBR (Fcharset_id);
2281 DEFSUBR (Fset_charset_ccl_program);
2282 DEFSUBR (Fset_charset_registry);
2284 DEFSUBR (Fcharset_mapping_table);
2285 DEFSUBR (Fset_charset_mapping_table);
2289 DEFSUBR (Fdecode_char);
2290 DEFSUBR (Fdecode_builtin_char);
2292 DEFSUBR (Fmake_char);
2293 DEFSUBR (Fchar_charset);
2294 DEFSUBR (Fchar_octet);
2295 DEFSUBR (Fsplit_char);
2297 #ifdef ENABLE_COMPOSITE_CHARS
2298 DEFSUBR (Fmake_composite_char);
2299 DEFSUBR (Fcomposite_char_string);
2302 defsymbol (&Qcharsetp, "charsetp");
2303 defsymbol (&Qregistry, "registry");
2304 defsymbol (&Qfinal, "final");
2305 defsymbol (&Qgraphic, "graphic");
2306 defsymbol (&Qdirection, "direction");
2307 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2308 defsymbol (&Qshort_name, "short-name");
2309 defsymbol (&Qlong_name, "long-name");
2311 defsymbol (&Ql2r, "l2r");
2312 defsymbol (&Qr2l, "r2l");
2314 /* Charsets, compatible with FSF 20.3
2315 Naming convention is Script-Charset[-Edition] */
2316 defsymbol (&Qascii, "ascii");
2317 defsymbol (&Qcontrol_1, "control-1");
2318 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2319 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2320 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2321 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2322 defsymbol (&Qthai_tis620, "thai-tis620");
2323 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2324 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2325 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2326 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2327 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2328 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2329 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2330 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2331 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2332 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2333 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2334 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2335 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2336 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2337 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2338 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2340 defsymbol (&Qucs, "ucs");
2341 defsymbol (&Qucs_bmp, "ucs-bmp");
2342 defsymbol (&Qucs_cns, "ucs-cns");
2343 defsymbol (&Qucs_jis, "ucs-jis");
2344 defsymbol (&Qucs_big5, "ucs-big5");
2345 defsymbol (&Qlatin_viscii, "latin-viscii");
2346 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2347 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2348 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2349 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2350 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2351 defsymbol (&Qideograph_gt, "ideograph-gt");
2352 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2353 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2354 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2355 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2356 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2357 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2358 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2359 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2360 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2361 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2362 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2363 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2364 defsymbol (&Qchinese_big5, "chinese-big5");
2365 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2366 defsymbol (&Qjapanese_jef_china3, "japanese-jef-china3");
2367 defsymbol (&Qmojikyo, "mojikyo");
2368 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2369 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2370 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2371 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2372 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2373 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2374 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2375 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2376 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2377 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2378 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2379 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2380 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2381 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2382 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2383 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2384 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2385 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2386 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2387 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2388 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2389 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2390 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2392 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2393 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2395 defsymbol (&Qcomposite, "composite");
2399 vars_of_mule_charset (void)
2406 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2407 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2409 /* Table of charsets indexed by leading byte. */
2410 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2411 chlook->charset_by_leading_byte[i] = Qnil;
2414 /* Table of charsets indexed by type/final-byte. */
2415 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2416 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2417 chlook->charset_by_attributes[i][j] = Qnil;
2419 /* Table of charsets indexed by type/final-byte/direction. */
2420 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2421 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2422 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2423 chlook->charset_by_attributes[i][j][k] = Qnil;
2427 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2429 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2430 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2434 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2435 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2436 Leading-code of private TYPE9N charset of column-width 1.
2438 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2442 Vdefault_coded_charset_priority_list = Qnil;
2443 DEFVAR_LISP ("default-coded-charset-priority-list",
2444 &Vdefault_coded_charset_priority_list /*
2445 Default order of preferred coded-character-sets.
2451 complex_vars_of_mule_charset (void)
2453 staticpro (&Vcharset_hash_table);
2454 Vcharset_hash_table =
2455 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2457 /* Predefined character sets. We store them into variables for
2461 staticpro (&Vcharset_ucs);
2463 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2464 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2465 build_string ("UCS"),
2466 build_string ("UCS"),
2467 build_string ("ISO/IEC 10646"),
2469 Qnil, 0, 0xFFFFFFF, 0, 0);
2470 staticpro (&Vcharset_ucs_bmp);
2472 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2473 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2474 build_string ("BMP"),
2475 build_string ("BMP"),
2476 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2477 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2478 Qnil, 0, 0xFFFF, 0, 0);
2479 staticpro (&Vcharset_ucs_cns);
2481 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2482 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2483 build_string ("UCS for CNS"),
2484 build_string ("UCS for CNS 11643"),
2485 build_string ("ISO/IEC 10646 for CNS 11643"),
2488 staticpro (&Vcharset_ucs_jis);
2490 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2491 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2492 build_string ("UCS for JIS"),
2493 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2494 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2497 staticpro (&Vcharset_ucs_big5);
2499 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2500 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2501 build_string ("UCS for Big5"),
2502 build_string ("UCS for Big5"),
2503 build_string ("ISO/IEC 10646 for Big5"),
2507 # define MIN_CHAR_THAI 0
2508 # define MAX_CHAR_THAI 0
2509 /* # define MIN_CHAR_HEBREW 0 */
2510 /* # define MAX_CHAR_HEBREW 0 */
2511 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2512 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2514 staticpro (&Vcharset_ascii);
2516 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2517 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2518 build_string ("ASCII"),
2519 build_string ("ASCII)"),
2520 build_string ("ASCII (ISO646 IRV)"),
2521 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2522 Qnil, 0, 0x7F, 0, 0);
2523 staticpro (&Vcharset_control_1);
2524 Vcharset_control_1 =
2525 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2526 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2527 build_string ("C1"),
2528 build_string ("Control characters"),
2529 build_string ("Control characters 128-191"),
2531 Qnil, 0x80, 0x9F, 0, 0);
2532 staticpro (&Vcharset_latin_iso8859_1);
2533 Vcharset_latin_iso8859_1 =
2534 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2535 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2536 build_string ("Latin-1"),
2537 build_string ("ISO8859-1 (Latin-1)"),
2538 build_string ("ISO8859-1 (Latin-1)"),
2539 build_string ("iso8859-1"),
2540 Qnil, 0xA0, 0xFF, 0, 32);
2541 staticpro (&Vcharset_latin_iso8859_2);
2542 Vcharset_latin_iso8859_2 =
2543 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2544 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2545 build_string ("Latin-2"),
2546 build_string ("ISO8859-2 (Latin-2)"),
2547 build_string ("ISO8859-2 (Latin-2)"),
2548 build_string ("iso8859-2"),
2550 staticpro (&Vcharset_latin_iso8859_3);
2551 Vcharset_latin_iso8859_3 =
2552 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2553 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2554 build_string ("Latin-3"),
2555 build_string ("ISO8859-3 (Latin-3)"),
2556 build_string ("ISO8859-3 (Latin-3)"),
2557 build_string ("iso8859-3"),
2559 staticpro (&Vcharset_latin_iso8859_4);
2560 Vcharset_latin_iso8859_4 =
2561 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2562 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2563 build_string ("Latin-4"),
2564 build_string ("ISO8859-4 (Latin-4)"),
2565 build_string ("ISO8859-4 (Latin-4)"),
2566 build_string ("iso8859-4"),
2568 staticpro (&Vcharset_thai_tis620);
2569 Vcharset_thai_tis620 =
2570 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2571 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2572 build_string ("TIS620"),
2573 build_string ("TIS620 (Thai)"),
2574 build_string ("TIS620.2529 (Thai)"),
2575 build_string ("tis620"),
2576 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2577 staticpro (&Vcharset_greek_iso8859_7);
2578 Vcharset_greek_iso8859_7 =
2579 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2580 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2581 build_string ("ISO8859-7"),
2582 build_string ("ISO8859-7 (Greek)"),
2583 build_string ("ISO8859-7 (Greek)"),
2584 build_string ("iso8859-7"),
2586 staticpro (&Vcharset_arabic_iso8859_6);
2587 Vcharset_arabic_iso8859_6 =
2588 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2589 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2590 build_string ("ISO8859-6"),
2591 build_string ("ISO8859-6 (Arabic)"),
2592 build_string ("ISO8859-6 (Arabic)"),
2593 build_string ("iso8859-6"),
2595 staticpro (&Vcharset_hebrew_iso8859_8);
2596 Vcharset_hebrew_iso8859_8 =
2597 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2598 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2599 build_string ("ISO8859-8"),
2600 build_string ("ISO8859-8 (Hebrew)"),
2601 build_string ("ISO8859-8 (Hebrew)"),
2602 build_string ("iso8859-8"),
2604 0 /* MIN_CHAR_HEBREW */,
2605 0 /* MAX_CHAR_HEBREW */, 0, 32);
2606 staticpro (&Vcharset_katakana_jisx0201);
2607 Vcharset_katakana_jisx0201 =
2608 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2609 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2610 build_string ("JISX0201 Kana"),
2611 build_string ("JISX0201.1976 (Japanese Kana)"),
2612 build_string ("JISX0201.1976 Japanese Kana"),
2613 build_string ("jisx0201\\.1976"),
2615 staticpro (&Vcharset_latin_jisx0201);
2616 Vcharset_latin_jisx0201 =
2617 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2618 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2619 build_string ("JISX0201 Roman"),
2620 build_string ("JISX0201.1976 (Japanese Roman)"),
2621 build_string ("JISX0201.1976 Japanese Roman"),
2622 build_string ("jisx0201\\.1976"),
2624 staticpro (&Vcharset_cyrillic_iso8859_5);
2625 Vcharset_cyrillic_iso8859_5 =
2626 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2627 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2628 build_string ("ISO8859-5"),
2629 build_string ("ISO8859-5 (Cyrillic)"),
2630 build_string ("ISO8859-5 (Cyrillic)"),
2631 build_string ("iso8859-5"),
2633 staticpro (&Vcharset_latin_iso8859_9);
2634 Vcharset_latin_iso8859_9 =
2635 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2636 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2637 build_string ("Latin-5"),
2638 build_string ("ISO8859-9 (Latin-5)"),
2639 build_string ("ISO8859-9 (Latin-5)"),
2640 build_string ("iso8859-9"),
2642 staticpro (&Vcharset_japanese_jisx0208_1978);
2643 Vcharset_japanese_jisx0208_1978 =
2644 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2645 Qjapanese_jisx0208_1978, 94, 2,
2646 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2647 build_string ("JIS X0208:1978"),
2648 build_string ("JIS X0208:1978 (Japanese)"),
2650 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2651 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2653 staticpro (&Vcharset_chinese_gb2312);
2654 Vcharset_chinese_gb2312 =
2655 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2656 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2657 build_string ("GB2312"),
2658 build_string ("GB2312)"),
2659 build_string ("GB2312 Chinese simplified"),
2660 build_string ("gb2312"),
2662 staticpro (&Vcharset_chinese_gb12345);
2663 Vcharset_chinese_gb12345 =
2664 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2665 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2666 build_string ("G1"),
2667 build_string ("GB 12345)"),
2668 build_string ("GB 12345-1990"),
2669 build_string ("GB12345\\(\\.1990\\)?-0"),
2671 staticpro (&Vcharset_japanese_jisx0208);
2672 Vcharset_japanese_jisx0208 =
2673 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2674 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2675 build_string ("JISX0208"),
2676 build_string ("JIS X0208:1983 (Japanese)"),
2677 build_string ("JIS X0208:1983 Japanese Kanji"),
2678 build_string ("jisx0208\\.1983"),
2681 staticpro (&Vcharset_japanese_jisx0208_1990);
2682 Vcharset_japanese_jisx0208_1990 =
2683 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2684 Qjapanese_jisx0208_1990, 94, 2,
2685 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2686 build_string ("JISX0208-1990"),
2687 build_string ("JIS X0208:1990 (Japanese)"),
2688 build_string ("JIS X0208:1990 Japanese Kanji"),
2689 build_string ("jisx0208\\.1990"),
2691 MIN_CHAR_JIS_X0208_1990,
2692 MAX_CHAR_JIS_X0208_1990, 0, 33);
2694 staticpro (&Vcharset_korean_ksc5601);
2695 Vcharset_korean_ksc5601 =
2696 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2697 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("KSC5601"),
2699 build_string ("KSC5601 (Korean"),
2700 build_string ("KSC5601 Korean Hangul and Hanja"),
2701 build_string ("ksc5601"),
2703 staticpro (&Vcharset_japanese_jisx0212);
2704 Vcharset_japanese_jisx0212 =
2705 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2706 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("JISX0212"),
2708 build_string ("JISX0212 (Japanese)"),
2709 build_string ("JISX0212 Japanese Supplement"),
2710 build_string ("jisx0212"),
2713 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2714 staticpro (&Vcharset_chinese_cns11643_1);
2715 Vcharset_chinese_cns11643_1 =
2716 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2717 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2718 build_string ("CNS11643-1"),
2719 build_string ("CNS11643-1 (Chinese traditional)"),
2721 ("CNS 11643 Plane 1 Chinese traditional"),
2722 build_string (CHINESE_CNS_PLANE_RE("1")),
2724 staticpro (&Vcharset_chinese_cns11643_2);
2725 Vcharset_chinese_cns11643_2 =
2726 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2727 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2728 build_string ("CNS11643-2"),
2729 build_string ("CNS11643-2 (Chinese traditional)"),
2731 ("CNS 11643 Plane 2 Chinese traditional"),
2732 build_string (CHINESE_CNS_PLANE_RE("2")),
2735 staticpro (&Vcharset_latin_tcvn5712);
2736 Vcharset_latin_tcvn5712 =
2737 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2738 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2739 build_string ("TCVN 5712"),
2740 build_string ("TCVN 5712 (VSCII-2)"),
2741 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2742 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2744 staticpro (&Vcharset_latin_viscii_lower);
2745 Vcharset_latin_viscii_lower =
2746 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2747 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2748 build_string ("VISCII lower"),
2749 build_string ("VISCII lower (Vietnamese)"),
2750 build_string ("VISCII lower (Vietnamese)"),
2751 build_string ("MULEVISCII-LOWER"),
2753 staticpro (&Vcharset_latin_viscii_upper);
2754 Vcharset_latin_viscii_upper =
2755 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2756 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2757 build_string ("VISCII upper"),
2758 build_string ("VISCII upper (Vietnamese)"),
2759 build_string ("VISCII upper (Vietnamese)"),
2760 build_string ("MULEVISCII-UPPER"),
2762 staticpro (&Vcharset_latin_viscii);
2763 Vcharset_latin_viscii =
2764 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2765 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2766 build_string ("VISCII"),
2767 build_string ("VISCII 1.1 (Vietnamese)"),
2768 build_string ("VISCII 1.1 (Vietnamese)"),
2769 build_string ("VISCII1\\.1"),
2771 staticpro (&Vcharset_chinese_big5);
2772 Vcharset_chinese_big5 =
2773 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2774 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2775 build_string ("Big5"),
2776 build_string ("Big5"),
2777 build_string ("Big5 Chinese traditional"),
2778 build_string ("big5"),
2780 staticpro (&Vcharset_chinese_big5_cdp);
2781 Vcharset_chinese_big5_cdp =
2782 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2783 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2784 build_string ("Big5-CDP"),
2785 build_string ("Big5 + CDP extension"),
2786 build_string ("Big5 with CDP extension"),
2787 build_string ("big5\\.cdp-0"),
2789 staticpro (&Vcharset_japanese_jef_china3);
2790 Vcharset_japanese_jef_china3 =
2791 make_charset (LEADING_BYTE_JEF_CHINA3, Qjapanese_jef_china3, 256, 2,
2792 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2793 build_string ("JC3"),
2794 build_string ("JEF + CHINA3"),
2795 build_string ("JEF + CHINA3 private characters"),
2796 build_string ("china3jef-0"),
2797 Qnil, MIN_CHAR_JEF_CHINA3, MAX_CHAR_JEF_CHINA3, 0, 0);
2798 staticpro (&Vcharset_ideograph_gt);
2799 Vcharset_ideograph_gt =
2800 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2801 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2802 build_string ("GT"),
2803 build_string ("GT"),
2804 build_string ("GT"),
2806 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2807 #define DEF_GT_PJ(n) \
2808 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2809 Vcharset_ideograph_gt_pj_##n = \
2810 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2811 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2812 build_string ("GT-PJ-"#n), \
2813 build_string ("GT (pseudo JIS encoding) part "#n), \
2814 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2816 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2830 staticpro (&Vcharset_ideograph_daikanwa);
2831 Vcharset_ideograph_daikanwa =
2832 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2833 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2834 build_string ("Daikanwa"),
2835 build_string ("Morohashi's Daikanwa"),
2836 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2837 build_string ("Daikanwa"),
2838 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2839 staticpro (&Vcharset_mojikyo);
2841 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2842 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2843 build_string ("Mojikyo"),
2844 build_string ("Mojikyo"),
2845 build_string ("Konjaku-Mojikyo"),
2847 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2848 staticpro (&Vcharset_mojikyo_2022_1);
2849 Vcharset_mojikyo_2022_1 =
2850 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2851 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2852 build_string ("Mojikyo-2022-1"),
2853 build_string ("Mojikyo ISO-2022 Part 1"),
2854 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2858 #define DEF_MOJIKYO_PJ(n) \
2859 staticpro (&Vcharset_mojikyo_pj_##n); \
2860 Vcharset_mojikyo_pj_##n = \
2861 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2862 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2863 build_string ("Mojikyo-PJ-"#n), \
2864 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2866 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2868 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2880 DEF_MOJIKYO_PJ (10);
2881 DEF_MOJIKYO_PJ (11);
2882 DEF_MOJIKYO_PJ (12);
2883 DEF_MOJIKYO_PJ (13);
2884 DEF_MOJIKYO_PJ (14);
2885 DEF_MOJIKYO_PJ (15);
2886 DEF_MOJIKYO_PJ (16);
2887 DEF_MOJIKYO_PJ (17);
2888 DEF_MOJIKYO_PJ (18);
2889 DEF_MOJIKYO_PJ (19);
2890 DEF_MOJIKYO_PJ (20);
2891 DEF_MOJIKYO_PJ (21);
2893 staticpro (&Vcharset_ethiopic_ucs);
2894 Vcharset_ethiopic_ucs =
2895 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2896 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2897 build_string ("Ethiopic (UCS)"),
2898 build_string ("Ethiopic (UCS)"),
2899 build_string ("Ethiopic of UCS"),
2900 build_string ("Ethiopic-Unicode"),
2901 Qnil, 0x1200, 0x137F, 0x1200, 0);
2903 staticpro (&Vcharset_chinese_big5_1);
2904 Vcharset_chinese_big5_1 =
2905 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2906 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2907 build_string ("Big5"),
2908 build_string ("Big5 (Level-1)"),
2910 ("Big5 Level-1 Chinese traditional"),
2911 build_string ("big5"),
2913 staticpro (&Vcharset_chinese_big5_2);
2914 Vcharset_chinese_big5_2 =
2915 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2916 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2917 build_string ("Big5"),
2918 build_string ("Big5 (Level-2)"),
2920 ("Big5 Level-2 Chinese traditional"),
2921 build_string ("big5"),
2924 #ifdef ENABLE_COMPOSITE_CHARS
2925 /* #### For simplicity, we put composite chars into a 96x96 charset.
2926 This is going to lead to problems because you can run out of
2927 room, esp. as we don't yet recycle numbers. */
2928 staticpro (&Vcharset_composite);
2929 Vcharset_composite =
2930 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2931 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2932 build_string ("Composite"),
2933 build_string ("Composite characters"),
2934 build_string ("Composite characters"),
2937 /* #### not dumped properly */
2938 composite_char_row_next = 32;
2939 composite_char_col_next = 32;
2941 Vcomposite_char_string2char_hash_table =
2942 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2943 Vcomposite_char_char2string_hash_table =
2944 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2945 staticpro (&Vcomposite_char_string2char_hash_table);
2946 staticpro (&Vcomposite_char_char2string_hash_table);
2947 #endif /* ENABLE_COMPOSITE_CHARS */