1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_japanese_jef_china3;
79 Lisp_Object Vcharset_ideograph_cbeta;
80 Lisp_Object Vcharset_ideograph_gt;
81 Lisp_Object Vcharset_ideograph_gt_pj_1;
82 Lisp_Object Vcharset_ideograph_gt_pj_2;
83 Lisp_Object Vcharset_ideograph_gt_pj_3;
84 Lisp_Object Vcharset_ideograph_gt_pj_4;
85 Lisp_Object Vcharset_ideograph_gt_pj_5;
86 Lisp_Object Vcharset_ideograph_gt_pj_6;
87 Lisp_Object Vcharset_ideograph_gt_pj_7;
88 Lisp_Object Vcharset_ideograph_gt_pj_8;
89 Lisp_Object Vcharset_ideograph_gt_pj_9;
90 Lisp_Object Vcharset_ideograph_gt_pj_10;
91 Lisp_Object Vcharset_ideograph_gt_pj_11;
92 Lisp_Object Vcharset_ideograph_daikanwa;
93 Lisp_Object Vcharset_mojikyo;
94 Lisp_Object Vcharset_mojikyo_2022_1;
95 Lisp_Object Vcharset_mojikyo_pj_1;
96 Lisp_Object Vcharset_mojikyo_pj_2;
97 Lisp_Object Vcharset_mojikyo_pj_3;
98 Lisp_Object Vcharset_mojikyo_pj_4;
99 Lisp_Object Vcharset_mojikyo_pj_5;
100 Lisp_Object Vcharset_mojikyo_pj_6;
101 Lisp_Object Vcharset_mojikyo_pj_7;
102 Lisp_Object Vcharset_mojikyo_pj_8;
103 Lisp_Object Vcharset_mojikyo_pj_9;
104 Lisp_Object Vcharset_mojikyo_pj_10;
105 Lisp_Object Vcharset_mojikyo_pj_11;
106 Lisp_Object Vcharset_mojikyo_pj_12;
107 Lisp_Object Vcharset_mojikyo_pj_13;
108 Lisp_Object Vcharset_mojikyo_pj_14;
109 Lisp_Object Vcharset_mojikyo_pj_15;
110 Lisp_Object Vcharset_mojikyo_pj_16;
111 Lisp_Object Vcharset_mojikyo_pj_17;
112 Lisp_Object Vcharset_mojikyo_pj_18;
113 Lisp_Object Vcharset_mojikyo_pj_19;
114 Lisp_Object Vcharset_mojikyo_pj_20;
115 Lisp_Object Vcharset_mojikyo_pj_21;
116 Lisp_Object Vcharset_ethiopic_ucs;
118 Lisp_Object Vcharset_chinese_big5_1;
119 Lisp_Object Vcharset_chinese_big5_2;
121 #ifdef ENABLE_COMPOSITE_CHARS
122 Lisp_Object Vcharset_composite;
124 /* Hash tables for composite chars. One maps string representing
125 composed chars to their equivalent chars; one goes the
127 Lisp_Object Vcomposite_char_char2string_hash_table;
128 Lisp_Object Vcomposite_char_string2char_hash_table;
130 static int composite_char_row_next;
131 static int composite_char_col_next;
133 #endif /* ENABLE_COMPOSITE_CHARS */
135 struct charset_lookup *chlook;
137 static const struct lrecord_description charset_lookup_description_1[] = {
138 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
147 static const struct struct_description charset_lookup_description = {
148 sizeof (struct charset_lookup),
149 charset_lookup_description_1
153 /* Table of number of bytes in the string representation of a character
154 indexed by the first byte of that representation.
156 rep_bytes_by_first_byte(c) is more efficient than the equivalent
157 canonical computation:
159 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
161 const Bytecount rep_bytes_by_first_byte[0xA0] =
162 { /* 0x00 - 0x7f are for straight ASCII */
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 /* 0x80 - 0x8f are for Dimension-1 official charsets */
173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
175 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
177 /* 0x90 - 0x9d are for Dimension-2 official charsets */
178 /* 0x9e is for Dimension-1 private charsets */
179 /* 0x9f is for Dimension-2 private charsets */
180 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
186 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
188 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
190 /* ad-hoc method for `ascii' */
191 if ((CHARSET_CHARS (cs) == 94) &&
192 (CHARSET_BYTE_OFFSET (cs) != 33))
193 return 128 - CHARSET_BYTE_OFFSET (cs);
195 return CHARSET_CHARS (cs);
198 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
200 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
202 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
206 if (XVECTOR_LENGTH (v) > ccs_len)
209 for (i = 0; i < XVECTOR_LENGTH (v); i++)
211 Lisp_Object c = XVECTOR_DATA(v)[i];
213 if (!NILP (c) && !CHARP (c))
217 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
229 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
232 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
242 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
243 nv = XVECTOR_DATA(v)[i];
249 XVECTOR_DATA(v)[i] = Qnil;
253 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
254 int code_point, Lisp_Object character);
256 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
257 int code_point, Lisp_Object character)
261 int ccs_len = XVECTOR_LENGTH (v);
266 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
267 nv = XVECTOR_DATA(v)[i];
271 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
277 XVECTOR_DATA(v)[i] = character;
281 put_char_ccs_code_point (Lisp_Object character,
282 Lisp_Object ccs, Lisp_Object value)
284 Lisp_Object encoding_table;
286 if (!EQ (XCHARSET_NAME (ccs), Qucs)
287 || (XCHAR (character) != XINT (value)))
289 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
290 int dim = XCHARSET_DIMENSION (ccs);
291 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
292 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
296 { /* obsolete representation: value must be a list of bytes */
297 Lisp_Object ret = Fcar (value);
301 signal_simple_error ("Invalid value for coded-charset", value);
302 code_point = XINT (ret);
303 if (XCHARSET_GRAPHIC (ccs) == 1)
311 signal_simple_error ("Invalid value for coded-charset",
315 signal_simple_error ("Invalid value for coded-charset",
318 if (XCHARSET_GRAPHIC (ccs) == 1)
320 code_point = (code_point << 8) | j;
323 value = make_int (code_point);
325 else if (INTP (value))
327 code_point = XINT (value);
328 if (XCHARSET_GRAPHIC (ccs) == 1)
330 code_point &= 0x7F7F7F7F;
331 value = make_int (code_point);
335 signal_simple_error ("Invalid value for coded-charset", value);
339 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
342 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
347 XCHARSET_DECODING_TABLE (ccs)
348 = v = make_older_vector (ccs_len, Qnil);
351 decoding_table_put_char (v, dim, byte_offset, code_point, character);
353 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
355 XCHARSET_ENCODING_TABLE (ccs)
356 = encoding_table = make_char_id_table (Qnil);
358 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
363 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
365 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
366 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
368 if (VECTORP (decoding_table))
370 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
374 decoding_table_remove_char (decoding_table,
375 XCHARSET_DIMENSION (ccs),
376 XCHARSET_BYTE_OFFSET (ccs),
380 if (CHAR_TABLEP (encoding_table))
382 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
390 int leading_code_private_11;
393 Lisp_Object Qcharsetp;
395 /* Qdoc_string, Qdimension, Qchars defined in general.c */
396 Lisp_Object Qregistry, Qfinal, Qgraphic;
397 Lisp_Object Qdirection;
398 Lisp_Object Qreverse_direction_charset;
399 Lisp_Object Qleading_byte;
400 Lisp_Object Qshort_name, Qlong_name;
416 Qjapanese_jisx0208_1978,
420 Qjapanese_jisx0208_1990,
435 Qvietnamese_viscii_lower,
436 Qvietnamese_viscii_upper,
439 Qjapanese_jef_china3,
483 Lisp_Object Ql2r, Qr2l;
485 Lisp_Object Vcharset_hash_table;
487 /* Composite characters are characters constructed by overstriking two
488 or more regular characters.
490 1) The old Mule implementation involves storing composite characters
491 in a buffer as a tag followed by all of the actual characters
492 used to make up the composite character. I think this is a bad
493 idea; it greatly complicates code that wants to handle strings
494 one character at a time because it has to deal with the possibility
495 of great big ungainly characters. It's much more reasonable to
496 simply store an index into a table of composite characters.
498 2) The current implementation only allows for 16,384 separate
499 composite characters over the lifetime of the XEmacs process.
500 This could become a potential problem if the user
501 edited lots of different files that use composite characters.
502 Due to FSF bogosity, increasing the number of allowable
503 composite characters under Mule would decrease the number
504 of possible faces that can exist. Mule already has shrunk
505 this to 2048, and further shrinkage would become uncomfortable.
506 No such problems exist in XEmacs.
508 Composite characters could be represented as 0x80 C1 C2 C3,
509 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
510 for slightly under 2^20 (one million) composite characters
511 over the XEmacs process lifetime, and you only need to
512 increase the size of a Mule character from 19 to 21 bits.
513 Or you could use 0x80 C1 C2 C3 C4, allowing for about
514 85 million (slightly over 2^26) composite characters. */
517 /************************************************************************/
518 /* Basic Emchar functions */
519 /************************************************************************/
521 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
522 string in STR. Returns the number of bytes stored.
523 Do not call this directly. Use the macro set_charptr_emchar() instead.
527 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
542 else if ( c <= 0x7ff )
544 *p++ = (c >> 6) | 0xc0;
545 *p++ = (c & 0x3f) | 0x80;
547 else if ( c <= 0xffff )
549 *p++ = (c >> 12) | 0xe0;
550 *p++ = ((c >> 6) & 0x3f) | 0x80;
551 *p++ = (c & 0x3f) | 0x80;
553 else if ( c <= 0x1fffff )
555 *p++ = (c >> 18) | 0xf0;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
560 else if ( c <= 0x3ffffff )
562 *p++ = (c >> 24) | 0xf8;
563 *p++ = ((c >> 18) & 0x3f) | 0x80;
564 *p++ = ((c >> 12) & 0x3f) | 0x80;
565 *p++ = ((c >> 6) & 0x3f) | 0x80;
566 *p++ = (c & 0x3f) | 0x80;
570 *p++ = (c >> 30) | 0xfc;
571 *p++ = ((c >> 24) & 0x3f) | 0x80;
572 *p++ = ((c >> 18) & 0x3f) | 0x80;
573 *p++ = ((c >> 12) & 0x3f) | 0x80;
574 *p++ = ((c >> 6) & 0x3f) | 0x80;
575 *p++ = (c & 0x3f) | 0x80;
578 BREAKUP_CHAR (c, charset, c1, c2);
579 lb = CHAR_LEADING_BYTE (c);
580 if (LEADING_BYTE_PRIVATE_P (lb))
581 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
583 if (EQ (charset, Vcharset_control_1))
592 /* Return the first character from a Mule-encoded string in STR,
593 assuming it's non-ASCII. Do not call this directly.
594 Use the macro charptr_emchar() instead. */
597 non_ascii_charptr_emchar (const Bufbyte *str)
610 else if ( b >= 0xf8 )
615 else if ( b >= 0xf0 )
620 else if ( b >= 0xe0 )
625 else if ( b >= 0xc0 )
635 for( ; len > 0; len-- )
638 ch = ( ch << 6 ) | ( b & 0x3f );
642 Bufbyte i0 = *str, i1, i2 = 0;
645 if (i0 == LEADING_BYTE_CONTROL_1)
646 return (Emchar) (*++str - 0x20);
648 if (LEADING_BYTE_PREFIX_P (i0))
653 charset = CHARSET_BY_LEADING_BYTE (i0);
654 if (XCHARSET_DIMENSION (charset) == 2)
657 return MAKE_CHAR (charset, i1, i2);
661 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
662 Do not call this directly. Use the macro valid_char_p() instead. */
666 non_ascii_valid_char_p (Emchar ch)
670 /* Must have only lowest 19 bits set */
674 f1 = CHAR_FIELD1 (ch);
675 f2 = CHAR_FIELD2 (ch);
676 f3 = CHAR_FIELD3 (ch);
682 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
683 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
684 f2 > MAX_CHAR_FIELD2_PRIVATE)
689 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
690 f2 <= MAX_CHAR_FIELD2_PRIVATE))
694 NOTE: This takes advantage of the fact that
695 FIELD2_TO_OFFICIAL_LEADING_BYTE and
696 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
698 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
699 if (EQ (charset, Qnil))
701 return (XCHARSET_CHARS (charset) == 96);
707 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
708 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
709 f1 > MAX_CHAR_FIELD1_PRIVATE)
711 if (f2 < 0x20 || f3 < 0x20)
714 #ifdef ENABLE_COMPOSITE_CHARS
715 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
717 if (UNBOUNDP (Fgethash (make_int (ch),
718 Vcomposite_char_char2string_hash_table,
723 #endif /* ENABLE_COMPOSITE_CHARS */
725 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
726 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
729 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
731 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
734 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
736 if (EQ (charset, Qnil))
738 return (XCHARSET_CHARS (charset) == 96);
744 /************************************************************************/
745 /* Basic string functions */
746 /************************************************************************/
748 /* Copy the character pointed to by SRC into DST. Do not call this
749 directly. Use the macro charptr_copy_char() instead.
750 Return the number of bytes copied. */
753 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
755 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
757 for (i = bytes; i; i--, dst++, src++)
763 /************************************************************************/
764 /* streams of Emchars */
765 /************************************************************************/
767 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
768 The functions below are not meant to be called directly; use
769 the macros in insdel.h. */
772 Lstream_get_emchar_1 (Lstream *stream, int ch)
774 Bufbyte str[MAX_EMCHAR_LEN];
775 Bufbyte *strptr = str;
778 str[0] = (Bufbyte) ch;
780 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
782 int c = Lstream_getc (stream);
783 bufpos_checking_assert (c >= 0);
784 *++strptr = (Bufbyte) c;
786 return charptr_emchar (str);
790 Lstream_fput_emchar (Lstream *stream, Emchar ch)
792 Bufbyte str[MAX_EMCHAR_LEN];
793 Bytecount len = set_charptr_emchar (str, ch);
794 return Lstream_write (stream, str, len);
798 Lstream_funget_emchar (Lstream *stream, Emchar ch)
800 Bufbyte str[MAX_EMCHAR_LEN];
801 Bytecount len = set_charptr_emchar (str, ch);
802 Lstream_unread (stream, str, len);
806 /************************************************************************/
808 /************************************************************************/
811 mark_charset (Lisp_Object obj)
813 Lisp_Charset *cs = XCHARSET (obj);
815 mark_object (cs->short_name);
816 mark_object (cs->long_name);
817 mark_object (cs->doc_string);
818 mark_object (cs->registry);
819 mark_object (cs->ccl_program);
821 mark_object (cs->encoding_table);
822 /* mark_object (cs->decoding_table); */
828 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
830 Lisp_Charset *cs = XCHARSET (obj);
834 error ("printing unreadable object #<charset %s 0x%x>",
835 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
838 write_c_string ("#<charset ", printcharfun);
839 print_internal (CHARSET_NAME (cs), printcharfun, 0);
840 write_c_string (" ", printcharfun);
841 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
842 write_c_string (" ", printcharfun);
843 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
844 write_c_string (" ", printcharfun);
845 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
846 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
848 CHARSET_DIMENSION (cs),
849 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
850 CHARSET_COLUMNS (cs),
851 CHARSET_GRAPHIC (cs),
853 write_c_string (buf, printcharfun);
854 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
855 sprintf (buf, " 0x%x>", cs->header.uid);
856 write_c_string (buf, printcharfun);
859 static const struct lrecord_description charset_description[] = {
860 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
861 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
862 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
863 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
864 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
865 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
866 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
868 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
869 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
874 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
875 mark_charset, print_charset, 0, 0, 0,
879 /* Make a new charset. */
880 /* #### SJT Should generic properties be allowed? */
882 make_charset (Charset_ID id, Lisp_Object name,
883 unsigned short chars, unsigned char dimension,
884 unsigned char columns, unsigned char graphic,
885 Bufbyte final, unsigned char direction, Lisp_Object short_name,
886 Lisp_Object long_name, Lisp_Object doc,
888 Lisp_Object decoding_table,
889 Emchar ucs_min, Emchar ucs_max,
890 Emchar code_offset, unsigned char byte_offset)
893 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
897 XSETCHARSET (obj, cs);
899 CHARSET_ID (cs) = id;
900 CHARSET_NAME (cs) = name;
901 CHARSET_SHORT_NAME (cs) = short_name;
902 CHARSET_LONG_NAME (cs) = long_name;
903 CHARSET_CHARS (cs) = chars;
904 CHARSET_DIMENSION (cs) = dimension;
905 CHARSET_DIRECTION (cs) = direction;
906 CHARSET_COLUMNS (cs) = columns;
907 CHARSET_GRAPHIC (cs) = graphic;
908 CHARSET_FINAL (cs) = final;
909 CHARSET_DOC_STRING (cs) = doc;
910 CHARSET_REGISTRY (cs) = reg;
911 CHARSET_CCL_PROGRAM (cs) = Qnil;
912 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
914 CHARSET_DECODING_TABLE(cs) = Qnil;
915 CHARSET_ENCODING_TABLE(cs) = Qnil;
916 CHARSET_UCS_MIN(cs) = ucs_min;
917 CHARSET_UCS_MAX(cs) = ucs_max;
918 CHARSET_CODE_OFFSET(cs) = code_offset;
919 CHARSET_BYTE_OFFSET(cs) = byte_offset;
923 if (id == LEADING_BYTE_ASCII)
924 CHARSET_REP_BYTES (cs) = 1;
926 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
928 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
933 /* some charsets do not have final characters. This includes
934 ASCII, Control-1, Composite, and the two faux private
936 unsigned char iso2022_type
937 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
939 if (code_offset == 0)
941 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
942 chlook->charset_by_attributes[iso2022_type][final] = obj;
946 (chlook->charset_by_attributes[iso2022_type][final][direction]));
947 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
951 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
952 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
954 /* Some charsets are "faux" and don't have names or really exist at
955 all except in the leading-byte table. */
957 Fputhash (name, obj, Vcharset_hash_table);
962 get_unallocated_leading_byte (int dimension)
967 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
970 lb = chlook->next_allocated_leading_byte++;
974 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
977 lb = chlook->next_allocated_1_byte_leading_byte++;
981 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
984 lb = chlook->next_allocated_2_byte_leading_byte++;
990 ("No more character sets free for this dimension",
991 make_int (dimension));
997 /* Number of Big5 characters which have the same code in 1st byte. */
999 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1002 decode_builtin_char (Lisp_Object charset, int code_point)
1006 if (EQ (charset, Vcharset_chinese_big5))
1008 int c1 = code_point >> 8;
1009 int c2 = code_point & 0xFF;
1012 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1014 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1015 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1017 I = (c1 - 0xA1) * BIG5_SAME_ROW
1018 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1022 charset = Vcharset_chinese_big5_1;
1026 charset = Vcharset_chinese_big5_2;
1027 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1029 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1032 if ((final = XCHARSET_FINAL (charset)) >= '0')
1034 if (XCHARSET_DIMENSION (charset) == 1)
1036 switch (XCHARSET_CHARS (charset))
1040 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1043 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1051 switch (XCHARSET_CHARS (charset))
1054 return MIN_CHAR_94x94
1055 + (final - '0') * 94 * 94
1056 + (((code_point >> 8) & 0x7F) - 33) * 94
1057 + ((code_point & 0x7F) - 33);
1059 return MIN_CHAR_96x96
1060 + (final - '0') * 96 * 96
1061 + (((code_point >> 8) & 0x7F) - 32) * 96
1062 + ((code_point & 0x7F) - 32);
1069 else if (XCHARSET_UCS_MAX (charset))
1072 = (XCHARSET_DIMENSION (charset) == 1
1074 code_point - XCHARSET_BYTE_OFFSET (charset)
1076 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1077 * XCHARSET_CHARS (charset)
1078 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1079 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1080 if ((cid < XCHARSET_UCS_MIN (charset))
1081 || (XCHARSET_UCS_MAX (charset) < cid))
1090 range_charset_code_point (Lisp_Object charset, Emchar ch)
1094 if ((XCHARSET_UCS_MIN (charset) <= ch)
1095 && (ch <= XCHARSET_UCS_MAX (charset)))
1097 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1099 if (XCHARSET_CHARS (charset) == 256)
1101 else if (XCHARSET_DIMENSION (charset) == 1)
1102 return d + XCHARSET_BYTE_OFFSET (charset);
1103 else if (XCHARSET_DIMENSION (charset) == 2)
1105 ((d / XCHARSET_CHARS (charset)
1106 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1107 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1108 else if (XCHARSET_DIMENSION (charset) == 3)
1110 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1111 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1112 | ((d / XCHARSET_CHARS (charset)
1113 % XCHARSET_CHARS (charset)
1114 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1115 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1116 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1118 ((d / (XCHARSET_CHARS (charset)
1119 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1120 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1121 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1122 % XCHARSET_CHARS (charset)
1123 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1124 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1125 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1126 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1128 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1130 if (XCHARSET_DIMENSION (charset) == 1)
1132 if (XCHARSET_CHARS (charset) == 94)
1134 if (((d = ch - (MIN_CHAR_94
1135 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1139 else if (XCHARSET_CHARS (charset) == 96)
1141 if (((d = ch - (MIN_CHAR_96
1142 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1149 else if (XCHARSET_DIMENSION (charset) == 2)
1151 if (XCHARSET_CHARS (charset) == 94)
1153 if (((d = ch - (MIN_CHAR_94x94
1154 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1157 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1159 else if (XCHARSET_CHARS (charset) == 96)
1161 if (((d = ch - (MIN_CHAR_96x96
1162 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1165 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1171 if (EQ (charset, Vcharset_mojikyo_2022_1)
1172 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1174 int m = ch - MIN_CHAR_MOJIKYO - 1;
1175 int byte1 = m / (94 * 60) + 33;
1176 int byte2 = (m % (94 * 60)) / 94;
1177 int byte3 = m % 94 + 33;
1183 return (byte1 << 16) | (byte2 << 8) | byte3;
1189 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1191 if (c <= MAX_CHAR_BASIC_LATIN)
1193 *charset = Vcharset_ascii;
1198 *charset = Vcharset_control_1;
1203 *charset = Vcharset_latin_iso8859_1;
1207 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1209 *charset = Vcharset_hebrew_iso8859_8;
1210 return c - MIN_CHAR_HEBREW + 0x20;
1213 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1215 *charset = Vcharset_thai_tis620;
1216 return c - MIN_CHAR_THAI + 0x20;
1219 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1220 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1222 return list2 (Vcharset_katakana_jisx0201,
1223 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1226 else if (c <= MAX_CHAR_BMP)
1228 *charset = Vcharset_ucs_bmp;
1231 else if (c < MIN_CHAR_DAIKANWA)
1233 *charset = Vcharset_ucs;
1236 else if (c <= MAX_CHAR_DAIKANWA)
1238 *charset = Vcharset_ideograph_daikanwa;
1239 return c - MIN_CHAR_DAIKANWA;
1241 else if (c <= MAX_CHAR_MOJIKYO_0)
1243 *charset = Vcharset_mojikyo;
1244 return c - MIN_CHAR_MOJIKYO_0;
1246 else if (c < MIN_CHAR_94)
1248 *charset = Vcharset_ucs;
1251 else if (c <= MAX_CHAR_94)
1253 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1254 ((c - MIN_CHAR_94) / 94) + '0',
1255 CHARSET_LEFT_TO_RIGHT);
1256 if (!NILP (*charset))
1257 return ((c - MIN_CHAR_94) % 94) + 33;
1260 *charset = Vcharset_ucs;
1264 else if (c <= MAX_CHAR_96)
1266 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1267 ((c - MIN_CHAR_96) / 96) + '0',
1268 CHARSET_LEFT_TO_RIGHT);
1269 if (!NILP (*charset))
1270 return ((c - MIN_CHAR_96) % 96) + 32;
1273 *charset = Vcharset_ucs;
1277 else if (c <= MAX_CHAR_94x94)
1280 = CHARSET_BY_ATTRIBUTES (94, 2,
1281 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1282 CHARSET_LEFT_TO_RIGHT);
1283 if (!NILP (*charset))
1284 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1285 | (((c - MIN_CHAR_94x94) % 94) + 33);
1288 *charset = Vcharset_ucs;
1292 else if (c <= MAX_CHAR_96x96)
1295 = CHARSET_BY_ATTRIBUTES (96, 2,
1296 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1297 CHARSET_LEFT_TO_RIGHT);
1298 if (!NILP (*charset))
1299 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1300 | (((c - MIN_CHAR_96x96) % 96) + 32);
1303 *charset = Vcharset_ucs;
1308 else if (c < MIN_CHAR_MOJIKYO)
1310 *charset = Vcharset_ucs;
1313 else if (c <= MAX_CHAR_MOJIKYO)
1315 *charset = Vcharset_mojikyo;
1316 return c - MIN_CHAR_MOJIKYO;
1318 else if (c < MIN_CHAR_JEF_CHINA3)
1320 *charset = Vcharset_ucs;
1323 else if (c <= MAX_CHAR_JEF_CHINA3)
1325 *charset = Vcharset_japanese_jef_china3;
1326 return c - MIN_CHAR_JEF_CHINA3;
1328 else if (c <= MAX_CHAR_CBETA)
1330 *charset = Vcharset_ideograph_cbeta;
1331 return c - MIN_CHAR_CBETA;
1336 *charset = Vcharset_ucs;
1341 Lisp_Object Vdefault_coded_charset_priority_list;
1345 /************************************************************************/
1346 /* Basic charset Lisp functions */
1347 /************************************************************************/
1349 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1350 Return non-nil if OBJECT is a charset.
1354 return CHARSETP (object) ? Qt : Qnil;
1357 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1358 Retrieve the charset of the given name.
1359 If CHARSET-OR-NAME is a charset object, it is simply returned.
1360 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1361 nil is returned. Otherwise the associated charset object is returned.
1365 if (CHARSETP (charset_or_name))
1366 return charset_or_name;
1368 CHECK_SYMBOL (charset_or_name);
1369 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1372 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1373 Retrieve the charset of the given name.
1374 Same as `find-charset' except an error is signalled if there is no such
1375 charset instead of returning nil.
1379 Lisp_Object charset = Ffind_charset (name);
1382 signal_simple_error ("No such charset", name);
1386 /* We store the charsets in hash tables with the names as the key and the
1387 actual charset object as the value. Occasionally we need to use them
1388 in a list format. These routines provide us with that. */
1389 struct charset_list_closure
1391 Lisp_Object *charset_list;
1395 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1396 void *charset_list_closure)
1398 /* This function can GC */
1399 struct charset_list_closure *chcl =
1400 (struct charset_list_closure*) charset_list_closure;
1401 Lisp_Object *charset_list = chcl->charset_list;
1403 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1407 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1408 Return a list of the names of all defined charsets.
1412 Lisp_Object charset_list = Qnil;
1413 struct gcpro gcpro1;
1414 struct charset_list_closure charset_list_closure;
1416 GCPRO1 (charset_list);
1417 charset_list_closure.charset_list = &charset_list;
1418 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1419 &charset_list_closure);
1422 return charset_list;
1425 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1426 Return the name of charset CHARSET.
1430 return XCHARSET_NAME (Fget_charset (charset));
1433 /* #### SJT Should generic properties be allowed? */
1434 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1435 Define a new character set.
1436 This function is for use with Mule support.
1437 NAME is a symbol, the name by which the character set is normally referred.
1438 DOC-STRING is a string describing the character set.
1439 PROPS is a property list, describing the specific nature of the
1440 character set. Recognized properties are:
1442 'short-name Short version of the charset name (ex: Latin-1)
1443 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1444 'registry A regular expression matching the font registry field for
1446 'dimension Number of octets used to index a character in this charset.
1447 Either 1 or 2. Defaults to 1.
1448 'columns Number of columns used to display a character in this charset.
1449 Only used in TTY mode. (Under X, the actual width of a
1450 character can be derived from the font used to display the
1451 characters.) If unspecified, defaults to the dimension
1452 (this is almost always the correct value).
1453 'chars Number of characters in each dimension (94 or 96).
1454 Defaults to 94. Note that if the dimension is 2, the
1455 character set thus described is 94x94 or 96x96.
1456 'final Final byte of ISO 2022 escape sequence. Must be
1457 supplied. Each combination of (DIMENSION, CHARS) defines a
1458 separate namespace for final bytes. Note that ISO
1459 2022 restricts the final byte to the range
1460 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1461 dimension == 2. Note also that final bytes in the range
1462 0x30 - 0x3F are reserved for user-defined (not official)
1464 'graphic 0 (use left half of font on output) or 1 (use right half
1465 of font on output). Defaults to 0. For example, for
1466 a font whose registry is ISO8859-1, the left half
1467 (octets 0x20 - 0x7F) is the `ascii' character set, while
1468 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1469 character set. With 'graphic set to 0, the octets
1470 will have their high bit cleared; with it set to 1,
1471 the octets will have their high bit set.
1472 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1474 'ccl-program A compiled CCL program used to convert a character in
1475 this charset into an index into the font. This is in
1476 addition to the 'graphic property. The CCL program
1477 is passed the octets of the character, with the high
1478 bit cleared and set depending upon whether the value
1479 of the 'graphic property is 0 or 1.
1481 (name, doc_string, props))
1483 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1484 int direction = CHARSET_LEFT_TO_RIGHT;
1485 Lisp_Object registry = Qnil;
1486 Lisp_Object charset;
1487 Lisp_Object ccl_program = Qnil;
1488 Lisp_Object short_name = Qnil, long_name = Qnil;
1489 int byte_offset = -1;
1491 CHECK_SYMBOL (name);
1492 if (!NILP (doc_string))
1493 CHECK_STRING (doc_string);
1495 charset = Ffind_charset (name);
1496 if (!NILP (charset))
1497 signal_simple_error ("Cannot redefine existing charset", name);
1500 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1502 if (EQ (keyword, Qshort_name))
1504 CHECK_STRING (value);
1508 if (EQ (keyword, Qlong_name))
1510 CHECK_STRING (value);
1514 else if (EQ (keyword, Qdimension))
1517 dimension = XINT (value);
1518 if (dimension < 1 || dimension > 2)
1519 signal_simple_error ("Invalid value for 'dimension", value);
1522 else if (EQ (keyword, Qchars))
1525 chars = XINT (value);
1526 if (chars != 94 && chars != 96)
1527 signal_simple_error ("Invalid value for 'chars", value);
1530 else if (EQ (keyword, Qcolumns))
1533 columns = XINT (value);
1534 if (columns != 1 && columns != 2)
1535 signal_simple_error ("Invalid value for 'columns", value);
1538 else if (EQ (keyword, Qgraphic))
1541 graphic = XINT (value);
1543 if (graphic < 0 || graphic > 2)
1545 if (graphic < 0 || graphic > 1)
1547 signal_simple_error ("Invalid value for 'graphic", value);
1550 else if (EQ (keyword, Qregistry))
1552 CHECK_STRING (value);
1556 else if (EQ (keyword, Qdirection))
1558 if (EQ (value, Ql2r))
1559 direction = CHARSET_LEFT_TO_RIGHT;
1560 else if (EQ (value, Qr2l))
1561 direction = CHARSET_RIGHT_TO_LEFT;
1563 signal_simple_error ("Invalid value for 'direction", value);
1566 else if (EQ (keyword, Qfinal))
1568 CHECK_CHAR_COERCE_INT (value);
1569 final = XCHAR (value);
1570 if (final < '0' || final > '~')
1571 signal_simple_error ("Invalid value for 'final", value);
1574 else if (EQ (keyword, Qccl_program))
1576 struct ccl_program test_ccl;
1578 if (setup_ccl_program (&test_ccl, value) < 0)
1579 signal_simple_error ("Invalid value for 'ccl-program", value);
1580 ccl_program = value;
1584 signal_simple_error ("Unrecognized property", keyword);
1589 error ("'final must be specified");
1590 if (dimension == 2 && final > 0x5F)
1592 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1595 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1596 CHARSET_LEFT_TO_RIGHT)) ||
1597 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1598 CHARSET_RIGHT_TO_LEFT)))
1600 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1602 id = get_unallocated_leading_byte (dimension);
1604 if (NILP (doc_string))
1605 doc_string = build_string ("");
1607 if (NILP (registry))
1608 registry = build_string ("");
1610 if (NILP (short_name))
1611 XSETSTRING (short_name, XSYMBOL (name)->name);
1613 if (NILP (long_name))
1614 long_name = doc_string;
1617 columns = dimension;
1619 if (byte_offset < 0)
1623 else if (chars == 96)
1629 charset = make_charset (id, name, chars, dimension, columns, graphic,
1630 final, direction, short_name, long_name,
1631 doc_string, registry,
1632 Qnil, 0, 0, 0, byte_offset);
1633 if (!NILP (ccl_program))
1634 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1638 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1640 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1641 NEW-NAME is the name of the new charset. Return the new charset.
1643 (charset, new_name))
1645 Lisp_Object new_charset = Qnil;
1646 int id, chars, dimension, columns, graphic, final;
1648 Lisp_Object registry, doc_string, short_name, long_name;
1651 charset = Fget_charset (charset);
1652 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1653 signal_simple_error ("Charset already has reverse-direction charset",
1656 CHECK_SYMBOL (new_name);
1657 if (!NILP (Ffind_charset (new_name)))
1658 signal_simple_error ("Cannot redefine existing charset", new_name);
1660 cs = XCHARSET (charset);
1662 chars = CHARSET_CHARS (cs);
1663 dimension = CHARSET_DIMENSION (cs);
1664 columns = CHARSET_COLUMNS (cs);
1665 id = get_unallocated_leading_byte (dimension);
1667 graphic = CHARSET_GRAPHIC (cs);
1668 final = CHARSET_FINAL (cs);
1669 direction = CHARSET_RIGHT_TO_LEFT;
1670 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1671 direction = CHARSET_LEFT_TO_RIGHT;
1672 doc_string = CHARSET_DOC_STRING (cs);
1673 short_name = CHARSET_SHORT_NAME (cs);
1674 long_name = CHARSET_LONG_NAME (cs);
1675 registry = CHARSET_REGISTRY (cs);
1677 new_charset = make_charset (id, new_name, chars, dimension, columns,
1678 graphic, final, direction, short_name, long_name,
1679 doc_string, registry,
1681 CHARSET_DECODING_TABLE(cs),
1682 CHARSET_UCS_MIN(cs),
1683 CHARSET_UCS_MAX(cs),
1684 CHARSET_CODE_OFFSET(cs),
1685 CHARSET_BYTE_OFFSET(cs)
1691 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1692 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1697 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1698 Define symbol ALIAS as an alias for CHARSET.
1702 CHECK_SYMBOL (alias);
1703 charset = Fget_charset (charset);
1704 return Fputhash (alias, charset, Vcharset_hash_table);
1707 /* #### Reverse direction charsets not yet implemented. */
1709 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1711 Return the reverse-direction charset parallel to CHARSET, if any.
1712 This is the charset with the same properties (in particular, the same
1713 dimension, number of characters per dimension, and final byte) as
1714 CHARSET but whose characters are displayed in the opposite direction.
1718 charset = Fget_charset (charset);
1719 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1723 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1724 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1725 If DIRECTION is omitted, both directions will be checked (left-to-right
1726 will be returned if character sets exist for both directions).
1728 (dimension, chars, final, direction))
1730 int dm, ch, fi, di = -1;
1731 Lisp_Object obj = Qnil;
1733 CHECK_INT (dimension);
1734 dm = XINT (dimension);
1735 if (dm < 1 || dm > 2)
1736 signal_simple_error ("Invalid value for DIMENSION", dimension);
1740 if (ch != 94 && ch != 96)
1741 signal_simple_error ("Invalid value for CHARS", chars);
1743 CHECK_CHAR_COERCE_INT (final);
1745 if (fi < '0' || fi > '~')
1746 signal_simple_error ("Invalid value for FINAL", final);
1748 if (EQ (direction, Ql2r))
1749 di = CHARSET_LEFT_TO_RIGHT;
1750 else if (EQ (direction, Qr2l))
1751 di = CHARSET_RIGHT_TO_LEFT;
1752 else if (!NILP (direction))
1753 signal_simple_error ("Invalid value for DIRECTION", direction);
1755 if (dm == 2 && fi > 0x5F)
1757 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1761 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1763 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1766 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1769 return XCHARSET_NAME (obj);
1773 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1774 Return short name of CHARSET.
1778 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1781 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1782 Return long name of CHARSET.
1786 return XCHARSET_LONG_NAME (Fget_charset (charset));
1789 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1790 Return description of CHARSET.
1794 return XCHARSET_DOC_STRING (Fget_charset (charset));
1797 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1798 Return dimension of CHARSET.
1802 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1805 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1806 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1807 Recognized properties are those listed in `make-charset', as well as
1808 'name and 'doc-string.
1814 charset = Fget_charset (charset);
1815 cs = XCHARSET (charset);
1817 CHECK_SYMBOL (prop);
1818 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1819 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1820 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1821 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1822 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1823 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1824 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1825 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1826 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1827 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1828 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1829 if (EQ (prop, Qdirection))
1830 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1831 if (EQ (prop, Qreverse_direction_charset))
1833 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1834 /* #### Is this translation OK? If so, error checking sufficient? */
1835 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1837 signal_simple_error ("Unrecognized charset property name", prop);
1838 return Qnil; /* not reached */
1841 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1842 Return charset identification number of CHARSET.
1846 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1849 /* #### We need to figure out which properties we really want to
1852 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1853 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1855 (charset, ccl_program))
1857 struct ccl_program test_ccl;
1859 charset = Fget_charset (charset);
1860 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1861 signal_simple_error ("Invalid ccl-program", ccl_program);
1862 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1867 invalidate_charset_font_caches (Lisp_Object charset)
1869 /* Invalidate font cache entries for charset on all devices. */
1870 Lisp_Object devcons, concons, hash_table;
1871 DEVICE_LOOP_NO_BREAK (devcons, concons)
1873 struct device *d = XDEVICE (XCAR (devcons));
1874 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1875 if (!UNBOUNDP (hash_table))
1876 Fclrhash (hash_table);
1880 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1881 Set the 'registry property of CHARSET to REGISTRY.
1883 (charset, registry))
1885 charset = Fget_charset (charset);
1886 CHECK_STRING (registry);
1887 XCHARSET_REGISTRY (charset) = registry;
1888 invalidate_charset_font_caches (charset);
1889 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1894 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1895 Return mapping-table of CHARSET.
1899 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1902 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1903 Set mapping-table of CHARSET to TABLE.
1907 struct Lisp_Charset *cs;
1911 charset = Fget_charset (charset);
1912 cs = XCHARSET (charset);
1916 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1917 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1918 CHARSET_DECODING_TABLE(cs) = Qnil;
1921 else if (VECTORP (table))
1923 int ccs_len = CHARSET_BYTE_SIZE (cs);
1924 int ret = decoding_table_check_elements (table,
1925 CHARSET_DIMENSION (cs),
1930 signal_simple_error ("Too big table", table);
1932 signal_simple_error ("Invalid element is found", table);
1934 signal_simple_error ("Something wrong", table);
1936 CHARSET_DECODING_TABLE(cs) = Qnil;
1939 signal_error (Qwrong_type_argument,
1940 list2 (build_translated_string ("vector-or-nil-p"),
1943 byte_offset = CHARSET_BYTE_OFFSET (cs);
1944 switch (CHARSET_DIMENSION (cs))
1947 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1949 Lisp_Object c = XVECTOR_DATA(table)[i];
1952 put_char_ccs_code_point (c, charset,
1953 make_int (i + byte_offset));
1957 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1959 Lisp_Object v = XVECTOR_DATA(table)[i];
1965 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1967 Lisp_Object c = XVECTOR_DATA(v)[j];
1970 put_char_ccs_code_point
1972 make_int ( ( (i + byte_offset) << 8 )
1978 put_char_ccs_code_point (v, charset,
1979 make_int (i + byte_offset));
1988 /************************************************************************/
1989 /* Lisp primitives for working with characters */
1990 /************************************************************************/
1993 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
1994 Make a character from CHARSET and code-point CODE.
2000 charset = Fget_charset (charset);
2003 if (XCHARSET_GRAPHIC (charset) == 1)
2005 c = DECODE_CHAR (charset, c);
2006 return c >= 0 ? make_char (c) : Qnil;
2009 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2010 Make a builtin character from CHARSET and code-point CODE.
2016 charset = Fget_charset (charset);
2018 if (EQ (charset, Vcharset_latin_viscii))
2020 Lisp_Object chr = Fdecode_char (charset, code);
2026 (ret = Fget_char_attribute (chr,
2027 Vcharset_latin_viscii_lower,
2030 charset = Vcharset_latin_viscii_lower;
2034 (ret = Fget_char_attribute (chr,
2035 Vcharset_latin_viscii_upper,
2038 charset = Vcharset_latin_viscii_upper;
2045 if (XCHARSET_GRAPHIC (charset) == 1)
2048 c = decode_builtin_char (charset, c);
2049 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
2053 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2054 Make a character from CHARSET and octets ARG1 and ARG2.
2055 ARG2 is required only for characters from two-dimensional charsets.
2056 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2057 character s with caron.
2059 (charset, arg1, arg2))
2063 int lowlim, highlim;
2065 charset = Fget_charset (charset);
2066 cs = XCHARSET (charset);
2068 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2069 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2071 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2073 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2074 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2077 /* It is useful (and safe, according to Olivier Galibert) to strip
2078 the 8th bit off ARG1 and ARG2 because it allows programmers to
2079 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2080 Latin 2 code of the character. */
2088 if (a1 < lowlim || a1 > highlim)
2089 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2091 if (CHARSET_DIMENSION (cs) == 1)
2095 ("Charset is of dimension one; second octet must be nil", arg2);
2096 return make_char (MAKE_CHAR (charset, a1, 0));
2105 a2 = XINT (arg2) & 0x7f;
2107 if (a2 < lowlim || a2 > highlim)
2108 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2110 return make_char (MAKE_CHAR (charset, a1, a2));
2113 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2114 Return the character set of CHARACTER.
2118 CHECK_CHAR_COERCE_INT (character);
2120 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2123 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2124 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2125 N defaults to 0 if omitted.
2129 Lisp_Object charset;
2132 CHECK_CHAR_COERCE_INT (character);
2134 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2136 if (NILP (n) || EQ (n, Qzero))
2137 return make_int (octet0);
2138 else if (EQ (n, make_int (1)))
2139 return make_int (octet1);
2141 signal_simple_error ("Octet number must be 0 or 1", n);
2144 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2145 Return list of charset and one or two position-codes of CHARACTER.
2149 /* This function can GC */
2150 struct gcpro gcpro1, gcpro2;
2151 Lisp_Object charset = Qnil;
2152 Lisp_Object rc = Qnil;
2160 GCPRO2 (charset, rc);
2161 CHECK_CHAR_COERCE_INT (character);
2164 code_point = ENCODE_CHAR (XCHAR (character), charset);
2165 dimension = XCHARSET_DIMENSION (charset);
2166 while (dimension > 0)
2168 rc = Fcons (make_int (code_point & 255), rc);
2172 rc = Fcons (XCHARSET_NAME (charset), rc);
2174 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2176 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2178 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2182 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2191 #ifdef ENABLE_COMPOSITE_CHARS
2192 /************************************************************************/
2193 /* composite character functions */
2194 /************************************************************************/
2197 lookup_composite_char (Bufbyte *str, int len)
2199 Lisp_Object lispstr = make_string (str, len);
2200 Lisp_Object ch = Fgethash (lispstr,
2201 Vcomposite_char_string2char_hash_table,
2207 if (composite_char_row_next >= 128)
2208 signal_simple_error ("No more composite chars available", lispstr);
2209 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2210 composite_char_col_next);
2211 Fputhash (make_char (emch), lispstr,
2212 Vcomposite_char_char2string_hash_table);
2213 Fputhash (lispstr, make_char (emch),
2214 Vcomposite_char_string2char_hash_table);
2215 composite_char_col_next++;
2216 if (composite_char_col_next >= 128)
2218 composite_char_col_next = 32;
2219 composite_char_row_next++;
2228 composite_char_string (Emchar ch)
2230 Lisp_Object str = Fgethash (make_char (ch),
2231 Vcomposite_char_char2string_hash_table,
2233 assert (!UNBOUNDP (str));
2237 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2238 Convert a string into a single composite character.
2239 The character is the result of overstriking all the characters in
2244 CHECK_STRING (string);
2245 return make_char (lookup_composite_char (XSTRING_DATA (string),
2246 XSTRING_LENGTH (string)));
2249 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2250 Return a string of the characters comprising a composite character.
2258 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2259 signal_simple_error ("Must be composite char", ch);
2260 return composite_char_string (emch);
2262 #endif /* ENABLE_COMPOSITE_CHARS */
2265 /************************************************************************/
2266 /* initialization */
2267 /************************************************************************/
2270 syms_of_mule_charset (void)
2272 INIT_LRECORD_IMPLEMENTATION (charset);
2274 DEFSUBR (Fcharsetp);
2275 DEFSUBR (Ffind_charset);
2276 DEFSUBR (Fget_charset);
2277 DEFSUBR (Fcharset_list);
2278 DEFSUBR (Fcharset_name);
2279 DEFSUBR (Fmake_charset);
2280 DEFSUBR (Fmake_reverse_direction_charset);
2281 /* DEFSUBR (Freverse_direction_charset); */
2282 DEFSUBR (Fdefine_charset_alias);
2283 DEFSUBR (Fcharset_from_attributes);
2284 DEFSUBR (Fcharset_short_name);
2285 DEFSUBR (Fcharset_long_name);
2286 DEFSUBR (Fcharset_description);
2287 DEFSUBR (Fcharset_dimension);
2288 DEFSUBR (Fcharset_property);
2289 DEFSUBR (Fcharset_id);
2290 DEFSUBR (Fset_charset_ccl_program);
2291 DEFSUBR (Fset_charset_registry);
2293 DEFSUBR (Fcharset_mapping_table);
2294 DEFSUBR (Fset_charset_mapping_table);
2298 DEFSUBR (Fdecode_char);
2299 DEFSUBR (Fdecode_builtin_char);
2301 DEFSUBR (Fmake_char);
2302 DEFSUBR (Fchar_charset);
2303 DEFSUBR (Fchar_octet);
2304 DEFSUBR (Fsplit_char);
2306 #ifdef ENABLE_COMPOSITE_CHARS
2307 DEFSUBR (Fmake_composite_char);
2308 DEFSUBR (Fcomposite_char_string);
2311 defsymbol (&Qcharsetp, "charsetp");
2312 defsymbol (&Qregistry, "registry");
2313 defsymbol (&Qfinal, "final");
2314 defsymbol (&Qgraphic, "graphic");
2315 defsymbol (&Qdirection, "direction");
2316 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2317 defsymbol (&Qshort_name, "short-name");
2318 defsymbol (&Qlong_name, "long-name");
2320 defsymbol (&Ql2r, "l2r");
2321 defsymbol (&Qr2l, "r2l");
2323 /* Charsets, compatible with FSF 20.3
2324 Naming convention is Script-Charset[-Edition] */
2325 defsymbol (&Qascii, "ascii");
2326 defsymbol (&Qcontrol_1, "control-1");
2327 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2328 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2329 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2330 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2331 defsymbol (&Qthai_tis620, "thai-tis620");
2332 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2333 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2334 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2335 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2336 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2337 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2338 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2339 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2340 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2341 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2342 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2343 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2344 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2345 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2346 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2347 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2349 defsymbol (&Qucs, "ucs");
2350 defsymbol (&Qucs_bmp, "ucs-bmp");
2351 defsymbol (&Qucs_cns, "ucs-cns");
2352 defsymbol (&Qucs_jis, "ucs-jis");
2353 defsymbol (&Qucs_big5, "ucs-big5");
2354 defsymbol (&Qlatin_viscii, "latin-viscii");
2355 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2356 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2357 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2358 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2359 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2360 defsymbol (&Qideograph_gt, "ideograph-gt");
2361 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2362 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2363 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2364 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2365 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2366 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2367 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2368 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2369 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2370 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2371 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2372 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2373 defsymbol (&Qchinese_big5, "chinese-big5");
2374 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2375 defsymbol (&Qjapanese_jef_china3, "japanese-jef-china3");
2376 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2377 defsymbol (&Qmojikyo, "mojikyo");
2378 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2379 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2380 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2381 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2382 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2383 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2384 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2385 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2386 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2387 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2388 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2389 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2390 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2391 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2392 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2393 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2394 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2395 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2396 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2397 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2398 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2399 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2400 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2402 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2403 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2405 defsymbol (&Qcomposite, "composite");
2409 vars_of_mule_charset (void)
2416 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2417 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2419 /* Table of charsets indexed by leading byte. */
2420 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2421 chlook->charset_by_leading_byte[i] = Qnil;
2424 /* Table of charsets indexed by type/final-byte. */
2425 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2426 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2427 chlook->charset_by_attributes[i][j] = Qnil;
2429 /* Table of charsets indexed by type/final-byte/direction. */
2430 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2431 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2432 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2433 chlook->charset_by_attributes[i][j][k] = Qnil;
2437 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2439 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2440 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2444 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2445 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2446 Leading-code of private TYPE9N charset of column-width 1.
2448 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2452 Vdefault_coded_charset_priority_list = Qnil;
2453 DEFVAR_LISP ("default-coded-charset-priority-list",
2454 &Vdefault_coded_charset_priority_list /*
2455 Default order of preferred coded-character-sets.
2461 complex_vars_of_mule_charset (void)
2463 staticpro (&Vcharset_hash_table);
2464 Vcharset_hash_table =
2465 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2467 /* Predefined character sets. We store them into variables for
2471 staticpro (&Vcharset_ucs);
2473 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2474 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2475 build_string ("UCS"),
2476 build_string ("UCS"),
2477 build_string ("ISO/IEC 10646"),
2479 Qnil, 0, 0xFFFFFFF, 0, 0);
2480 staticpro (&Vcharset_ucs_bmp);
2482 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2483 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2484 build_string ("BMP"),
2485 build_string ("BMP"),
2486 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2487 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2488 Qnil, 0, 0xFFFF, 0, 0);
2489 staticpro (&Vcharset_ucs_cns);
2491 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2492 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2493 build_string ("UCS for CNS"),
2494 build_string ("UCS for CNS 11643"),
2495 build_string ("ISO/IEC 10646 for CNS 11643"),
2498 staticpro (&Vcharset_ucs_jis);
2500 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2501 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2502 build_string ("UCS for JIS"),
2503 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2504 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2507 staticpro (&Vcharset_ucs_big5);
2509 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2510 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2511 build_string ("UCS for Big5"),
2512 build_string ("UCS for Big5"),
2513 build_string ("ISO/IEC 10646 for Big5"),
2517 # define MIN_CHAR_THAI 0
2518 # define MAX_CHAR_THAI 0
2519 /* # define MIN_CHAR_HEBREW 0 */
2520 /* # define MAX_CHAR_HEBREW 0 */
2521 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2522 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2524 staticpro (&Vcharset_ascii);
2526 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2527 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2528 build_string ("ASCII"),
2529 build_string ("ASCII)"),
2530 build_string ("ASCII (ISO646 IRV)"),
2531 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2532 Qnil, 0, 0x7F, 0, 0);
2533 staticpro (&Vcharset_control_1);
2534 Vcharset_control_1 =
2535 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2536 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2537 build_string ("C1"),
2538 build_string ("Control characters"),
2539 build_string ("Control characters 128-191"),
2541 Qnil, 0x80, 0x9F, 0, 0);
2542 staticpro (&Vcharset_latin_iso8859_1);
2543 Vcharset_latin_iso8859_1 =
2544 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2545 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2546 build_string ("Latin-1"),
2547 build_string ("ISO8859-1 (Latin-1)"),
2548 build_string ("ISO8859-1 (Latin-1)"),
2549 build_string ("iso8859-1"),
2550 Qnil, 0xA0, 0xFF, 0, 32);
2551 staticpro (&Vcharset_latin_iso8859_2);
2552 Vcharset_latin_iso8859_2 =
2553 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2554 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2555 build_string ("Latin-2"),
2556 build_string ("ISO8859-2 (Latin-2)"),
2557 build_string ("ISO8859-2 (Latin-2)"),
2558 build_string ("iso8859-2"),
2560 staticpro (&Vcharset_latin_iso8859_3);
2561 Vcharset_latin_iso8859_3 =
2562 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2563 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2564 build_string ("Latin-3"),
2565 build_string ("ISO8859-3 (Latin-3)"),
2566 build_string ("ISO8859-3 (Latin-3)"),
2567 build_string ("iso8859-3"),
2569 staticpro (&Vcharset_latin_iso8859_4);
2570 Vcharset_latin_iso8859_4 =
2571 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2572 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2573 build_string ("Latin-4"),
2574 build_string ("ISO8859-4 (Latin-4)"),
2575 build_string ("ISO8859-4 (Latin-4)"),
2576 build_string ("iso8859-4"),
2578 staticpro (&Vcharset_thai_tis620);
2579 Vcharset_thai_tis620 =
2580 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2581 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2582 build_string ("TIS620"),
2583 build_string ("TIS620 (Thai)"),
2584 build_string ("TIS620.2529 (Thai)"),
2585 build_string ("tis620"),
2586 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2587 staticpro (&Vcharset_greek_iso8859_7);
2588 Vcharset_greek_iso8859_7 =
2589 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2590 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2591 build_string ("ISO8859-7"),
2592 build_string ("ISO8859-7 (Greek)"),
2593 build_string ("ISO8859-7 (Greek)"),
2594 build_string ("iso8859-7"),
2596 staticpro (&Vcharset_arabic_iso8859_6);
2597 Vcharset_arabic_iso8859_6 =
2598 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2599 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2600 build_string ("ISO8859-6"),
2601 build_string ("ISO8859-6 (Arabic)"),
2602 build_string ("ISO8859-6 (Arabic)"),
2603 build_string ("iso8859-6"),
2605 staticpro (&Vcharset_hebrew_iso8859_8);
2606 Vcharset_hebrew_iso8859_8 =
2607 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2608 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2609 build_string ("ISO8859-8"),
2610 build_string ("ISO8859-8 (Hebrew)"),
2611 build_string ("ISO8859-8 (Hebrew)"),
2612 build_string ("iso8859-8"),
2614 0 /* MIN_CHAR_HEBREW */,
2615 0 /* MAX_CHAR_HEBREW */, 0, 32);
2616 staticpro (&Vcharset_katakana_jisx0201);
2617 Vcharset_katakana_jisx0201 =
2618 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2619 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2620 build_string ("JISX0201 Kana"),
2621 build_string ("JISX0201.1976 (Japanese Kana)"),
2622 build_string ("JISX0201.1976 Japanese Kana"),
2623 build_string ("jisx0201\\.1976"),
2625 staticpro (&Vcharset_latin_jisx0201);
2626 Vcharset_latin_jisx0201 =
2627 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2628 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2629 build_string ("JISX0201 Roman"),
2630 build_string ("JISX0201.1976 (Japanese Roman)"),
2631 build_string ("JISX0201.1976 Japanese Roman"),
2632 build_string ("jisx0201\\.1976"),
2634 staticpro (&Vcharset_cyrillic_iso8859_5);
2635 Vcharset_cyrillic_iso8859_5 =
2636 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2637 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2638 build_string ("ISO8859-5"),
2639 build_string ("ISO8859-5 (Cyrillic)"),
2640 build_string ("ISO8859-5 (Cyrillic)"),
2641 build_string ("iso8859-5"),
2643 staticpro (&Vcharset_latin_iso8859_9);
2644 Vcharset_latin_iso8859_9 =
2645 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2646 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2647 build_string ("Latin-5"),
2648 build_string ("ISO8859-9 (Latin-5)"),
2649 build_string ("ISO8859-9 (Latin-5)"),
2650 build_string ("iso8859-9"),
2652 staticpro (&Vcharset_japanese_jisx0208_1978);
2653 Vcharset_japanese_jisx0208_1978 =
2654 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2655 Qjapanese_jisx0208_1978, 94, 2,
2656 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2657 build_string ("JIS X0208:1978"),
2658 build_string ("JIS X0208:1978 (Japanese)"),
2660 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2661 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2663 staticpro (&Vcharset_chinese_gb2312);
2664 Vcharset_chinese_gb2312 =
2665 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2666 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2667 build_string ("GB2312"),
2668 build_string ("GB2312)"),
2669 build_string ("GB2312 Chinese simplified"),
2670 build_string ("gb2312"),
2672 staticpro (&Vcharset_chinese_gb12345);
2673 Vcharset_chinese_gb12345 =
2674 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2675 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2676 build_string ("G1"),
2677 build_string ("GB 12345)"),
2678 build_string ("GB 12345-1990"),
2679 build_string ("GB12345\\(\\.1990\\)?-0"),
2681 staticpro (&Vcharset_japanese_jisx0208);
2682 Vcharset_japanese_jisx0208 =
2683 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2684 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2685 build_string ("JISX0208"),
2686 build_string ("JIS X0208:1983 (Japanese)"),
2687 build_string ("JIS X0208:1983 Japanese Kanji"),
2688 build_string ("jisx0208\\.1983"),
2691 staticpro (&Vcharset_japanese_jisx0208_1990);
2692 Vcharset_japanese_jisx0208_1990 =
2693 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2694 Qjapanese_jisx0208_1990, 94, 2,
2695 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2696 build_string ("JISX0208-1990"),
2697 build_string ("JIS X0208:1990 (Japanese)"),
2698 build_string ("JIS X0208:1990 Japanese Kanji"),
2699 build_string ("jisx0208\\.1990"),
2701 MIN_CHAR_JIS_X0208_1990,
2702 MAX_CHAR_JIS_X0208_1990, 0, 33);
2704 staticpro (&Vcharset_korean_ksc5601);
2705 Vcharset_korean_ksc5601 =
2706 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2707 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2708 build_string ("KSC5601"),
2709 build_string ("KSC5601 (Korean"),
2710 build_string ("KSC5601 Korean Hangul and Hanja"),
2711 build_string ("ksc5601"),
2713 staticpro (&Vcharset_japanese_jisx0212);
2714 Vcharset_japanese_jisx0212 =
2715 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2716 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2717 build_string ("JISX0212"),
2718 build_string ("JISX0212 (Japanese)"),
2719 build_string ("JISX0212 Japanese Supplement"),
2720 build_string ("jisx0212"),
2723 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2724 staticpro (&Vcharset_chinese_cns11643_1);
2725 Vcharset_chinese_cns11643_1 =
2726 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2727 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2728 build_string ("CNS11643-1"),
2729 build_string ("CNS11643-1 (Chinese traditional)"),
2731 ("CNS 11643 Plane 1 Chinese traditional"),
2732 build_string (CHINESE_CNS_PLANE_RE("1")),
2734 staticpro (&Vcharset_chinese_cns11643_2);
2735 Vcharset_chinese_cns11643_2 =
2736 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2737 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2738 build_string ("CNS11643-2"),
2739 build_string ("CNS11643-2 (Chinese traditional)"),
2741 ("CNS 11643 Plane 2 Chinese traditional"),
2742 build_string (CHINESE_CNS_PLANE_RE("2")),
2745 staticpro (&Vcharset_latin_tcvn5712);
2746 Vcharset_latin_tcvn5712 =
2747 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2748 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2749 build_string ("TCVN 5712"),
2750 build_string ("TCVN 5712 (VSCII-2)"),
2751 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2752 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2754 staticpro (&Vcharset_latin_viscii_lower);
2755 Vcharset_latin_viscii_lower =
2756 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2757 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2758 build_string ("VISCII lower"),
2759 build_string ("VISCII lower (Vietnamese)"),
2760 build_string ("VISCII lower (Vietnamese)"),
2761 build_string ("MULEVISCII-LOWER"),
2763 staticpro (&Vcharset_latin_viscii_upper);
2764 Vcharset_latin_viscii_upper =
2765 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2766 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2767 build_string ("VISCII upper"),
2768 build_string ("VISCII upper (Vietnamese)"),
2769 build_string ("VISCII upper (Vietnamese)"),
2770 build_string ("MULEVISCII-UPPER"),
2772 staticpro (&Vcharset_latin_viscii);
2773 Vcharset_latin_viscii =
2774 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2775 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2776 build_string ("VISCII"),
2777 build_string ("VISCII 1.1 (Vietnamese)"),
2778 build_string ("VISCII 1.1 (Vietnamese)"),
2779 build_string ("VISCII1\\.1"),
2781 staticpro (&Vcharset_chinese_big5);
2782 Vcharset_chinese_big5 =
2783 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2784 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2785 build_string ("Big5"),
2786 build_string ("Big5"),
2787 build_string ("Big5 Chinese traditional"),
2788 build_string ("big5"),
2790 staticpro (&Vcharset_chinese_big5_cdp);
2791 Vcharset_chinese_big5_cdp =
2792 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2793 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2794 build_string ("Big5-CDP"),
2795 build_string ("Big5 + CDP extension"),
2796 build_string ("Big5 with CDP extension"),
2797 build_string ("big5\\.cdp-0"),
2799 staticpro (&Vcharset_japanese_jef_china3);
2800 Vcharset_japanese_jef_china3 =
2801 make_charset (LEADING_BYTE_JEF_CHINA3, Qjapanese_jef_china3, 256, 2,
2802 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2803 build_string ("JC3"),
2804 build_string ("JEF + CHINA3"),
2805 build_string ("JEF + CHINA3 private characters"),
2806 build_string ("china3jef-0"),
2807 Qnil, MIN_CHAR_JEF_CHINA3, MAX_CHAR_JEF_CHINA3, 0, 0);
2808 staticpro (&Vcharset_ideograph_cbeta);
2809 Vcharset_ideograph_cbeta =
2810 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2811 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2812 build_string ("CB"),
2813 build_string ("CBETA"),
2814 build_string ("CBETA private characters"),
2815 build_string ("cbeta-0"),
2816 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2817 staticpro (&Vcharset_ideograph_gt);
2818 Vcharset_ideograph_gt =
2819 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2820 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2821 build_string ("GT"),
2822 build_string ("GT"),
2823 build_string ("GT"),
2825 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2826 #define DEF_GT_PJ(n) \
2827 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2828 Vcharset_ideograph_gt_pj_##n = \
2829 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2830 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2831 build_string ("GT-PJ-"#n), \
2832 build_string ("GT (pseudo JIS encoding) part "#n), \
2833 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2835 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2849 staticpro (&Vcharset_ideograph_daikanwa);
2850 Vcharset_ideograph_daikanwa =
2851 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2852 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2853 build_string ("Daikanwa"),
2854 build_string ("Morohashi's Daikanwa"),
2855 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2856 build_string ("Daikanwa"),
2857 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2858 staticpro (&Vcharset_mojikyo);
2860 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2861 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2862 build_string ("Mojikyo"),
2863 build_string ("Mojikyo"),
2864 build_string ("Konjaku-Mojikyo"),
2866 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2867 staticpro (&Vcharset_mojikyo_2022_1);
2868 Vcharset_mojikyo_2022_1 =
2869 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2870 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2871 build_string ("Mojikyo-2022-1"),
2872 build_string ("Mojikyo ISO-2022 Part 1"),
2873 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2877 #define DEF_MOJIKYO_PJ(n) \
2878 staticpro (&Vcharset_mojikyo_pj_##n); \
2879 Vcharset_mojikyo_pj_##n = \
2880 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2881 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2882 build_string ("Mojikyo-PJ-"#n), \
2883 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2885 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2887 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2899 DEF_MOJIKYO_PJ (10);
2900 DEF_MOJIKYO_PJ (11);
2901 DEF_MOJIKYO_PJ (12);
2902 DEF_MOJIKYO_PJ (13);
2903 DEF_MOJIKYO_PJ (14);
2904 DEF_MOJIKYO_PJ (15);
2905 DEF_MOJIKYO_PJ (16);
2906 DEF_MOJIKYO_PJ (17);
2907 DEF_MOJIKYO_PJ (18);
2908 DEF_MOJIKYO_PJ (19);
2909 DEF_MOJIKYO_PJ (20);
2910 DEF_MOJIKYO_PJ (21);
2912 staticpro (&Vcharset_ethiopic_ucs);
2913 Vcharset_ethiopic_ucs =
2914 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2915 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2916 build_string ("Ethiopic (UCS)"),
2917 build_string ("Ethiopic (UCS)"),
2918 build_string ("Ethiopic of UCS"),
2919 build_string ("Ethiopic-Unicode"),
2920 Qnil, 0x1200, 0x137F, 0x1200, 0);
2922 staticpro (&Vcharset_chinese_big5_1);
2923 Vcharset_chinese_big5_1 =
2924 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2925 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2926 build_string ("Big5"),
2927 build_string ("Big5 (Level-1)"),
2929 ("Big5 Level-1 Chinese traditional"),
2930 build_string ("big5"),
2932 staticpro (&Vcharset_chinese_big5_2);
2933 Vcharset_chinese_big5_2 =
2934 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2935 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2936 build_string ("Big5"),
2937 build_string ("Big5 (Level-2)"),
2939 ("Big5 Level-2 Chinese traditional"),
2940 build_string ("big5"),
2943 #ifdef ENABLE_COMPOSITE_CHARS
2944 /* #### For simplicity, we put composite chars into a 96x96 charset.
2945 This is going to lead to problems because you can run out of
2946 room, esp. as we don't yet recycle numbers. */
2947 staticpro (&Vcharset_composite);
2948 Vcharset_composite =
2949 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2950 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2951 build_string ("Composite"),
2952 build_string ("Composite characters"),
2953 build_string ("Composite characters"),
2956 /* #### not dumped properly */
2957 composite_char_row_next = 32;
2958 composite_char_col_next = 32;
2960 Vcomposite_char_string2char_hash_table =
2961 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2962 Vcomposite_char_char2string_hash_table =
2963 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2964 staticpro (&Vcomposite_char_string2char_hash_table);
2965 staticpro (&Vcomposite_char_char2string_hash_table);
2966 #endif /* ENABLE_COMPOSITE_CHARS */