1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_ideograph_gt;
79 Lisp_Object Vcharset_ideograph_gt_pj_1;
80 Lisp_Object Vcharset_ideograph_gt_pj_2;
81 Lisp_Object Vcharset_ideograph_gt_pj_3;
82 Lisp_Object Vcharset_ideograph_gt_pj_4;
83 Lisp_Object Vcharset_ideograph_gt_pj_5;
84 Lisp_Object Vcharset_ideograph_gt_pj_6;
85 Lisp_Object Vcharset_ideograph_gt_pj_7;
86 Lisp_Object Vcharset_ideograph_gt_pj_8;
87 Lisp_Object Vcharset_ideograph_gt_pj_9;
88 Lisp_Object Vcharset_ideograph_gt_pj_10;
89 Lisp_Object Vcharset_ideograph_gt_pj_11;
90 Lisp_Object Vcharset_ideograph_daikanwa;
91 Lisp_Object Vcharset_mojikyo;
92 Lisp_Object Vcharset_mojikyo_2022_1;
93 Lisp_Object Vcharset_mojikyo_pj_1;
94 Lisp_Object Vcharset_mojikyo_pj_2;
95 Lisp_Object Vcharset_mojikyo_pj_3;
96 Lisp_Object Vcharset_mojikyo_pj_4;
97 Lisp_Object Vcharset_mojikyo_pj_5;
98 Lisp_Object Vcharset_mojikyo_pj_6;
99 Lisp_Object Vcharset_mojikyo_pj_7;
100 Lisp_Object Vcharset_mojikyo_pj_8;
101 Lisp_Object Vcharset_mojikyo_pj_9;
102 Lisp_Object Vcharset_mojikyo_pj_10;
103 Lisp_Object Vcharset_mojikyo_pj_11;
104 Lisp_Object Vcharset_mojikyo_pj_12;
105 Lisp_Object Vcharset_mojikyo_pj_13;
106 Lisp_Object Vcharset_mojikyo_pj_14;
107 Lisp_Object Vcharset_mojikyo_pj_15;
108 Lisp_Object Vcharset_mojikyo_pj_16;
109 Lisp_Object Vcharset_mojikyo_pj_17;
110 Lisp_Object Vcharset_mojikyo_pj_18;
111 Lisp_Object Vcharset_mojikyo_pj_19;
112 Lisp_Object Vcharset_mojikyo_pj_20;
113 Lisp_Object Vcharset_mojikyo_pj_21;
114 Lisp_Object Vcharset_ethiopic_ucs;
116 Lisp_Object Vcharset_chinese_big5_1;
117 Lisp_Object Vcharset_chinese_big5_2;
119 #ifdef ENABLE_COMPOSITE_CHARS
120 Lisp_Object Vcharset_composite;
122 /* Hash tables for composite chars. One maps string representing
123 composed chars to their equivalent chars; one goes the
125 Lisp_Object Vcomposite_char_char2string_hash_table;
126 Lisp_Object Vcomposite_char_string2char_hash_table;
128 static int composite_char_row_next;
129 static int composite_char_col_next;
131 #endif /* ENABLE_COMPOSITE_CHARS */
133 struct charset_lookup *chlook;
135 static const struct lrecord_description charset_lookup_description_1[] = {
136 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
145 static const struct struct_description charset_lookup_description = {
146 sizeof (struct charset_lookup),
147 charset_lookup_description_1
151 /* Table of number of bytes in the string representation of a character
152 indexed by the first byte of that representation.
154 rep_bytes_by_first_byte(c) is more efficient than the equivalent
155 canonical computation:
157 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
159 const Bytecount rep_bytes_by_first_byte[0xA0] =
160 { /* 0x00 - 0x7f are for straight ASCII */
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 /* 0x80 - 0x8f are for Dimension-1 official charsets */
171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
175 /* 0x90 - 0x9d are for Dimension-2 official charsets */
176 /* 0x9e is for Dimension-1 private charsets */
177 /* 0x9f is for Dimension-2 private charsets */
178 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
184 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
186 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
188 /* ad-hoc method for `ascii' */
189 if ((CHARSET_CHARS (cs) == 94) &&
190 (CHARSET_BYTE_OFFSET (cs) != 33))
191 return 128 - CHARSET_BYTE_OFFSET (cs);
193 return CHARSET_CHARS (cs);
196 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
198 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
200 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
204 if (XVECTOR_LENGTH (v) > ccs_len)
207 for (i = 0; i < XVECTOR_LENGTH (v); i++)
209 Lisp_Object c = XVECTOR_DATA(v)[i];
211 if (!NILP (c) && !CHARP (c))
215 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
227 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
230 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
240 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
241 nv = XVECTOR_DATA(v)[i];
247 XVECTOR_DATA(v)[i] = Qnil;
251 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
252 int code_point, Lisp_Object character);
254 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
255 int code_point, Lisp_Object character)
259 int ccs_len = XVECTOR_LENGTH (v);
264 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
265 nv = XVECTOR_DATA(v)[i];
269 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
275 XVECTOR_DATA(v)[i] = character;
279 put_char_ccs_code_point (Lisp_Object character,
280 Lisp_Object ccs, Lisp_Object value)
282 Lisp_Object encoding_table;
284 if (!EQ (XCHARSET_NAME (ccs), Qucs)
285 || (XCHAR (character) != XINT (value)))
287 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
288 int dim = XCHARSET_DIMENSION (ccs);
289 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
290 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
294 { /* obsolete representation: value must be a list of bytes */
295 Lisp_Object ret = Fcar (value);
299 signal_simple_error ("Invalid value for coded-charset", value);
300 code_point = XINT (ret);
301 if (XCHARSET_GRAPHIC (ccs) == 1)
309 signal_simple_error ("Invalid value for coded-charset",
313 signal_simple_error ("Invalid value for coded-charset",
316 if (XCHARSET_GRAPHIC (ccs) == 1)
318 code_point = (code_point << 8) | j;
321 value = make_int (code_point);
323 else if (INTP (value))
325 code_point = XINT (value);
326 if (XCHARSET_GRAPHIC (ccs) == 1)
328 code_point &= 0x7F7F7F7F;
329 value = make_int (code_point);
333 signal_simple_error ("Invalid value for coded-charset", value);
337 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
340 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
345 XCHARSET_DECODING_TABLE (ccs)
346 = v = make_older_vector (ccs_len, Qnil);
349 decoding_table_put_char (v, dim, byte_offset, code_point, character);
351 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
353 XCHARSET_ENCODING_TABLE (ccs)
354 = encoding_table = make_char_id_table (Qnil);
356 put_char_id_table (XCHAR (character), value, encoding_table);
361 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
363 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
364 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
366 if (VECTORP (decoding_table))
368 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
372 decoding_table_remove_char (decoding_table,
373 XCHARSET_DIMENSION (ccs),
374 XCHARSET_BYTE_OFFSET (ccs),
378 if (CHAR_ID_TABLE_P (encoding_table))
380 put_char_id_table (XCHAR (character), Qnil, encoding_table);
388 int leading_code_private_11;
391 Lisp_Object Qcharsetp;
393 /* Qdoc_string, Qdimension, Qchars defined in general.c */
394 Lisp_Object Qregistry, Qfinal, Qgraphic;
395 Lisp_Object Qdirection;
396 Lisp_Object Qreverse_direction_charset;
397 Lisp_Object Qleading_byte;
398 Lisp_Object Qshort_name, Qlong_name;
414 Qjapanese_jisx0208_1978,
418 Qjapanese_jisx0208_1990,
433 Qvietnamese_viscii_lower,
434 Qvietnamese_viscii_upper,
479 Lisp_Object Ql2r, Qr2l;
481 Lisp_Object Vcharset_hash_table;
483 /* Composite characters are characters constructed by overstriking two
484 or more regular characters.
486 1) The old Mule implementation involves storing composite characters
487 in a buffer as a tag followed by all of the actual characters
488 used to make up the composite character. I think this is a bad
489 idea; it greatly complicates code that wants to handle strings
490 one character at a time because it has to deal with the possibility
491 of great big ungainly characters. It's much more reasonable to
492 simply store an index into a table of composite characters.
494 2) The current implementation only allows for 16,384 separate
495 composite characters over the lifetime of the XEmacs process.
496 This could become a potential problem if the user
497 edited lots of different files that use composite characters.
498 Due to FSF bogosity, increasing the number of allowable
499 composite characters under Mule would decrease the number
500 of possible faces that can exist. Mule already has shrunk
501 this to 2048, and further shrinkage would become uncomfortable.
502 No such problems exist in XEmacs.
504 Composite characters could be represented as 0x80 C1 C2 C3,
505 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
506 for slightly under 2^20 (one million) composite characters
507 over the XEmacs process lifetime, and you only need to
508 increase the size of a Mule character from 19 to 21 bits.
509 Or you could use 0x80 C1 C2 C3 C4, allowing for about
510 85 million (slightly over 2^26) composite characters. */
513 /************************************************************************/
514 /* Basic Emchar functions */
515 /************************************************************************/
517 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
518 string in STR. Returns the number of bytes stored.
519 Do not call this directly. Use the macro set_charptr_emchar() instead.
523 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
538 else if ( c <= 0x7ff )
540 *p++ = (c >> 6) | 0xc0;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0xffff )
545 *p++ = (c >> 12) | 0xe0;
546 *p++ = ((c >> 6) & 0x3f) | 0x80;
547 *p++ = (c & 0x3f) | 0x80;
549 else if ( c <= 0x1fffff )
551 *p++ = (c >> 18) | 0xf0;
552 *p++ = ((c >> 12) & 0x3f) | 0x80;
553 *p++ = ((c >> 6) & 0x3f) | 0x80;
554 *p++ = (c & 0x3f) | 0x80;
556 else if ( c <= 0x3ffffff )
558 *p++ = (c >> 24) | 0xf8;
559 *p++ = ((c >> 18) & 0x3f) | 0x80;
560 *p++ = ((c >> 12) & 0x3f) | 0x80;
561 *p++ = ((c >> 6) & 0x3f) | 0x80;
562 *p++ = (c & 0x3f) | 0x80;
566 *p++ = (c >> 30) | 0xfc;
567 *p++ = ((c >> 24) & 0x3f) | 0x80;
568 *p++ = ((c >> 18) & 0x3f) | 0x80;
569 *p++ = ((c >> 12) & 0x3f) | 0x80;
570 *p++ = ((c >> 6) & 0x3f) | 0x80;
571 *p++ = (c & 0x3f) | 0x80;
574 BREAKUP_CHAR (c, charset, c1, c2);
575 lb = CHAR_LEADING_BYTE (c);
576 if (LEADING_BYTE_PRIVATE_P (lb))
577 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
579 if (EQ (charset, Vcharset_control_1))
588 /* Return the first character from a Mule-encoded string in STR,
589 assuming it's non-ASCII. Do not call this directly.
590 Use the macro charptr_emchar() instead. */
593 non_ascii_charptr_emchar (const Bufbyte *str)
606 else if ( b >= 0xf8 )
611 else if ( b >= 0xf0 )
616 else if ( b >= 0xe0 )
621 else if ( b >= 0xc0 )
631 for( ; len > 0; len-- )
634 ch = ( ch << 6 ) | ( b & 0x3f );
638 Bufbyte i0 = *str, i1, i2 = 0;
641 if (i0 == LEADING_BYTE_CONTROL_1)
642 return (Emchar) (*++str - 0x20);
644 if (LEADING_BYTE_PREFIX_P (i0))
649 charset = CHARSET_BY_LEADING_BYTE (i0);
650 if (XCHARSET_DIMENSION (charset) == 2)
653 return MAKE_CHAR (charset, i1, i2);
657 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
658 Do not call this directly. Use the macro valid_char_p() instead. */
662 non_ascii_valid_char_p (Emchar ch)
666 /* Must have only lowest 19 bits set */
670 f1 = CHAR_FIELD1 (ch);
671 f2 = CHAR_FIELD2 (ch);
672 f3 = CHAR_FIELD3 (ch);
678 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
679 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
680 f2 > MAX_CHAR_FIELD2_PRIVATE)
685 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
686 f2 <= MAX_CHAR_FIELD2_PRIVATE))
690 NOTE: This takes advantage of the fact that
691 FIELD2_TO_OFFICIAL_LEADING_BYTE and
692 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
694 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
695 if (EQ (charset, Qnil))
697 return (XCHARSET_CHARS (charset) == 96);
703 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
704 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
705 f1 > MAX_CHAR_FIELD1_PRIVATE)
707 if (f2 < 0x20 || f3 < 0x20)
710 #ifdef ENABLE_COMPOSITE_CHARS
711 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
713 if (UNBOUNDP (Fgethash (make_int (ch),
714 Vcomposite_char_char2string_hash_table,
719 #endif /* ENABLE_COMPOSITE_CHARS */
721 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
722 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
725 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
727 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
730 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
732 if (EQ (charset, Qnil))
734 return (XCHARSET_CHARS (charset) == 96);
740 /************************************************************************/
741 /* Basic string functions */
742 /************************************************************************/
744 /* Copy the character pointed to by SRC into DST. Do not call this
745 directly. Use the macro charptr_copy_char() instead.
746 Return the number of bytes copied. */
749 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
751 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
753 for (i = bytes; i; i--, dst++, src++)
759 /************************************************************************/
760 /* streams of Emchars */
761 /************************************************************************/
763 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
764 The functions below are not meant to be called directly; use
765 the macros in insdel.h. */
768 Lstream_get_emchar_1 (Lstream *stream, int ch)
770 Bufbyte str[MAX_EMCHAR_LEN];
771 Bufbyte *strptr = str;
774 str[0] = (Bufbyte) ch;
776 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
778 int c = Lstream_getc (stream);
779 bufpos_checking_assert (c >= 0);
780 *++strptr = (Bufbyte) c;
782 return charptr_emchar (str);
786 Lstream_fput_emchar (Lstream *stream, Emchar ch)
788 Bufbyte str[MAX_EMCHAR_LEN];
789 Bytecount len = set_charptr_emchar (str, ch);
790 return Lstream_write (stream, str, len);
794 Lstream_funget_emchar (Lstream *stream, Emchar ch)
796 Bufbyte str[MAX_EMCHAR_LEN];
797 Bytecount len = set_charptr_emchar (str, ch);
798 Lstream_unread (stream, str, len);
802 /************************************************************************/
804 /************************************************************************/
807 mark_charset (Lisp_Object obj)
809 Lisp_Charset *cs = XCHARSET (obj);
811 mark_object (cs->short_name);
812 mark_object (cs->long_name);
813 mark_object (cs->doc_string);
814 mark_object (cs->registry);
815 mark_object (cs->ccl_program);
817 mark_object (cs->encoding_table);
818 /* mark_object (cs->decoding_table); */
824 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
826 Lisp_Charset *cs = XCHARSET (obj);
830 error ("printing unreadable object #<charset %s 0x%x>",
831 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
834 write_c_string ("#<charset ", printcharfun);
835 print_internal (CHARSET_NAME (cs), printcharfun, 0);
836 write_c_string (" ", printcharfun);
837 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
838 write_c_string (" ", printcharfun);
839 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
840 write_c_string (" ", printcharfun);
841 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
842 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
844 CHARSET_DIMENSION (cs),
845 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
846 CHARSET_COLUMNS (cs),
847 CHARSET_GRAPHIC (cs),
849 write_c_string (buf, printcharfun);
850 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
851 sprintf (buf, " 0x%x>", cs->header.uid);
852 write_c_string (buf, printcharfun);
855 static const struct lrecord_description charset_description[] = {
856 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
857 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
858 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
859 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
860 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
861 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
862 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
864 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
865 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
870 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
871 mark_charset, print_charset, 0, 0, 0,
875 /* Make a new charset. */
876 /* #### SJT Should generic properties be allowed? */
878 make_charset (Charset_ID id, Lisp_Object name,
879 unsigned short chars, unsigned char dimension,
880 unsigned char columns, unsigned char graphic,
881 Bufbyte final, unsigned char direction, Lisp_Object short_name,
882 Lisp_Object long_name, Lisp_Object doc,
884 Lisp_Object decoding_table,
885 Emchar ucs_min, Emchar ucs_max,
886 Emchar code_offset, unsigned char byte_offset)
889 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
893 XSETCHARSET (obj, cs);
895 CHARSET_ID (cs) = id;
896 CHARSET_NAME (cs) = name;
897 CHARSET_SHORT_NAME (cs) = short_name;
898 CHARSET_LONG_NAME (cs) = long_name;
899 CHARSET_CHARS (cs) = chars;
900 CHARSET_DIMENSION (cs) = dimension;
901 CHARSET_DIRECTION (cs) = direction;
902 CHARSET_COLUMNS (cs) = columns;
903 CHARSET_GRAPHIC (cs) = graphic;
904 CHARSET_FINAL (cs) = final;
905 CHARSET_DOC_STRING (cs) = doc;
906 CHARSET_REGISTRY (cs) = reg;
907 CHARSET_CCL_PROGRAM (cs) = Qnil;
908 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
910 CHARSET_DECODING_TABLE(cs) = Qnil;
911 CHARSET_ENCODING_TABLE(cs) = Qnil;
912 CHARSET_UCS_MIN(cs) = ucs_min;
913 CHARSET_UCS_MAX(cs) = ucs_max;
914 CHARSET_CODE_OFFSET(cs) = code_offset;
915 CHARSET_BYTE_OFFSET(cs) = byte_offset;
919 if (id == LEADING_BYTE_ASCII)
920 CHARSET_REP_BYTES (cs) = 1;
922 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
924 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
929 /* some charsets do not have final characters. This includes
930 ASCII, Control-1, Composite, and the two faux private
932 unsigned char iso2022_type
933 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
935 if (code_offset == 0)
937 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
938 chlook->charset_by_attributes[iso2022_type][final] = obj;
942 (chlook->charset_by_attributes[iso2022_type][final][direction]));
943 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
947 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
948 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
950 /* Some charsets are "faux" and don't have names or really exist at
951 all except in the leading-byte table. */
953 Fputhash (name, obj, Vcharset_hash_table);
958 get_unallocated_leading_byte (int dimension)
963 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
966 lb = chlook->next_allocated_leading_byte++;
970 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
973 lb = chlook->next_allocated_1_byte_leading_byte++;
977 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
980 lb = chlook->next_allocated_2_byte_leading_byte++;
986 ("No more character sets free for this dimension",
987 make_int (dimension));
993 /* Number of Big5 characters which have the same code in 1st byte. */
995 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
998 decode_builtin_char (Lisp_Object charset, int code_point)
1002 if (EQ (charset, Vcharset_chinese_big5))
1004 int c1 = code_point >> 8;
1005 int c2 = code_point & 0xFF;
1008 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1010 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1011 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1013 I = (c1 - 0xA1) * BIG5_SAME_ROW
1014 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1018 charset = Vcharset_chinese_big5_1;
1022 charset = Vcharset_chinese_big5_2;
1023 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1025 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1028 if ((final = XCHARSET_FINAL (charset)) >= '0')
1030 if (XCHARSET_DIMENSION (charset) == 1)
1032 switch (XCHARSET_CHARS (charset))
1036 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1039 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1047 switch (XCHARSET_CHARS (charset))
1050 return MIN_CHAR_94x94
1051 + (final - '0') * 94 * 94
1052 + (((code_point >> 8) & 0x7F) - 33) * 94
1053 + ((code_point & 0x7F) - 33);
1055 return MIN_CHAR_96x96
1056 + (final - '0') * 96 * 96
1057 + (((code_point >> 8) & 0x7F) - 32) * 96
1058 + ((code_point & 0x7F) - 32);
1065 else if (XCHARSET_UCS_MAX (charset))
1068 = (XCHARSET_DIMENSION (charset) == 1
1070 code_point - XCHARSET_BYTE_OFFSET (charset)
1072 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1073 * XCHARSET_CHARS (charset)
1074 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1075 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1076 if ((cid < XCHARSET_UCS_MIN (charset))
1077 || (XCHARSET_UCS_MAX (charset) < cid))
1086 range_charset_code_point (Lisp_Object charset, Emchar ch)
1090 if ((XCHARSET_UCS_MIN (charset) <= ch)
1091 && (ch <= XCHARSET_UCS_MAX (charset)))
1093 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1095 if (XCHARSET_CHARS (charset) == 256)
1097 else if (XCHARSET_DIMENSION (charset) == 1)
1098 return d + XCHARSET_BYTE_OFFSET (charset);
1099 else if (XCHARSET_DIMENSION (charset) == 2)
1101 ((d / XCHARSET_CHARS (charset)
1102 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1103 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1104 else if (XCHARSET_DIMENSION (charset) == 3)
1106 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1107 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1108 | ((d / XCHARSET_CHARS (charset)
1109 % XCHARSET_CHARS (charset)
1110 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1111 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1112 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1114 ((d / (XCHARSET_CHARS (charset)
1115 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1116 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1117 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1118 % XCHARSET_CHARS (charset)
1119 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1120 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1121 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1122 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1124 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1126 if (XCHARSET_DIMENSION (charset) == 1)
1128 if (XCHARSET_CHARS (charset) == 94)
1130 if (((d = ch - (MIN_CHAR_94
1131 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1135 else if (XCHARSET_CHARS (charset) == 96)
1137 if (((d = ch - (MIN_CHAR_96
1138 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1145 else if (XCHARSET_DIMENSION (charset) == 2)
1147 if (XCHARSET_CHARS (charset) == 94)
1149 if (((d = ch - (MIN_CHAR_94x94
1150 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1153 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1155 else if (XCHARSET_CHARS (charset) == 96)
1157 if (((d = ch - (MIN_CHAR_96x96
1158 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1161 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1167 if (EQ (charset, Vcharset_mojikyo_2022_1)
1168 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1170 int m = ch - MIN_CHAR_MOJIKYO - 1;
1171 int byte1 = m / (94 * 60) + 33;
1172 int byte2 = (m % (94 * 60)) / 94;
1173 int byte3 = m % 94 + 33;
1179 return (byte1 << 16) | (byte2 << 8) | byte3;
1185 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1187 if (c <= MAX_CHAR_BASIC_LATIN)
1189 *charset = Vcharset_ascii;
1194 *charset = Vcharset_control_1;
1199 *charset = Vcharset_latin_iso8859_1;
1203 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1205 *charset = Vcharset_hebrew_iso8859_8;
1206 return c - MIN_CHAR_HEBREW + 0x20;
1209 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1211 *charset = Vcharset_thai_tis620;
1212 return c - MIN_CHAR_THAI + 0x20;
1215 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1216 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1218 return list2 (Vcharset_katakana_jisx0201,
1219 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1222 else if (c <= MAX_CHAR_BMP)
1224 *charset = Vcharset_ucs_bmp;
1227 else if (c < MIN_CHAR_DAIKANWA)
1229 *charset = Vcharset_ucs;
1232 else if (c <= MAX_CHAR_DAIKANWA)
1234 *charset = Vcharset_ideograph_daikanwa;
1235 return c - MIN_CHAR_DAIKANWA;
1237 else if (c <= MAX_CHAR_MOJIKYO_0)
1239 *charset = Vcharset_mojikyo;
1240 return c - MIN_CHAR_MOJIKYO_0;
1242 else if (c < MIN_CHAR_94)
1244 *charset = Vcharset_ucs;
1247 else if (c <= MAX_CHAR_94)
1249 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1250 ((c - MIN_CHAR_94) / 94) + '0',
1251 CHARSET_LEFT_TO_RIGHT);
1252 if (!NILP (*charset))
1253 return ((c - MIN_CHAR_94) % 94) + 33;
1256 *charset = Vcharset_ucs;
1260 else if (c <= MAX_CHAR_96)
1262 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1263 ((c - MIN_CHAR_96) / 96) + '0',
1264 CHARSET_LEFT_TO_RIGHT);
1265 if (!NILP (*charset))
1266 return ((c - MIN_CHAR_96) % 96) + 32;
1269 *charset = Vcharset_ucs;
1273 else if (c <= MAX_CHAR_94x94)
1276 = CHARSET_BY_ATTRIBUTES (94, 2,
1277 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1278 CHARSET_LEFT_TO_RIGHT);
1279 if (!NILP (*charset))
1280 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1281 | (((c - MIN_CHAR_94x94) % 94) + 33);
1284 *charset = Vcharset_ucs;
1288 else if (c <= MAX_CHAR_96x96)
1291 = CHARSET_BY_ATTRIBUTES (96, 2,
1292 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1293 CHARSET_LEFT_TO_RIGHT);
1294 if (!NILP (*charset))
1295 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1296 | (((c - MIN_CHAR_96x96) % 96) + 32);
1299 *charset = Vcharset_ucs;
1303 else if (c < MIN_CHAR_MOJIKYO)
1305 *charset = Vcharset_ucs;
1308 else if (c <= MAX_CHAR_MOJIKYO)
1310 *charset = Vcharset_mojikyo;
1311 return c - MIN_CHAR_MOJIKYO;
1315 *charset = Vcharset_ucs;
1320 Lisp_Object Vdefault_coded_charset_priority_list;
1324 /************************************************************************/
1325 /* Basic charset Lisp functions */
1326 /************************************************************************/
1328 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1329 Return non-nil if OBJECT is a charset.
1333 return CHARSETP (object) ? Qt : Qnil;
1336 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1337 Retrieve the charset of the given name.
1338 If CHARSET-OR-NAME is a charset object, it is simply returned.
1339 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1340 nil is returned. Otherwise the associated charset object is returned.
1344 if (CHARSETP (charset_or_name))
1345 return charset_or_name;
1347 CHECK_SYMBOL (charset_or_name);
1348 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1351 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1352 Retrieve the charset of the given name.
1353 Same as `find-charset' except an error is signalled if there is no such
1354 charset instead of returning nil.
1358 Lisp_Object charset = Ffind_charset (name);
1361 signal_simple_error ("No such charset", name);
1365 /* We store the charsets in hash tables with the names as the key and the
1366 actual charset object as the value. Occasionally we need to use them
1367 in a list format. These routines provide us with that. */
1368 struct charset_list_closure
1370 Lisp_Object *charset_list;
1374 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1375 void *charset_list_closure)
1377 /* This function can GC */
1378 struct charset_list_closure *chcl =
1379 (struct charset_list_closure*) charset_list_closure;
1380 Lisp_Object *charset_list = chcl->charset_list;
1382 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1386 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1387 Return a list of the names of all defined charsets.
1391 Lisp_Object charset_list = Qnil;
1392 struct gcpro gcpro1;
1393 struct charset_list_closure charset_list_closure;
1395 GCPRO1 (charset_list);
1396 charset_list_closure.charset_list = &charset_list;
1397 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1398 &charset_list_closure);
1401 return charset_list;
1404 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1405 Return the name of charset CHARSET.
1409 return XCHARSET_NAME (Fget_charset (charset));
1412 /* #### SJT Should generic properties be allowed? */
1413 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1414 Define a new character set.
1415 This function is for use with Mule support.
1416 NAME is a symbol, the name by which the character set is normally referred.
1417 DOC-STRING is a string describing the character set.
1418 PROPS is a property list, describing the specific nature of the
1419 character set. Recognized properties are:
1421 'short-name Short version of the charset name (ex: Latin-1)
1422 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1423 'registry A regular expression matching the font registry field for
1425 'dimension Number of octets used to index a character in this charset.
1426 Either 1 or 2. Defaults to 1.
1427 'columns Number of columns used to display a character in this charset.
1428 Only used in TTY mode. (Under X, the actual width of a
1429 character can be derived from the font used to display the
1430 characters.) If unspecified, defaults to the dimension
1431 (this is almost always the correct value).
1432 'chars Number of characters in each dimension (94 or 96).
1433 Defaults to 94. Note that if the dimension is 2, the
1434 character set thus described is 94x94 or 96x96.
1435 'final Final byte of ISO 2022 escape sequence. Must be
1436 supplied. Each combination of (DIMENSION, CHARS) defines a
1437 separate namespace for final bytes. Note that ISO
1438 2022 restricts the final byte to the range
1439 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1440 dimension == 2. Note also that final bytes in the range
1441 0x30 - 0x3F are reserved for user-defined (not official)
1443 'graphic 0 (use left half of font on output) or 1 (use right half
1444 of font on output). Defaults to 0. For example, for
1445 a font whose registry is ISO8859-1, the left half
1446 (octets 0x20 - 0x7F) is the `ascii' character set, while
1447 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1448 character set. With 'graphic set to 0, the octets
1449 will have their high bit cleared; with it set to 1,
1450 the octets will have their high bit set.
1451 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1453 'ccl-program A compiled CCL program used to convert a character in
1454 this charset into an index into the font. This is in
1455 addition to the 'graphic property. The CCL program
1456 is passed the octets of the character, with the high
1457 bit cleared and set depending upon whether the value
1458 of the 'graphic property is 0 or 1.
1460 (name, doc_string, props))
1462 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1463 int direction = CHARSET_LEFT_TO_RIGHT;
1464 Lisp_Object registry = Qnil;
1465 Lisp_Object charset;
1466 Lisp_Object ccl_program = Qnil;
1467 Lisp_Object short_name = Qnil, long_name = Qnil;
1468 int byte_offset = -1;
1470 CHECK_SYMBOL (name);
1471 if (!NILP (doc_string))
1472 CHECK_STRING (doc_string);
1474 charset = Ffind_charset (name);
1475 if (!NILP (charset))
1476 signal_simple_error ("Cannot redefine existing charset", name);
1479 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1481 if (EQ (keyword, Qshort_name))
1483 CHECK_STRING (value);
1487 if (EQ (keyword, Qlong_name))
1489 CHECK_STRING (value);
1493 else if (EQ (keyword, Qdimension))
1496 dimension = XINT (value);
1497 if (dimension < 1 || dimension > 2)
1498 signal_simple_error ("Invalid value for 'dimension", value);
1501 else if (EQ (keyword, Qchars))
1504 chars = XINT (value);
1505 if (chars != 94 && chars != 96)
1506 signal_simple_error ("Invalid value for 'chars", value);
1509 else if (EQ (keyword, Qcolumns))
1512 columns = XINT (value);
1513 if (columns != 1 && columns != 2)
1514 signal_simple_error ("Invalid value for 'columns", value);
1517 else if (EQ (keyword, Qgraphic))
1520 graphic = XINT (value);
1522 if (graphic < 0 || graphic > 2)
1524 if (graphic < 0 || graphic > 1)
1526 signal_simple_error ("Invalid value for 'graphic", value);
1529 else if (EQ (keyword, Qregistry))
1531 CHECK_STRING (value);
1535 else if (EQ (keyword, Qdirection))
1537 if (EQ (value, Ql2r))
1538 direction = CHARSET_LEFT_TO_RIGHT;
1539 else if (EQ (value, Qr2l))
1540 direction = CHARSET_RIGHT_TO_LEFT;
1542 signal_simple_error ("Invalid value for 'direction", value);
1545 else if (EQ (keyword, Qfinal))
1547 CHECK_CHAR_COERCE_INT (value);
1548 final = XCHAR (value);
1549 if (final < '0' || final > '~')
1550 signal_simple_error ("Invalid value for 'final", value);
1553 else if (EQ (keyword, Qccl_program))
1555 struct ccl_program test_ccl;
1557 if (setup_ccl_program (&test_ccl, value) < 0)
1558 signal_simple_error ("Invalid value for 'ccl-program", value);
1559 ccl_program = value;
1563 signal_simple_error ("Unrecognized property", keyword);
1568 error ("'final must be specified");
1569 if (dimension == 2 && final > 0x5F)
1571 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1574 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1575 CHARSET_LEFT_TO_RIGHT)) ||
1576 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1577 CHARSET_RIGHT_TO_LEFT)))
1579 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1581 id = get_unallocated_leading_byte (dimension);
1583 if (NILP (doc_string))
1584 doc_string = build_string ("");
1586 if (NILP (registry))
1587 registry = build_string ("");
1589 if (NILP (short_name))
1590 XSETSTRING (short_name, XSYMBOL (name)->name);
1592 if (NILP (long_name))
1593 long_name = doc_string;
1596 columns = dimension;
1598 if (byte_offset < 0)
1602 else if (chars == 96)
1608 charset = make_charset (id, name, chars, dimension, columns, graphic,
1609 final, direction, short_name, long_name,
1610 doc_string, registry,
1611 Qnil, 0, 0, 0, byte_offset);
1612 if (!NILP (ccl_program))
1613 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1617 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1619 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1620 NEW-NAME is the name of the new charset. Return the new charset.
1622 (charset, new_name))
1624 Lisp_Object new_charset = Qnil;
1625 int id, chars, dimension, columns, graphic, final;
1627 Lisp_Object registry, doc_string, short_name, long_name;
1630 charset = Fget_charset (charset);
1631 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1632 signal_simple_error ("Charset already has reverse-direction charset",
1635 CHECK_SYMBOL (new_name);
1636 if (!NILP (Ffind_charset (new_name)))
1637 signal_simple_error ("Cannot redefine existing charset", new_name);
1639 cs = XCHARSET (charset);
1641 chars = CHARSET_CHARS (cs);
1642 dimension = CHARSET_DIMENSION (cs);
1643 columns = CHARSET_COLUMNS (cs);
1644 id = get_unallocated_leading_byte (dimension);
1646 graphic = CHARSET_GRAPHIC (cs);
1647 final = CHARSET_FINAL (cs);
1648 direction = CHARSET_RIGHT_TO_LEFT;
1649 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1650 direction = CHARSET_LEFT_TO_RIGHT;
1651 doc_string = CHARSET_DOC_STRING (cs);
1652 short_name = CHARSET_SHORT_NAME (cs);
1653 long_name = CHARSET_LONG_NAME (cs);
1654 registry = CHARSET_REGISTRY (cs);
1656 new_charset = make_charset (id, new_name, chars, dimension, columns,
1657 graphic, final, direction, short_name, long_name,
1658 doc_string, registry,
1660 CHARSET_DECODING_TABLE(cs),
1661 CHARSET_UCS_MIN(cs),
1662 CHARSET_UCS_MAX(cs),
1663 CHARSET_CODE_OFFSET(cs),
1664 CHARSET_BYTE_OFFSET(cs)
1670 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1671 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1676 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1677 Define symbol ALIAS as an alias for CHARSET.
1681 CHECK_SYMBOL (alias);
1682 charset = Fget_charset (charset);
1683 return Fputhash (alias, charset, Vcharset_hash_table);
1686 /* #### Reverse direction charsets not yet implemented. */
1688 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1690 Return the reverse-direction charset parallel to CHARSET, if any.
1691 This is the charset with the same properties (in particular, the same
1692 dimension, number of characters per dimension, and final byte) as
1693 CHARSET but whose characters are displayed in the opposite direction.
1697 charset = Fget_charset (charset);
1698 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1702 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1703 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1704 If DIRECTION is omitted, both directions will be checked (left-to-right
1705 will be returned if character sets exist for both directions).
1707 (dimension, chars, final, direction))
1709 int dm, ch, fi, di = -1;
1710 Lisp_Object obj = Qnil;
1712 CHECK_INT (dimension);
1713 dm = XINT (dimension);
1714 if (dm < 1 || dm > 2)
1715 signal_simple_error ("Invalid value for DIMENSION", dimension);
1719 if (ch != 94 && ch != 96)
1720 signal_simple_error ("Invalid value for CHARS", chars);
1722 CHECK_CHAR_COERCE_INT (final);
1724 if (fi < '0' || fi > '~')
1725 signal_simple_error ("Invalid value for FINAL", final);
1727 if (EQ (direction, Ql2r))
1728 di = CHARSET_LEFT_TO_RIGHT;
1729 else if (EQ (direction, Qr2l))
1730 di = CHARSET_RIGHT_TO_LEFT;
1731 else if (!NILP (direction))
1732 signal_simple_error ("Invalid value for DIRECTION", direction);
1734 if (dm == 2 && fi > 0x5F)
1736 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1740 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1742 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1745 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1748 return XCHARSET_NAME (obj);
1752 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1753 Return short name of CHARSET.
1757 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1760 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1761 Return long name of CHARSET.
1765 return XCHARSET_LONG_NAME (Fget_charset (charset));
1768 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1769 Return description of CHARSET.
1773 return XCHARSET_DOC_STRING (Fget_charset (charset));
1776 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1777 Return dimension of CHARSET.
1781 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1784 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1785 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1786 Recognized properties are those listed in `make-charset', as well as
1787 'name and 'doc-string.
1793 charset = Fget_charset (charset);
1794 cs = XCHARSET (charset);
1796 CHECK_SYMBOL (prop);
1797 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1798 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1799 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1800 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1801 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1802 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1803 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1804 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1805 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1806 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1807 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1808 if (EQ (prop, Qdirection))
1809 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1810 if (EQ (prop, Qreverse_direction_charset))
1812 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1813 /* #### Is this translation OK? If so, error checking sufficient? */
1814 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1816 signal_simple_error ("Unrecognized charset property name", prop);
1817 return Qnil; /* not reached */
1820 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1821 Return charset identification number of CHARSET.
1825 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1828 /* #### We need to figure out which properties we really want to
1831 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1832 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1834 (charset, ccl_program))
1836 struct ccl_program test_ccl;
1838 charset = Fget_charset (charset);
1839 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1840 signal_simple_error ("Invalid ccl-program", ccl_program);
1841 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1846 invalidate_charset_font_caches (Lisp_Object charset)
1848 /* Invalidate font cache entries for charset on all devices. */
1849 Lisp_Object devcons, concons, hash_table;
1850 DEVICE_LOOP_NO_BREAK (devcons, concons)
1852 struct device *d = XDEVICE (XCAR (devcons));
1853 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1854 if (!UNBOUNDP (hash_table))
1855 Fclrhash (hash_table);
1859 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1860 Set the 'registry property of CHARSET to REGISTRY.
1862 (charset, registry))
1864 charset = Fget_charset (charset);
1865 CHECK_STRING (registry);
1866 XCHARSET_REGISTRY (charset) = registry;
1867 invalidate_charset_font_caches (charset);
1868 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1873 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1874 Return mapping-table of CHARSET.
1878 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1881 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1882 Set mapping-table of CHARSET to TABLE.
1886 struct Lisp_Charset *cs;
1890 charset = Fget_charset (charset);
1891 cs = XCHARSET (charset);
1895 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1896 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1897 CHARSET_DECODING_TABLE(cs) = Qnil;
1900 else if (VECTORP (table))
1902 int ccs_len = CHARSET_BYTE_SIZE (cs);
1903 int ret = decoding_table_check_elements (table,
1904 CHARSET_DIMENSION (cs),
1909 signal_simple_error ("Too big table", table);
1911 signal_simple_error ("Invalid element is found", table);
1913 signal_simple_error ("Something wrong", table);
1915 CHARSET_DECODING_TABLE(cs) = Qnil;
1918 signal_error (Qwrong_type_argument,
1919 list2 (build_translated_string ("vector-or-nil-p"),
1922 byte_offset = CHARSET_BYTE_OFFSET (cs);
1923 switch (CHARSET_DIMENSION (cs))
1926 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1928 Lisp_Object c = XVECTOR_DATA(table)[i];
1931 put_char_ccs_code_point (c, charset,
1932 make_int (i + byte_offset));
1936 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1938 Lisp_Object v = XVECTOR_DATA(table)[i];
1944 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1946 Lisp_Object c = XVECTOR_DATA(v)[j];
1949 put_char_ccs_code_point
1951 make_int ( ( (i + byte_offset) << 8 )
1957 put_char_ccs_code_point (v, charset,
1958 make_int (i + byte_offset));
1967 /************************************************************************/
1968 /* Lisp primitives for working with characters */
1969 /************************************************************************/
1972 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
1973 Make a character from CHARSET and code-point CODE.
1979 charset = Fget_charset (charset);
1982 if (XCHARSET_GRAPHIC (charset) == 1)
1984 c = DECODE_CHAR (charset, c);
1985 return c >= 0 ? make_char (c) : Qnil;
1988 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
1989 Make a builtin character from CHARSET and code-point CODE.
1995 charset = Fget_charset (charset);
1997 if (EQ (charset, Vcharset_latin_viscii))
1999 Lisp_Object chr = Fdecode_char (charset, code);
2005 (ret = Fget_char_attribute (chr,
2006 Vcharset_latin_viscii_lower,
2009 charset = Vcharset_latin_viscii_lower;
2013 (ret = Fget_char_attribute (chr,
2014 Vcharset_latin_viscii_upper,
2017 charset = Vcharset_latin_viscii_upper;
2024 if (XCHARSET_GRAPHIC (charset) == 1)
2027 c = decode_builtin_char (charset, c);
2028 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
2032 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2033 Make a character from CHARSET and octets ARG1 and ARG2.
2034 ARG2 is required only for characters from two-dimensional charsets.
2035 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2036 character s with caron.
2038 (charset, arg1, arg2))
2042 int lowlim, highlim;
2044 charset = Fget_charset (charset);
2045 cs = XCHARSET (charset);
2047 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2048 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2050 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2052 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2053 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2056 /* It is useful (and safe, according to Olivier Galibert) to strip
2057 the 8th bit off ARG1 and ARG2 because it allows programmers to
2058 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2059 Latin 2 code of the character. */
2067 if (a1 < lowlim || a1 > highlim)
2068 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2070 if (CHARSET_DIMENSION (cs) == 1)
2074 ("Charset is of dimension one; second octet must be nil", arg2);
2075 return make_char (MAKE_CHAR (charset, a1, 0));
2084 a2 = XINT (arg2) & 0x7f;
2086 if (a2 < lowlim || a2 > highlim)
2087 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2089 return make_char (MAKE_CHAR (charset, a1, a2));
2092 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2093 Return the character set of CHARACTER.
2097 CHECK_CHAR_COERCE_INT (character);
2099 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2102 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2103 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2104 N defaults to 0 if omitted.
2108 Lisp_Object charset;
2111 CHECK_CHAR_COERCE_INT (character);
2113 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2115 if (NILP (n) || EQ (n, Qzero))
2116 return make_int (octet0);
2117 else if (EQ (n, make_int (1)))
2118 return make_int (octet1);
2120 signal_simple_error ("Octet number must be 0 or 1", n);
2123 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2124 Return list of charset and one or two position-codes of CHARACTER.
2128 /* This function can GC */
2129 struct gcpro gcpro1, gcpro2;
2130 Lisp_Object charset = Qnil;
2131 Lisp_Object rc = Qnil;
2139 GCPRO2 (charset, rc);
2140 CHECK_CHAR_COERCE_INT (character);
2143 code_point = ENCODE_CHAR (XCHAR (character), charset);
2144 dimension = XCHARSET_DIMENSION (charset);
2145 while (dimension > 0)
2147 rc = Fcons (make_int (code_point & 255), rc);
2151 rc = Fcons (XCHARSET_NAME (charset), rc);
2153 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2155 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2157 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2161 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2170 #ifdef ENABLE_COMPOSITE_CHARS
2171 /************************************************************************/
2172 /* composite character functions */
2173 /************************************************************************/
2176 lookup_composite_char (Bufbyte *str, int len)
2178 Lisp_Object lispstr = make_string (str, len);
2179 Lisp_Object ch = Fgethash (lispstr,
2180 Vcomposite_char_string2char_hash_table,
2186 if (composite_char_row_next >= 128)
2187 signal_simple_error ("No more composite chars available", lispstr);
2188 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2189 composite_char_col_next);
2190 Fputhash (make_char (emch), lispstr,
2191 Vcomposite_char_char2string_hash_table);
2192 Fputhash (lispstr, make_char (emch),
2193 Vcomposite_char_string2char_hash_table);
2194 composite_char_col_next++;
2195 if (composite_char_col_next >= 128)
2197 composite_char_col_next = 32;
2198 composite_char_row_next++;
2207 composite_char_string (Emchar ch)
2209 Lisp_Object str = Fgethash (make_char (ch),
2210 Vcomposite_char_char2string_hash_table,
2212 assert (!UNBOUNDP (str));
2216 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2217 Convert a string into a single composite character.
2218 The character is the result of overstriking all the characters in
2223 CHECK_STRING (string);
2224 return make_char (lookup_composite_char (XSTRING_DATA (string),
2225 XSTRING_LENGTH (string)));
2228 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2229 Return a string of the characters comprising a composite character.
2237 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2238 signal_simple_error ("Must be composite char", ch);
2239 return composite_char_string (emch);
2241 #endif /* ENABLE_COMPOSITE_CHARS */
2244 /************************************************************************/
2245 /* initialization */
2246 /************************************************************************/
2249 syms_of_mule_charset (void)
2251 INIT_LRECORD_IMPLEMENTATION (charset);
2253 DEFSUBR (Fcharsetp);
2254 DEFSUBR (Ffind_charset);
2255 DEFSUBR (Fget_charset);
2256 DEFSUBR (Fcharset_list);
2257 DEFSUBR (Fcharset_name);
2258 DEFSUBR (Fmake_charset);
2259 DEFSUBR (Fmake_reverse_direction_charset);
2260 /* DEFSUBR (Freverse_direction_charset); */
2261 DEFSUBR (Fdefine_charset_alias);
2262 DEFSUBR (Fcharset_from_attributes);
2263 DEFSUBR (Fcharset_short_name);
2264 DEFSUBR (Fcharset_long_name);
2265 DEFSUBR (Fcharset_description);
2266 DEFSUBR (Fcharset_dimension);
2267 DEFSUBR (Fcharset_property);
2268 DEFSUBR (Fcharset_id);
2269 DEFSUBR (Fset_charset_ccl_program);
2270 DEFSUBR (Fset_charset_registry);
2272 DEFSUBR (Fcharset_mapping_table);
2273 DEFSUBR (Fset_charset_mapping_table);
2277 DEFSUBR (Fdecode_char);
2278 DEFSUBR (Fdecode_builtin_char);
2280 DEFSUBR (Fmake_char);
2281 DEFSUBR (Fchar_charset);
2282 DEFSUBR (Fchar_octet);
2283 DEFSUBR (Fsplit_char);
2285 #ifdef ENABLE_COMPOSITE_CHARS
2286 DEFSUBR (Fmake_composite_char);
2287 DEFSUBR (Fcomposite_char_string);
2290 defsymbol (&Qcharsetp, "charsetp");
2291 defsymbol (&Qregistry, "registry");
2292 defsymbol (&Qfinal, "final");
2293 defsymbol (&Qgraphic, "graphic");
2294 defsymbol (&Qdirection, "direction");
2295 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2296 defsymbol (&Qshort_name, "short-name");
2297 defsymbol (&Qlong_name, "long-name");
2299 defsymbol (&Ql2r, "l2r");
2300 defsymbol (&Qr2l, "r2l");
2302 /* Charsets, compatible with FSF 20.3
2303 Naming convention is Script-Charset[-Edition] */
2304 defsymbol (&Qascii, "ascii");
2305 defsymbol (&Qcontrol_1, "control-1");
2306 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2307 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2308 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2309 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2310 defsymbol (&Qthai_tis620, "thai-tis620");
2311 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2312 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2313 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2314 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2315 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2316 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2317 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2318 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2319 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2320 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2321 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2322 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2323 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2324 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2325 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2326 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2328 defsymbol (&Qucs, "ucs");
2329 defsymbol (&Qucs_bmp, "ucs-bmp");
2330 defsymbol (&Qucs_cns, "ucs-cns");
2331 defsymbol (&Qucs_jis, "ucs-jis");
2332 defsymbol (&Qucs_big5, "ucs-big5");
2333 defsymbol (&Qlatin_viscii, "latin-viscii");
2334 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2335 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2336 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2337 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2338 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2339 defsymbol (&Qideograph_gt, "ideograph-gt");
2340 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2341 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2342 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2343 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2344 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2345 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2346 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2347 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2348 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2349 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2350 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2351 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2352 defsymbol (&Qchinese_big5, "chinese-big5");
2353 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2354 defsymbol (&Qmojikyo, "mojikyo");
2355 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2356 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2357 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2358 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2359 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2360 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2361 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2362 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2363 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2364 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2365 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2366 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2367 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2368 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2369 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2370 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2371 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2372 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2373 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2374 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2375 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2376 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2377 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2379 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2380 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2382 defsymbol (&Qcomposite, "composite");
2386 vars_of_mule_charset (void)
2393 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2394 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2396 /* Table of charsets indexed by leading byte. */
2397 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2398 chlook->charset_by_leading_byte[i] = Qnil;
2401 /* Table of charsets indexed by type/final-byte. */
2402 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2403 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2404 chlook->charset_by_attributes[i][j] = Qnil;
2406 /* Table of charsets indexed by type/final-byte/direction. */
2407 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2408 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2409 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2410 chlook->charset_by_attributes[i][j][k] = Qnil;
2414 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2416 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2417 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2421 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2422 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2423 Leading-code of private TYPE9N charset of column-width 1.
2425 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2429 Vdefault_coded_charset_priority_list = Qnil;
2430 DEFVAR_LISP ("default-coded-charset-priority-list",
2431 &Vdefault_coded_charset_priority_list /*
2432 Default order of preferred coded-character-sets.
2438 complex_vars_of_mule_charset (void)
2440 staticpro (&Vcharset_hash_table);
2441 Vcharset_hash_table =
2442 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2444 /* Predefined character sets. We store them into variables for
2448 staticpro (&Vcharset_ucs);
2450 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2451 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2452 build_string ("UCS"),
2453 build_string ("UCS"),
2454 build_string ("ISO/IEC 10646"),
2456 Qnil, 0, 0xFFFFFFF, 0, 0);
2457 staticpro (&Vcharset_ucs_bmp);
2459 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2460 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2461 build_string ("BMP"),
2462 build_string ("BMP"),
2463 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2464 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2465 Qnil, 0, 0xFFFF, 0, 0);
2466 staticpro (&Vcharset_ucs_cns);
2468 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2469 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2470 build_string ("UCS for CNS"),
2471 build_string ("UCS for CNS 11643"),
2472 build_string ("ISO/IEC 10646 for CNS 11643"),
2475 staticpro (&Vcharset_ucs_jis);
2477 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2478 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2479 build_string ("UCS for JIS"),
2480 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2481 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2484 staticpro (&Vcharset_ucs_big5);
2486 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2487 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2488 build_string ("UCS for Big5"),
2489 build_string ("UCS for Big5"),
2490 build_string ("ISO/IEC 10646 for Big5"),
2494 # define MIN_CHAR_THAI 0
2495 # define MAX_CHAR_THAI 0
2496 /* # define MIN_CHAR_HEBREW 0 */
2497 /* # define MAX_CHAR_HEBREW 0 */
2498 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2499 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2501 staticpro (&Vcharset_ascii);
2503 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2504 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2505 build_string ("ASCII"),
2506 build_string ("ASCII)"),
2507 build_string ("ASCII (ISO646 IRV)"),
2508 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2509 Qnil, 0, 0x7F, 0, 0);
2510 staticpro (&Vcharset_control_1);
2511 Vcharset_control_1 =
2512 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2513 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2514 build_string ("C1"),
2515 build_string ("Control characters"),
2516 build_string ("Control characters 128-191"),
2518 Qnil, 0x80, 0x9F, 0, 0);
2519 staticpro (&Vcharset_latin_iso8859_1);
2520 Vcharset_latin_iso8859_1 =
2521 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2522 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2523 build_string ("Latin-1"),
2524 build_string ("ISO8859-1 (Latin-1)"),
2525 build_string ("ISO8859-1 (Latin-1)"),
2526 build_string ("iso8859-1"),
2527 Qnil, 0xA0, 0xFF, 0, 32);
2528 staticpro (&Vcharset_latin_iso8859_2);
2529 Vcharset_latin_iso8859_2 =
2530 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2531 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2532 build_string ("Latin-2"),
2533 build_string ("ISO8859-2 (Latin-2)"),
2534 build_string ("ISO8859-2 (Latin-2)"),
2535 build_string ("iso8859-2"),
2537 staticpro (&Vcharset_latin_iso8859_3);
2538 Vcharset_latin_iso8859_3 =
2539 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2540 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2541 build_string ("Latin-3"),
2542 build_string ("ISO8859-3 (Latin-3)"),
2543 build_string ("ISO8859-3 (Latin-3)"),
2544 build_string ("iso8859-3"),
2546 staticpro (&Vcharset_latin_iso8859_4);
2547 Vcharset_latin_iso8859_4 =
2548 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2549 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2550 build_string ("Latin-4"),
2551 build_string ("ISO8859-4 (Latin-4)"),
2552 build_string ("ISO8859-4 (Latin-4)"),
2553 build_string ("iso8859-4"),
2555 staticpro (&Vcharset_thai_tis620);
2556 Vcharset_thai_tis620 =
2557 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2558 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2559 build_string ("TIS620"),
2560 build_string ("TIS620 (Thai)"),
2561 build_string ("TIS620.2529 (Thai)"),
2562 build_string ("tis620"),
2563 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2564 staticpro (&Vcharset_greek_iso8859_7);
2565 Vcharset_greek_iso8859_7 =
2566 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2567 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2568 build_string ("ISO8859-7"),
2569 build_string ("ISO8859-7 (Greek)"),
2570 build_string ("ISO8859-7 (Greek)"),
2571 build_string ("iso8859-7"),
2573 staticpro (&Vcharset_arabic_iso8859_6);
2574 Vcharset_arabic_iso8859_6 =
2575 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2576 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2577 build_string ("ISO8859-6"),
2578 build_string ("ISO8859-6 (Arabic)"),
2579 build_string ("ISO8859-6 (Arabic)"),
2580 build_string ("iso8859-6"),
2582 staticpro (&Vcharset_hebrew_iso8859_8);
2583 Vcharset_hebrew_iso8859_8 =
2584 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2585 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2586 build_string ("ISO8859-8"),
2587 build_string ("ISO8859-8 (Hebrew)"),
2588 build_string ("ISO8859-8 (Hebrew)"),
2589 build_string ("iso8859-8"),
2591 0 /* MIN_CHAR_HEBREW */,
2592 0 /* MAX_CHAR_HEBREW */, 0, 32);
2593 staticpro (&Vcharset_katakana_jisx0201);
2594 Vcharset_katakana_jisx0201 =
2595 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2596 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2597 build_string ("JISX0201 Kana"),
2598 build_string ("JISX0201.1976 (Japanese Kana)"),
2599 build_string ("JISX0201.1976 Japanese Kana"),
2600 build_string ("jisx0201\\.1976"),
2602 staticpro (&Vcharset_latin_jisx0201);
2603 Vcharset_latin_jisx0201 =
2604 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2605 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2606 build_string ("JISX0201 Roman"),
2607 build_string ("JISX0201.1976 (Japanese Roman)"),
2608 build_string ("JISX0201.1976 Japanese Roman"),
2609 build_string ("jisx0201\\.1976"),
2611 staticpro (&Vcharset_cyrillic_iso8859_5);
2612 Vcharset_cyrillic_iso8859_5 =
2613 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2614 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2615 build_string ("ISO8859-5"),
2616 build_string ("ISO8859-5 (Cyrillic)"),
2617 build_string ("ISO8859-5 (Cyrillic)"),
2618 build_string ("iso8859-5"),
2620 staticpro (&Vcharset_latin_iso8859_9);
2621 Vcharset_latin_iso8859_9 =
2622 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2623 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2624 build_string ("Latin-5"),
2625 build_string ("ISO8859-9 (Latin-5)"),
2626 build_string ("ISO8859-9 (Latin-5)"),
2627 build_string ("iso8859-9"),
2629 staticpro (&Vcharset_japanese_jisx0208_1978);
2630 Vcharset_japanese_jisx0208_1978 =
2631 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2632 Qjapanese_jisx0208_1978, 94, 2,
2633 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2634 build_string ("JIS X0208:1978"),
2635 build_string ("JIS X0208:1978 (Japanese)"),
2637 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2638 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2640 staticpro (&Vcharset_chinese_gb2312);
2641 Vcharset_chinese_gb2312 =
2642 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2643 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2644 build_string ("GB2312"),
2645 build_string ("GB2312)"),
2646 build_string ("GB2312 Chinese simplified"),
2647 build_string ("gb2312"),
2649 staticpro (&Vcharset_chinese_gb12345);
2650 Vcharset_chinese_gb12345 =
2651 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2652 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2653 build_string ("G1"),
2654 build_string ("GB 12345)"),
2655 build_string ("GB 12345-1990"),
2656 build_string ("GB12345\\(\\.1990\\)?-0"),
2658 staticpro (&Vcharset_japanese_jisx0208);
2659 Vcharset_japanese_jisx0208 =
2660 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2661 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2662 build_string ("JISX0208"),
2663 build_string ("JIS X0208:1983 (Japanese)"),
2664 build_string ("JIS X0208:1983 Japanese Kanji"),
2665 build_string ("jisx0208\\.1983"),
2668 staticpro (&Vcharset_japanese_jisx0208_1990);
2669 Vcharset_japanese_jisx0208_1990 =
2670 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2671 Qjapanese_jisx0208_1990, 94, 2,
2672 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2673 build_string ("JISX0208-1990"),
2674 build_string ("JIS X0208:1990 (Japanese)"),
2675 build_string ("JIS X0208:1990 Japanese Kanji"),
2676 build_string ("jisx0208\\.1990"),
2678 MIN_CHAR_JIS_X0208_1990,
2679 MAX_CHAR_JIS_X0208_1990, 0, 33);
2681 staticpro (&Vcharset_korean_ksc5601);
2682 Vcharset_korean_ksc5601 =
2683 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2684 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2685 build_string ("KSC5601"),
2686 build_string ("KSC5601 (Korean"),
2687 build_string ("KSC5601 Korean Hangul and Hanja"),
2688 build_string ("ksc5601"),
2690 staticpro (&Vcharset_japanese_jisx0212);
2691 Vcharset_japanese_jisx0212 =
2692 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2693 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2694 build_string ("JISX0212"),
2695 build_string ("JISX0212 (Japanese)"),
2696 build_string ("JISX0212 Japanese Supplement"),
2697 build_string ("jisx0212"),
2700 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2701 staticpro (&Vcharset_chinese_cns11643_1);
2702 Vcharset_chinese_cns11643_1 =
2703 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2704 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2705 build_string ("CNS11643-1"),
2706 build_string ("CNS11643-1 (Chinese traditional)"),
2708 ("CNS 11643 Plane 1 Chinese traditional"),
2709 build_string (CHINESE_CNS_PLANE_RE("1")),
2711 staticpro (&Vcharset_chinese_cns11643_2);
2712 Vcharset_chinese_cns11643_2 =
2713 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2714 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2715 build_string ("CNS11643-2"),
2716 build_string ("CNS11643-2 (Chinese traditional)"),
2718 ("CNS 11643 Plane 2 Chinese traditional"),
2719 build_string (CHINESE_CNS_PLANE_RE("2")),
2722 staticpro (&Vcharset_latin_tcvn5712);
2723 Vcharset_latin_tcvn5712 =
2724 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2725 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2726 build_string ("TCVN 5712"),
2727 build_string ("TCVN 5712 (VSCII-2)"),
2728 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2729 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2731 staticpro (&Vcharset_latin_viscii_lower);
2732 Vcharset_latin_viscii_lower =
2733 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2734 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2735 build_string ("VISCII lower"),
2736 build_string ("VISCII lower (Vietnamese)"),
2737 build_string ("VISCII lower (Vietnamese)"),
2738 build_string ("MULEVISCII-LOWER"),
2740 staticpro (&Vcharset_latin_viscii_upper);
2741 Vcharset_latin_viscii_upper =
2742 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2743 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2744 build_string ("VISCII upper"),
2745 build_string ("VISCII upper (Vietnamese)"),
2746 build_string ("VISCII upper (Vietnamese)"),
2747 build_string ("MULEVISCII-UPPER"),
2749 staticpro (&Vcharset_latin_viscii);
2750 Vcharset_latin_viscii =
2751 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2752 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2753 build_string ("VISCII"),
2754 build_string ("VISCII 1.1 (Vietnamese)"),
2755 build_string ("VISCII 1.1 (Vietnamese)"),
2756 build_string ("VISCII1\\.1"),
2758 staticpro (&Vcharset_chinese_big5);
2759 Vcharset_chinese_big5 =
2760 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2761 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2762 build_string ("Big5"),
2763 build_string ("Big5"),
2764 build_string ("Big5 Chinese traditional"),
2765 build_string ("big5"),
2767 staticpro (&Vcharset_chinese_big5_cdp);
2768 Vcharset_chinese_big5_cdp =
2769 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2770 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2771 build_string ("Big5-CDP"),
2772 build_string ("Big5 + CDP extension"),
2773 build_string ("Big5 with CDP extension"),
2774 build_string ("big5\\.cdp-0"),
2776 staticpro (&Vcharset_ideograph_gt);
2777 Vcharset_ideograph_gt =
2778 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2779 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2780 build_string ("GT"),
2781 build_string ("GT"),
2782 build_string ("GT"),
2784 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2785 #define DEF_GT_PJ(n) \
2786 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2787 Vcharset_ideograph_gt_pj_##n = \
2788 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2789 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2790 build_string ("GT-PJ-"#n), \
2791 build_string ("GT (pseudo JIS encoding) part "#n), \
2792 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2794 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2808 staticpro (&Vcharset_ideograph_daikanwa);
2809 Vcharset_ideograph_daikanwa =
2810 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2811 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2812 build_string ("Daikanwa"),
2813 build_string ("Morohashi's Daikanwa"),
2814 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2815 build_string ("Daikanwa"),
2816 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2817 staticpro (&Vcharset_mojikyo);
2819 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2820 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2821 build_string ("Mojikyo"),
2822 build_string ("Mojikyo"),
2823 build_string ("Konjaku-Mojikyo"),
2825 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2826 staticpro (&Vcharset_mojikyo_2022_1);
2827 Vcharset_mojikyo_2022_1 =
2828 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2829 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2830 build_string ("Mojikyo-2022-1"),
2831 build_string ("Mojikyo ISO-2022 Part 1"),
2832 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2836 #define DEF_MOJIKYO_PJ(n) \
2837 staticpro (&Vcharset_mojikyo_pj_##n); \
2838 Vcharset_mojikyo_pj_##n = \
2839 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2840 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2841 build_string ("Mojikyo-PJ-"#n), \
2842 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2844 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2846 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2858 DEF_MOJIKYO_PJ (10);
2859 DEF_MOJIKYO_PJ (11);
2860 DEF_MOJIKYO_PJ (12);
2861 DEF_MOJIKYO_PJ (13);
2862 DEF_MOJIKYO_PJ (14);
2863 DEF_MOJIKYO_PJ (15);
2864 DEF_MOJIKYO_PJ (16);
2865 DEF_MOJIKYO_PJ (17);
2866 DEF_MOJIKYO_PJ (18);
2867 DEF_MOJIKYO_PJ (19);
2868 DEF_MOJIKYO_PJ (20);
2869 DEF_MOJIKYO_PJ (21);
2871 staticpro (&Vcharset_ethiopic_ucs);
2872 Vcharset_ethiopic_ucs =
2873 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2874 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2875 build_string ("Ethiopic (UCS)"),
2876 build_string ("Ethiopic (UCS)"),
2877 build_string ("Ethiopic of UCS"),
2878 build_string ("Ethiopic-Unicode"),
2879 Qnil, 0x1200, 0x137F, 0x1200, 0);
2881 staticpro (&Vcharset_chinese_big5_1);
2882 Vcharset_chinese_big5_1 =
2883 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2884 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2885 build_string ("Big5"),
2886 build_string ("Big5 (Level-1)"),
2888 ("Big5 Level-1 Chinese traditional"),
2889 build_string ("big5"),
2891 staticpro (&Vcharset_chinese_big5_2);
2892 Vcharset_chinese_big5_2 =
2893 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2894 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2895 build_string ("Big5"),
2896 build_string ("Big5 (Level-2)"),
2898 ("Big5 Level-2 Chinese traditional"),
2899 build_string ("big5"),
2902 #ifdef ENABLE_COMPOSITE_CHARS
2903 /* #### For simplicity, we put composite chars into a 96x96 charset.
2904 This is going to lead to problems because you can run out of
2905 room, esp. as we don't yet recycle numbers. */
2906 staticpro (&Vcharset_composite);
2907 Vcharset_composite =
2908 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2909 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2910 build_string ("Composite"),
2911 build_string ("Composite characters"),
2912 build_string ("Composite characters"),
2915 /* #### not dumped properly */
2916 composite_char_row_next = 32;
2917 composite_char_col_next = 32;
2919 Vcomposite_char_string2char_hash_table =
2920 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2921 Vcomposite_char_char2string_hash_table =
2922 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2923 staticpro (&Vcomposite_char_string2char_hash_table);
2924 staticpro (&Vcomposite_char_char2string_hash_table);
2925 #endif /* ENABLE_COMPOSITE_CHARS */