1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 /* Lisp_Object Vcharset_chinese_big5_cdp; */
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
353 /* Qchinese_big5_cdp, */
354 Qideograph_hanziku_1,
355 Qideograph_hanziku_2,
356 Qideograph_hanziku_3,
357 Qideograph_hanziku_4,
358 Qideograph_hanziku_5,
359 Qideograph_hanziku_6,
360 Qideograph_hanziku_7,
361 Qideograph_hanziku_8,
362 Qideograph_hanziku_9,
363 Qideograph_hanziku_10,
364 Qideograph_hanziku_11,
365 Qideograph_hanziku_12,
368 Qideograph_daikanwa_2,
388 Lisp_Object Ql2r, Qr2l;
390 Lisp_Object Vcharset_hash_table;
392 /* Composite characters are characters constructed by overstriking two
393 or more regular characters.
395 1) The old Mule implementation involves storing composite characters
396 in a buffer as a tag followed by all of the actual characters
397 used to make up the composite character. I think this is a bad
398 idea; it greatly complicates code that wants to handle strings
399 one character at a time because it has to deal with the possibility
400 of great big ungainly characters. It's much more reasonable to
401 simply store an index into a table of composite characters.
403 2) The current implementation only allows for 16,384 separate
404 composite characters over the lifetime of the XEmacs process.
405 This could become a potential problem if the user
406 edited lots of different files that use composite characters.
407 Due to FSF bogosity, increasing the number of allowable
408 composite characters under Mule would decrease the number
409 of possible faces that can exist. Mule already has shrunk
410 this to 2048, and further shrinkage would become uncomfortable.
411 No such problems exist in XEmacs.
413 Composite characters could be represented as 0x80 C1 C2 C3,
414 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
415 for slightly under 2^20 (one million) composite characters
416 over the XEmacs process lifetime, and you only need to
417 increase the size of a Mule character from 19 to 21 bits.
418 Or you could use 0x80 C1 C2 C3 C4, allowing for about
419 85 million (slightly over 2^26) composite characters. */
422 /************************************************************************/
423 /* Basic Emchar functions */
424 /************************************************************************/
426 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
427 string in STR. Returns the number of bytes stored.
428 Do not call this directly. Use the macro set_charptr_emchar() instead.
432 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
447 else if ( c <= 0x7ff )
449 *p++ = (c >> 6) | 0xc0;
450 *p++ = (c & 0x3f) | 0x80;
452 else if ( c <= 0xffff )
454 *p++ = (c >> 12) | 0xe0;
455 *p++ = ((c >> 6) & 0x3f) | 0x80;
456 *p++ = (c & 0x3f) | 0x80;
458 else if ( c <= 0x1fffff )
460 *p++ = (c >> 18) | 0xf0;
461 *p++ = ((c >> 12) & 0x3f) | 0x80;
462 *p++ = ((c >> 6) & 0x3f) | 0x80;
463 *p++ = (c & 0x3f) | 0x80;
465 else if ( c <= 0x3ffffff )
467 *p++ = (c >> 24) | 0xf8;
468 *p++ = ((c >> 18) & 0x3f) | 0x80;
469 *p++ = ((c >> 12) & 0x3f) | 0x80;
470 *p++ = ((c >> 6) & 0x3f) | 0x80;
471 *p++ = (c & 0x3f) | 0x80;
475 *p++ = (c >> 30) | 0xfc;
476 *p++ = ((c >> 24) & 0x3f) | 0x80;
477 *p++ = ((c >> 18) & 0x3f) | 0x80;
478 *p++ = ((c >> 12) & 0x3f) | 0x80;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
483 BREAKUP_CHAR (c, charset, c1, c2);
484 lb = CHAR_LEADING_BYTE (c);
485 if (LEADING_BYTE_PRIVATE_P (lb))
486 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
488 if (EQ (charset, Vcharset_control_1))
497 /* Return the first character from a Mule-encoded string in STR,
498 assuming it's non-ASCII. Do not call this directly.
499 Use the macro charptr_emchar() instead. */
502 non_ascii_charptr_emchar (const Bufbyte *str)
515 else if ( b >= 0xf8 )
520 else if ( b >= 0xf0 )
525 else if ( b >= 0xe0 )
530 else if ( b >= 0xc0 )
540 for( ; len > 0; len-- )
543 ch = ( ch << 6 ) | ( b & 0x3f );
547 Bufbyte i0 = *str, i1, i2 = 0;
550 if (i0 == LEADING_BYTE_CONTROL_1)
551 return (Emchar) (*++str - 0x20);
553 if (LEADING_BYTE_PREFIX_P (i0))
558 charset = CHARSET_BY_LEADING_BYTE (i0);
559 if (XCHARSET_DIMENSION (charset) == 2)
562 return MAKE_CHAR (charset, i1, i2);
566 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
567 Do not call this directly. Use the macro valid_char_p() instead. */
571 non_ascii_valid_char_p (Emchar ch)
575 /* Must have only lowest 19 bits set */
579 f1 = CHAR_FIELD1 (ch);
580 f2 = CHAR_FIELD2 (ch);
581 f3 = CHAR_FIELD3 (ch);
587 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
588 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
589 f2 > MAX_CHAR_FIELD2_PRIVATE)
594 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
595 f2 <= MAX_CHAR_FIELD2_PRIVATE))
599 NOTE: This takes advantage of the fact that
600 FIELD2_TO_OFFICIAL_LEADING_BYTE and
601 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
603 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
604 if (EQ (charset, Qnil))
606 return (XCHARSET_CHARS (charset) == 96);
612 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
613 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
614 f1 > MAX_CHAR_FIELD1_PRIVATE)
616 if (f2 < 0x20 || f3 < 0x20)
619 #ifdef ENABLE_COMPOSITE_CHARS
620 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
622 if (UNBOUNDP (Fgethash (make_int (ch),
623 Vcomposite_char_char2string_hash_table,
628 #endif /* ENABLE_COMPOSITE_CHARS */
630 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
631 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
634 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
636 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
639 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
641 if (EQ (charset, Qnil))
643 return (XCHARSET_CHARS (charset) == 96);
649 /************************************************************************/
650 /* Basic string functions */
651 /************************************************************************/
653 /* Copy the character pointed to by SRC into DST. Do not call this
654 directly. Use the macro charptr_copy_char() instead.
655 Return the number of bytes copied. */
658 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
660 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
662 for (i = bytes; i; i--, dst++, src++)
668 /************************************************************************/
669 /* streams of Emchars */
670 /************************************************************************/
672 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
673 The functions below are not meant to be called directly; use
674 the macros in insdel.h. */
677 Lstream_get_emchar_1 (Lstream *stream, int ch)
679 Bufbyte str[MAX_EMCHAR_LEN];
680 Bufbyte *strptr = str;
683 str[0] = (Bufbyte) ch;
685 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
687 int c = Lstream_getc (stream);
688 bufpos_checking_assert (c >= 0);
689 *++strptr = (Bufbyte) c;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj)
718 Lisp_Charset *cs = XCHARSET (obj);
720 mark_object (cs->short_name);
721 mark_object (cs->long_name);
722 mark_object (cs->doc_string);
723 mark_object (cs->registry);
724 mark_object (cs->ccl_program);
726 mark_object (cs->decoding_table);
727 mark_object (cs->mother);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
753 CHARSET_DIMENSION (cs),
754 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
755 CHARSET_COLUMNS (cs),
756 CHARSET_GRAPHIC (cs),
758 write_c_string (buf, printcharfun);
759 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
760 sprintf (buf, " 0x%x>", cs->header.uid);
761 write_c_string (buf, printcharfun);
764 static const struct lrecord_description charset_description[] = {
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
779 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
780 mark_charset, print_charset, 0, 0, 0,
784 /* Make a new charset. */
785 /* #### SJT Should generic properties be allowed? */
787 make_charset (Charset_ID id, Lisp_Object name,
788 unsigned short chars, unsigned char dimension,
789 unsigned char columns, unsigned char graphic,
790 Bufbyte final, unsigned char direction, Lisp_Object short_name,
791 Lisp_Object long_name, Lisp_Object doc,
793 Lisp_Object decoding_table,
794 Emchar min_code, Emchar max_code,
795 Emchar code_offset, unsigned char byte_offset,
796 Lisp_Object mother, unsigned char conversion)
799 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
803 XSETCHARSET (obj, cs);
805 CHARSET_ID (cs) = id;
806 CHARSET_NAME (cs) = name;
807 CHARSET_SHORT_NAME (cs) = short_name;
808 CHARSET_LONG_NAME (cs) = long_name;
809 CHARSET_CHARS (cs) = chars;
810 CHARSET_DIMENSION (cs) = dimension;
811 CHARSET_DIRECTION (cs) = direction;
812 CHARSET_COLUMNS (cs) = columns;
813 CHARSET_GRAPHIC (cs) = graphic;
814 CHARSET_FINAL (cs) = final;
815 CHARSET_DOC_STRING (cs) = doc;
816 CHARSET_REGISTRY (cs) = reg;
817 CHARSET_CCL_PROGRAM (cs) = Qnil;
818 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
820 CHARSET_DECODING_TABLE(cs) = Qunbound;
821 CHARSET_MIN_CODE (cs) = min_code;
822 CHARSET_MAX_CODE (cs) = max_code;
823 CHARSET_CODE_OFFSET (cs) = code_offset;
824 CHARSET_BYTE_OFFSET (cs) = byte_offset;
825 CHARSET_MOTHER (cs) = mother;
826 CHARSET_CONVERSION (cs) = conversion;
830 if (id == LEADING_BYTE_ASCII)
831 CHARSET_REP_BYTES (cs) = 1;
833 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
840 /* some charsets do not have final characters. This includes
841 ASCII, Control-1, Composite, and the two faux private
843 unsigned char iso2022_type
844 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
846 if (code_offset == 0)
848 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
849 chlook->charset_by_attributes[iso2022_type][final] = obj;
853 (chlook->charset_by_attributes[iso2022_type][final][direction]));
854 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
858 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
859 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
861 /* Some charsets are "faux" and don't have names or really exist at
862 all except in the leading-byte table. */
864 Fputhash (name, obj, Vcharset_hash_table);
869 get_unallocated_leading_byte (int dimension)
874 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
877 lb = chlook->next_allocated_leading_byte++;
881 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
884 lb = chlook->next_allocated_1_byte_leading_byte++;
888 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
891 lb = chlook->next_allocated_2_byte_leading_byte++;
897 ("No more character sets free for this dimension",
898 make_int (dimension));
904 /* Number of Big5 characters which have the same code in 1st byte. */
906 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
909 decode_builtin_char (Lisp_Object charset, int code_point)
911 Lisp_Object mother = XCHARSET_MOTHER (charset);
914 if ( CHARSETP (mother) )
916 int code = code_point;
918 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
920 int row = code_point >> 8;
921 int cell = code_point & 255;
925 else if (row < 16 + 32 + 30)
926 code = (row - (16 + 32)) * 94 + cell - 33;
927 else if (row < 18 + 32 + 30)
929 else if (row < 18 + 32 + 60)
930 code = (row - (18 + 32)) * 94 + cell - 33;
932 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
934 int plane = code_point >> 16;
935 int row = (code_point >> 8) & 255;
936 int cell = code_point & 255;
940 else if (row < 16 + 32 + 30)
942 = (plane - 33) * 94 * 60
943 + (row - (16 + 32)) * 94
945 else if (row < 18 + 32 + 30)
947 else if (row < 18 + 32 + 60)
949 = (plane - 33) * 94 * 60
950 + (row - (18 + 32)) * 94
954 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
957 else if (EQ (charset, Vcharset_chinese_big5))
959 int c1 = code_point >> 8;
960 int c2 = code_point & 0xFF;
963 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
965 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
966 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
968 I = (c1 - 0xA1) * BIG5_SAME_ROW
969 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
973 charset = Vcharset_chinese_big5_1;
977 charset = Vcharset_chinese_big5_2;
978 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
980 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
984 if ((final = XCHARSET_FINAL (charset)) >= '0')
986 if (XCHARSET_DIMENSION (charset) == 1)
988 switch (XCHARSET_CHARS (charset))
992 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
995 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1003 switch (XCHARSET_CHARS (charset))
1006 return MIN_CHAR_94x94
1007 + (final - '0') * 94 * 94
1008 + (((code_point >> 8) & 0x7F) - 33) * 94
1009 + ((code_point & 0x7F) - 33);
1011 return MIN_CHAR_96x96
1012 + (final - '0') * 96 * 96
1013 + (((code_point >> 8) & 0x7F) - 32) * 96
1014 + ((code_point & 0x7F) - 32);
1021 else if (XCHARSET_MAX_CODE (charset))
1024 = (XCHARSET_DIMENSION (charset) == 1
1026 code_point - XCHARSET_BYTE_OFFSET (charset)
1028 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1029 * XCHARSET_CHARS (charset)
1030 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1031 + XCHARSET_CODE_OFFSET (charset);
1032 if ((cid < XCHARSET_MIN_CODE (charset))
1033 || (XCHARSET_MAX_CODE (charset) < cid))
1042 charset_code_point (Lisp_Object charset, Emchar ch)
1044 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1047 if ( CHAR_TABLEP (encoding_table)
1048 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1053 Lisp_Object mother = XCHARSET_MOTHER (charset);
1054 int min = XCHARSET_MIN_CODE (charset);
1055 int max = XCHARSET_MAX_CODE (charset);
1058 if ( CHARSETP (mother) )
1059 code = charset_code_point (mother, ch);
1062 if ( (min <= code) && (code <= max) )
1064 int d = code - XCHARSET_CODE_OFFSET (charset);
1066 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1069 int cell = d % 94 + 33;
1075 return (row << 8) | cell;
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1079 int plane = d / (94 * 60) + 33;
1080 int row = (d % (94 * 60)) / 94;
1081 int cell = d % 94 + 33;
1087 return (plane << 16) | (row << 8) | cell;
1089 else if (XCHARSET_CHARS (charset) == 94)
1091 if (XCHARSET_DIMENSION (charset) == 1)
1093 else if (XCHARSET_DIMENSION (charset) == 2)
1094 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1095 else if (XCHARSET_DIMENSION (charset) == 3)
1097 ( (d / (94 * 94) + 33) << 16)
1098 | ((d / 94 % 94 + 33) << 8)
1100 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1102 ( (d / (94 * 94 * 94) + 33) << 24)
1103 | ((d / (94 * 94) % 94 + 33) << 16)
1104 | ((d / 94 % 94 + 33) << 8)
1107 else if (XCHARSET_CHARS (charset) == 96)
1109 if (XCHARSET_DIMENSION (charset) == 1)
1111 else if (XCHARSET_DIMENSION (charset) == 2)
1112 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1113 else if (XCHARSET_DIMENSION (charset) == 3)
1115 ( (d / (96 * 96) + 32) << 16)
1116 | ((d / 96 % 96 + 32) << 8)
1118 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1120 ( (d / (96 * 96 * 96) + 32) << 24)
1121 | ((d / (96 * 96) % 96 + 32) << 16)
1122 | ((d / 96 % 96 + 32) << 8)
1126 return code - XCHARSET_CODE_OFFSET (charset);
1128 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1129 (XCHARSET_CODE_OFFSET (charset)
1130 == XCHARSET_MIN_CODE (charset)) )
1134 if (XCHARSET_DIMENSION (charset) == 1)
1136 if (XCHARSET_CHARS (charset) == 94)
1138 if (((d = ch - (MIN_CHAR_94
1139 + (XCHARSET_FINAL (charset) - '0') * 94))
1144 else if (XCHARSET_CHARS (charset) == 96)
1146 if (((d = ch - (MIN_CHAR_96
1147 + (XCHARSET_FINAL (charset) - '0') * 96))
1155 else if (XCHARSET_DIMENSION (charset) == 2)
1157 if (XCHARSET_CHARS (charset) == 94)
1159 if (((d = ch - (MIN_CHAR_94x94
1161 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1164 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1166 else if (XCHARSET_CHARS (charset) == 96)
1168 if (((d = ch - (MIN_CHAR_96x96
1170 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1173 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1184 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1186 if (c <= MAX_CHAR_BASIC_LATIN)
1188 *charset = Vcharset_ascii;
1193 *charset = Vcharset_control_1;
1198 *charset = Vcharset_latin_iso8859_1;
1202 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1204 *charset = Vcharset_hebrew_iso8859_8;
1205 return c - MIN_CHAR_HEBREW + 0x20;
1208 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1210 *charset = Vcharset_thai_tis620;
1211 return c - MIN_CHAR_THAI + 0x20;
1214 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1215 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1217 return list2 (Vcharset_katakana_jisx0201,
1218 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1221 else if (c <= MAX_CHAR_BMP)
1223 *charset = Vcharset_ucs_bmp;
1226 else if (c <= MAX_CHAR_SMP)
1228 *charset = Vcharset_ucs_smp;
1229 return c - MIN_CHAR_SMP;
1231 else if (c <= MAX_CHAR_SIP)
1233 *charset = Vcharset_ucs_sip;
1234 return c - MIN_CHAR_SIP;
1236 else if (c < MIN_CHAR_DAIKANWA)
1238 *charset = Vcharset_ucs;
1241 else if (c <= MAX_CHAR_DAIKANWA)
1243 *charset = Vcharset_ideograph_daikanwa;
1244 return c - MIN_CHAR_DAIKANWA;
1246 else if (c < MIN_CHAR_94)
1248 *charset = Vcharset_ucs;
1251 else if (c <= MAX_CHAR_94)
1253 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1254 ((c - MIN_CHAR_94) / 94) + '0',
1255 CHARSET_LEFT_TO_RIGHT);
1256 if (!NILP (*charset))
1257 return ((c - MIN_CHAR_94) % 94) + 33;
1260 *charset = Vcharset_ucs;
1264 else if (c <= MAX_CHAR_96)
1266 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1267 ((c - MIN_CHAR_96) / 96) + '0',
1268 CHARSET_LEFT_TO_RIGHT);
1269 if (!NILP (*charset))
1270 return ((c - MIN_CHAR_96) % 96) + 32;
1273 *charset = Vcharset_ucs;
1277 else if (c <= MAX_CHAR_94x94)
1280 = CHARSET_BY_ATTRIBUTES (94, 2,
1281 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1282 CHARSET_LEFT_TO_RIGHT);
1283 if (!NILP (*charset))
1284 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1285 | (((c - MIN_CHAR_94x94) % 94) + 33);
1288 *charset = Vcharset_ucs;
1292 else if (c <= MAX_CHAR_96x96)
1295 = CHARSET_BY_ATTRIBUTES (96, 2,
1296 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1297 CHARSET_LEFT_TO_RIGHT);
1298 if (!NILP (*charset))
1299 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1300 | (((c - MIN_CHAR_96x96) % 96) + 32);
1303 *charset = Vcharset_ucs;
1309 *charset = Vcharset_ucs;
1314 Lisp_Object Vdefault_coded_charset_priority_list;
1318 /************************************************************************/
1319 /* Basic charset Lisp functions */
1320 /************************************************************************/
1322 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1323 Return non-nil if OBJECT is a charset.
1327 return CHARSETP (object) ? Qt : Qnil;
1330 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1331 Retrieve the charset of the given name.
1332 If CHARSET-OR-NAME is a charset object, it is simply returned.
1333 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1334 nil is returned. Otherwise the associated charset object is returned.
1338 if (CHARSETP (charset_or_name))
1339 return charset_or_name;
1341 CHECK_SYMBOL (charset_or_name);
1342 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1345 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1346 Retrieve the charset of the given name.
1347 Same as `find-charset' except an error is signalled if there is no such
1348 charset instead of returning nil.
1352 Lisp_Object charset = Ffind_charset (name);
1355 signal_simple_error ("No such charset", name);
1359 /* We store the charsets in hash tables with the names as the key and the
1360 actual charset object as the value. Occasionally we need to use them
1361 in a list format. These routines provide us with that. */
1362 struct charset_list_closure
1364 Lisp_Object *charset_list;
1368 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1369 void *charset_list_closure)
1371 /* This function can GC */
1372 struct charset_list_closure *chcl =
1373 (struct charset_list_closure*) charset_list_closure;
1374 Lisp_Object *charset_list = chcl->charset_list;
1376 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1380 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1381 Return a list of the names of all defined charsets.
1385 Lisp_Object charset_list = Qnil;
1386 struct gcpro gcpro1;
1387 struct charset_list_closure charset_list_closure;
1389 GCPRO1 (charset_list);
1390 charset_list_closure.charset_list = &charset_list;
1391 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1392 &charset_list_closure);
1395 return charset_list;
1398 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1399 Return the name of charset CHARSET.
1403 return XCHARSET_NAME (Fget_charset (charset));
1406 /* #### SJT Should generic properties be allowed? */
1407 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1408 Define a new character set.
1409 This function is for use with Mule support.
1410 NAME is a symbol, the name by which the character set is normally referred.
1411 DOC-STRING is a string describing the character set.
1412 PROPS is a property list, describing the specific nature of the
1413 character set. Recognized properties are:
1415 'short-name Short version of the charset name (ex: Latin-1)
1416 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1417 'registry A regular expression matching the font registry field for
1419 'dimension Number of octets used to index a character in this charset.
1420 Either 1 or 2. Defaults to 1.
1421 If UTF-2000 feature is enabled, 3 or 4 are also available.
1422 'columns Number of columns used to display a character in this charset.
1423 Only used in TTY mode. (Under X, the actual width of a
1424 character can be derived from the font used to display the
1425 characters.) If unspecified, defaults to the dimension
1426 (this is almost always the correct value).
1427 'chars Number of characters in each dimension (94 or 96).
1428 Defaults to 94. Note that if the dimension is 2, the
1429 character set thus described is 94x94 or 96x96.
1430 If UTF-2000 feature is enabled, 128 or 256 are also available.
1431 'final Final byte of ISO 2022 escape sequence. Must be
1432 supplied. Each combination of (DIMENSION, CHARS) defines a
1433 separate namespace for final bytes. Note that ISO
1434 2022 restricts the final byte to the range
1435 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1436 dimension == 2. Note also that final bytes in the range
1437 0x30 - 0x3F are reserved for user-defined (not official)
1439 'graphic 0 (use left half of font on output) or 1 (use right half
1440 of font on output). Defaults to 0. For example, for
1441 a font whose registry is ISO8859-1, the left half
1442 (octets 0x20 - 0x7F) is the `ascii' character set, while
1443 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1444 character set. With 'graphic set to 0, the octets
1445 will have their high bit cleared; with it set to 1,
1446 the octets will have their high bit set.
1447 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1449 'ccl-program A compiled CCL program used to convert a character in
1450 this charset into an index into the font. This is in
1451 addition to the 'graphic property. The CCL program
1452 is passed the octets of the character, with the high
1453 bit cleared and set depending upon whether the value
1454 of the 'graphic property is 0 or 1.
1455 'mother [UTF-2000 only] Base coded-charset.
1456 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1457 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1458 'code-offset [UTF-2000 only] Offset for a code-point of a base
1460 'conversion [UTF-2000 only] Conversion for a code-point of a base
1461 coded-charset (94x60 or 94x94x60).
1463 (name, doc_string, props))
1465 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1466 int direction = CHARSET_LEFT_TO_RIGHT;
1467 Lisp_Object registry = Qnil;
1468 Lisp_Object charset;
1469 Lisp_Object ccl_program = Qnil;
1470 Lisp_Object short_name = Qnil, long_name = Qnil;
1471 Lisp_Object mother = Qnil;
1472 int min_code = 0, max_code = 0, code_offset = 0;
1473 int byte_offset = -1;
1476 CHECK_SYMBOL (name);
1477 if (!NILP (doc_string))
1478 CHECK_STRING (doc_string);
1480 charset = Ffind_charset (name);
1481 if (!NILP (charset))
1482 signal_simple_error ("Cannot redefine existing charset", name);
1485 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1487 if (EQ (keyword, Qshort_name))
1489 CHECK_STRING (value);
1493 if (EQ (keyword, Qlong_name))
1495 CHECK_STRING (value);
1499 else if (EQ (keyword, Qdimension))
1502 dimension = XINT (value);
1503 if (dimension < 1 ||
1510 signal_simple_error ("Invalid value for 'dimension", value);
1513 else if (EQ (keyword, Qchars))
1516 chars = XINT (value);
1517 if (chars != 94 && chars != 96
1519 && chars != 128 && chars != 256
1522 signal_simple_error ("Invalid value for 'chars", value);
1525 else if (EQ (keyword, Qcolumns))
1528 columns = XINT (value);
1529 if (columns != 1 && columns != 2)
1530 signal_simple_error ("Invalid value for 'columns", value);
1533 else if (EQ (keyword, Qgraphic))
1536 graphic = XINT (value);
1544 signal_simple_error ("Invalid value for 'graphic", value);
1547 else if (EQ (keyword, Qregistry))
1549 CHECK_STRING (value);
1553 else if (EQ (keyword, Qdirection))
1555 if (EQ (value, Ql2r))
1556 direction = CHARSET_LEFT_TO_RIGHT;
1557 else if (EQ (value, Qr2l))
1558 direction = CHARSET_RIGHT_TO_LEFT;
1560 signal_simple_error ("Invalid value for 'direction", value);
1563 else if (EQ (keyword, Qfinal))
1565 CHECK_CHAR_COERCE_INT (value);
1566 final = XCHAR (value);
1567 if (final < '0' || final > '~')
1568 signal_simple_error ("Invalid value for 'final", value);
1572 else if (EQ (keyword, Qmother))
1574 mother = Fget_charset (value);
1577 else if (EQ (keyword, Qmin_code))
1580 min_code = XUINT (value);
1583 else if (EQ (keyword, Qmax_code))
1586 max_code = XUINT (value);
1589 else if (EQ (keyword, Qcode_offset))
1592 code_offset = XUINT (value);
1595 else if (EQ (keyword, Qconversion))
1597 if (EQ (value, Q94x60))
1598 conversion = CONVERSION_94x60;
1599 else if (EQ (value, Q94x94x60))
1600 conversion = CONVERSION_94x94x60;
1602 signal_simple_error ("Unrecognized conversion", value);
1606 else if (EQ (keyword, Qccl_program))
1608 struct ccl_program test_ccl;
1610 if (setup_ccl_program (&test_ccl, value) < 0)
1611 signal_simple_error ("Invalid value for 'ccl-program", value);
1612 ccl_program = value;
1616 signal_simple_error ("Unrecognized property", keyword);
1622 error ("'final must be specified");
1624 if (dimension == 2 && final > 0x5F)
1626 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1629 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1630 CHARSET_LEFT_TO_RIGHT)) ||
1631 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1632 CHARSET_RIGHT_TO_LEFT)))
1634 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1636 id = get_unallocated_leading_byte (dimension);
1638 if (NILP (doc_string))
1639 doc_string = build_string ("");
1641 if (NILP (registry))
1642 registry = build_string ("");
1644 if (NILP (short_name))
1645 XSETSTRING (short_name, XSYMBOL (name)->name);
1647 if (NILP (long_name))
1648 long_name = doc_string;
1651 columns = dimension;
1653 if (byte_offset < 0)
1657 else if (chars == 96)
1663 charset = make_charset (id, name, chars, dimension, columns, graphic,
1664 final, direction, short_name, long_name,
1665 doc_string, registry,
1666 Qnil, min_code, max_code, code_offset, byte_offset,
1667 mother, conversion);
1668 if (!NILP (ccl_program))
1669 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1673 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1675 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1676 NEW-NAME is the name of the new charset. Return the new charset.
1678 (charset, new_name))
1680 Lisp_Object new_charset = Qnil;
1681 int id, chars, dimension, columns, graphic, final;
1683 Lisp_Object registry, doc_string, short_name, long_name;
1686 charset = Fget_charset (charset);
1687 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1688 signal_simple_error ("Charset already has reverse-direction charset",
1691 CHECK_SYMBOL (new_name);
1692 if (!NILP (Ffind_charset (new_name)))
1693 signal_simple_error ("Cannot redefine existing charset", new_name);
1695 cs = XCHARSET (charset);
1697 chars = CHARSET_CHARS (cs);
1698 dimension = CHARSET_DIMENSION (cs);
1699 columns = CHARSET_COLUMNS (cs);
1700 id = get_unallocated_leading_byte (dimension);
1702 graphic = CHARSET_GRAPHIC (cs);
1703 final = CHARSET_FINAL (cs);
1704 direction = CHARSET_RIGHT_TO_LEFT;
1705 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1706 direction = CHARSET_LEFT_TO_RIGHT;
1707 doc_string = CHARSET_DOC_STRING (cs);
1708 short_name = CHARSET_SHORT_NAME (cs);
1709 long_name = CHARSET_LONG_NAME (cs);
1710 registry = CHARSET_REGISTRY (cs);
1712 new_charset = make_charset (id, new_name, chars, dimension, columns,
1713 graphic, final, direction, short_name, long_name,
1714 doc_string, registry,
1716 CHARSET_DECODING_TABLE(cs),
1717 CHARSET_MIN_CODE(cs),
1718 CHARSET_MAX_CODE(cs),
1719 CHARSET_CODE_OFFSET(cs),
1720 CHARSET_BYTE_OFFSET(cs),
1722 CHARSET_CONVERSION (cs)
1724 Qnil, 0, 0, 0, 0, Qnil, 0
1728 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1729 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1734 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1735 Define symbol ALIAS as an alias for CHARSET.
1739 CHECK_SYMBOL (alias);
1740 charset = Fget_charset (charset);
1741 return Fputhash (alias, charset, Vcharset_hash_table);
1744 /* #### Reverse direction charsets not yet implemented. */
1746 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1748 Return the reverse-direction charset parallel to CHARSET, if any.
1749 This is the charset with the same properties (in particular, the same
1750 dimension, number of characters per dimension, and final byte) as
1751 CHARSET but whose characters are displayed in the opposite direction.
1755 charset = Fget_charset (charset);
1756 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1760 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1761 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1762 If DIRECTION is omitted, both directions will be checked (left-to-right
1763 will be returned if character sets exist for both directions).
1765 (dimension, chars, final, direction))
1767 int dm, ch, fi, di = -1;
1768 Lisp_Object obj = Qnil;
1770 CHECK_INT (dimension);
1771 dm = XINT (dimension);
1772 if (dm < 1 || dm > 2)
1773 signal_simple_error ("Invalid value for DIMENSION", dimension);
1777 if (ch != 94 && ch != 96)
1778 signal_simple_error ("Invalid value for CHARS", chars);
1780 CHECK_CHAR_COERCE_INT (final);
1782 if (fi < '0' || fi > '~')
1783 signal_simple_error ("Invalid value for FINAL", final);
1785 if (EQ (direction, Ql2r))
1786 di = CHARSET_LEFT_TO_RIGHT;
1787 else if (EQ (direction, Qr2l))
1788 di = CHARSET_RIGHT_TO_LEFT;
1789 else if (!NILP (direction))
1790 signal_simple_error ("Invalid value for DIRECTION", direction);
1792 if (dm == 2 && fi > 0x5F)
1794 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1798 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1800 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1803 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1806 return XCHARSET_NAME (obj);
1810 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1811 Return short name of CHARSET.
1815 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1818 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1819 Return long name of CHARSET.
1823 return XCHARSET_LONG_NAME (Fget_charset (charset));
1826 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1827 Return description of CHARSET.
1831 return XCHARSET_DOC_STRING (Fget_charset (charset));
1834 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1835 Return dimension of CHARSET.
1839 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1842 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1843 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1844 Recognized properties are those listed in `make-charset', as well as
1845 'name and 'doc-string.
1851 charset = Fget_charset (charset);
1852 cs = XCHARSET (charset);
1854 CHECK_SYMBOL (prop);
1855 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1856 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1857 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1858 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1859 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1860 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1861 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1862 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1863 Qnil : make_char (CHARSET_FINAL (cs));
1864 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1865 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1866 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1867 if (EQ (prop, Qdirection))
1868 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1869 if (EQ (prop, Qreverse_direction_charset))
1871 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1872 /* #### Is this translation OK? If so, error checking sufficient? */
1873 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1876 if (EQ (prop, Qmother))
1877 return CHARSET_MOTHER (cs);
1878 if (EQ (prop, Qmin_code))
1879 return make_int (CHARSET_MIN_CODE (cs));
1880 if (EQ (prop, Qmax_code))
1881 return make_int (CHARSET_MAX_CODE (cs));
1883 signal_simple_error ("Unrecognized charset property name", prop);
1884 return Qnil; /* not reached */
1887 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1888 Return charset identification number of CHARSET.
1892 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1895 /* #### We need to figure out which properties we really want to
1898 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1899 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1901 (charset, ccl_program))
1903 struct ccl_program test_ccl;
1905 charset = Fget_charset (charset);
1906 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1907 signal_simple_error ("Invalid ccl-program", ccl_program);
1908 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1913 invalidate_charset_font_caches (Lisp_Object charset)
1915 /* Invalidate font cache entries for charset on all devices. */
1916 Lisp_Object devcons, concons, hash_table;
1917 DEVICE_LOOP_NO_BREAK (devcons, concons)
1919 struct device *d = XDEVICE (XCAR (devcons));
1920 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1921 if (!UNBOUNDP (hash_table))
1922 Fclrhash (hash_table);
1926 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1927 Set the 'registry property of CHARSET to REGISTRY.
1929 (charset, registry))
1931 charset = Fget_charset (charset);
1932 CHECK_STRING (registry);
1933 XCHARSET_REGISTRY (charset) = registry;
1934 invalidate_charset_font_caches (charset);
1935 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1940 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1941 Return mapping-table of CHARSET.
1945 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1948 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1949 Set mapping-table of CHARSET to TABLE.
1953 struct Lisp_Charset *cs;
1957 charset = Fget_charset (charset);
1958 cs = XCHARSET (charset);
1962 CHARSET_DECODING_TABLE(cs) = Qnil;
1965 else if (VECTORP (table))
1967 int ccs_len = CHARSET_BYTE_SIZE (cs);
1968 int ret = decoding_table_check_elements (table,
1969 CHARSET_DIMENSION (cs),
1974 signal_simple_error ("Too big table", table);
1976 signal_simple_error ("Invalid element is found", table);
1978 signal_simple_error ("Something wrong", table);
1980 CHARSET_DECODING_TABLE(cs) = Qnil;
1983 signal_error (Qwrong_type_argument,
1984 list2 (build_translated_string ("vector-or-nil-p"),
1987 byte_offset = CHARSET_BYTE_OFFSET (cs);
1988 switch (CHARSET_DIMENSION (cs))
1991 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1993 Lisp_Object c = XVECTOR_DATA(table)[i];
1996 Fput_char_attribute (c, XCHARSET_NAME (charset),
1997 make_int (i + byte_offset));
2001 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2003 Lisp_Object v = XVECTOR_DATA(table)[i];
2009 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2011 Lisp_Object c = XVECTOR_DATA(v)[j];
2015 (c, XCHARSET_NAME (charset),
2016 make_int ( ( (i + byte_offset) << 8 )
2022 Fput_char_attribute (v, XCHARSET_NAME (charset),
2023 make_int (i + byte_offset));
2030 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2031 Save mapping-table of CHARSET.
2035 #ifdef HAVE_DATABASE
2036 struct Lisp_Charset *cs;
2037 int byte_min, byte_max;
2039 Lisp_Object db_file;
2041 charset = Fget_charset (charset);
2042 cs = XCHARSET (charset);
2044 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2045 Qsystem_char_id, 1);
2046 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2048 byte_min = CHARSET_BYTE_OFFSET (cs);
2049 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2050 switch (CHARSET_DIMENSION (cs))
2054 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2057 for (cell = byte_min; cell < byte_max; cell++)
2059 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2062 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2063 Fprin1_to_string (c, Qnil),
2070 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2073 for (row = byte_min; row < byte_max; row++)
2075 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2078 for (cell = byte_min; cell < byte_max; cell++)
2080 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2083 Fput_database (Fprin1_to_string (make_int ((row << 8)
2086 Fprin1_to_string (c, Qnil),
2094 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2097 for (plane = byte_min; plane < byte_max; plane++)
2100 = get_ccs_octet_table (table_p, charset, plane);
2103 for (row = byte_min; row < byte_max; row++)
2106 = get_ccs_octet_table (table_r, charset, row);
2109 for (cell = byte_min; cell < byte_max; cell++)
2111 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2115 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2119 Fprin1_to_string (c, Qnil),
2128 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2131 for (group = byte_min; group < byte_max; group++)
2134 = get_ccs_octet_table (table_g, charset, group);
2137 for (plane = byte_min; plane < byte_max; plane++)
2140 = get_ccs_octet_table (table_p, charset, plane);
2143 for (row = byte_min; row < byte_max; row++)
2146 = get_ccs_octet_table (table_r, charset, row);
2149 for (cell = byte_min; cell < byte_max; cell++)
2152 = get_ccs_octet_table (table_c, charset, cell);
2155 Fput_database (Fprin1_to_string
2156 (make_int (( group << 24)
2161 Fprin1_to_string (c, Qnil),
2169 return Fclose_database (db);
2175 #ifdef HAVE_CHISE_CLIENT
2177 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2181 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2184 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2188 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2195 decoding_table_put_char (ccs, code_point, ret);
2196 Fclose_database (db);
2200 Fclose_database (db);
2204 #endif /* HAVE_CHISE_CLIENT */
2205 #endif /* UTF2000 */
2208 /************************************************************************/
2209 /* Lisp primitives for working with characters */
2210 /************************************************************************/
2213 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2214 Make a character from CHARSET and code-point CODE.
2215 If DEFINED_ONLY is non-nil, builtin character is not returned.
2216 If corresponding character is not found, nil is returned.
2218 (charset, code, defined_only))
2222 charset = Fget_charset (charset);
2225 if (XCHARSET_GRAPHIC (charset) == 1)
2227 if (NILP (defined_only))
2228 c = DECODE_CHAR (charset, c);
2230 c = DECODE_DEFINED_CHAR (charset, c);
2231 return c >= 0 ? make_char (c) : Qnil;
2234 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2235 Make a builtin character from CHARSET and code-point CODE.
2241 charset = Fget_charset (charset);
2243 if (EQ (charset, Vcharset_latin_viscii))
2245 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2251 (ret = Fget_char_attribute (chr,
2252 Vcharset_latin_viscii_lower,
2255 charset = Vcharset_latin_viscii_lower;
2259 (ret = Fget_char_attribute (chr,
2260 Vcharset_latin_viscii_upper,
2263 charset = Vcharset_latin_viscii_upper;
2270 if (XCHARSET_GRAPHIC (charset) == 1)
2273 c = decode_builtin_char (charset, c);
2274 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2278 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2279 Make a character from CHARSET and octets ARG1 and ARG2.
2280 ARG2 is required only for characters from two-dimensional charsets.
2281 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2282 character s with caron.
2284 (charset, arg1, arg2))
2288 int lowlim, highlim;
2290 charset = Fget_charset (charset);
2291 cs = XCHARSET (charset);
2293 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2294 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2296 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2298 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2299 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2302 /* It is useful (and safe, according to Olivier Galibert) to strip
2303 the 8th bit off ARG1 and ARG2 because it allows programmers to
2304 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2305 Latin 2 code of the character. */
2313 if (a1 < lowlim || a1 > highlim)
2314 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2316 if (CHARSET_DIMENSION (cs) == 1)
2320 ("Charset is of dimension one; second octet must be nil", arg2);
2321 return make_char (MAKE_CHAR (charset, a1, 0));
2330 a2 = XINT (arg2) & 0x7f;
2332 if (a2 < lowlim || a2 > highlim)
2333 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2335 return make_char (MAKE_CHAR (charset, a1, a2));
2338 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2339 Return the character set of CHARACTER.
2343 CHECK_CHAR_COERCE_INT (character);
2345 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2348 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2349 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2350 N defaults to 0 if omitted.
2354 Lisp_Object charset;
2357 CHECK_CHAR_COERCE_INT (character);
2359 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2361 if (NILP (n) || EQ (n, Qzero))
2362 return make_int (octet0);
2363 else if (EQ (n, make_int (1)))
2364 return make_int (octet1);
2366 signal_simple_error ("Octet number must be 0 or 1", n);
2370 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2371 Return code-point of CHARACTER in specified CHARSET.
2373 (character, charset))
2377 CHECK_CHAR_COERCE_INT (character);
2378 charset = Fget_charset (charset);
2379 code_point = charset_code_point (charset, XCHAR (character));
2380 if (code_point >= 0)
2381 return make_int (code_point);
2387 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2388 Return list of charset and one or two position-codes of CHARACTER.
2392 /* This function can GC */
2393 struct gcpro gcpro1, gcpro2;
2394 Lisp_Object charset = Qnil;
2395 Lisp_Object rc = Qnil;
2403 GCPRO2 (charset, rc);
2404 CHECK_CHAR_COERCE_INT (character);
2407 code_point = ENCODE_CHAR (XCHAR (character), charset);
2408 dimension = XCHARSET_DIMENSION (charset);
2409 while (dimension > 0)
2411 rc = Fcons (make_int (code_point & 255), rc);
2415 rc = Fcons (XCHARSET_NAME (charset), rc);
2417 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2419 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2421 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2425 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2434 #ifdef ENABLE_COMPOSITE_CHARS
2435 /************************************************************************/
2436 /* composite character functions */
2437 /************************************************************************/
2440 lookup_composite_char (Bufbyte *str, int len)
2442 Lisp_Object lispstr = make_string (str, len);
2443 Lisp_Object ch = Fgethash (lispstr,
2444 Vcomposite_char_string2char_hash_table,
2450 if (composite_char_row_next >= 128)
2451 signal_simple_error ("No more composite chars available", lispstr);
2452 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2453 composite_char_col_next);
2454 Fputhash (make_char (emch), lispstr,
2455 Vcomposite_char_char2string_hash_table);
2456 Fputhash (lispstr, make_char (emch),
2457 Vcomposite_char_string2char_hash_table);
2458 composite_char_col_next++;
2459 if (composite_char_col_next >= 128)
2461 composite_char_col_next = 32;
2462 composite_char_row_next++;
2471 composite_char_string (Emchar ch)
2473 Lisp_Object str = Fgethash (make_char (ch),
2474 Vcomposite_char_char2string_hash_table,
2476 assert (!UNBOUNDP (str));
2480 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2481 Convert a string into a single composite character.
2482 The character is the result of overstriking all the characters in
2487 CHECK_STRING (string);
2488 return make_char (lookup_composite_char (XSTRING_DATA (string),
2489 XSTRING_LENGTH (string)));
2492 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2493 Return a string of the characters comprising a composite character.
2501 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2502 signal_simple_error ("Must be composite char", ch);
2503 return composite_char_string (emch);
2505 #endif /* ENABLE_COMPOSITE_CHARS */
2508 /************************************************************************/
2509 /* initialization */
2510 /************************************************************************/
2513 syms_of_mule_charset (void)
2515 INIT_LRECORD_IMPLEMENTATION (charset);
2517 DEFSUBR (Fcharsetp);
2518 DEFSUBR (Ffind_charset);
2519 DEFSUBR (Fget_charset);
2520 DEFSUBR (Fcharset_list);
2521 DEFSUBR (Fcharset_name);
2522 DEFSUBR (Fmake_charset);
2523 DEFSUBR (Fmake_reverse_direction_charset);
2524 /* DEFSUBR (Freverse_direction_charset); */
2525 DEFSUBR (Fdefine_charset_alias);
2526 DEFSUBR (Fcharset_from_attributes);
2527 DEFSUBR (Fcharset_short_name);
2528 DEFSUBR (Fcharset_long_name);
2529 DEFSUBR (Fcharset_description);
2530 DEFSUBR (Fcharset_dimension);
2531 DEFSUBR (Fcharset_property);
2532 DEFSUBR (Fcharset_id);
2533 DEFSUBR (Fset_charset_ccl_program);
2534 DEFSUBR (Fset_charset_registry);
2536 DEFSUBR (Fcharset_mapping_table);
2537 DEFSUBR (Fset_charset_mapping_table);
2541 DEFSUBR (Fdecode_char);
2542 DEFSUBR (Fdecode_builtin_char);
2543 DEFSUBR (Fencode_char);
2544 DEFSUBR (Fsave_charset_mapping_table);
2546 DEFSUBR (Fmake_char);
2547 DEFSUBR (Fchar_charset);
2548 DEFSUBR (Fchar_octet);
2549 DEFSUBR (Fsplit_char);
2551 #ifdef ENABLE_COMPOSITE_CHARS
2552 DEFSUBR (Fmake_composite_char);
2553 DEFSUBR (Fcomposite_char_string);
2556 defsymbol (&Qcharsetp, "charsetp");
2557 defsymbol (&Qregistry, "registry");
2558 defsymbol (&Qfinal, "final");
2559 defsymbol (&Qgraphic, "graphic");
2560 defsymbol (&Qdirection, "direction");
2561 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2562 defsymbol (&Qshort_name, "short-name");
2563 defsymbol (&Qlong_name, "long-name");
2565 defsymbol (&Qmother, "mother");
2566 defsymbol (&Qmin_code, "min-code");
2567 defsymbol (&Qmax_code, "max-code");
2568 defsymbol (&Qcode_offset, "code-offset");
2569 defsymbol (&Qconversion, "conversion");
2570 defsymbol (&Q94x60, "94x60");
2571 defsymbol (&Q94x94x60, "94x94x60");
2574 defsymbol (&Ql2r, "l2r");
2575 defsymbol (&Qr2l, "r2l");
2577 /* Charsets, compatible with FSF 20.3
2578 Naming convention is Script-Charset[-Edition] */
2579 defsymbol (&Qascii, "ascii");
2580 defsymbol (&Qcontrol_1, "control-1");
2581 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2582 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2583 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2584 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2585 defsymbol (&Qthai_tis620, "thai-tis620");
2586 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2587 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2588 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2589 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2590 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2591 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2592 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2593 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2594 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2595 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2596 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2597 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2598 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2599 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2600 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2601 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2603 defsymbol (&Qucs, "ucs");
2604 defsymbol (&Qucs_bmp, "ucs-bmp");
2605 defsymbol (&Qucs_smp, "ucs-smp");
2606 defsymbol (&Qucs_sip, "ucs-sip");
2607 defsymbol (&Qucs_cns, "ucs-cns");
2608 defsymbol (&Qucs_jis, "ucs-jis");
2609 defsymbol (&Qucs_ks, "ucs-ks");
2610 defsymbol (&Qucs_big5, "ucs-big5");
2611 defsymbol (&Qlatin_viscii, "latin-viscii");
2612 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2613 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2614 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2615 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2616 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2617 defsymbol (&Qideograph_gt, "ideograph-gt");
2618 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2619 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2620 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2621 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2622 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2623 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2624 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2625 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2626 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2627 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2628 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2629 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2630 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2631 defsymbol (&Qchinese_big5, "chinese-big5");
2632 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2633 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2634 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2635 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2636 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2637 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2638 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2639 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2640 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2641 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2642 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2643 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2644 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2645 defsymbol (&Qchina3_jef, "china3-jef");
2646 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2647 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2649 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2650 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2652 defsymbol (&Qcomposite, "composite");
2656 vars_of_mule_charset (void)
2663 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2664 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2666 /* Table of charsets indexed by leading byte. */
2667 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2668 chlook->charset_by_leading_byte[i] = Qnil;
2671 /* Table of charsets indexed by type/final-byte. */
2672 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2673 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2674 chlook->charset_by_attributes[i][j] = Qnil;
2676 /* Table of charsets indexed by type/final-byte/direction. */
2677 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2678 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2679 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2680 chlook->charset_by_attributes[i][j][k] = Qnil;
2684 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2686 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2687 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2691 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2692 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2693 Leading-code of private TYPE9N charset of column-width 1.
2695 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2699 Vdefault_coded_charset_priority_list = Qnil;
2700 DEFVAR_LISP ("default-coded-charset-priority-list",
2701 &Vdefault_coded_charset_priority_list /*
2702 Default order of preferred coded-character-sets.
2708 complex_vars_of_mule_charset (void)
2710 staticpro (&Vcharset_hash_table);
2711 Vcharset_hash_table =
2712 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2714 /* Predefined character sets. We store them into variables for
2718 staticpro (&Vcharset_ucs);
2720 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2721 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2722 build_string ("UCS"),
2723 build_string ("UCS"),
2724 build_string ("ISO/IEC 10646"),
2726 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2727 staticpro (&Vcharset_ucs_bmp);
2729 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2730 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2731 build_string ("BMP"),
2732 build_string ("UCS-BMP"),
2733 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2735 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2736 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2737 staticpro (&Vcharset_ucs_smp);
2739 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2740 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2741 build_string ("SMP"),
2742 build_string ("UCS-SMP"),
2743 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2744 build_string ("UCS00-1"),
2745 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2746 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2747 staticpro (&Vcharset_ucs_sip);
2749 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2750 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2751 build_string ("SIP"),
2752 build_string ("UCS-SIP"),
2753 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2754 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2755 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2756 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2757 staticpro (&Vcharset_ucs_cns);
2759 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2760 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2761 build_string ("UCS for CNS"),
2762 build_string ("UCS for CNS 11643"),
2763 build_string ("ISO/IEC 10646 for CNS 11643"),
2766 Qnil, CONVERSION_IDENTICAL);
2767 staticpro (&Vcharset_ucs_jis);
2769 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2770 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2771 build_string ("UCS for JIS"),
2772 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2773 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2775 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2776 staticpro (&Vcharset_ucs_ks);
2778 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2779 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2780 build_string ("UCS for KS"),
2781 build_string ("UCS for CCS defined by KS"),
2782 build_string ("ISO/IEC 10646 for Korean Standards"),
2784 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2785 staticpro (&Vcharset_ucs_big5);
2787 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2788 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2789 build_string ("UCS for Big5"),
2790 build_string ("UCS for Big5"),
2791 build_string ("ISO/IEC 10646 for Big5"),
2793 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2795 # define MIN_CHAR_THAI 0
2796 # define MAX_CHAR_THAI 0
2797 /* # define MIN_CHAR_HEBREW 0 */
2798 /* # define MAX_CHAR_HEBREW 0 */
2799 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2800 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2802 staticpro (&Vcharset_ascii);
2804 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2805 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2806 build_string ("ASCII"),
2807 build_string ("ASCII)"),
2808 build_string ("ASCII (ISO646 IRV)"),
2809 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2810 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2811 staticpro (&Vcharset_control_1);
2812 Vcharset_control_1 =
2813 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2814 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2815 build_string ("C1"),
2816 build_string ("Control characters"),
2817 build_string ("Control characters 128-191"),
2819 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2820 staticpro (&Vcharset_latin_iso8859_1);
2821 Vcharset_latin_iso8859_1 =
2822 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2823 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2824 build_string ("Latin-1"),
2825 build_string ("ISO8859-1 (Latin-1)"),
2826 build_string ("ISO8859-1 (Latin-1)"),
2827 build_string ("iso8859-1"),
2828 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2829 staticpro (&Vcharset_latin_iso8859_2);
2830 Vcharset_latin_iso8859_2 =
2831 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2832 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2833 build_string ("Latin-2"),
2834 build_string ("ISO8859-2 (Latin-2)"),
2835 build_string ("ISO8859-2 (Latin-2)"),
2836 build_string ("iso8859-2"),
2837 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2838 staticpro (&Vcharset_latin_iso8859_3);
2839 Vcharset_latin_iso8859_3 =
2840 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2841 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2842 build_string ("Latin-3"),
2843 build_string ("ISO8859-3 (Latin-3)"),
2844 build_string ("ISO8859-3 (Latin-3)"),
2845 build_string ("iso8859-3"),
2846 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2847 staticpro (&Vcharset_latin_iso8859_4);
2848 Vcharset_latin_iso8859_4 =
2849 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2850 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2851 build_string ("Latin-4"),
2852 build_string ("ISO8859-4 (Latin-4)"),
2853 build_string ("ISO8859-4 (Latin-4)"),
2854 build_string ("iso8859-4"),
2855 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2856 staticpro (&Vcharset_thai_tis620);
2857 Vcharset_thai_tis620 =
2858 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2859 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2860 build_string ("TIS620"),
2861 build_string ("TIS620 (Thai)"),
2862 build_string ("TIS620.2529 (Thai)"),
2863 build_string ("tis620"),
2864 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2865 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2866 staticpro (&Vcharset_greek_iso8859_7);
2867 Vcharset_greek_iso8859_7 =
2868 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2869 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2870 build_string ("ISO8859-7"),
2871 build_string ("ISO8859-7 (Greek)"),
2872 build_string ("ISO8859-7 (Greek)"),
2873 build_string ("iso8859-7"),
2874 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2875 staticpro (&Vcharset_arabic_iso8859_6);
2876 Vcharset_arabic_iso8859_6 =
2877 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2878 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2879 build_string ("ISO8859-6"),
2880 build_string ("ISO8859-6 (Arabic)"),
2881 build_string ("ISO8859-6 (Arabic)"),
2882 build_string ("iso8859-6"),
2883 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2884 staticpro (&Vcharset_hebrew_iso8859_8);
2885 Vcharset_hebrew_iso8859_8 =
2886 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2887 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2888 build_string ("ISO8859-8"),
2889 build_string ("ISO8859-8 (Hebrew)"),
2890 build_string ("ISO8859-8 (Hebrew)"),
2891 build_string ("iso8859-8"),
2893 0 /* MIN_CHAR_HEBREW */,
2894 0 /* MAX_CHAR_HEBREW */, 0, 32,
2895 Qnil, CONVERSION_IDENTICAL);
2896 staticpro (&Vcharset_katakana_jisx0201);
2897 Vcharset_katakana_jisx0201 =
2898 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2899 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2900 build_string ("JISX0201 Kana"),
2901 build_string ("JISX0201.1976 (Japanese Kana)"),
2902 build_string ("JISX0201.1976 Japanese Kana"),
2903 build_string ("jisx0201\\.1976"),
2904 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2905 staticpro (&Vcharset_latin_jisx0201);
2906 Vcharset_latin_jisx0201 =
2907 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2908 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2909 build_string ("JISX0201 Roman"),
2910 build_string ("JISX0201.1976 (Japanese Roman)"),
2911 build_string ("JISX0201.1976 Japanese Roman"),
2912 build_string ("jisx0201\\.1976"),
2913 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2914 staticpro (&Vcharset_cyrillic_iso8859_5);
2915 Vcharset_cyrillic_iso8859_5 =
2916 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2917 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2918 build_string ("ISO8859-5"),
2919 build_string ("ISO8859-5 (Cyrillic)"),
2920 build_string ("ISO8859-5 (Cyrillic)"),
2921 build_string ("iso8859-5"),
2922 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2923 staticpro (&Vcharset_latin_iso8859_9);
2924 Vcharset_latin_iso8859_9 =
2925 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2926 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2927 build_string ("Latin-5"),
2928 build_string ("ISO8859-9 (Latin-5)"),
2929 build_string ("ISO8859-9 (Latin-5)"),
2930 build_string ("iso8859-9"),
2931 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2932 staticpro (&Vcharset_japanese_jisx0208_1978);
2933 Vcharset_japanese_jisx0208_1978 =
2934 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2935 Qjapanese_jisx0208_1978, 94, 2,
2936 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2937 build_string ("JIS X0208:1978"),
2938 build_string ("JIS X0208:1978 (Japanese)"),
2940 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2941 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2942 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2943 staticpro (&Vcharset_chinese_gb2312);
2944 Vcharset_chinese_gb2312 =
2945 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2946 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2947 build_string ("GB2312"),
2948 build_string ("GB2312)"),
2949 build_string ("GB2312 Chinese simplified"),
2950 build_string ("gb2312"),
2951 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2952 staticpro (&Vcharset_chinese_gb12345);
2953 Vcharset_chinese_gb12345 =
2954 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2955 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2956 build_string ("G1"),
2957 build_string ("GB 12345)"),
2958 build_string ("GB 12345-1990"),
2959 build_string ("GB12345\\(\\.1990\\)?-0"),
2960 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2961 staticpro (&Vcharset_japanese_jisx0208);
2962 Vcharset_japanese_jisx0208 =
2963 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2964 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2965 build_string ("JISX0208"),
2966 build_string ("JIS X0208:1983 (Japanese)"),
2967 build_string ("JIS X0208:1983 Japanese Kanji"),
2968 build_string ("jisx0208\\.1983"),
2969 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2971 staticpro (&Vcharset_japanese_jisx0208_1990);
2972 Vcharset_japanese_jisx0208_1990 =
2973 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2974 Qjapanese_jisx0208_1990, 94, 2,
2975 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2976 build_string ("JISX0208-1990"),
2977 build_string ("JIS X0208:1990 (Japanese)"),
2978 build_string ("JIS X0208:1990 Japanese Kanji"),
2979 build_string ("jisx0208\\.1990"),
2981 MIN_CHAR_JIS_X0208_1990,
2982 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2983 Qnil, CONVERSION_IDENTICAL);
2985 staticpro (&Vcharset_korean_ksc5601);
2986 Vcharset_korean_ksc5601 =
2987 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2988 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2989 build_string ("KSC5601"),
2990 build_string ("KSC5601 (Korean"),
2991 build_string ("KSC5601 Korean Hangul and Hanja"),
2992 build_string ("ksc5601"),
2993 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2994 staticpro (&Vcharset_japanese_jisx0212);
2995 Vcharset_japanese_jisx0212 =
2996 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2997 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2998 build_string ("JISX0212"),
2999 build_string ("JISX0212 (Japanese)"),
3000 build_string ("JISX0212 Japanese Supplement"),
3001 build_string ("jisx0212"),
3002 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3004 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3005 staticpro (&Vcharset_chinese_cns11643_1);
3006 Vcharset_chinese_cns11643_1 =
3007 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3008 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3009 build_string ("CNS11643-1"),
3010 build_string ("CNS11643-1 (Chinese traditional)"),
3012 ("CNS 11643 Plane 1 Chinese traditional"),
3013 build_string (CHINESE_CNS_PLANE_RE("1")),
3014 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3015 staticpro (&Vcharset_chinese_cns11643_2);
3016 Vcharset_chinese_cns11643_2 =
3017 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3018 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3019 build_string ("CNS11643-2"),
3020 build_string ("CNS11643-2 (Chinese traditional)"),
3022 ("CNS 11643 Plane 2 Chinese traditional"),
3023 build_string (CHINESE_CNS_PLANE_RE("2")),
3024 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3026 staticpro (&Vcharset_latin_tcvn5712);
3027 Vcharset_latin_tcvn5712 =
3028 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3029 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3030 build_string ("TCVN 5712"),
3031 build_string ("TCVN 5712 (VSCII-2)"),
3032 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3033 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3034 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3035 staticpro (&Vcharset_latin_viscii_lower);
3036 Vcharset_latin_viscii_lower =
3037 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3038 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3039 build_string ("VISCII lower"),
3040 build_string ("VISCII lower (Vietnamese)"),
3041 build_string ("VISCII lower (Vietnamese)"),
3042 build_string ("MULEVISCII-LOWER"),
3043 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3044 staticpro (&Vcharset_latin_viscii_upper);
3045 Vcharset_latin_viscii_upper =
3046 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3047 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3048 build_string ("VISCII upper"),
3049 build_string ("VISCII upper (Vietnamese)"),
3050 build_string ("VISCII upper (Vietnamese)"),
3051 build_string ("MULEVISCII-UPPER"),
3052 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3053 staticpro (&Vcharset_latin_viscii);
3054 Vcharset_latin_viscii =
3055 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3056 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3057 build_string ("VISCII"),
3058 build_string ("VISCII 1.1 (Vietnamese)"),
3059 build_string ("VISCII 1.1 (Vietnamese)"),
3060 build_string ("VISCII1\\.1"),
3061 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3062 staticpro (&Vcharset_chinese_big5);
3063 Vcharset_chinese_big5 =
3064 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3065 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3066 build_string ("Big5"),
3067 build_string ("Big5"),
3068 build_string ("Big5 Chinese traditional"),
3069 build_string ("big5-0"),
3071 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3072 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3074 staticpro (&Vcharset_chinese_big5_cdp);
3075 Vcharset_chinese_big5_cdp =
3076 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3077 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3078 build_string ("Big5-CDP"),
3079 build_string ("Big5 + CDP extension"),
3080 build_string ("Big5 with CDP extension"),
3081 build_string ("big5\\.cdp-0"),
3082 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3083 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3085 #define DEF_HANZIKU(n) \
3086 staticpro (&Vcharset_ideograph_hanziku_##n); \
3087 Vcharset_ideograph_hanziku_##n = \
3088 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3089 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3090 build_string ("HZK-"#n), \
3091 build_string ("HANZIKU-"#n), \
3092 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3094 ("hanziku-"#n"$"), \
3095 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3096 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3109 staticpro (&Vcharset_china3_jef);
3110 Vcharset_china3_jef =
3111 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
3112 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3113 build_string ("JC3"),
3114 build_string ("JEF + CHINA3"),
3115 build_string ("JEF + CHINA3 private characters"),
3116 build_string ("china3jef-0"),
3117 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
3118 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
3119 staticpro (&Vcharset_ideograph_cbeta);
3120 Vcharset_ideograph_cbeta =
3121 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3122 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3123 build_string ("CB"),
3124 build_string ("CBETA"),
3125 build_string ("CBETA private characters"),
3126 build_string ("cbeta-0"),
3127 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3128 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3129 staticpro (&Vcharset_ideograph_gt);
3130 Vcharset_ideograph_gt =
3131 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3132 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3133 build_string ("GT"),
3134 build_string ("GT"),
3135 build_string ("GT"),
3137 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3138 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3139 #define DEF_GT_PJ(n) \
3140 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3141 Vcharset_ideograph_gt_pj_##n = \
3142 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3143 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3144 build_string ("GT-PJ-"#n), \
3145 build_string ("GT (pseudo JIS encoding) part "#n), \
3146 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3148 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3149 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3162 staticpro (&Vcharset_ideograph_daikanwa_2);
3163 Vcharset_ideograph_daikanwa_2 =
3164 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3165 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3166 build_string ("Daikanwa Rev."),
3167 build_string ("Morohashi's Daikanwa Rev."),
3169 ("Daikanwa dictionary (revised version)"),
3170 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3171 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3172 staticpro (&Vcharset_ideograph_daikanwa);
3173 Vcharset_ideograph_daikanwa =
3174 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3175 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3176 build_string ("Daikanwa"),
3177 build_string ("Morohashi's Daikanwa Rev.2"),
3179 ("Daikanwa dictionary (second revised version)"),
3180 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3181 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3182 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3184 staticpro (&Vcharset_ethiopic_ucs);
3185 Vcharset_ethiopic_ucs =
3186 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3187 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3188 build_string ("Ethiopic (UCS)"),
3189 build_string ("Ethiopic (UCS)"),
3190 build_string ("Ethiopic of UCS"),
3191 build_string ("Ethiopic-Unicode"),
3192 Qnil, 0x1200, 0x137F, 0, 0,
3193 Qnil, CONVERSION_IDENTICAL);
3195 staticpro (&Vcharset_chinese_big5_1);
3196 Vcharset_chinese_big5_1 =
3197 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3198 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3199 build_string ("Big5"),
3200 build_string ("Big5 (Level-1)"),
3202 ("Big5 Level-1 Chinese traditional"),
3203 build_string ("big5"),
3204 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3205 staticpro (&Vcharset_chinese_big5_2);
3206 Vcharset_chinese_big5_2 =
3207 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3208 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3209 build_string ("Big5"),
3210 build_string ("Big5 (Level-2)"),
3212 ("Big5 Level-2 Chinese traditional"),
3213 build_string ("big5"),
3214 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3216 #ifdef ENABLE_COMPOSITE_CHARS
3217 /* #### For simplicity, we put composite chars into a 96x96 charset.
3218 This is going to lead to problems because you can run out of
3219 room, esp. as we don't yet recycle numbers. */
3220 staticpro (&Vcharset_composite);
3221 Vcharset_composite =
3222 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3223 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3224 build_string ("Composite"),
3225 build_string ("Composite characters"),
3226 build_string ("Composite characters"),
3229 /* #### not dumped properly */
3230 composite_char_row_next = 32;
3231 composite_char_col_next = 32;
3233 Vcomposite_char_string2char_hash_table =
3234 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3235 Vcomposite_char_char2string_hash_table =
3236 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3237 staticpro (&Vcomposite_char_string2char_hash_table);
3238 staticpro (&Vcomposite_char_char2string_hash_table);
3239 #endif /* ENABLE_COMPOSITE_CHARS */