1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 /* Lisp_Object Vcharset_chinese_big5_cdp; */
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_china3_jef;
95 Lisp_Object Vcharset_ideograph_cbeta;
96 Lisp_Object Vcharset_ideograph_gt;
97 Lisp_Object Vcharset_ideograph_gt_pj_1;
98 Lisp_Object Vcharset_ideograph_gt_pj_2;
99 Lisp_Object Vcharset_ideograph_gt_pj_3;
100 Lisp_Object Vcharset_ideograph_gt_pj_4;
101 Lisp_Object Vcharset_ideograph_gt_pj_5;
102 Lisp_Object Vcharset_ideograph_gt_pj_6;
103 Lisp_Object Vcharset_ideograph_gt_pj_7;
104 Lisp_Object Vcharset_ideograph_gt_pj_8;
105 Lisp_Object Vcharset_ideograph_gt_pj_9;
106 Lisp_Object Vcharset_ideograph_gt_pj_10;
107 Lisp_Object Vcharset_ideograph_gt_pj_11;
108 Lisp_Object Vcharset_ideograph_daikanwa_2;
109 Lisp_Object Vcharset_ideograph_daikanwa;
110 Lisp_Object Vcharset_ethiopic_ucs;
112 Lisp_Object Vcharset_chinese_big5_1;
113 Lisp_Object Vcharset_chinese_big5_2;
115 #ifdef ENABLE_COMPOSITE_CHARS
116 Lisp_Object Vcharset_composite;
118 /* Hash tables for composite chars. One maps string representing
119 composed chars to their equivalent chars; one goes the
121 Lisp_Object Vcomposite_char_char2string_hash_table;
122 Lisp_Object Vcomposite_char_string2char_hash_table;
124 static int composite_char_row_next;
125 static int composite_char_col_next;
127 #endif /* ENABLE_COMPOSITE_CHARS */
129 struct charset_lookup *chlook;
131 static const struct lrecord_description charset_lookup_description_1[] = {
132 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
141 static const struct struct_description charset_lookup_description = {
142 sizeof (struct charset_lookup),
143 charset_lookup_description_1
147 /* Table of number of bytes in the string representation of a character
148 indexed by the first byte of that representation.
150 rep_bytes_by_first_byte(c) is more efficient than the equivalent
151 canonical computation:
153 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
155 const Bytecount rep_bytes_by_first_byte[0xA0] =
156 { /* 0x00 - 0x7f are for straight ASCII */
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 /* 0x80 - 0x8f are for Dimension-1 official charsets */
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 /* 0x90 - 0x9d are for Dimension-2 official charsets */
172 /* 0x9e is for Dimension-1 private charsets */
173 /* 0x9f is for Dimension-2 private charsets */
174 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
180 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
182 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
186 if (XVECTOR_LENGTH (v) > ccs_len)
189 for (i = 0; i < XVECTOR_LENGTH (v); i++)
191 Lisp_Object c = XVECTOR_DATA(v)[i];
193 if (!NILP (c) && !CHARP (c))
197 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
209 put_char_ccs_code_point (Lisp_Object character,
210 Lisp_Object ccs, Lisp_Object value)
212 if (!EQ (XCHARSET_NAME (ccs), Qucs)
214 || (XCHAR (character) != XINT (value)))
216 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
220 { /* obsolete representation: value must be a list of bytes */
221 Lisp_Object ret = Fcar (value);
225 signal_simple_error ("Invalid value for coded-charset", value);
226 code_point = XINT (ret);
227 if (XCHARSET_GRAPHIC (ccs) == 1)
235 signal_simple_error ("Invalid value for coded-charset",
239 signal_simple_error ("Invalid value for coded-charset",
242 if (XCHARSET_GRAPHIC (ccs) == 1)
244 code_point = (code_point << 8) | j;
247 value = make_int (code_point);
249 else if (INTP (value))
251 code_point = XINT (value);
252 if (XCHARSET_GRAPHIC (ccs) == 1)
254 code_point &= 0x7F7F7F7F;
255 value = make_int (code_point);
259 signal_simple_error ("Invalid value for coded-charset", value);
263 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
266 decoding_table_remove_char (ccs, XINT (cpos));
269 decoding_table_put_char (ccs, code_point, character);
275 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
277 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
278 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
280 if (VECTORP (decoding_table))
282 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
286 decoding_table_remove_char (ccs, XINT (cpos));
289 if (CHAR_TABLEP (encoding_table))
291 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
299 int leading_code_private_11;
302 Lisp_Object Qcharsetp;
304 /* Qdoc_string, Qdimension, Qchars defined in general.c */
305 Lisp_Object Qregistry, Qfinal, Qgraphic;
306 Lisp_Object Qdirection;
307 Lisp_Object Qreverse_direction_charset;
308 Lisp_Object Qleading_byte;
309 Lisp_Object Qshort_name, Qlong_name;
311 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
312 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
329 Qjapanese_jisx0208_1978,
333 Qjapanese_jisx0208_1990,
351 Qvietnamese_viscii_lower,
352 Qvietnamese_viscii_upper,
355 /* Qchinese_big5_cdp, */
356 Qideograph_hanziku_1,
357 Qideograph_hanziku_2,
358 Qideograph_hanziku_3,
359 Qideograph_hanziku_4,
360 Qideograph_hanziku_5,
361 Qideograph_hanziku_6,
362 Qideograph_hanziku_7,
363 Qideograph_hanziku_8,
364 Qideograph_hanziku_9,
365 Qideograph_hanziku_10,
366 Qideograph_hanziku_11,
367 Qideograph_hanziku_12,
370 Qideograph_daikanwa_2,
390 Lisp_Object Ql2r, Qr2l;
392 Lisp_Object Vcharset_hash_table;
394 /* Composite characters are characters constructed by overstriking two
395 or more regular characters.
397 1) The old Mule implementation involves storing composite characters
398 in a buffer as a tag followed by all of the actual characters
399 used to make up the composite character. I think this is a bad
400 idea; it greatly complicates code that wants to handle strings
401 one character at a time because it has to deal with the possibility
402 of great big ungainly characters. It's much more reasonable to
403 simply store an index into a table of composite characters.
405 2) The current implementation only allows for 16,384 separate
406 composite characters over the lifetime of the XEmacs process.
407 This could become a potential problem if the user
408 edited lots of different files that use composite characters.
409 Due to FSF bogosity, increasing the number of allowable
410 composite characters under Mule would decrease the number
411 of possible faces that can exist. Mule already has shrunk
412 this to 2048, and further shrinkage would become uncomfortable.
413 No such problems exist in XEmacs.
415 Composite characters could be represented as 0x80 C1 C2 C3,
416 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
417 for slightly under 2^20 (one million) composite characters
418 over the XEmacs process lifetime, and you only need to
419 increase the size of a Mule character from 19 to 21 bits.
420 Or you could use 0x80 C1 C2 C3 C4, allowing for about
421 85 million (slightly over 2^26) composite characters. */
424 /************************************************************************/
425 /* Basic Emchar functions */
426 /************************************************************************/
428 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
429 string in STR. Returns the number of bytes stored.
430 Do not call this directly. Use the macro set_charptr_emchar() instead.
434 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
449 else if ( c <= 0x7ff )
451 *p++ = (c >> 6) | 0xc0;
452 *p++ = (c & 0x3f) | 0x80;
454 else if ( c <= 0xffff )
456 *p++ = (c >> 12) | 0xe0;
457 *p++ = ((c >> 6) & 0x3f) | 0x80;
458 *p++ = (c & 0x3f) | 0x80;
460 else if ( c <= 0x1fffff )
462 *p++ = (c >> 18) | 0xf0;
463 *p++ = ((c >> 12) & 0x3f) | 0x80;
464 *p++ = ((c >> 6) & 0x3f) | 0x80;
465 *p++ = (c & 0x3f) | 0x80;
467 else if ( c <= 0x3ffffff )
469 *p++ = (c >> 24) | 0xf8;
470 *p++ = ((c >> 18) & 0x3f) | 0x80;
471 *p++ = ((c >> 12) & 0x3f) | 0x80;
472 *p++ = ((c >> 6) & 0x3f) | 0x80;
473 *p++ = (c & 0x3f) | 0x80;
477 *p++ = (c >> 30) | 0xfc;
478 *p++ = ((c >> 24) & 0x3f) | 0x80;
479 *p++ = ((c >> 18) & 0x3f) | 0x80;
480 *p++ = ((c >> 12) & 0x3f) | 0x80;
481 *p++ = ((c >> 6) & 0x3f) | 0x80;
482 *p++ = (c & 0x3f) | 0x80;
485 BREAKUP_CHAR (c, charset, c1, c2);
486 lb = CHAR_LEADING_BYTE (c);
487 if (LEADING_BYTE_PRIVATE_P (lb))
488 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
490 if (EQ (charset, Vcharset_control_1))
499 /* Return the first character from a Mule-encoded string in STR,
500 assuming it's non-ASCII. Do not call this directly.
501 Use the macro charptr_emchar() instead. */
504 non_ascii_charptr_emchar (const Bufbyte *str)
517 else if ( b >= 0xf8 )
522 else if ( b >= 0xf0 )
527 else if ( b >= 0xe0 )
532 else if ( b >= 0xc0 )
542 for( ; len > 0; len-- )
545 ch = ( ch << 6 ) | ( b & 0x3f );
549 Bufbyte i0 = *str, i1, i2 = 0;
552 if (i0 == LEADING_BYTE_CONTROL_1)
553 return (Emchar) (*++str - 0x20);
555 if (LEADING_BYTE_PREFIX_P (i0))
560 charset = CHARSET_BY_LEADING_BYTE (i0);
561 if (XCHARSET_DIMENSION (charset) == 2)
564 return MAKE_CHAR (charset, i1, i2);
568 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
569 Do not call this directly. Use the macro valid_char_p() instead. */
573 non_ascii_valid_char_p (Emchar ch)
577 /* Must have only lowest 19 bits set */
581 f1 = CHAR_FIELD1 (ch);
582 f2 = CHAR_FIELD2 (ch);
583 f3 = CHAR_FIELD3 (ch);
589 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
590 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
591 f2 > MAX_CHAR_FIELD2_PRIVATE)
596 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
597 f2 <= MAX_CHAR_FIELD2_PRIVATE))
601 NOTE: This takes advantage of the fact that
602 FIELD2_TO_OFFICIAL_LEADING_BYTE and
603 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
605 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
606 if (EQ (charset, Qnil))
608 return (XCHARSET_CHARS (charset) == 96);
614 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
615 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
616 f1 > MAX_CHAR_FIELD1_PRIVATE)
618 if (f2 < 0x20 || f3 < 0x20)
621 #ifdef ENABLE_COMPOSITE_CHARS
622 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
624 if (UNBOUNDP (Fgethash (make_int (ch),
625 Vcomposite_char_char2string_hash_table,
630 #endif /* ENABLE_COMPOSITE_CHARS */
632 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
633 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
636 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
638 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
641 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
643 if (EQ (charset, Qnil))
645 return (XCHARSET_CHARS (charset) == 96);
651 /************************************************************************/
652 /* Basic string functions */
653 /************************************************************************/
655 /* Copy the character pointed to by SRC into DST. Do not call this
656 directly. Use the macro charptr_copy_char() instead.
657 Return the number of bytes copied. */
660 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
662 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
664 for (i = bytes; i; i--, dst++, src++)
670 /************************************************************************/
671 /* streams of Emchars */
672 /************************************************************************/
674 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
675 The functions below are not meant to be called directly; use
676 the macros in insdel.h. */
679 Lstream_get_emchar_1 (Lstream *stream, int ch)
681 Bufbyte str[MAX_EMCHAR_LEN];
682 Bufbyte *strptr = str;
685 str[0] = (Bufbyte) ch;
687 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
689 int c = Lstream_getc (stream);
690 bufpos_checking_assert (c >= 0);
691 *++strptr = (Bufbyte) c;
693 return charptr_emchar (str);
697 Lstream_fput_emchar (Lstream *stream, Emchar ch)
699 Bufbyte str[MAX_EMCHAR_LEN];
700 Bytecount len = set_charptr_emchar (str, ch);
701 return Lstream_write (stream, str, len);
705 Lstream_funget_emchar (Lstream *stream, Emchar ch)
707 Bufbyte str[MAX_EMCHAR_LEN];
708 Bytecount len = set_charptr_emchar (str, ch);
709 Lstream_unread (stream, str, len);
713 /************************************************************************/
715 /************************************************************************/
718 mark_charset (Lisp_Object obj)
720 Lisp_Charset *cs = XCHARSET (obj);
722 mark_object (cs->short_name);
723 mark_object (cs->long_name);
724 mark_object (cs->doc_string);
725 mark_object (cs->registry);
726 mark_object (cs->ccl_program);
728 mark_object (cs->decoding_table);
729 mark_object (cs->mother);
735 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
737 Lisp_Charset *cs = XCHARSET (obj);
741 error ("printing unreadable object #<charset %s 0x%x>",
742 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
745 write_c_string ("#<charset ", printcharfun);
746 print_internal (CHARSET_NAME (cs), printcharfun, 0);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
751 write_c_string (" ", printcharfun);
752 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
753 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
755 CHARSET_DIMENSION (cs),
756 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
757 CHARSET_COLUMNS (cs),
758 CHARSET_GRAPHIC (cs),
760 write_c_string (buf, printcharfun);
761 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
762 sprintf (buf, " 0x%x>", cs->header.uid);
763 write_c_string (buf, printcharfun);
766 static const struct lrecord_description charset_description[] = {
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
775 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
776 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
781 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
782 mark_charset, print_charset, 0, 0, 0,
786 /* Make a new charset. */
787 /* #### SJT Should generic properties be allowed? */
789 make_charset (Charset_ID id, Lisp_Object name,
790 unsigned short chars, unsigned char dimension,
791 unsigned char columns, unsigned char graphic,
792 Bufbyte final, unsigned char direction, Lisp_Object short_name,
793 Lisp_Object long_name, Lisp_Object doc,
795 Lisp_Object decoding_table,
796 Emchar min_code, Emchar max_code,
797 Emchar code_offset, unsigned char byte_offset,
798 Lisp_Object mother, unsigned char conversion)
801 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
805 XSETCHARSET (obj, cs);
807 CHARSET_ID (cs) = id;
808 CHARSET_NAME (cs) = name;
809 CHARSET_SHORT_NAME (cs) = short_name;
810 CHARSET_LONG_NAME (cs) = long_name;
811 CHARSET_CHARS (cs) = chars;
812 CHARSET_DIMENSION (cs) = dimension;
813 CHARSET_DIRECTION (cs) = direction;
814 CHARSET_COLUMNS (cs) = columns;
815 CHARSET_GRAPHIC (cs) = graphic;
816 CHARSET_FINAL (cs) = final;
817 CHARSET_DOC_STRING (cs) = doc;
818 CHARSET_REGISTRY (cs) = reg;
819 CHARSET_CCL_PROGRAM (cs) = Qnil;
820 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
822 CHARSET_DECODING_TABLE(cs) = Qunbound;
823 CHARSET_MIN_CODE (cs) = min_code;
824 CHARSET_MAX_CODE (cs) = max_code;
825 CHARSET_CODE_OFFSET (cs) = code_offset;
826 CHARSET_BYTE_OFFSET (cs) = byte_offset;
827 CHARSET_MOTHER (cs) = mother;
828 CHARSET_CONVERSION (cs) = conversion;
832 if (id == LEADING_BYTE_ASCII)
833 CHARSET_REP_BYTES (cs) = 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
837 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
842 /* some charsets do not have final characters. This includes
843 ASCII, Control-1, Composite, and the two faux private
845 unsigned char iso2022_type
846 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
848 if (code_offset == 0)
850 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
851 chlook->charset_by_attributes[iso2022_type][final] = obj;
855 (chlook->charset_by_attributes[iso2022_type][final][direction]));
856 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
860 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
861 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
863 /* Some charsets are "faux" and don't have names or really exist at
864 all except in the leading-byte table. */
866 Fputhash (name, obj, Vcharset_hash_table);
871 get_unallocated_leading_byte (int dimension)
876 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
879 lb = chlook->next_allocated_leading_byte++;
883 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
886 lb = chlook->next_allocated_1_byte_leading_byte++;
890 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
893 lb = chlook->next_allocated_2_byte_leading_byte++;
899 ("No more character sets free for this dimension",
900 make_int (dimension));
906 /* Number of Big5 characters which have the same code in 1st byte. */
908 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
911 decode_defined_char (Lisp_Object ccs, int code_point)
913 int dim = XCHARSET_DIMENSION (ccs);
914 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
922 = get_ccs_octet_table (decoding_table, ccs,
923 (code_point >> (dim * 8)) & 255);
925 if (CHARP (decoding_table))
926 return XCHAR (decoding_table);
929 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
931 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
933 if ( EQ (mother, Vcharset_ucs) )
934 return DECODE_CHAR (mother, code_point);
936 return decode_defined_char (mother, code_point);
943 decode_builtin_char (Lisp_Object charset, int code_point)
945 Lisp_Object mother = XCHARSET_MOTHER (charset);
948 if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
950 int code = code_point;
952 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
954 int row = code_point >> 8;
955 int cell = code_point & 255;
959 else if (row < 16 + 32 + 30)
960 code = (row - (16 + 32)) * 94 + cell - 33;
961 else if (row < 18 + 32 + 30)
963 else if (row < 18 + 32 + 60)
964 code = (row - (18 + 32)) * 94 + cell - 33;
966 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
968 int plane = code_point >> 16;
969 int row = (code_point >> 8) & 255;
970 int cell = code_point & 255;
974 else if (row < 16 + 32 + 30)
976 = (plane - 33) * 94 * 60
977 + (row - (16 + 32)) * 94
979 else if (row < 18 + 32 + 30)
981 else if (row < 18 + 32 + 60)
983 = (plane - 33) * 94 * 60
984 + (row - (18 + 32)) * 94
988 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
990 if (XCHARSET_MAX_CODE (charset))
993 = (XCHARSET_DIMENSION (charset) == 1
995 code_point - XCHARSET_BYTE_OFFSET (charset)
997 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
998 * XCHARSET_CHARS (charset)
999 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1000 + XCHARSET_CODE_OFFSET (charset);
1001 if ((cid < XCHARSET_MIN_CODE (charset))
1002 || (XCHARSET_MAX_CODE (charset) < cid))
1006 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1008 if (XCHARSET_DIMENSION (charset) == 1)
1010 switch (XCHARSET_CHARS (charset))
1014 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1017 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1025 switch (XCHARSET_CHARS (charset))
1028 return MIN_CHAR_94x94
1029 + (final - '0') * 94 * 94
1030 + (((code_point >> 8) & 0x7F) - 33) * 94
1031 + ((code_point & 0x7F) - 33);
1033 return MIN_CHAR_96x96
1034 + (final - '0') * 96 * 96
1035 + (((code_point >> 8) & 0x7F) - 32) * 96
1036 + ((code_point & 0x7F) - 32);
1048 charset_code_point (Lisp_Object charset, Emchar ch)
1050 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1053 if ( CHAR_TABLEP (encoding_table)
1054 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1059 Lisp_Object mother = XCHARSET_MOTHER (charset);
1060 int min = XCHARSET_MIN_CODE (charset);
1061 int max = XCHARSET_MAX_CODE (charset);
1064 if ( CHARSETP (mother) )
1065 code = charset_code_point (mother, ch);
1068 if ( ((max == 0) && CHARSETP (mother)) ||
1069 ((min <= code) && (code <= max)) )
1071 int d = code - XCHARSET_CODE_OFFSET (charset);
1073 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1075 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1082 int cell = d % 94 + 33;
1088 return (row << 8) | cell;
1090 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1091 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1092 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1093 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1096 int plane = d / (94 * 60) + 33;
1097 int row = (d % (94 * 60)) / 94;
1098 int cell = d % 94 + 33;
1104 return (plane << 16) | (row << 8) | cell;
1106 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1108 ( (d / (94 * 94) + 33) << 16)
1109 | ((d / 94 % 94 + 33) << 8)
1111 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1113 ( (d / (96 * 96) + 32) << 16)
1114 | ((d / 96 % 96 + 32) << 8)
1116 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1118 ( (d / (94 * 94 * 94) + 33) << 24)
1119 | ((d / (94 * 94) % 94 + 33) << 16)
1120 | ((d / 94 % 94 + 33) << 8)
1122 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1124 ( (d / (96 * 96 * 96) + 32) << 24)
1125 | ((d / (96 * 96) % 96 + 32) << 16)
1126 | ((d / 96 % 96 + 32) << 8)
1130 printf ("Unknown CCS-conversion %d is specified!",
1131 XCHARSET_CONVERSION (charset));
1135 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1136 ( XCHARSET_MIN_CODE (charset) == 0 )
1138 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1139 (XCHARSET_CODE_OFFSET (charset)
1140 == XCHARSET_MIN_CODE (charset))
1145 if (XCHARSET_DIMENSION (charset) == 1)
1147 if (XCHARSET_CHARS (charset) == 94)
1149 if (((d = ch - (MIN_CHAR_94
1150 + (XCHARSET_FINAL (charset) - '0') * 94))
1155 else if (XCHARSET_CHARS (charset) == 96)
1157 if (((d = ch - (MIN_CHAR_96
1158 + (XCHARSET_FINAL (charset) - '0') * 96))
1166 else if (XCHARSET_DIMENSION (charset) == 2)
1168 if (XCHARSET_CHARS (charset) == 94)
1170 if (((d = ch - (MIN_CHAR_94x94
1172 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1175 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1177 else if (XCHARSET_CHARS (charset) == 96)
1179 if (((d = ch - (MIN_CHAR_96x96
1181 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1184 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1195 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1197 if (c <= MAX_CHAR_BASIC_LATIN)
1199 *charset = Vcharset_ascii;
1204 *charset = Vcharset_control_1;
1209 *charset = Vcharset_latin_iso8859_1;
1213 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1215 *charset = Vcharset_hebrew_iso8859_8;
1216 return c - MIN_CHAR_HEBREW + 0x20;
1219 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1221 *charset = Vcharset_thai_tis620;
1222 return c - MIN_CHAR_THAI + 0x20;
1225 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1226 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1228 return list2 (Vcharset_katakana_jisx0201,
1229 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1232 else if (c <= MAX_CHAR_BMP)
1234 *charset = Vcharset_ucs_bmp;
1237 else if (c <= MAX_CHAR_SMP)
1239 *charset = Vcharset_ucs_smp;
1240 return c - MIN_CHAR_SMP;
1242 else if (c <= MAX_CHAR_SIP)
1244 *charset = Vcharset_ucs_sip;
1245 return c - MIN_CHAR_SIP;
1247 else if (c < MIN_CHAR_DAIKANWA)
1249 *charset = Vcharset_ucs;
1252 else if (c <= MAX_CHAR_DAIKANWA)
1254 *charset = Vcharset_ideograph_daikanwa;
1255 return c - MIN_CHAR_DAIKANWA;
1257 else if (c < MIN_CHAR_94)
1259 *charset = Vcharset_ucs;
1262 else if (c <= MAX_CHAR_94)
1264 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1265 ((c - MIN_CHAR_94) / 94) + '0',
1266 CHARSET_LEFT_TO_RIGHT);
1267 if (!NILP (*charset))
1268 return ((c - MIN_CHAR_94) % 94) + 33;
1271 *charset = Vcharset_ucs;
1275 else if (c <= MAX_CHAR_96)
1277 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1278 ((c - MIN_CHAR_96) / 96) + '0',
1279 CHARSET_LEFT_TO_RIGHT);
1280 if (!NILP (*charset))
1281 return ((c - MIN_CHAR_96) % 96) + 32;
1284 *charset = Vcharset_ucs;
1288 else if (c <= MAX_CHAR_94x94)
1291 = CHARSET_BY_ATTRIBUTES (94, 2,
1292 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1293 CHARSET_LEFT_TO_RIGHT);
1294 if (!NILP (*charset))
1295 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1296 | (((c - MIN_CHAR_94x94) % 94) + 33);
1299 *charset = Vcharset_ucs;
1303 else if (c <= MAX_CHAR_96x96)
1306 = CHARSET_BY_ATTRIBUTES (96, 2,
1307 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1308 CHARSET_LEFT_TO_RIGHT);
1309 if (!NILP (*charset))
1310 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1311 | (((c - MIN_CHAR_96x96) % 96) + 32);
1314 *charset = Vcharset_ucs;
1320 *charset = Vcharset_ucs;
1325 Lisp_Object Vdefault_coded_charset_priority_list;
1329 /************************************************************************/
1330 /* Basic charset Lisp functions */
1331 /************************************************************************/
1333 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1334 Return non-nil if OBJECT is a charset.
1338 return CHARSETP (object) ? Qt : Qnil;
1341 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1342 Retrieve the charset of the given name.
1343 If CHARSET-OR-NAME is a charset object, it is simply returned.
1344 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1345 nil is returned. Otherwise the associated charset object is returned.
1349 if (CHARSETP (charset_or_name))
1350 return charset_or_name;
1352 CHECK_SYMBOL (charset_or_name);
1353 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1356 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1357 Retrieve the charset of the given name.
1358 Same as `find-charset' except an error is signalled if there is no such
1359 charset instead of returning nil.
1363 Lisp_Object charset = Ffind_charset (name);
1366 signal_simple_error ("No such charset", name);
1370 /* We store the charsets in hash tables with the names as the key and the
1371 actual charset object as the value. Occasionally we need to use them
1372 in a list format. These routines provide us with that. */
1373 struct charset_list_closure
1375 Lisp_Object *charset_list;
1379 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1380 void *charset_list_closure)
1382 /* This function can GC */
1383 struct charset_list_closure *chcl =
1384 (struct charset_list_closure*) charset_list_closure;
1385 Lisp_Object *charset_list = chcl->charset_list;
1387 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1391 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1392 Return a list of the names of all defined charsets.
1396 Lisp_Object charset_list = Qnil;
1397 struct gcpro gcpro1;
1398 struct charset_list_closure charset_list_closure;
1400 GCPRO1 (charset_list);
1401 charset_list_closure.charset_list = &charset_list;
1402 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1403 &charset_list_closure);
1406 return charset_list;
1409 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1410 Return the name of charset CHARSET.
1414 return XCHARSET_NAME (Fget_charset (charset));
1417 /* #### SJT Should generic properties be allowed? */
1418 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1419 Define a new character set.
1420 This function is for use with Mule support.
1421 NAME is a symbol, the name by which the character set is normally referred.
1422 DOC-STRING is a string describing the character set.
1423 PROPS is a property list, describing the specific nature of the
1424 character set. Recognized properties are:
1426 'short-name Short version of the charset name (ex: Latin-1)
1427 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1428 'registry A regular expression matching the font registry field for
1430 'dimension Number of octets used to index a character in this charset.
1431 Either 1 or 2. Defaults to 1.
1432 If UTF-2000 feature is enabled, 3 or 4 are also available.
1433 'columns Number of columns used to display a character in this charset.
1434 Only used in TTY mode. (Under X, the actual width of a
1435 character can be derived from the font used to display the
1436 characters.) If unspecified, defaults to the dimension
1437 (this is almost always the correct value).
1438 'chars Number of characters in each dimension (94 or 96).
1439 Defaults to 94. Note that if the dimension is 2, the
1440 character set thus described is 94x94 or 96x96.
1441 If UTF-2000 feature is enabled, 128 or 256 are also available.
1442 'final Final byte of ISO 2022 escape sequence. Must be
1443 supplied. Each combination of (DIMENSION, CHARS) defines a
1444 separate namespace for final bytes. Note that ISO
1445 2022 restricts the final byte to the range
1446 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1447 dimension == 2. Note also that final bytes in the range
1448 0x30 - 0x3F are reserved for user-defined (not official)
1450 'graphic 0 (use left half of font on output) or 1 (use right half
1451 of font on output). Defaults to 0. For example, for
1452 a font whose registry is ISO8859-1, the left half
1453 (octets 0x20 - 0x7F) is the `ascii' character set, while
1454 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1455 character set. With 'graphic set to 0, the octets
1456 will have their high bit cleared; with it set to 1,
1457 the octets will have their high bit set.
1458 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1460 'ccl-program A compiled CCL program used to convert a character in
1461 this charset into an index into the font. This is in
1462 addition to the 'graphic property. The CCL program
1463 is passed the octets of the character, with the high
1464 bit cleared and set depending upon whether the value
1465 of the 'graphic property is 0 or 1.
1466 'mother [UTF-2000 only] Base coded-charset.
1467 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1468 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1469 'code-offset [UTF-2000 only] Offset for a code-point of a base
1471 'conversion [UTF-2000 only] Conversion for a code-point of a base
1472 coded-charset (94x60 or 94x94x60).
1474 (name, doc_string, props))
1476 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1477 int direction = CHARSET_LEFT_TO_RIGHT;
1478 Lisp_Object registry = Qnil;
1479 Lisp_Object charset;
1480 Lisp_Object ccl_program = Qnil;
1481 Lisp_Object short_name = Qnil, long_name = Qnil;
1482 Lisp_Object mother = Qnil;
1483 int min_code = 0, max_code = 0, code_offset = 0;
1484 int byte_offset = -1;
1487 CHECK_SYMBOL (name);
1488 if (!NILP (doc_string))
1489 CHECK_STRING (doc_string);
1491 charset = Ffind_charset (name);
1492 if (!NILP (charset))
1493 signal_simple_error ("Cannot redefine existing charset", name);
1496 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1498 if (EQ (keyword, Qshort_name))
1500 CHECK_STRING (value);
1504 if (EQ (keyword, Qlong_name))
1506 CHECK_STRING (value);
1510 else if (EQ (keyword, Qdimension))
1513 dimension = XINT (value);
1514 if (dimension < 1 ||
1521 signal_simple_error ("Invalid value for 'dimension", value);
1524 else if (EQ (keyword, Qchars))
1527 chars = XINT (value);
1528 if (chars != 94 && chars != 96
1530 && chars != 128 && chars != 256
1533 signal_simple_error ("Invalid value for 'chars", value);
1536 else if (EQ (keyword, Qcolumns))
1539 columns = XINT (value);
1540 if (columns != 1 && columns != 2)
1541 signal_simple_error ("Invalid value for 'columns", value);
1544 else if (EQ (keyword, Qgraphic))
1547 graphic = XINT (value);
1555 signal_simple_error ("Invalid value for 'graphic", value);
1558 else if (EQ (keyword, Qregistry))
1560 CHECK_STRING (value);
1564 else if (EQ (keyword, Qdirection))
1566 if (EQ (value, Ql2r))
1567 direction = CHARSET_LEFT_TO_RIGHT;
1568 else if (EQ (value, Qr2l))
1569 direction = CHARSET_RIGHT_TO_LEFT;
1571 signal_simple_error ("Invalid value for 'direction", value);
1574 else if (EQ (keyword, Qfinal))
1576 CHECK_CHAR_COERCE_INT (value);
1577 final = XCHAR (value);
1578 if (final < '0' || final > '~')
1579 signal_simple_error ("Invalid value for 'final", value);
1583 else if (EQ (keyword, Qmother))
1585 mother = Fget_charset (value);
1588 else if (EQ (keyword, Qmin_code))
1591 min_code = XUINT (value);
1594 else if (EQ (keyword, Qmax_code))
1597 max_code = XUINT (value);
1600 else if (EQ (keyword, Qcode_offset))
1603 code_offset = XUINT (value);
1606 else if (EQ (keyword, Qconversion))
1608 if (EQ (value, Q94x60))
1609 conversion = CONVERSION_94x60;
1610 else if (EQ (value, Q94x94x60))
1611 conversion = CONVERSION_94x94x60;
1613 signal_simple_error ("Unrecognized conversion", value);
1617 else if (EQ (keyword, Qccl_program))
1619 struct ccl_program test_ccl;
1621 if (setup_ccl_program (&test_ccl, value) < 0)
1622 signal_simple_error ("Invalid value for 'ccl-program", value);
1623 ccl_program = value;
1627 signal_simple_error ("Unrecognized property", keyword);
1633 error ("'final must be specified");
1635 if (dimension == 2 && final > 0x5F)
1637 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1640 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1641 CHARSET_LEFT_TO_RIGHT)) ||
1642 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1643 CHARSET_RIGHT_TO_LEFT)))
1645 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1647 id = get_unallocated_leading_byte (dimension);
1649 if (NILP (doc_string))
1650 doc_string = build_string ("");
1652 if (NILP (registry))
1653 registry = build_string ("");
1655 if (NILP (short_name))
1656 XSETSTRING (short_name, XSYMBOL (name)->name);
1658 if (NILP (long_name))
1659 long_name = doc_string;
1662 columns = dimension;
1664 if (byte_offset < 0)
1668 else if (chars == 96)
1674 charset = make_charset (id, name, chars, dimension, columns, graphic,
1675 final, direction, short_name, long_name,
1676 doc_string, registry,
1677 Qnil, min_code, max_code, code_offset, byte_offset,
1678 mother, conversion);
1679 if (!NILP (ccl_program))
1680 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1684 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1686 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1687 NEW-NAME is the name of the new charset. Return the new charset.
1689 (charset, new_name))
1691 Lisp_Object new_charset = Qnil;
1692 int id, chars, dimension, columns, graphic, final;
1694 Lisp_Object registry, doc_string, short_name, long_name;
1697 charset = Fget_charset (charset);
1698 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1699 signal_simple_error ("Charset already has reverse-direction charset",
1702 CHECK_SYMBOL (new_name);
1703 if (!NILP (Ffind_charset (new_name)))
1704 signal_simple_error ("Cannot redefine existing charset", new_name);
1706 cs = XCHARSET (charset);
1708 chars = CHARSET_CHARS (cs);
1709 dimension = CHARSET_DIMENSION (cs);
1710 columns = CHARSET_COLUMNS (cs);
1711 id = get_unallocated_leading_byte (dimension);
1713 graphic = CHARSET_GRAPHIC (cs);
1714 final = CHARSET_FINAL (cs);
1715 direction = CHARSET_RIGHT_TO_LEFT;
1716 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1717 direction = CHARSET_LEFT_TO_RIGHT;
1718 doc_string = CHARSET_DOC_STRING (cs);
1719 short_name = CHARSET_SHORT_NAME (cs);
1720 long_name = CHARSET_LONG_NAME (cs);
1721 registry = CHARSET_REGISTRY (cs);
1723 new_charset = make_charset (id, new_name, chars, dimension, columns,
1724 graphic, final, direction, short_name, long_name,
1725 doc_string, registry,
1727 CHARSET_DECODING_TABLE(cs),
1728 CHARSET_MIN_CODE(cs),
1729 CHARSET_MAX_CODE(cs),
1730 CHARSET_CODE_OFFSET(cs),
1731 CHARSET_BYTE_OFFSET(cs),
1733 CHARSET_CONVERSION (cs)
1735 Qnil, 0, 0, 0, 0, Qnil, 0
1739 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1740 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1745 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1746 Define symbol ALIAS as an alias for CHARSET.
1750 CHECK_SYMBOL (alias);
1751 charset = Fget_charset (charset);
1752 return Fputhash (alias, charset, Vcharset_hash_table);
1755 /* #### Reverse direction charsets not yet implemented. */
1757 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1759 Return the reverse-direction charset parallel to CHARSET, if any.
1760 This is the charset with the same properties (in particular, the same
1761 dimension, number of characters per dimension, and final byte) as
1762 CHARSET but whose characters are displayed in the opposite direction.
1766 charset = Fget_charset (charset);
1767 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1771 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1772 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1773 If DIRECTION is omitted, both directions will be checked (left-to-right
1774 will be returned if character sets exist for both directions).
1776 (dimension, chars, final, direction))
1778 int dm, ch, fi, di = -1;
1779 Lisp_Object obj = Qnil;
1781 CHECK_INT (dimension);
1782 dm = XINT (dimension);
1783 if (dm < 1 || dm > 2)
1784 signal_simple_error ("Invalid value for DIMENSION", dimension);
1788 if (ch != 94 && ch != 96)
1789 signal_simple_error ("Invalid value for CHARS", chars);
1791 CHECK_CHAR_COERCE_INT (final);
1793 if (fi < '0' || fi > '~')
1794 signal_simple_error ("Invalid value for FINAL", final);
1796 if (EQ (direction, Ql2r))
1797 di = CHARSET_LEFT_TO_RIGHT;
1798 else if (EQ (direction, Qr2l))
1799 di = CHARSET_RIGHT_TO_LEFT;
1800 else if (!NILP (direction))
1801 signal_simple_error ("Invalid value for DIRECTION", direction);
1803 if (dm == 2 && fi > 0x5F)
1805 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1809 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1811 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1814 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1817 return XCHARSET_NAME (obj);
1821 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1822 Return short name of CHARSET.
1826 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1829 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1830 Return long name of CHARSET.
1834 return XCHARSET_LONG_NAME (Fget_charset (charset));
1837 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1838 Return description of CHARSET.
1842 return XCHARSET_DOC_STRING (Fget_charset (charset));
1845 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1846 Return dimension of CHARSET.
1850 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1853 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1854 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1855 Recognized properties are those listed in `make-charset', as well as
1856 'name and 'doc-string.
1862 charset = Fget_charset (charset);
1863 cs = XCHARSET (charset);
1865 CHECK_SYMBOL (prop);
1866 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1867 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1868 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1869 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1870 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1871 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1872 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1873 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1874 Qnil : make_char (CHARSET_FINAL (cs));
1875 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1876 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1877 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1878 if (EQ (prop, Qdirection))
1879 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1880 if (EQ (prop, Qreverse_direction_charset))
1882 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1883 /* #### Is this translation OK? If so, error checking sufficient? */
1884 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1887 if (EQ (prop, Qmother))
1888 return CHARSET_MOTHER (cs);
1889 if (EQ (prop, Qmin_code))
1890 return make_int (CHARSET_MIN_CODE (cs));
1891 if (EQ (prop, Qmax_code))
1892 return make_int (CHARSET_MAX_CODE (cs));
1894 signal_simple_error ("Unrecognized charset property name", prop);
1895 return Qnil; /* not reached */
1898 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1899 Return charset identification number of CHARSET.
1903 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1906 /* #### We need to figure out which properties we really want to
1909 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1910 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1912 (charset, ccl_program))
1914 struct ccl_program test_ccl;
1916 charset = Fget_charset (charset);
1917 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1918 signal_simple_error ("Invalid ccl-program", ccl_program);
1919 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1924 invalidate_charset_font_caches (Lisp_Object charset)
1926 /* Invalidate font cache entries for charset on all devices. */
1927 Lisp_Object devcons, concons, hash_table;
1928 DEVICE_LOOP_NO_BREAK (devcons, concons)
1930 struct device *d = XDEVICE (XCAR (devcons));
1931 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1932 if (!UNBOUNDP (hash_table))
1933 Fclrhash (hash_table);
1937 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1938 Set the 'registry property of CHARSET to REGISTRY.
1940 (charset, registry))
1942 charset = Fget_charset (charset);
1943 CHECK_STRING (registry);
1944 XCHARSET_REGISTRY (charset) = registry;
1945 invalidate_charset_font_caches (charset);
1946 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1951 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1952 Return mapping-table of CHARSET.
1956 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1959 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1960 Set mapping-table of CHARSET to TABLE.
1964 struct Lisp_Charset *cs;
1968 charset = Fget_charset (charset);
1969 cs = XCHARSET (charset);
1973 CHARSET_DECODING_TABLE(cs) = Qnil;
1976 else if (VECTORP (table))
1978 int ccs_len = CHARSET_BYTE_SIZE (cs);
1979 int ret = decoding_table_check_elements (table,
1980 CHARSET_DIMENSION (cs),
1985 signal_simple_error ("Too big table", table);
1987 signal_simple_error ("Invalid element is found", table);
1989 signal_simple_error ("Something wrong", table);
1991 CHARSET_DECODING_TABLE(cs) = Qnil;
1994 signal_error (Qwrong_type_argument,
1995 list2 (build_translated_string ("vector-or-nil-p"),
1998 byte_offset = CHARSET_BYTE_OFFSET (cs);
1999 switch (CHARSET_DIMENSION (cs))
2002 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2004 Lisp_Object c = XVECTOR_DATA(table)[i];
2007 Fput_char_attribute (c, XCHARSET_NAME (charset),
2008 make_int (i + byte_offset));
2012 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2014 Lisp_Object v = XVECTOR_DATA(table)[i];
2020 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2022 Lisp_Object c = XVECTOR_DATA(v)[j];
2026 (c, XCHARSET_NAME (charset),
2027 make_int ( ( (i + byte_offset) << 8 )
2033 Fput_char_attribute (v, XCHARSET_NAME (charset),
2034 make_int (i + byte_offset));
2043 /************************************************************************/
2044 /* Lisp primitives for working with characters */
2045 /************************************************************************/
2048 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2049 Make a character from CHARSET and code-point CODE.
2050 If DEFINED_ONLY is non-nil, builtin character is not returned.
2051 If corresponding character is not found, nil is returned.
2053 (charset, code, defined_only))
2057 charset = Fget_charset (charset);
2060 if (XCHARSET_GRAPHIC (charset) == 1)
2062 if (NILP (defined_only))
2063 c = DECODE_CHAR (charset, c);
2065 c = decode_defined_char (charset, c);
2066 return c >= 0 ? make_char (c) : Qnil;
2069 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2070 Make a builtin character from CHARSET and code-point CODE.
2076 charset = Fget_charset (charset);
2078 if (EQ (charset, Vcharset_latin_viscii))
2080 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2086 (ret = Fget_char_attribute (chr,
2087 Vcharset_latin_viscii_lower,
2090 charset = Vcharset_latin_viscii_lower;
2094 (ret = Fget_char_attribute (chr,
2095 Vcharset_latin_viscii_upper,
2098 charset = Vcharset_latin_viscii_upper;
2105 if (XCHARSET_GRAPHIC (charset) == 1)
2108 c = decode_builtin_char (charset, c);
2109 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2113 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2114 Make a character from CHARSET and octets ARG1 and ARG2.
2115 ARG2 is required only for characters from two-dimensional charsets.
2116 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2117 character s with caron.
2119 (charset, arg1, arg2))
2123 int lowlim, highlim;
2125 charset = Fget_charset (charset);
2126 cs = XCHARSET (charset);
2128 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2129 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2131 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2133 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2134 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2137 /* It is useful (and safe, according to Olivier Galibert) to strip
2138 the 8th bit off ARG1 and ARG2 because it allows programmers to
2139 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2140 Latin 2 code of the character. */
2148 if (a1 < lowlim || a1 > highlim)
2149 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2151 if (CHARSET_DIMENSION (cs) == 1)
2155 ("Charset is of dimension one; second octet must be nil", arg2);
2156 return make_char (MAKE_CHAR (charset, a1, 0));
2165 a2 = XINT (arg2) & 0x7f;
2167 if (a2 < lowlim || a2 > highlim)
2168 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2170 return make_char (MAKE_CHAR (charset, a1, a2));
2173 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2174 Return the character set of CHARACTER.
2178 CHECK_CHAR_COERCE_INT (character);
2180 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2183 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2184 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2185 N defaults to 0 if omitted.
2189 Lisp_Object charset;
2192 CHECK_CHAR_COERCE_INT (character);
2194 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2196 if (NILP (n) || EQ (n, Qzero))
2197 return make_int (octet0);
2198 else if (EQ (n, make_int (1)))
2199 return make_int (octet1);
2201 signal_simple_error ("Octet number must be 0 or 1", n);
2205 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2206 Return code-point of CHARACTER in specified CHARSET.
2208 (character, charset))
2212 CHECK_CHAR_COERCE_INT (character);
2213 charset = Fget_charset (charset);
2214 code_point = charset_code_point (charset, XCHAR (character));
2215 if (code_point >= 0)
2216 return make_int (code_point);
2222 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2223 Return list of charset and one or two position-codes of CHARACTER.
2227 /* This function can GC */
2228 struct gcpro gcpro1, gcpro2;
2229 Lisp_Object charset = Qnil;
2230 Lisp_Object rc = Qnil;
2238 GCPRO2 (charset, rc);
2239 CHECK_CHAR_COERCE_INT (character);
2242 code_point = ENCODE_CHAR (XCHAR (character), charset);
2243 dimension = XCHARSET_DIMENSION (charset);
2244 while (dimension > 0)
2246 rc = Fcons (make_int (code_point & 255), rc);
2250 rc = Fcons (XCHARSET_NAME (charset), rc);
2252 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2254 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2256 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2260 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2269 #ifdef ENABLE_COMPOSITE_CHARS
2270 /************************************************************************/
2271 /* composite character functions */
2272 /************************************************************************/
2275 lookup_composite_char (Bufbyte *str, int len)
2277 Lisp_Object lispstr = make_string (str, len);
2278 Lisp_Object ch = Fgethash (lispstr,
2279 Vcomposite_char_string2char_hash_table,
2285 if (composite_char_row_next >= 128)
2286 signal_simple_error ("No more composite chars available", lispstr);
2287 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2288 composite_char_col_next);
2289 Fputhash (make_char (emch), lispstr,
2290 Vcomposite_char_char2string_hash_table);
2291 Fputhash (lispstr, make_char (emch),
2292 Vcomposite_char_string2char_hash_table);
2293 composite_char_col_next++;
2294 if (composite_char_col_next >= 128)
2296 composite_char_col_next = 32;
2297 composite_char_row_next++;
2306 composite_char_string (Emchar ch)
2308 Lisp_Object str = Fgethash (make_char (ch),
2309 Vcomposite_char_char2string_hash_table,
2311 assert (!UNBOUNDP (str));
2315 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2316 Convert a string into a single composite character.
2317 The character is the result of overstriking all the characters in
2322 CHECK_STRING (string);
2323 return make_char (lookup_composite_char (XSTRING_DATA (string),
2324 XSTRING_LENGTH (string)));
2327 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2328 Return a string of the characters comprising a composite character.
2336 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2337 signal_simple_error ("Must be composite char", ch);
2338 return composite_char_string (emch);
2340 #endif /* ENABLE_COMPOSITE_CHARS */
2343 /************************************************************************/
2344 /* initialization */
2345 /************************************************************************/
2348 syms_of_mule_charset (void)
2350 INIT_LRECORD_IMPLEMENTATION (charset);
2352 DEFSUBR (Fcharsetp);
2353 DEFSUBR (Ffind_charset);
2354 DEFSUBR (Fget_charset);
2355 DEFSUBR (Fcharset_list);
2356 DEFSUBR (Fcharset_name);
2357 DEFSUBR (Fmake_charset);
2358 DEFSUBR (Fmake_reverse_direction_charset);
2359 /* DEFSUBR (Freverse_direction_charset); */
2360 DEFSUBR (Fdefine_charset_alias);
2361 DEFSUBR (Fcharset_from_attributes);
2362 DEFSUBR (Fcharset_short_name);
2363 DEFSUBR (Fcharset_long_name);
2364 DEFSUBR (Fcharset_description);
2365 DEFSUBR (Fcharset_dimension);
2366 DEFSUBR (Fcharset_property);
2367 DEFSUBR (Fcharset_id);
2368 DEFSUBR (Fset_charset_ccl_program);
2369 DEFSUBR (Fset_charset_registry);
2371 DEFSUBR (Fcharset_mapping_table);
2372 DEFSUBR (Fset_charset_mapping_table);
2376 DEFSUBR (Fdecode_char);
2377 DEFSUBR (Fdecode_builtin_char);
2378 DEFSUBR (Fencode_char);
2380 DEFSUBR (Fmake_char);
2381 DEFSUBR (Fchar_charset);
2382 DEFSUBR (Fchar_octet);
2383 DEFSUBR (Fsplit_char);
2385 #ifdef ENABLE_COMPOSITE_CHARS
2386 DEFSUBR (Fmake_composite_char);
2387 DEFSUBR (Fcomposite_char_string);
2390 defsymbol (&Qcharsetp, "charsetp");
2391 defsymbol (&Qregistry, "registry");
2392 defsymbol (&Qfinal, "final");
2393 defsymbol (&Qgraphic, "graphic");
2394 defsymbol (&Qdirection, "direction");
2395 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2396 defsymbol (&Qshort_name, "short-name");
2397 defsymbol (&Qlong_name, "long-name");
2399 defsymbol (&Qmother, "mother");
2400 defsymbol (&Qmin_code, "min-code");
2401 defsymbol (&Qmax_code, "max-code");
2402 defsymbol (&Qcode_offset, "code-offset");
2403 defsymbol (&Qconversion, "conversion");
2404 defsymbol (&Q94x60, "94x60");
2405 defsymbol (&Q94x94x60, "94x94x60");
2408 defsymbol (&Ql2r, "l2r");
2409 defsymbol (&Qr2l, "r2l");
2411 /* Charsets, compatible with FSF 20.3
2412 Naming convention is Script-Charset[-Edition] */
2413 defsymbol (&Qascii, "ascii");
2414 defsymbol (&Qcontrol_1, "control-1");
2415 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2416 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2417 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2418 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2419 defsymbol (&Qthai_tis620, "thai-tis620");
2420 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2421 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2422 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2423 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2424 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2425 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2426 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2427 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2428 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2429 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2430 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2431 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2432 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2433 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2434 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2435 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2437 defsymbol (&Qucs, "ucs");
2438 defsymbol (&Qucs_bmp, "ucs-bmp");
2439 defsymbol (&Qucs_smp, "ucs-smp");
2440 defsymbol (&Qucs_sip, "ucs-sip");
2441 defsymbol (&Qucs_cns, "ucs-cns");
2442 defsymbol (&Qucs_jis, "ucs-jis");
2443 defsymbol (&Qucs_ks, "ucs-ks");
2444 defsymbol (&Qucs_big5, "ucs-big5");
2445 defsymbol (&Qlatin_viscii, "latin-viscii");
2446 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2447 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2448 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2449 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2450 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2451 defsymbol (&Qjis_x0208, "=jis-x0208");
2452 defsymbol (&Qideograph_gt, "ideograph-gt");
2453 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2454 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2455 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2456 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2457 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2458 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2459 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2460 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2461 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2462 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2463 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2464 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2465 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2466 defsymbol (&Qchinese_big5, "chinese-big5");
2467 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2468 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2469 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2470 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2471 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2472 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2473 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2474 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2475 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2476 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2477 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2478 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2479 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2480 defsymbol (&Qchina3_jef, "china3-jef");
2481 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2482 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2484 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2485 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2487 defsymbol (&Qcomposite, "composite");
2491 vars_of_mule_charset (void)
2498 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2499 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2501 /* Table of charsets indexed by leading byte. */
2502 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2503 chlook->charset_by_leading_byte[i] = Qnil;
2506 /* Table of charsets indexed by type/final-byte. */
2507 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2508 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2509 chlook->charset_by_attributes[i][j] = Qnil;
2511 /* Table of charsets indexed by type/final-byte/direction. */
2512 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2513 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2514 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2515 chlook->charset_by_attributes[i][j][k] = Qnil;
2519 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2521 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2522 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2526 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2527 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2528 Leading-code of private TYPE9N charset of column-width 1.
2530 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2534 Vdefault_coded_charset_priority_list = Qnil;
2535 DEFVAR_LISP ("default-coded-charset-priority-list",
2536 &Vdefault_coded_charset_priority_list /*
2537 Default order of preferred coded-character-sets.
2543 complex_vars_of_mule_charset (void)
2545 staticpro (&Vcharset_hash_table);
2546 Vcharset_hash_table =
2547 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2549 /* Predefined character sets. We store them into variables for
2553 staticpro (&Vcharset_ucs);
2555 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2556 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2557 build_string ("UCS"),
2558 build_string ("UCS"),
2559 build_string ("ISO/IEC 10646"),
2561 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2562 staticpro (&Vcharset_ucs_bmp);
2564 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2565 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2566 build_string ("BMP"),
2567 build_string ("UCS-BMP"),
2568 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2570 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2571 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2572 staticpro (&Vcharset_ucs_smp);
2574 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2575 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2576 build_string ("SMP"),
2577 build_string ("UCS-SMP"),
2578 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2579 build_string ("UCS00-1"),
2580 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2581 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2582 staticpro (&Vcharset_ucs_sip);
2584 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2585 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2586 build_string ("SIP"),
2587 build_string ("UCS-SIP"),
2588 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2589 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2590 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2591 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2592 staticpro (&Vcharset_ucs_cns);
2594 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2595 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2596 build_string ("UCS for CNS"),
2597 build_string ("UCS for CNS 11643"),
2598 build_string ("ISO/IEC 10646 for CNS 11643"),
2600 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2601 staticpro (&Vcharset_ucs_jis);
2603 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2604 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2605 build_string ("UCS for JIS"),
2606 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2608 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2610 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2611 staticpro (&Vcharset_ucs_ks);
2613 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2614 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2615 build_string ("UCS for KS"),
2616 build_string ("UCS for CCS defined by KS"),
2617 build_string ("ISO/IEC 10646 for Korean Standards"),
2619 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2620 staticpro (&Vcharset_ucs_big5);
2622 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2623 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2624 build_string ("UCS for Big5"),
2625 build_string ("UCS for Big5"),
2626 build_string ("ISO/IEC 10646 for Big5"),
2628 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2630 # define MIN_CHAR_THAI 0
2631 # define MAX_CHAR_THAI 0
2632 /* # define MIN_CHAR_HEBREW 0 */
2633 /* # define MAX_CHAR_HEBREW 0 */
2634 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2635 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2637 staticpro (&Vcharset_ascii);
2639 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2640 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2641 build_string ("ASCII"),
2642 build_string ("ASCII)"),
2643 build_string ("ASCII (ISO646 IRV)"),
2644 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2645 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2646 staticpro (&Vcharset_control_1);
2647 Vcharset_control_1 =
2648 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2649 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2650 build_string ("C1"),
2651 build_string ("Control characters"),
2652 build_string ("Control characters 128-191"),
2654 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2655 staticpro (&Vcharset_latin_iso8859_1);
2656 Vcharset_latin_iso8859_1 =
2657 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2658 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2659 build_string ("Latin-1"),
2660 build_string ("ISO8859-1 (Latin-1)"),
2661 build_string ("ISO8859-1 (Latin-1)"),
2662 build_string ("iso8859-1"),
2663 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2664 staticpro (&Vcharset_latin_iso8859_2);
2665 Vcharset_latin_iso8859_2 =
2666 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2667 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2668 build_string ("Latin-2"),
2669 build_string ("ISO8859-2 (Latin-2)"),
2670 build_string ("ISO8859-2 (Latin-2)"),
2671 build_string ("iso8859-2"),
2672 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2673 staticpro (&Vcharset_latin_iso8859_3);
2674 Vcharset_latin_iso8859_3 =
2675 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2676 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2677 build_string ("Latin-3"),
2678 build_string ("ISO8859-3 (Latin-3)"),
2679 build_string ("ISO8859-3 (Latin-3)"),
2680 build_string ("iso8859-3"),
2681 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2682 staticpro (&Vcharset_latin_iso8859_4);
2683 Vcharset_latin_iso8859_4 =
2684 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2685 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2686 build_string ("Latin-4"),
2687 build_string ("ISO8859-4 (Latin-4)"),
2688 build_string ("ISO8859-4 (Latin-4)"),
2689 build_string ("iso8859-4"),
2690 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2691 staticpro (&Vcharset_thai_tis620);
2692 Vcharset_thai_tis620 =
2693 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2694 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2695 build_string ("TIS620"),
2696 build_string ("TIS620 (Thai)"),
2697 build_string ("TIS620.2529 (Thai)"),
2698 build_string ("tis620"),
2699 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2700 MIN_CHAR_THAI, 32, Qnil, CONVERSION_96);
2701 staticpro (&Vcharset_greek_iso8859_7);
2702 Vcharset_greek_iso8859_7 =
2703 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2704 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2705 build_string ("ISO8859-7"),
2706 build_string ("ISO8859-7 (Greek)"),
2707 build_string ("ISO8859-7 (Greek)"),
2708 build_string ("iso8859-7"),
2709 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2710 staticpro (&Vcharset_arabic_iso8859_6);
2711 Vcharset_arabic_iso8859_6 =
2712 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2713 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2714 build_string ("ISO8859-6"),
2715 build_string ("ISO8859-6 (Arabic)"),
2716 build_string ("ISO8859-6 (Arabic)"),
2717 build_string ("iso8859-6"),
2718 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2719 staticpro (&Vcharset_hebrew_iso8859_8);
2720 Vcharset_hebrew_iso8859_8 =
2721 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2722 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2723 build_string ("ISO8859-8"),
2724 build_string ("ISO8859-8 (Hebrew)"),
2725 build_string ("ISO8859-8 (Hebrew)"),
2726 build_string ("iso8859-8"),
2728 0 /* MIN_CHAR_HEBREW */,
2729 0 /* MAX_CHAR_HEBREW */, 0, 32,
2730 Qnil, CONVERSION_IDENTICAL);
2731 staticpro (&Vcharset_katakana_jisx0201);
2732 Vcharset_katakana_jisx0201 =
2733 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2734 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2735 build_string ("JISX0201 Kana"),
2736 build_string ("JISX0201.1976 (Japanese Kana)"),
2737 build_string ("JISX0201.1976 Japanese Kana"),
2738 build_string ("jisx0201\\.1976"),
2739 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2740 staticpro (&Vcharset_latin_jisx0201);
2741 Vcharset_latin_jisx0201 =
2742 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2743 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2744 build_string ("JISX0201 Roman"),
2745 build_string ("JISX0201.1976 (Japanese Roman)"),
2746 build_string ("JISX0201.1976 Japanese Roman"),
2747 build_string ("jisx0201\\.1976"),
2748 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2749 staticpro (&Vcharset_cyrillic_iso8859_5);
2750 Vcharset_cyrillic_iso8859_5 =
2751 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2752 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2753 build_string ("ISO8859-5"),
2754 build_string ("ISO8859-5 (Cyrillic)"),
2755 build_string ("ISO8859-5 (Cyrillic)"),
2756 build_string ("iso8859-5"),
2757 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2758 staticpro (&Vcharset_latin_iso8859_9);
2759 Vcharset_latin_iso8859_9 =
2760 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2761 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2762 build_string ("Latin-5"),
2763 build_string ("ISO8859-9 (Latin-5)"),
2764 build_string ("ISO8859-9 (Latin-5)"),
2765 build_string ("iso8859-9"),
2766 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2768 staticpro (&Vcharset_jis_x0208);
2769 Vcharset_jis_x0208 =
2770 make_charset (LEADING_BYTE_JIS_X0208,
2772 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2773 build_string ("JIS X0208"),
2774 build_string ("JIS X0208 Common"),
2775 build_string ("JIS X0208 Common part"),
2776 build_string ("jisx0208\\.1990"),
2778 MIN_CHAR_JIS_X0208_1990,
2779 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2780 Qnil, CONVERSION_94x94);
2782 staticpro (&Vcharset_japanese_jisx0208_1978);
2783 Vcharset_japanese_jisx0208_1978 =
2784 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2785 Qjapanese_jisx0208_1978, 94, 2,
2786 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2787 build_string ("JIS X0208:1978"),
2788 build_string ("JIS X0208:1978 (Japanese)"),
2790 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2791 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2798 CONVERSION_IDENTICAL);
2799 staticpro (&Vcharset_chinese_gb2312);
2800 Vcharset_chinese_gb2312 =
2801 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2802 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2803 build_string ("GB2312"),
2804 build_string ("GB2312)"),
2805 build_string ("GB2312 Chinese simplified"),
2806 build_string ("gb2312"),
2807 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2808 staticpro (&Vcharset_chinese_gb12345);
2809 Vcharset_chinese_gb12345 =
2810 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2811 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2812 build_string ("G1"),
2813 build_string ("GB 12345)"),
2814 build_string ("GB 12345-1990"),
2815 build_string ("GB12345\\(\\.1990\\)?-0"),
2816 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_japanese_jisx0208);
2818 Vcharset_japanese_jisx0208 =
2819 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2820 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2821 build_string ("JISX0208"),
2822 build_string ("JIS X0208:1983 (Japanese)"),
2823 build_string ("JIS X0208:1983 Japanese Kanji"),
2824 build_string ("jisx0208\\.1983"),
2831 CONVERSION_IDENTICAL);
2833 staticpro (&Vcharset_japanese_jisx0208_1990);
2834 Vcharset_japanese_jisx0208_1990 =
2835 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2836 Qjapanese_jisx0208_1990, 94, 2,
2837 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2838 build_string ("JISX0208-1990"),
2839 build_string ("JIS X0208:1990 (Japanese)"),
2840 build_string ("JIS X0208:1990 Japanese Kanji"),
2841 build_string ("jisx0208\\.1990"),
2843 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2844 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2845 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2846 Vcharset_jis_x0208 /* Qnil */,
2847 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2849 staticpro (&Vcharset_korean_ksc5601);
2850 Vcharset_korean_ksc5601 =
2851 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2852 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2853 build_string ("KSC5601"),
2854 build_string ("KSC5601 (Korean"),
2855 build_string ("KSC5601 Korean Hangul and Hanja"),
2856 build_string ("ksc5601"),
2857 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2858 staticpro (&Vcharset_japanese_jisx0212);
2859 Vcharset_japanese_jisx0212 =
2860 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2861 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2862 build_string ("JISX0212"),
2863 build_string ("JISX0212 (Japanese)"),
2864 build_string ("JISX0212 Japanese Supplement"),
2865 build_string ("jisx0212"),
2866 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2868 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2869 staticpro (&Vcharset_chinese_cns11643_1);
2870 Vcharset_chinese_cns11643_1 =
2871 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2872 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2873 build_string ("CNS11643-1"),
2874 build_string ("CNS11643-1 (Chinese traditional)"),
2876 ("CNS 11643 Plane 1 Chinese traditional"),
2877 build_string (CHINESE_CNS_PLANE_RE("1")),
2878 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2879 staticpro (&Vcharset_chinese_cns11643_2);
2880 Vcharset_chinese_cns11643_2 =
2881 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2882 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2883 build_string ("CNS11643-2"),
2884 build_string ("CNS11643-2 (Chinese traditional)"),
2886 ("CNS 11643 Plane 2 Chinese traditional"),
2887 build_string (CHINESE_CNS_PLANE_RE("2")),
2888 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2890 staticpro (&Vcharset_latin_tcvn5712);
2891 Vcharset_latin_tcvn5712 =
2892 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2893 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2894 build_string ("TCVN 5712"),
2895 build_string ("TCVN 5712 (VSCII-2)"),
2896 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2897 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2898 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2899 staticpro (&Vcharset_latin_viscii_lower);
2900 Vcharset_latin_viscii_lower =
2901 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2902 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2903 build_string ("VISCII lower"),
2904 build_string ("VISCII lower (Vietnamese)"),
2905 build_string ("VISCII lower (Vietnamese)"),
2906 build_string ("MULEVISCII-LOWER"),
2907 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2908 staticpro (&Vcharset_latin_viscii_upper);
2909 Vcharset_latin_viscii_upper =
2910 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2911 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2912 build_string ("VISCII upper"),
2913 build_string ("VISCII upper (Vietnamese)"),
2914 build_string ("VISCII upper (Vietnamese)"),
2915 build_string ("MULEVISCII-UPPER"),
2916 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2917 staticpro (&Vcharset_latin_viscii);
2918 Vcharset_latin_viscii =
2919 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2920 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2921 build_string ("VISCII"),
2922 build_string ("VISCII 1.1 (Vietnamese)"),
2923 build_string ("VISCII 1.1 (Vietnamese)"),
2924 build_string ("VISCII1\\.1"),
2925 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2926 staticpro (&Vcharset_chinese_big5);
2927 Vcharset_chinese_big5 =
2928 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2929 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2930 build_string ("Big5"),
2931 build_string ("Big5"),
2932 build_string ("Big5 Chinese traditional"),
2933 build_string ("big5-0"),
2935 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2936 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2938 staticpro (&Vcharset_chinese_big5_cdp);
2939 Vcharset_chinese_big5_cdp =
2940 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2941 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Big5-CDP"),
2943 build_string ("Big5 + CDP extension"),
2944 build_string ("Big5 with CDP extension"),
2945 build_string ("big5\\.cdp-0"),
2946 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2947 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2949 #define DEF_HANZIKU(n) \
2950 staticpro (&Vcharset_ideograph_hanziku_##n); \
2951 Vcharset_ideograph_hanziku_##n = \
2952 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2953 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2954 build_string ("HZK-"#n), \
2955 build_string ("HANZIKU-"#n), \
2956 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2958 ("hanziku-"#n"$"), \
2959 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2960 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2973 staticpro (&Vcharset_china3_jef);
2974 Vcharset_china3_jef =
2975 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2976 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2977 build_string ("JC3"),
2978 build_string ("JEF + CHINA3"),
2979 build_string ("JEF + CHINA3 private characters"),
2980 build_string ("china3jef-0"),
2981 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2982 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2983 staticpro (&Vcharset_ideograph_cbeta);
2984 Vcharset_ideograph_cbeta =
2985 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2986 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2987 build_string ("CB"),
2988 build_string ("CBETA"),
2989 build_string ("CBETA private characters"),
2990 build_string ("cbeta-0"),
2991 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2992 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2993 staticpro (&Vcharset_ideograph_gt);
2994 Vcharset_ideograph_gt =
2995 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2996 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2997 build_string ("GT"),
2998 build_string ("GT"),
2999 build_string ("GT"),
3001 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3002 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3003 #define DEF_GT_PJ(n) \
3004 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3005 Vcharset_ideograph_gt_pj_##n = \
3006 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3007 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3008 build_string ("GT-PJ-"#n), \
3009 build_string ("GT (pseudo JIS encoding) part "#n), \
3010 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3012 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3013 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3026 staticpro (&Vcharset_ideograph_daikanwa_2);
3027 Vcharset_ideograph_daikanwa_2 =
3028 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3029 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3030 build_string ("Daikanwa Rev."),
3031 build_string ("Morohashi's Daikanwa Rev."),
3033 ("Daikanwa dictionary (revised version)"),
3034 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3035 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3036 staticpro (&Vcharset_ideograph_daikanwa);
3037 Vcharset_ideograph_daikanwa =
3038 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3039 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3040 build_string ("Daikanwa"),
3041 build_string ("Morohashi's Daikanwa Rev.2"),
3043 ("Daikanwa dictionary (second revised version)"),
3044 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3045 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3046 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3048 staticpro (&Vcharset_ethiopic_ucs);
3049 Vcharset_ethiopic_ucs =
3050 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3051 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3052 build_string ("Ethiopic (UCS)"),
3053 build_string ("Ethiopic (UCS)"),
3054 build_string ("Ethiopic of UCS"),
3055 build_string ("Ethiopic-Unicode"),
3056 Qnil, 0x1200, 0x137F, 0, 0,
3057 Qnil, CONVERSION_IDENTICAL);
3059 staticpro (&Vcharset_chinese_big5_1);
3060 Vcharset_chinese_big5_1 =
3061 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3062 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3063 build_string ("Big5"),
3064 build_string ("Big5 (Level-1)"),
3066 ("Big5 Level-1 Chinese traditional"),
3067 build_string ("big5"),
3068 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3069 staticpro (&Vcharset_chinese_big5_2);
3070 Vcharset_chinese_big5_2 =
3071 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3072 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3073 build_string ("Big5"),
3074 build_string ("Big5 (Level-2)"),
3076 ("Big5 Level-2 Chinese traditional"),
3077 build_string ("big5"),
3078 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3080 #ifdef ENABLE_COMPOSITE_CHARS
3081 /* #### For simplicity, we put composite chars into a 96x96 charset.
3082 This is going to lead to problems because you can run out of
3083 room, esp. as we don't yet recycle numbers. */
3084 staticpro (&Vcharset_composite);
3085 Vcharset_composite =
3086 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3087 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3088 build_string ("Composite"),
3089 build_string ("Composite characters"),
3090 build_string ("Composite characters"),
3093 /* #### not dumped properly */
3094 composite_char_row_next = 32;
3095 composite_char_col_next = 32;
3097 Vcomposite_char_string2char_hash_table =
3098 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3099 Vcomposite_char_char2string_hash_table =
3100 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3101 staticpro (&Vcomposite_char_string2char_hash_table);
3102 staticpro (&Vcomposite_char_char2string_hash_table);
3103 #endif /* ENABLE_COMPOSITE_CHARS */