1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 /* Lisp_Object Vcharset_chinese_big5_cdp; */
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_china3_jef;
95 Lisp_Object Vcharset_ideograph_cbeta;
96 Lisp_Object Vcharset_ideograph_gt;
97 Lisp_Object Vcharset_ideograph_gt_pj_1;
98 Lisp_Object Vcharset_ideograph_gt_pj_2;
99 Lisp_Object Vcharset_ideograph_gt_pj_3;
100 Lisp_Object Vcharset_ideograph_gt_pj_4;
101 Lisp_Object Vcharset_ideograph_gt_pj_5;
102 Lisp_Object Vcharset_ideograph_gt_pj_6;
103 Lisp_Object Vcharset_ideograph_gt_pj_7;
104 Lisp_Object Vcharset_ideograph_gt_pj_8;
105 Lisp_Object Vcharset_ideograph_gt_pj_9;
106 Lisp_Object Vcharset_ideograph_gt_pj_10;
107 Lisp_Object Vcharset_ideograph_gt_pj_11;
108 Lisp_Object Vcharset_ideograph_daikanwa_2;
109 Lisp_Object Vcharset_ideograph_daikanwa;
110 Lisp_Object Vcharset_ethiopic_ucs;
112 Lisp_Object Vcharset_chinese_big5_1;
113 Lisp_Object Vcharset_chinese_big5_2;
115 #ifdef ENABLE_COMPOSITE_CHARS
116 Lisp_Object Vcharset_composite;
118 /* Hash tables for composite chars. One maps string representing
119 composed chars to their equivalent chars; one goes the
121 Lisp_Object Vcomposite_char_char2string_hash_table;
122 Lisp_Object Vcomposite_char_string2char_hash_table;
124 static int composite_char_row_next;
125 static int composite_char_col_next;
127 #endif /* ENABLE_COMPOSITE_CHARS */
129 struct charset_lookup *chlook;
131 static const struct lrecord_description charset_lookup_description_1[] = {
132 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
141 static const struct struct_description charset_lookup_description = {
142 sizeof (struct charset_lookup),
143 charset_lookup_description_1
147 /* Table of number of bytes in the string representation of a character
148 indexed by the first byte of that representation.
150 rep_bytes_by_first_byte(c) is more efficient than the equivalent
151 canonical computation:
153 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
155 const Bytecount rep_bytes_by_first_byte[0xA0] =
156 { /* 0x00 - 0x7f are for straight ASCII */
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 /* 0x80 - 0x8f are for Dimension-1 official charsets */
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 /* 0x90 - 0x9d are for Dimension-2 official charsets */
172 /* 0x9e is for Dimension-1 private charsets */
173 /* 0x9f is for Dimension-2 private charsets */
174 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
180 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
182 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
186 if (XVECTOR_LENGTH (v) > ccs_len)
189 for (i = 0; i < XVECTOR_LENGTH (v); i++)
191 Lisp_Object c = XVECTOR_DATA(v)[i];
193 if (!NILP (c) && !CHARP (c))
197 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
209 put_char_ccs_code_point (Lisp_Object character,
210 Lisp_Object ccs, Lisp_Object value)
212 if (!EQ (XCHARSET_NAME (ccs), Qucs)
214 || (XCHAR (character) != XINT (value)))
216 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
220 { /* obsolete representation: value must be a list of bytes */
221 Lisp_Object ret = Fcar (value);
225 signal_simple_error ("Invalid value for coded-charset", value);
226 code_point = XINT (ret);
227 if (XCHARSET_GRAPHIC (ccs) == 1)
235 signal_simple_error ("Invalid value for coded-charset",
239 signal_simple_error ("Invalid value for coded-charset",
242 if (XCHARSET_GRAPHIC (ccs) == 1)
244 code_point = (code_point << 8) | j;
247 value = make_int (code_point);
249 else if (INTP (value))
251 code_point = XINT (value);
252 if (XCHARSET_GRAPHIC (ccs) == 1)
254 code_point &= 0x7F7F7F7F;
255 value = make_int (code_point);
259 signal_simple_error ("Invalid value for coded-charset", value);
263 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
266 decoding_table_remove_char (ccs, XINT (cpos));
269 decoding_table_put_char (ccs, code_point, character);
275 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
277 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
278 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
280 if (VECTORP (decoding_table))
282 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
286 decoding_table_remove_char (ccs, XINT (cpos));
289 if (CHAR_TABLEP (encoding_table))
291 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
299 int leading_code_private_11;
302 Lisp_Object Qcharsetp;
304 /* Qdoc_string, Qdimension, Qchars defined in general.c */
305 Lisp_Object Qregistry, Qfinal, Qgraphic;
306 Lisp_Object Qdirection;
307 Lisp_Object Qreverse_direction_charset;
308 Lisp_Object Qleading_byte;
309 Lisp_Object Qshort_name, Qlong_name;
311 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
312 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
329 Qjapanese_jisx0208_1978,
333 Qjapanese_jisx0208_1990,
351 Qvietnamese_viscii_lower,
352 Qvietnamese_viscii_upper,
355 /* Qchinese_big5_cdp, */
356 Qideograph_hanziku_1,
357 Qideograph_hanziku_2,
358 Qideograph_hanziku_3,
359 Qideograph_hanziku_4,
360 Qideograph_hanziku_5,
361 Qideograph_hanziku_6,
362 Qideograph_hanziku_7,
363 Qideograph_hanziku_8,
364 Qideograph_hanziku_9,
365 Qideograph_hanziku_10,
366 Qideograph_hanziku_11,
367 Qideograph_hanziku_12,
370 Qideograph_daikanwa_2,
390 Lisp_Object Ql2r, Qr2l;
392 Lisp_Object Vcharset_hash_table;
394 /* Composite characters are characters constructed by overstriking two
395 or more regular characters.
397 1) The old Mule implementation involves storing composite characters
398 in a buffer as a tag followed by all of the actual characters
399 used to make up the composite character. I think this is a bad
400 idea; it greatly complicates code that wants to handle strings
401 one character at a time because it has to deal with the possibility
402 of great big ungainly characters. It's much more reasonable to
403 simply store an index into a table of composite characters.
405 2) The current implementation only allows for 16,384 separate
406 composite characters over the lifetime of the XEmacs process.
407 This could become a potential problem if the user
408 edited lots of different files that use composite characters.
409 Due to FSF bogosity, increasing the number of allowable
410 composite characters under Mule would decrease the number
411 of possible faces that can exist. Mule already has shrunk
412 this to 2048, and further shrinkage would become uncomfortable.
413 No such problems exist in XEmacs.
415 Composite characters could be represented as 0x80 C1 C2 C3,
416 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
417 for slightly under 2^20 (one million) composite characters
418 over the XEmacs process lifetime, and you only need to
419 increase the size of a Mule character from 19 to 21 bits.
420 Or you could use 0x80 C1 C2 C3 C4, allowing for about
421 85 million (slightly over 2^26) composite characters. */
424 /************************************************************************/
425 /* Basic Emchar functions */
426 /************************************************************************/
428 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
429 string in STR. Returns the number of bytes stored.
430 Do not call this directly. Use the macro set_charptr_emchar() instead.
434 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
449 else if ( c <= 0x7ff )
451 *p++ = (c >> 6) | 0xc0;
452 *p++ = (c & 0x3f) | 0x80;
454 else if ( c <= 0xffff )
456 *p++ = (c >> 12) | 0xe0;
457 *p++ = ((c >> 6) & 0x3f) | 0x80;
458 *p++ = (c & 0x3f) | 0x80;
460 else if ( c <= 0x1fffff )
462 *p++ = (c >> 18) | 0xf0;
463 *p++ = ((c >> 12) & 0x3f) | 0x80;
464 *p++ = ((c >> 6) & 0x3f) | 0x80;
465 *p++ = (c & 0x3f) | 0x80;
467 else if ( c <= 0x3ffffff )
469 *p++ = (c >> 24) | 0xf8;
470 *p++ = ((c >> 18) & 0x3f) | 0x80;
471 *p++ = ((c >> 12) & 0x3f) | 0x80;
472 *p++ = ((c >> 6) & 0x3f) | 0x80;
473 *p++ = (c & 0x3f) | 0x80;
477 *p++ = (c >> 30) | 0xfc;
478 *p++ = ((c >> 24) & 0x3f) | 0x80;
479 *p++ = ((c >> 18) & 0x3f) | 0x80;
480 *p++ = ((c >> 12) & 0x3f) | 0x80;
481 *p++ = ((c >> 6) & 0x3f) | 0x80;
482 *p++ = (c & 0x3f) | 0x80;
485 BREAKUP_CHAR (c, charset, c1, c2);
486 lb = CHAR_LEADING_BYTE (c);
487 if (LEADING_BYTE_PRIVATE_P (lb))
488 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
490 if (EQ (charset, Vcharset_control_1))
499 /* Return the first character from a Mule-encoded string in STR,
500 assuming it's non-ASCII. Do not call this directly.
501 Use the macro charptr_emchar() instead. */
504 non_ascii_charptr_emchar (const Bufbyte *str)
517 else if ( b >= 0xf8 )
522 else if ( b >= 0xf0 )
527 else if ( b >= 0xe0 )
532 else if ( b >= 0xc0 )
542 for( ; len > 0; len-- )
545 ch = ( ch << 6 ) | ( b & 0x3f );
549 Bufbyte i0 = *str, i1, i2 = 0;
552 if (i0 == LEADING_BYTE_CONTROL_1)
553 return (Emchar) (*++str - 0x20);
555 if (LEADING_BYTE_PREFIX_P (i0))
560 charset = CHARSET_BY_LEADING_BYTE (i0);
561 if (XCHARSET_DIMENSION (charset) == 2)
564 return MAKE_CHAR (charset, i1, i2);
568 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
569 Do not call this directly. Use the macro valid_char_p() instead. */
573 non_ascii_valid_char_p (Emchar ch)
577 /* Must have only lowest 19 bits set */
581 f1 = CHAR_FIELD1 (ch);
582 f2 = CHAR_FIELD2 (ch);
583 f3 = CHAR_FIELD3 (ch);
589 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
590 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
591 f2 > MAX_CHAR_FIELD2_PRIVATE)
596 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
597 f2 <= MAX_CHAR_FIELD2_PRIVATE))
601 NOTE: This takes advantage of the fact that
602 FIELD2_TO_OFFICIAL_LEADING_BYTE and
603 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
605 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
606 if (EQ (charset, Qnil))
608 return (XCHARSET_CHARS (charset) == 96);
614 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
615 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
616 f1 > MAX_CHAR_FIELD1_PRIVATE)
618 if (f2 < 0x20 || f3 < 0x20)
621 #ifdef ENABLE_COMPOSITE_CHARS
622 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
624 if (UNBOUNDP (Fgethash (make_int (ch),
625 Vcomposite_char_char2string_hash_table,
630 #endif /* ENABLE_COMPOSITE_CHARS */
632 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
633 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
636 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
638 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
641 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
643 if (EQ (charset, Qnil))
645 return (XCHARSET_CHARS (charset) == 96);
651 /************************************************************************/
652 /* Basic string functions */
653 /************************************************************************/
655 /* Copy the character pointed to by SRC into DST. Do not call this
656 directly. Use the macro charptr_copy_char() instead.
657 Return the number of bytes copied. */
660 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
662 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
664 for (i = bytes; i; i--, dst++, src++)
670 /************************************************************************/
671 /* streams of Emchars */
672 /************************************************************************/
674 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
675 The functions below are not meant to be called directly; use
676 the macros in insdel.h. */
679 Lstream_get_emchar_1 (Lstream *stream, int ch)
681 Bufbyte str[MAX_EMCHAR_LEN];
682 Bufbyte *strptr = str;
685 str[0] = (Bufbyte) ch;
687 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
689 int c = Lstream_getc (stream);
690 bufpos_checking_assert (c >= 0);
691 *++strptr = (Bufbyte) c;
693 return charptr_emchar (str);
697 Lstream_fput_emchar (Lstream *stream, Emchar ch)
699 Bufbyte str[MAX_EMCHAR_LEN];
700 Bytecount len = set_charptr_emchar (str, ch);
701 return Lstream_write (stream, str, len);
705 Lstream_funget_emchar (Lstream *stream, Emchar ch)
707 Bufbyte str[MAX_EMCHAR_LEN];
708 Bytecount len = set_charptr_emchar (str, ch);
709 Lstream_unread (stream, str, len);
713 /************************************************************************/
715 /************************************************************************/
718 mark_charset (Lisp_Object obj)
720 Lisp_Charset *cs = XCHARSET (obj);
722 mark_object (cs->short_name);
723 mark_object (cs->long_name);
724 mark_object (cs->doc_string);
725 mark_object (cs->registry);
726 mark_object (cs->ccl_program);
728 mark_object (cs->decoding_table);
729 mark_object (cs->mother);
735 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
737 Lisp_Charset *cs = XCHARSET (obj);
741 error ("printing unreadable object #<charset %s 0x%x>",
742 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
745 write_c_string ("#<charset ", printcharfun);
746 print_internal (CHARSET_NAME (cs), printcharfun, 0);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
751 write_c_string (" ", printcharfun);
752 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
753 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
755 CHARSET_DIMENSION (cs),
756 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
757 CHARSET_COLUMNS (cs),
758 CHARSET_GRAPHIC (cs),
760 write_c_string (buf, printcharfun);
761 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
762 sprintf (buf, " 0x%x>", cs->header.uid);
763 write_c_string (buf, printcharfun);
766 static const struct lrecord_description charset_description[] = {
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
775 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
776 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
781 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
782 mark_charset, print_charset, 0, 0, 0,
786 /* Make a new charset. */
787 /* #### SJT Should generic properties be allowed? */
789 make_charset (Charset_ID id, Lisp_Object name,
790 unsigned short chars, unsigned char dimension,
791 unsigned char columns, unsigned char graphic,
792 Bufbyte final, unsigned char direction, Lisp_Object short_name,
793 Lisp_Object long_name, Lisp_Object doc,
795 Lisp_Object decoding_table,
796 Emchar min_code, Emchar max_code,
797 Emchar code_offset, unsigned char byte_offset,
798 Lisp_Object mother, unsigned char conversion)
801 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
805 XSETCHARSET (obj, cs);
807 CHARSET_ID (cs) = id;
808 CHARSET_NAME (cs) = name;
809 CHARSET_SHORT_NAME (cs) = short_name;
810 CHARSET_LONG_NAME (cs) = long_name;
811 CHARSET_CHARS (cs) = chars;
812 CHARSET_DIMENSION (cs) = dimension;
813 CHARSET_DIRECTION (cs) = direction;
814 CHARSET_COLUMNS (cs) = columns;
815 CHARSET_GRAPHIC (cs) = graphic;
816 CHARSET_FINAL (cs) = final;
817 CHARSET_DOC_STRING (cs) = doc;
818 CHARSET_REGISTRY (cs) = reg;
819 CHARSET_CCL_PROGRAM (cs) = Qnil;
820 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
822 CHARSET_DECODING_TABLE(cs) = Qunbound;
823 CHARSET_MIN_CODE (cs) = min_code;
824 CHARSET_MAX_CODE (cs) = max_code;
825 CHARSET_CODE_OFFSET (cs) = code_offset;
826 CHARSET_BYTE_OFFSET (cs) = byte_offset;
827 CHARSET_MOTHER (cs) = mother;
828 CHARSET_CONVERSION (cs) = conversion;
832 if (id == LEADING_BYTE_ASCII)
833 CHARSET_REP_BYTES (cs) = 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
837 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
842 /* some charsets do not have final characters. This includes
843 ASCII, Control-1, Composite, and the two faux private
845 unsigned char iso2022_type
846 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
848 if (code_offset == 0)
850 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
851 chlook->charset_by_attributes[iso2022_type][final] = obj;
855 (chlook->charset_by_attributes[iso2022_type][final][direction]));
856 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
860 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
861 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
863 /* Some charsets are "faux" and don't have names or really exist at
864 all except in the leading-byte table. */
866 Fputhash (name, obj, Vcharset_hash_table);
871 get_unallocated_leading_byte (int dimension)
876 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
879 lb = chlook->next_allocated_leading_byte++;
883 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
886 lb = chlook->next_allocated_1_byte_leading_byte++;
890 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
893 lb = chlook->next_allocated_2_byte_leading_byte++;
899 ("No more character sets free for this dimension",
900 make_int (dimension));
906 /* Number of Big5 characters which have the same code in 1st byte. */
908 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
911 decode_defined_char (Lisp_Object ccs, int code_point)
913 int dim = XCHARSET_DIMENSION (ccs);
914 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
922 = get_ccs_octet_table (decoding_table, ccs,
923 (code_point >> (dim * 8)) & 255);
925 if (CHARP (decoding_table))
926 return XCHAR (decoding_table);
929 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
931 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
933 if ( EQ (mother, Vcharset_ucs) )
934 return DECODE_CHAR (mother, code_point);
936 return decode_defined_char (mother, code_point);
943 decode_builtin_char (Lisp_Object charset, int code_point)
945 Lisp_Object mother = XCHARSET_MOTHER (charset);
948 if ( XCHARSET_MAX_CODE (charset) > 0 )
950 if ( CHARSETP (mother) )
952 int code = code_point;
954 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
956 int row = code_point >> 8;
957 int cell = code_point & 255;
961 else if (row < 16 + 32 + 30)
962 code = (row - (16 + 32)) * 94 + cell - 33;
963 else if (row < 18 + 32 + 30)
965 else if (row < 18 + 32 + 60)
966 code = (row - (18 + 32)) * 94 + cell - 33;
968 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
970 int plane = code_point >> 16;
971 int row = (code_point >> 8) & 255;
972 int cell = code_point & 255;
976 else if (row < 16 + 32 + 30)
978 = (plane - 33) * 94 * 60
979 + (row - (16 + 32)) * 94
981 else if (row < 18 + 32 + 30)
983 else if (row < 18 + 32 + 60)
985 = (plane - 33) * 94 * 60
986 + (row - (18 + 32)) * 94
990 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
995 = (XCHARSET_DIMENSION (charset) == 1
997 code_point - XCHARSET_BYTE_OFFSET (charset)
999 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1000 * XCHARSET_CHARS (charset)
1001 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1002 + XCHARSET_CODE_OFFSET (charset);
1003 if ((cid < XCHARSET_MIN_CODE (charset))
1004 || (XCHARSET_MAX_CODE (charset) < cid))
1009 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1011 if (XCHARSET_DIMENSION (charset) == 1)
1013 switch (XCHARSET_CHARS (charset))
1017 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1020 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1028 switch (XCHARSET_CHARS (charset))
1031 return MIN_CHAR_94x94
1032 + (final - '0') * 94 * 94
1033 + (((code_point >> 8) & 0x7F) - 33) * 94
1034 + ((code_point & 0x7F) - 33);
1036 return MIN_CHAR_96x96
1037 + (final - '0') * 96 * 96
1038 + (((code_point >> 8) & 0x7F) - 32) * 96
1039 + ((code_point & 0x7F) - 32);
1051 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1053 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1056 if ( CHAR_TABLEP (encoding_table)
1057 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1062 Lisp_Object mother = XCHARSET_MOTHER (charset);
1063 int min = XCHARSET_MIN_CODE (charset);
1064 int max = XCHARSET_MAX_CODE (charset);
1067 if ( CHARSETP (mother) )
1069 if (XCHARSET_FINAL (charset) >= '0')
1070 code = charset_code_point (mother, ch, 1);
1072 code = charset_code_point (mother, ch, defined_only);
1074 else if (defined_only)
1076 else if ( ((max == 0) && CHARSETP (mother)
1077 && (XCHARSET_FINAL (charset) == 0))
1078 || ((min <= ch) && (ch <= max)) )
1080 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1081 || ((min <= code) && (code <= max)) )
1083 int d = code - XCHARSET_CODE_OFFSET (charset);
1085 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1087 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1089 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1091 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1094 int cell = d % 94 + 33;
1100 return (row << 8) | cell;
1102 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1103 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1104 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1105 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1106 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1108 int plane = d / (94 * 60) + 33;
1109 int row = (d % (94 * 60)) / 94;
1110 int cell = d % 94 + 33;
1116 return (plane << 16) | (row << 8) | cell;
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1120 ( (d / (94 * 94) + 33) << 16)
1121 | ((d / 94 % 94 + 33) << 8)
1123 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1125 ( (d / (96 * 96) + 32) << 16)
1126 | ((d / 96 % 96 + 32) << 8)
1128 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1130 ( (d / (94 * 94 * 94) + 33) << 24)
1131 | ((d / (94 * 94) % 94 + 33) << 16)
1132 | ((d / 94 % 94 + 33) << 8)
1134 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1136 ( (d / (96 * 96 * 96) + 32) << 24)
1137 | ((d / (96 * 96) % 96 + 32) << 16)
1138 | ((d / 96 % 96 + 32) << 8)
1142 printf ("Unknown CCS-conversion %d is specified!",
1143 XCHARSET_CONVERSION (charset));
1147 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1148 ( XCHARSET_MIN_CODE (charset) == 0 )
1150 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1151 (XCHARSET_CODE_OFFSET (charset)
1152 == XCHARSET_MIN_CODE (charset))
1157 if (XCHARSET_DIMENSION (charset) == 1)
1159 if (XCHARSET_CHARS (charset) == 94)
1161 if (((d = ch - (MIN_CHAR_94
1162 + (XCHARSET_FINAL (charset) - '0') * 94))
1167 else if (XCHARSET_CHARS (charset) == 96)
1169 if (((d = ch - (MIN_CHAR_96
1170 + (XCHARSET_FINAL (charset) - '0') * 96))
1178 else if (XCHARSET_DIMENSION (charset) == 2)
1180 if (XCHARSET_CHARS (charset) == 94)
1182 if (((d = ch - (MIN_CHAR_94x94
1184 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1187 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1189 else if (XCHARSET_CHARS (charset) == 96)
1191 if (((d = ch - (MIN_CHAR_96x96
1193 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1196 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1207 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1209 if (c <= MAX_CHAR_BASIC_LATIN)
1211 *charset = Vcharset_ascii;
1216 *charset = Vcharset_control_1;
1221 *charset = Vcharset_latin_iso8859_1;
1225 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1227 *charset = Vcharset_hebrew_iso8859_8;
1228 return c - MIN_CHAR_HEBREW + 0x20;
1231 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1233 *charset = Vcharset_thai_tis620;
1234 return c - MIN_CHAR_THAI + 0x20;
1237 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1238 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1240 return list2 (Vcharset_katakana_jisx0201,
1241 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1244 else if (c <= MAX_CHAR_BMP)
1246 *charset = Vcharset_ucs_bmp;
1249 else if (c <= MAX_CHAR_SMP)
1251 *charset = Vcharset_ucs_smp;
1252 return c - MIN_CHAR_SMP;
1254 else if (c <= MAX_CHAR_SIP)
1256 *charset = Vcharset_ucs_sip;
1257 return c - MIN_CHAR_SIP;
1259 else if (c < MIN_CHAR_DAIKANWA)
1261 *charset = Vcharset_ucs;
1264 else if (c <= MAX_CHAR_DAIKANWA)
1266 *charset = Vcharset_ideograph_daikanwa;
1267 return c - MIN_CHAR_DAIKANWA;
1269 else if (c < MIN_CHAR_94)
1271 *charset = Vcharset_ucs;
1274 else if (c <= MAX_CHAR_94)
1276 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1277 ((c - MIN_CHAR_94) / 94) + '0',
1278 CHARSET_LEFT_TO_RIGHT);
1279 if (!NILP (*charset))
1280 return ((c - MIN_CHAR_94) % 94) + 33;
1283 *charset = Vcharset_ucs;
1287 else if (c <= MAX_CHAR_96)
1289 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1290 ((c - MIN_CHAR_96) / 96) + '0',
1291 CHARSET_LEFT_TO_RIGHT);
1292 if (!NILP (*charset))
1293 return ((c - MIN_CHAR_96) % 96) + 32;
1296 *charset = Vcharset_ucs;
1300 else if (c <= MAX_CHAR_94x94)
1303 = CHARSET_BY_ATTRIBUTES (94, 2,
1304 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1305 CHARSET_LEFT_TO_RIGHT);
1306 if (!NILP (*charset))
1307 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1308 | (((c - MIN_CHAR_94x94) % 94) + 33);
1311 *charset = Vcharset_ucs;
1315 else if (c <= MAX_CHAR_96x96)
1318 = CHARSET_BY_ATTRIBUTES (96, 2,
1319 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1320 CHARSET_LEFT_TO_RIGHT);
1321 if (!NILP (*charset))
1322 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1323 | (((c - MIN_CHAR_96x96) % 96) + 32);
1326 *charset = Vcharset_ucs;
1332 *charset = Vcharset_ucs;
1337 Lisp_Object Vdefault_coded_charset_priority_list;
1341 /************************************************************************/
1342 /* Basic charset Lisp functions */
1343 /************************************************************************/
1345 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1346 Return non-nil if OBJECT is a charset.
1350 return CHARSETP (object) ? Qt : Qnil;
1353 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1354 Retrieve the charset of the given name.
1355 If CHARSET-OR-NAME is a charset object, it is simply returned.
1356 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1357 nil is returned. Otherwise the associated charset object is returned.
1361 if (CHARSETP (charset_or_name))
1362 return charset_or_name;
1364 CHECK_SYMBOL (charset_or_name);
1365 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1368 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1369 Retrieve the charset of the given name.
1370 Same as `find-charset' except an error is signalled if there is no such
1371 charset instead of returning nil.
1375 Lisp_Object charset = Ffind_charset (name);
1378 signal_simple_error ("No such charset", name);
1382 /* We store the charsets in hash tables with the names as the key and the
1383 actual charset object as the value. Occasionally we need to use them
1384 in a list format. These routines provide us with that. */
1385 struct charset_list_closure
1387 Lisp_Object *charset_list;
1391 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1392 void *charset_list_closure)
1394 /* This function can GC */
1395 struct charset_list_closure *chcl =
1396 (struct charset_list_closure*) charset_list_closure;
1397 Lisp_Object *charset_list = chcl->charset_list;
1399 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1403 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1404 Return a list of the names of all defined charsets.
1408 Lisp_Object charset_list = Qnil;
1409 struct gcpro gcpro1;
1410 struct charset_list_closure charset_list_closure;
1412 GCPRO1 (charset_list);
1413 charset_list_closure.charset_list = &charset_list;
1414 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1415 &charset_list_closure);
1418 return charset_list;
1421 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1422 Return the name of charset CHARSET.
1426 return XCHARSET_NAME (Fget_charset (charset));
1429 /* #### SJT Should generic properties be allowed? */
1430 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1431 Define a new character set.
1432 This function is for use with Mule support.
1433 NAME is a symbol, the name by which the character set is normally referred.
1434 DOC-STRING is a string describing the character set.
1435 PROPS is a property list, describing the specific nature of the
1436 character set. Recognized properties are:
1438 'short-name Short version of the charset name (ex: Latin-1)
1439 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1440 'registry A regular expression matching the font registry field for
1442 'dimension Number of octets used to index a character in this charset.
1443 Either 1 or 2. Defaults to 1.
1444 If UTF-2000 feature is enabled, 3 or 4 are also available.
1445 'columns Number of columns used to display a character in this charset.
1446 Only used in TTY mode. (Under X, the actual width of a
1447 character can be derived from the font used to display the
1448 characters.) If unspecified, defaults to the dimension
1449 (this is almost always the correct value).
1450 'chars Number of characters in each dimension (94 or 96).
1451 Defaults to 94. Note that if the dimension is 2, the
1452 character set thus described is 94x94 or 96x96.
1453 If UTF-2000 feature is enabled, 128 or 256 are also available.
1454 'final Final byte of ISO 2022 escape sequence. Must be
1455 supplied. Each combination of (DIMENSION, CHARS) defines a
1456 separate namespace for final bytes. Note that ISO
1457 2022 restricts the final byte to the range
1458 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1459 dimension == 2. Note also that final bytes in the range
1460 0x30 - 0x3F are reserved for user-defined (not official)
1462 'graphic 0 (use left half of font on output) or 1 (use right half
1463 of font on output). Defaults to 0. For example, for
1464 a font whose registry is ISO8859-1, the left half
1465 (octets 0x20 - 0x7F) is the `ascii' character set, while
1466 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1467 character set. With 'graphic set to 0, the octets
1468 will have their high bit cleared; with it set to 1,
1469 the octets will have their high bit set.
1470 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1472 'ccl-program A compiled CCL program used to convert a character in
1473 this charset into an index into the font. This is in
1474 addition to the 'graphic property. The CCL program
1475 is passed the octets of the character, with the high
1476 bit cleared and set depending upon whether the value
1477 of the 'graphic property is 0 or 1.
1478 'mother [UTF-2000 only] Base coded-charset.
1479 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1480 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1481 'code-offset [UTF-2000 only] Offset for a code-point of a base
1483 'conversion [UTF-2000 only] Conversion for a code-point of a base
1484 coded-charset (94x60 or 94x94x60).
1486 (name, doc_string, props))
1488 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1489 int direction = CHARSET_LEFT_TO_RIGHT;
1490 Lisp_Object registry = Qnil;
1491 Lisp_Object charset;
1492 Lisp_Object ccl_program = Qnil;
1493 Lisp_Object short_name = Qnil, long_name = Qnil;
1494 Lisp_Object mother = Qnil;
1495 int min_code = 0, max_code = 0, code_offset = 0;
1496 int byte_offset = -1;
1499 CHECK_SYMBOL (name);
1500 if (!NILP (doc_string))
1501 CHECK_STRING (doc_string);
1503 charset = Ffind_charset (name);
1504 if (!NILP (charset))
1505 signal_simple_error ("Cannot redefine existing charset", name);
1508 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1510 if (EQ (keyword, Qshort_name))
1512 CHECK_STRING (value);
1516 if (EQ (keyword, Qlong_name))
1518 CHECK_STRING (value);
1522 else if (EQ (keyword, Qdimension))
1525 dimension = XINT (value);
1526 if (dimension < 1 ||
1533 signal_simple_error ("Invalid value for 'dimension", value);
1536 else if (EQ (keyword, Qchars))
1539 chars = XINT (value);
1540 if (chars != 94 && chars != 96
1542 && chars != 128 && chars != 256
1545 signal_simple_error ("Invalid value for 'chars", value);
1548 else if (EQ (keyword, Qcolumns))
1551 columns = XINT (value);
1552 if (columns != 1 && columns != 2)
1553 signal_simple_error ("Invalid value for 'columns", value);
1556 else if (EQ (keyword, Qgraphic))
1559 graphic = XINT (value);
1567 signal_simple_error ("Invalid value for 'graphic", value);
1570 else if (EQ (keyword, Qregistry))
1572 CHECK_STRING (value);
1576 else if (EQ (keyword, Qdirection))
1578 if (EQ (value, Ql2r))
1579 direction = CHARSET_LEFT_TO_RIGHT;
1580 else if (EQ (value, Qr2l))
1581 direction = CHARSET_RIGHT_TO_LEFT;
1583 signal_simple_error ("Invalid value for 'direction", value);
1586 else if (EQ (keyword, Qfinal))
1588 CHECK_CHAR_COERCE_INT (value);
1589 final = XCHAR (value);
1590 if (final < '0' || final > '~')
1591 signal_simple_error ("Invalid value for 'final", value);
1595 else if (EQ (keyword, Qmother))
1597 mother = Fget_charset (value);
1600 else if (EQ (keyword, Qmin_code))
1603 min_code = XUINT (value);
1606 else if (EQ (keyword, Qmax_code))
1609 max_code = XUINT (value);
1612 else if (EQ (keyword, Qcode_offset))
1615 code_offset = XUINT (value);
1618 else if (EQ (keyword, Qconversion))
1620 if (EQ (value, Q94x60))
1621 conversion = CONVERSION_94x60;
1622 else if (EQ (value, Q94x94x60))
1623 conversion = CONVERSION_94x94x60;
1625 signal_simple_error ("Unrecognized conversion", value);
1629 else if (EQ (keyword, Qccl_program))
1631 struct ccl_program test_ccl;
1633 if (setup_ccl_program (&test_ccl, value) < 0)
1634 signal_simple_error ("Invalid value for 'ccl-program", value);
1635 ccl_program = value;
1639 signal_simple_error ("Unrecognized property", keyword);
1645 error ("'final must be specified");
1647 if (dimension == 2 && final > 0x5F)
1649 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1652 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1653 CHARSET_LEFT_TO_RIGHT)) ||
1654 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1655 CHARSET_RIGHT_TO_LEFT)))
1657 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1659 id = get_unallocated_leading_byte (dimension);
1661 if (NILP (doc_string))
1662 doc_string = build_string ("");
1664 if (NILP (registry))
1665 registry = build_string ("");
1667 if (NILP (short_name))
1668 XSETSTRING (short_name, XSYMBOL (name)->name);
1670 if (NILP (long_name))
1671 long_name = doc_string;
1674 columns = dimension;
1676 if (byte_offset < 0)
1680 else if (chars == 96)
1686 charset = make_charset (id, name, chars, dimension, columns, graphic,
1687 final, direction, short_name, long_name,
1688 doc_string, registry,
1689 Qnil, min_code, max_code, code_offset, byte_offset,
1690 mother, conversion);
1691 if (!NILP (ccl_program))
1692 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1696 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1698 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1699 NEW-NAME is the name of the new charset. Return the new charset.
1701 (charset, new_name))
1703 Lisp_Object new_charset = Qnil;
1704 int id, chars, dimension, columns, graphic, final;
1706 Lisp_Object registry, doc_string, short_name, long_name;
1709 charset = Fget_charset (charset);
1710 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1711 signal_simple_error ("Charset already has reverse-direction charset",
1714 CHECK_SYMBOL (new_name);
1715 if (!NILP (Ffind_charset (new_name)))
1716 signal_simple_error ("Cannot redefine existing charset", new_name);
1718 cs = XCHARSET (charset);
1720 chars = CHARSET_CHARS (cs);
1721 dimension = CHARSET_DIMENSION (cs);
1722 columns = CHARSET_COLUMNS (cs);
1723 id = get_unallocated_leading_byte (dimension);
1725 graphic = CHARSET_GRAPHIC (cs);
1726 final = CHARSET_FINAL (cs);
1727 direction = CHARSET_RIGHT_TO_LEFT;
1728 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1729 direction = CHARSET_LEFT_TO_RIGHT;
1730 doc_string = CHARSET_DOC_STRING (cs);
1731 short_name = CHARSET_SHORT_NAME (cs);
1732 long_name = CHARSET_LONG_NAME (cs);
1733 registry = CHARSET_REGISTRY (cs);
1735 new_charset = make_charset (id, new_name, chars, dimension, columns,
1736 graphic, final, direction, short_name, long_name,
1737 doc_string, registry,
1739 CHARSET_DECODING_TABLE(cs),
1740 CHARSET_MIN_CODE(cs),
1741 CHARSET_MAX_CODE(cs),
1742 CHARSET_CODE_OFFSET(cs),
1743 CHARSET_BYTE_OFFSET(cs),
1745 CHARSET_CONVERSION (cs)
1747 Qnil, 0, 0, 0, 0, Qnil, 0
1751 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1752 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1757 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1758 Define symbol ALIAS as an alias for CHARSET.
1762 CHECK_SYMBOL (alias);
1763 charset = Fget_charset (charset);
1764 return Fputhash (alias, charset, Vcharset_hash_table);
1767 /* #### Reverse direction charsets not yet implemented. */
1769 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1771 Return the reverse-direction charset parallel to CHARSET, if any.
1772 This is the charset with the same properties (in particular, the same
1773 dimension, number of characters per dimension, and final byte) as
1774 CHARSET but whose characters are displayed in the opposite direction.
1778 charset = Fget_charset (charset);
1779 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1783 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1784 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1785 If DIRECTION is omitted, both directions will be checked (left-to-right
1786 will be returned if character sets exist for both directions).
1788 (dimension, chars, final, direction))
1790 int dm, ch, fi, di = -1;
1791 Lisp_Object obj = Qnil;
1793 CHECK_INT (dimension);
1794 dm = XINT (dimension);
1795 if (dm < 1 || dm > 2)
1796 signal_simple_error ("Invalid value for DIMENSION", dimension);
1800 if (ch != 94 && ch != 96)
1801 signal_simple_error ("Invalid value for CHARS", chars);
1803 CHECK_CHAR_COERCE_INT (final);
1805 if (fi < '0' || fi > '~')
1806 signal_simple_error ("Invalid value for FINAL", final);
1808 if (EQ (direction, Ql2r))
1809 di = CHARSET_LEFT_TO_RIGHT;
1810 else if (EQ (direction, Qr2l))
1811 di = CHARSET_RIGHT_TO_LEFT;
1812 else if (!NILP (direction))
1813 signal_simple_error ("Invalid value for DIRECTION", direction);
1815 if (dm == 2 && fi > 0x5F)
1817 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1821 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1823 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1826 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1829 return XCHARSET_NAME (obj);
1833 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1834 Return short name of CHARSET.
1838 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1841 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1842 Return long name of CHARSET.
1846 return XCHARSET_LONG_NAME (Fget_charset (charset));
1849 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1850 Return description of CHARSET.
1854 return XCHARSET_DOC_STRING (Fget_charset (charset));
1857 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1858 Return dimension of CHARSET.
1862 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1865 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1866 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1867 Recognized properties are those listed in `make-charset', as well as
1868 'name and 'doc-string.
1874 charset = Fget_charset (charset);
1875 cs = XCHARSET (charset);
1877 CHECK_SYMBOL (prop);
1878 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1879 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1880 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1881 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1882 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1883 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1884 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1885 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1886 Qnil : make_char (CHARSET_FINAL (cs));
1887 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1888 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1889 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1890 if (EQ (prop, Qdirection))
1891 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1892 if (EQ (prop, Qreverse_direction_charset))
1894 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1895 /* #### Is this translation OK? If so, error checking sufficient? */
1896 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1899 if (EQ (prop, Qmother))
1900 return CHARSET_MOTHER (cs);
1901 if (EQ (prop, Qmin_code))
1902 return make_int (CHARSET_MIN_CODE (cs));
1903 if (EQ (prop, Qmax_code))
1904 return make_int (CHARSET_MAX_CODE (cs));
1906 signal_simple_error ("Unrecognized charset property name", prop);
1907 return Qnil; /* not reached */
1910 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1911 Return charset identification number of CHARSET.
1915 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1918 /* #### We need to figure out which properties we really want to
1921 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1922 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1924 (charset, ccl_program))
1926 struct ccl_program test_ccl;
1928 charset = Fget_charset (charset);
1929 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1930 signal_simple_error ("Invalid ccl-program", ccl_program);
1931 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1936 invalidate_charset_font_caches (Lisp_Object charset)
1938 /* Invalidate font cache entries for charset on all devices. */
1939 Lisp_Object devcons, concons, hash_table;
1940 DEVICE_LOOP_NO_BREAK (devcons, concons)
1942 struct device *d = XDEVICE (XCAR (devcons));
1943 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1944 if (!UNBOUNDP (hash_table))
1945 Fclrhash (hash_table);
1949 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1950 Set the 'registry property of CHARSET to REGISTRY.
1952 (charset, registry))
1954 charset = Fget_charset (charset);
1955 CHECK_STRING (registry);
1956 XCHARSET_REGISTRY (charset) = registry;
1957 invalidate_charset_font_caches (charset);
1958 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1963 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1964 Return mapping-table of CHARSET.
1968 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1971 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1972 Set mapping-table of CHARSET to TABLE.
1976 struct Lisp_Charset *cs;
1980 charset = Fget_charset (charset);
1981 cs = XCHARSET (charset);
1985 CHARSET_DECODING_TABLE(cs) = Qnil;
1988 else if (VECTORP (table))
1990 int ccs_len = CHARSET_BYTE_SIZE (cs);
1991 int ret = decoding_table_check_elements (table,
1992 CHARSET_DIMENSION (cs),
1997 signal_simple_error ("Too big table", table);
1999 signal_simple_error ("Invalid element is found", table);
2001 signal_simple_error ("Something wrong", table);
2003 CHARSET_DECODING_TABLE(cs) = Qnil;
2006 signal_error (Qwrong_type_argument,
2007 list2 (build_translated_string ("vector-or-nil-p"),
2010 byte_offset = CHARSET_BYTE_OFFSET (cs);
2011 switch (CHARSET_DIMENSION (cs))
2014 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2016 Lisp_Object c = XVECTOR_DATA(table)[i];
2019 Fput_char_attribute (c, XCHARSET_NAME (charset),
2020 make_int (i + byte_offset));
2024 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2026 Lisp_Object v = XVECTOR_DATA(table)[i];
2032 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2034 Lisp_Object c = XVECTOR_DATA(v)[j];
2038 (c, XCHARSET_NAME (charset),
2039 make_int ( ( (i + byte_offset) << 8 )
2045 Fput_char_attribute (v, XCHARSET_NAME (charset),
2046 make_int (i + byte_offset));
2055 /************************************************************************/
2056 /* Lisp primitives for working with characters */
2057 /************************************************************************/
2060 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2061 Make a character from CHARSET and code-point CODE.
2062 If DEFINED_ONLY is non-nil, builtin character is not returned.
2063 If corresponding character is not found, nil is returned.
2065 (charset, code, defined_only))
2069 charset = Fget_charset (charset);
2072 if (XCHARSET_GRAPHIC (charset) == 1)
2074 if (NILP (defined_only))
2075 c = DECODE_CHAR (charset, c);
2077 c = decode_defined_char (charset, c);
2078 return c >= 0 ? make_char (c) : Qnil;
2081 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2082 Make a builtin character from CHARSET and code-point CODE.
2088 charset = Fget_charset (charset);
2090 if (EQ (charset, Vcharset_latin_viscii))
2092 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2098 (ret = Fget_char_attribute (chr,
2099 Vcharset_latin_viscii_lower,
2102 charset = Vcharset_latin_viscii_lower;
2106 (ret = Fget_char_attribute (chr,
2107 Vcharset_latin_viscii_upper,
2110 charset = Vcharset_latin_viscii_upper;
2117 if (XCHARSET_GRAPHIC (charset) == 1)
2120 c = decode_builtin_char (charset, c);
2121 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2125 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2126 Make a character from CHARSET and octets ARG1 and ARG2.
2127 ARG2 is required only for characters from two-dimensional charsets.
2128 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2129 character s with caron.
2131 (charset, arg1, arg2))
2135 int lowlim, highlim;
2137 charset = Fget_charset (charset);
2138 cs = XCHARSET (charset);
2140 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2141 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2143 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2145 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2146 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2149 /* It is useful (and safe, according to Olivier Galibert) to strip
2150 the 8th bit off ARG1 and ARG2 because it allows programmers to
2151 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2152 Latin 2 code of the character. */
2160 if (a1 < lowlim || a1 > highlim)
2161 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2163 if (CHARSET_DIMENSION (cs) == 1)
2167 ("Charset is of dimension one; second octet must be nil", arg2);
2168 return make_char (MAKE_CHAR (charset, a1, 0));
2177 a2 = XINT (arg2) & 0x7f;
2179 if (a2 < lowlim || a2 > highlim)
2180 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2182 return make_char (MAKE_CHAR (charset, a1, a2));
2185 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2186 Return the character set of CHARACTER.
2190 CHECK_CHAR_COERCE_INT (character);
2192 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2195 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2196 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2197 N defaults to 0 if omitted.
2201 Lisp_Object charset;
2204 CHECK_CHAR_COERCE_INT (character);
2206 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2208 if (NILP (n) || EQ (n, Qzero))
2209 return make_int (octet0);
2210 else if (EQ (n, make_int (1)))
2211 return make_int (octet1);
2213 signal_simple_error ("Octet number must be 0 or 1", n);
2217 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2218 Return code-point of CHARACTER in specified CHARSET.
2220 (character, charset, defined_only))
2224 CHECK_CHAR_COERCE_INT (character);
2225 charset = Fget_charset (charset);
2226 code_point = charset_code_point (charset, XCHAR (character),
2227 !NILP (defined_only));
2228 if (code_point >= 0)
2229 return make_int (code_point);
2235 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2236 Return list of charset and one or two position-codes of CHARACTER.
2240 /* This function can GC */
2241 struct gcpro gcpro1, gcpro2;
2242 Lisp_Object charset = Qnil;
2243 Lisp_Object rc = Qnil;
2251 GCPRO2 (charset, rc);
2252 CHECK_CHAR_COERCE_INT (character);
2255 code_point = ENCODE_CHAR (XCHAR (character), charset);
2256 dimension = XCHARSET_DIMENSION (charset);
2257 while (dimension > 0)
2259 rc = Fcons (make_int (code_point & 255), rc);
2263 rc = Fcons (XCHARSET_NAME (charset), rc);
2265 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2267 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2269 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2273 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2282 #ifdef ENABLE_COMPOSITE_CHARS
2283 /************************************************************************/
2284 /* composite character functions */
2285 /************************************************************************/
2288 lookup_composite_char (Bufbyte *str, int len)
2290 Lisp_Object lispstr = make_string (str, len);
2291 Lisp_Object ch = Fgethash (lispstr,
2292 Vcomposite_char_string2char_hash_table,
2298 if (composite_char_row_next >= 128)
2299 signal_simple_error ("No more composite chars available", lispstr);
2300 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2301 composite_char_col_next);
2302 Fputhash (make_char (emch), lispstr,
2303 Vcomposite_char_char2string_hash_table);
2304 Fputhash (lispstr, make_char (emch),
2305 Vcomposite_char_string2char_hash_table);
2306 composite_char_col_next++;
2307 if (composite_char_col_next >= 128)
2309 composite_char_col_next = 32;
2310 composite_char_row_next++;
2319 composite_char_string (Emchar ch)
2321 Lisp_Object str = Fgethash (make_char (ch),
2322 Vcomposite_char_char2string_hash_table,
2324 assert (!UNBOUNDP (str));
2328 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2329 Convert a string into a single composite character.
2330 The character is the result of overstriking all the characters in
2335 CHECK_STRING (string);
2336 return make_char (lookup_composite_char (XSTRING_DATA (string),
2337 XSTRING_LENGTH (string)));
2340 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2341 Return a string of the characters comprising a composite character.
2349 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2350 signal_simple_error ("Must be composite char", ch);
2351 return composite_char_string (emch);
2353 #endif /* ENABLE_COMPOSITE_CHARS */
2356 /************************************************************************/
2357 /* initialization */
2358 /************************************************************************/
2361 syms_of_mule_charset (void)
2363 INIT_LRECORD_IMPLEMENTATION (charset);
2365 DEFSUBR (Fcharsetp);
2366 DEFSUBR (Ffind_charset);
2367 DEFSUBR (Fget_charset);
2368 DEFSUBR (Fcharset_list);
2369 DEFSUBR (Fcharset_name);
2370 DEFSUBR (Fmake_charset);
2371 DEFSUBR (Fmake_reverse_direction_charset);
2372 /* DEFSUBR (Freverse_direction_charset); */
2373 DEFSUBR (Fdefine_charset_alias);
2374 DEFSUBR (Fcharset_from_attributes);
2375 DEFSUBR (Fcharset_short_name);
2376 DEFSUBR (Fcharset_long_name);
2377 DEFSUBR (Fcharset_description);
2378 DEFSUBR (Fcharset_dimension);
2379 DEFSUBR (Fcharset_property);
2380 DEFSUBR (Fcharset_id);
2381 DEFSUBR (Fset_charset_ccl_program);
2382 DEFSUBR (Fset_charset_registry);
2384 DEFSUBR (Fcharset_mapping_table);
2385 DEFSUBR (Fset_charset_mapping_table);
2389 DEFSUBR (Fdecode_char);
2390 DEFSUBR (Fdecode_builtin_char);
2391 DEFSUBR (Fencode_char);
2393 DEFSUBR (Fmake_char);
2394 DEFSUBR (Fchar_charset);
2395 DEFSUBR (Fchar_octet);
2396 DEFSUBR (Fsplit_char);
2398 #ifdef ENABLE_COMPOSITE_CHARS
2399 DEFSUBR (Fmake_composite_char);
2400 DEFSUBR (Fcomposite_char_string);
2403 defsymbol (&Qcharsetp, "charsetp");
2404 defsymbol (&Qregistry, "registry");
2405 defsymbol (&Qfinal, "final");
2406 defsymbol (&Qgraphic, "graphic");
2407 defsymbol (&Qdirection, "direction");
2408 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2409 defsymbol (&Qshort_name, "short-name");
2410 defsymbol (&Qlong_name, "long-name");
2412 defsymbol (&Qmother, "mother");
2413 defsymbol (&Qmin_code, "min-code");
2414 defsymbol (&Qmax_code, "max-code");
2415 defsymbol (&Qcode_offset, "code-offset");
2416 defsymbol (&Qconversion, "conversion");
2417 defsymbol (&Q94x60, "94x60");
2418 defsymbol (&Q94x94x60, "94x94x60");
2421 defsymbol (&Ql2r, "l2r");
2422 defsymbol (&Qr2l, "r2l");
2424 /* Charsets, compatible with FSF 20.3
2425 Naming convention is Script-Charset[-Edition] */
2426 defsymbol (&Qascii, "ascii");
2427 defsymbol (&Qcontrol_1, "control-1");
2428 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2429 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2430 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2431 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2432 defsymbol (&Qthai_tis620, "thai-tis620");
2433 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2434 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2435 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2436 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2437 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2438 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2439 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2440 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2441 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2442 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2443 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2444 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2445 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2446 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2447 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2448 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2450 defsymbol (&Qucs, "ucs");
2451 defsymbol (&Qucs_bmp, "ucs-bmp");
2452 defsymbol (&Qucs_smp, "ucs-smp");
2453 defsymbol (&Qucs_sip, "ucs-sip");
2454 defsymbol (&Qucs_cns, "ucs-cns");
2455 defsymbol (&Qucs_jis, "ucs-jis");
2456 defsymbol (&Qucs_ks, "ucs-ks");
2457 defsymbol (&Qucs_big5, "ucs-big5");
2458 defsymbol (&Qlatin_viscii, "latin-viscii");
2459 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2460 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2461 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2462 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2463 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2464 defsymbol (&Qjis_x0208, "=jis-x0208");
2465 defsymbol (&Qideograph_gt, "ideograph-gt");
2466 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2467 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2468 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2469 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2470 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2471 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2472 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2473 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2474 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2475 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2476 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2477 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2478 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2479 defsymbol (&Qchinese_big5, "chinese-big5");
2480 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2481 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2482 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2483 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2484 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2485 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2486 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2487 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2488 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2489 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2490 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2491 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2492 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2493 defsymbol (&Qchina3_jef, "china3-jef");
2494 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2495 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2497 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2498 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2500 defsymbol (&Qcomposite, "composite");
2504 vars_of_mule_charset (void)
2511 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2512 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2514 /* Table of charsets indexed by leading byte. */
2515 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2516 chlook->charset_by_leading_byte[i] = Qnil;
2519 /* Table of charsets indexed by type/final-byte. */
2520 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2521 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2522 chlook->charset_by_attributes[i][j] = Qnil;
2524 /* Table of charsets indexed by type/final-byte/direction. */
2525 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2526 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2527 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2528 chlook->charset_by_attributes[i][j][k] = Qnil;
2532 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2534 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2535 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2539 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2540 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2541 Leading-code of private TYPE9N charset of column-width 1.
2543 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2547 Vdefault_coded_charset_priority_list = Qnil;
2548 DEFVAR_LISP ("default-coded-charset-priority-list",
2549 &Vdefault_coded_charset_priority_list /*
2550 Default order of preferred coded-character-sets.
2556 complex_vars_of_mule_charset (void)
2558 staticpro (&Vcharset_hash_table);
2559 Vcharset_hash_table =
2560 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2562 /* Predefined character sets. We store them into variables for
2566 staticpro (&Vcharset_ucs);
2568 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2569 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2570 build_string ("UCS"),
2571 build_string ("UCS"),
2572 build_string ("ISO/IEC 10646"),
2574 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2575 staticpro (&Vcharset_ucs_bmp);
2577 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2578 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2579 build_string ("BMP"),
2580 build_string ("UCS-BMP"),
2581 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2583 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2584 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2585 staticpro (&Vcharset_ucs_smp);
2587 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2588 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2589 build_string ("SMP"),
2590 build_string ("UCS-SMP"),
2591 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2592 build_string ("UCS00-1"),
2593 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2594 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2595 staticpro (&Vcharset_ucs_sip);
2597 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2598 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2599 build_string ("SIP"),
2600 build_string ("UCS-SIP"),
2601 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2602 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2603 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2604 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2605 staticpro (&Vcharset_ucs_cns);
2607 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2608 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2609 build_string ("UCS for CNS"),
2610 build_string ("UCS for CNS 11643"),
2611 build_string ("ISO/IEC 10646 for CNS 11643"),
2613 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2614 staticpro (&Vcharset_ucs_jis);
2616 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2617 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2618 build_string ("UCS for JIS"),
2619 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2621 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2623 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2624 staticpro (&Vcharset_ucs_ks);
2626 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2627 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2628 build_string ("UCS for KS"),
2629 build_string ("UCS for CCS defined by KS"),
2630 build_string ("ISO/IEC 10646 for Korean Standards"),
2632 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2633 staticpro (&Vcharset_ucs_big5);
2635 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2636 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2637 build_string ("UCS for Big5"),
2638 build_string ("UCS for Big5"),
2639 build_string ("ISO/IEC 10646 for Big5"),
2641 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2643 # define MIN_CHAR_THAI 0
2644 # define MAX_CHAR_THAI 0
2645 /* # define MIN_CHAR_HEBREW 0 */
2646 /* # define MAX_CHAR_HEBREW 0 */
2647 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2648 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2650 staticpro (&Vcharset_ascii);
2652 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2653 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2654 build_string ("ASCII"),
2655 build_string ("ASCII)"),
2656 build_string ("ASCII (ISO646 IRV)"),
2657 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2658 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2659 staticpro (&Vcharset_control_1);
2660 Vcharset_control_1 =
2661 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2662 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2663 build_string ("C1"),
2664 build_string ("Control characters"),
2665 build_string ("Control characters 128-191"),
2667 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2668 staticpro (&Vcharset_latin_iso8859_1);
2669 Vcharset_latin_iso8859_1 =
2670 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2671 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2672 build_string ("Latin-1"),
2673 build_string ("ISO8859-1 (Latin-1)"),
2674 build_string ("ISO8859-1 (Latin-1)"),
2675 build_string ("iso8859-1"),
2676 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2677 staticpro (&Vcharset_latin_iso8859_2);
2678 Vcharset_latin_iso8859_2 =
2679 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2680 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2681 build_string ("Latin-2"),
2682 build_string ("ISO8859-2 (Latin-2)"),
2683 build_string ("ISO8859-2 (Latin-2)"),
2684 build_string ("iso8859-2"),
2685 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2686 staticpro (&Vcharset_latin_iso8859_3);
2687 Vcharset_latin_iso8859_3 =
2688 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2689 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2690 build_string ("Latin-3"),
2691 build_string ("ISO8859-3 (Latin-3)"),
2692 build_string ("ISO8859-3 (Latin-3)"),
2693 build_string ("iso8859-3"),
2694 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2695 staticpro (&Vcharset_latin_iso8859_4);
2696 Vcharset_latin_iso8859_4 =
2697 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2698 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2699 build_string ("Latin-4"),
2700 build_string ("ISO8859-4 (Latin-4)"),
2701 build_string ("ISO8859-4 (Latin-4)"),
2702 build_string ("iso8859-4"),
2703 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2704 staticpro (&Vcharset_thai_tis620);
2705 Vcharset_thai_tis620 =
2706 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2707 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2708 build_string ("TIS620"),
2709 build_string ("TIS620 (Thai)"),
2710 build_string ("TIS620.2529 (Thai)"),
2711 build_string ("tis620"),
2712 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2713 staticpro (&Vcharset_greek_iso8859_7);
2714 Vcharset_greek_iso8859_7 =
2715 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2716 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2717 build_string ("ISO8859-7"),
2718 build_string ("ISO8859-7 (Greek)"),
2719 build_string ("ISO8859-7 (Greek)"),
2720 build_string ("iso8859-7"),
2721 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2722 staticpro (&Vcharset_arabic_iso8859_6);
2723 Vcharset_arabic_iso8859_6 =
2724 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2725 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2726 build_string ("ISO8859-6"),
2727 build_string ("ISO8859-6 (Arabic)"),
2728 build_string ("ISO8859-6 (Arabic)"),
2729 build_string ("iso8859-6"),
2730 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2731 staticpro (&Vcharset_hebrew_iso8859_8);
2732 Vcharset_hebrew_iso8859_8 =
2733 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2734 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2735 build_string ("ISO8859-8"),
2736 build_string ("ISO8859-8 (Hebrew)"),
2737 build_string ("ISO8859-8 (Hebrew)"),
2738 build_string ("iso8859-8"),
2740 0 /* MIN_CHAR_HEBREW */,
2741 0 /* MAX_CHAR_HEBREW */, 0, 32,
2742 Qnil, CONVERSION_IDENTICAL);
2743 staticpro (&Vcharset_katakana_jisx0201);
2744 Vcharset_katakana_jisx0201 =
2745 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2746 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2747 build_string ("JISX0201 Kana"),
2748 build_string ("JISX0201.1976 (Japanese Kana)"),
2749 build_string ("JISX0201.1976 Japanese Kana"),
2750 build_string ("jisx0201\\.1976"),
2751 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2752 staticpro (&Vcharset_latin_jisx0201);
2753 Vcharset_latin_jisx0201 =
2754 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2755 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2756 build_string ("JISX0201 Roman"),
2757 build_string ("JISX0201.1976 (Japanese Roman)"),
2758 build_string ("JISX0201.1976 Japanese Roman"),
2759 build_string ("jisx0201\\.1976"),
2760 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2761 staticpro (&Vcharset_cyrillic_iso8859_5);
2762 Vcharset_cyrillic_iso8859_5 =
2763 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2764 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2765 build_string ("ISO8859-5"),
2766 build_string ("ISO8859-5 (Cyrillic)"),
2767 build_string ("ISO8859-5 (Cyrillic)"),
2768 build_string ("iso8859-5"),
2769 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2770 staticpro (&Vcharset_latin_iso8859_9);
2771 Vcharset_latin_iso8859_9 =
2772 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2773 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2774 build_string ("Latin-5"),
2775 build_string ("ISO8859-9 (Latin-5)"),
2776 build_string ("ISO8859-9 (Latin-5)"),
2777 build_string ("iso8859-9"),
2778 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2780 staticpro (&Vcharset_jis_x0208);
2781 Vcharset_jis_x0208 =
2782 make_charset (LEADING_BYTE_JIS_X0208,
2784 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2785 build_string ("JIS X0208"),
2786 build_string ("JIS X0208 Common"),
2787 build_string ("JIS X0208 Common part"),
2788 build_string ("jisx0208\\.1990"),
2790 MIN_CHAR_JIS_X0208_1990,
2791 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2792 Qnil, CONVERSION_94x94);
2794 staticpro (&Vcharset_japanese_jisx0208_1978);
2795 Vcharset_japanese_jisx0208_1978 =
2796 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2797 Qjapanese_jisx0208_1978, 94, 2,
2798 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2799 build_string ("JIS X0208:1978"),
2800 build_string ("JIS X0208:1978 (Japanese)"),
2802 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2803 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2810 CONVERSION_IDENTICAL);
2811 staticpro (&Vcharset_chinese_gb2312);
2812 Vcharset_chinese_gb2312 =
2813 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2814 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2815 build_string ("GB2312"),
2816 build_string ("GB2312)"),
2817 build_string ("GB2312 Chinese simplified"),
2818 build_string ("gb2312"),
2819 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2820 staticpro (&Vcharset_chinese_gb12345);
2821 Vcharset_chinese_gb12345 =
2822 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2823 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2824 build_string ("G1"),
2825 build_string ("GB 12345)"),
2826 build_string ("GB 12345-1990"),
2827 build_string ("GB12345\\(\\.1990\\)?-0"),
2828 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2829 staticpro (&Vcharset_japanese_jisx0208);
2830 Vcharset_japanese_jisx0208 =
2831 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2832 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2833 build_string ("JISX0208"),
2834 build_string ("JIS X0208:1983 (Japanese)"),
2835 build_string ("JIS X0208:1983 Japanese Kanji"),
2836 build_string ("jisx0208\\.1983"),
2843 CONVERSION_IDENTICAL);
2845 staticpro (&Vcharset_japanese_jisx0208_1990);
2846 Vcharset_japanese_jisx0208_1990 =
2847 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2848 Qjapanese_jisx0208_1990, 94, 2,
2849 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2850 build_string ("JISX0208-1990"),
2851 build_string ("JIS X0208:1990 (Japanese)"),
2852 build_string ("JIS X0208:1990 Japanese Kanji"),
2853 build_string ("jisx0208\\.1990"),
2855 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2856 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2857 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2858 Vcharset_jis_x0208 /* Qnil */,
2859 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2861 staticpro (&Vcharset_korean_ksc5601);
2862 Vcharset_korean_ksc5601 =
2863 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2864 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2865 build_string ("KSC5601"),
2866 build_string ("KSC5601 (Korean"),
2867 build_string ("KSC5601 Korean Hangul and Hanja"),
2868 build_string ("ksc5601"),
2869 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2870 staticpro (&Vcharset_japanese_jisx0212);
2871 Vcharset_japanese_jisx0212 =
2872 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2873 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2874 build_string ("JISX0212"),
2875 build_string ("JISX0212 (Japanese)"),
2876 build_string ("JISX0212 Japanese Supplement"),
2877 build_string ("jisx0212"),
2878 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2880 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2881 staticpro (&Vcharset_chinese_cns11643_1);
2882 Vcharset_chinese_cns11643_1 =
2883 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2884 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2885 build_string ("CNS11643-1"),
2886 build_string ("CNS11643-1 (Chinese traditional)"),
2888 ("CNS 11643 Plane 1 Chinese traditional"),
2889 build_string (CHINESE_CNS_PLANE_RE("1")),
2890 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2891 staticpro (&Vcharset_chinese_cns11643_2);
2892 Vcharset_chinese_cns11643_2 =
2893 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2894 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2895 build_string ("CNS11643-2"),
2896 build_string ("CNS11643-2 (Chinese traditional)"),
2898 ("CNS 11643 Plane 2 Chinese traditional"),
2899 build_string (CHINESE_CNS_PLANE_RE("2")),
2900 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2902 staticpro (&Vcharset_latin_tcvn5712);
2903 Vcharset_latin_tcvn5712 =
2904 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2905 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2906 build_string ("TCVN 5712"),
2907 build_string ("TCVN 5712 (VSCII-2)"),
2908 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2909 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2910 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2911 staticpro (&Vcharset_latin_viscii_lower);
2912 Vcharset_latin_viscii_lower =
2913 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2914 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2915 build_string ("VISCII lower"),
2916 build_string ("VISCII lower (Vietnamese)"),
2917 build_string ("VISCII lower (Vietnamese)"),
2918 build_string ("MULEVISCII-LOWER"),
2919 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2920 staticpro (&Vcharset_latin_viscii_upper);
2921 Vcharset_latin_viscii_upper =
2922 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2923 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2924 build_string ("VISCII upper"),
2925 build_string ("VISCII upper (Vietnamese)"),
2926 build_string ("VISCII upper (Vietnamese)"),
2927 build_string ("MULEVISCII-UPPER"),
2928 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2929 staticpro (&Vcharset_latin_viscii);
2930 Vcharset_latin_viscii =
2931 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2932 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2933 build_string ("VISCII"),
2934 build_string ("VISCII 1.1 (Vietnamese)"),
2935 build_string ("VISCII 1.1 (Vietnamese)"),
2936 build_string ("VISCII1\\.1"),
2937 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2938 staticpro (&Vcharset_chinese_big5);
2939 Vcharset_chinese_big5 =
2940 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2941 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Big5"),
2943 build_string ("Big5"),
2944 build_string ("Big5 Chinese traditional"),
2945 build_string ("big5-0"),
2947 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2948 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2950 staticpro (&Vcharset_chinese_big5_cdp);
2951 Vcharset_chinese_big5_cdp =
2952 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2953 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2954 build_string ("Big5-CDP"),
2955 build_string ("Big5 + CDP extension"),
2956 build_string ("Big5 with CDP extension"),
2957 build_string ("big5\\.cdp-0"),
2958 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2959 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2961 #define DEF_HANZIKU(n) \
2962 staticpro (&Vcharset_ideograph_hanziku_##n); \
2963 Vcharset_ideograph_hanziku_##n = \
2964 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2965 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2966 build_string ("HZK-"#n), \
2967 build_string ("HANZIKU-"#n), \
2968 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2970 ("hanziku-"#n"$"), \
2971 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2972 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2985 staticpro (&Vcharset_china3_jef);
2986 Vcharset_china3_jef =
2987 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2988 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2989 build_string ("JC3"),
2990 build_string ("JEF + CHINA3"),
2991 build_string ("JEF + CHINA3 private characters"),
2992 build_string ("china3jef-0"),
2993 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2994 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2995 staticpro (&Vcharset_ideograph_cbeta);
2996 Vcharset_ideograph_cbeta =
2997 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2998 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2999 build_string ("CB"),
3000 build_string ("CBETA"),
3001 build_string ("CBETA private characters"),
3002 build_string ("cbeta-0"),
3003 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3004 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3005 staticpro (&Vcharset_ideograph_gt);
3006 Vcharset_ideograph_gt =
3007 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3008 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3009 build_string ("GT"),
3010 build_string ("GT"),
3011 build_string ("GT"),
3013 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3014 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3015 #define DEF_GT_PJ(n) \
3016 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3017 Vcharset_ideograph_gt_pj_##n = \
3018 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3019 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3020 build_string ("GT-PJ-"#n), \
3021 build_string ("GT (pseudo JIS encoding) part "#n), \
3022 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3024 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3025 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3038 staticpro (&Vcharset_ideograph_daikanwa_2);
3039 Vcharset_ideograph_daikanwa_2 =
3040 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3041 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3042 build_string ("Daikanwa Rev."),
3043 build_string ("Morohashi's Daikanwa Rev."),
3045 ("Daikanwa dictionary (revised version)"),
3046 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3047 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3048 staticpro (&Vcharset_ideograph_daikanwa);
3049 Vcharset_ideograph_daikanwa =
3050 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3051 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3052 build_string ("Daikanwa"),
3053 build_string ("Morohashi's Daikanwa Rev.2"),
3055 ("Daikanwa dictionary (second revised version)"),
3056 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3057 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3058 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3060 staticpro (&Vcharset_ethiopic_ucs);
3061 Vcharset_ethiopic_ucs =
3062 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3063 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3064 build_string ("Ethiopic (UCS)"),
3065 build_string ("Ethiopic (UCS)"),
3066 build_string ("Ethiopic of UCS"),
3067 build_string ("Ethiopic-Unicode"),
3068 Qnil, 0x1200, 0x137F, 0, 0,
3069 Qnil, CONVERSION_IDENTICAL);
3071 staticpro (&Vcharset_chinese_big5_1);
3072 Vcharset_chinese_big5_1 =
3073 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3074 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3075 build_string ("Big5"),
3076 build_string ("Big5 (Level-1)"),
3078 ("Big5 Level-1 Chinese traditional"),
3079 build_string ("big5"),
3080 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3081 staticpro (&Vcharset_chinese_big5_2);
3082 Vcharset_chinese_big5_2 =
3083 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3084 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3085 build_string ("Big5"),
3086 build_string ("Big5 (Level-2)"),
3088 ("Big5 Level-2 Chinese traditional"),
3089 build_string ("big5"),
3090 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3092 #ifdef ENABLE_COMPOSITE_CHARS
3093 /* #### For simplicity, we put composite chars into a 96x96 charset.
3094 This is going to lead to problems because you can run out of
3095 room, esp. as we don't yet recycle numbers. */
3096 staticpro (&Vcharset_composite);
3097 Vcharset_composite =
3098 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3099 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3100 build_string ("Composite"),
3101 build_string ("Composite characters"),
3102 build_string ("Composite characters"),
3105 /* #### not dumped properly */
3106 composite_char_row_next = 32;
3107 composite_char_col_next = 32;
3109 Vcomposite_char_string2char_hash_table =
3110 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3111 Vcomposite_char_char2string_hash_table =
3112 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3113 staticpro (&Vcomposite_char_string2char_hash_table);
3114 staticpro (&Vcomposite_char_char2string_hash_table);
3115 #endif /* ENABLE_COMPOSITE_CHARS */