1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 /* Lisp_Object Vcharset_chinese_big5_cdp; */
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_china3_jef;
95 Lisp_Object Vcharset_ideograph_cbeta;
96 Lisp_Object Vcharset_ideograph_gt;
97 Lisp_Object Vcharset_ideograph_gt_pj_1;
98 Lisp_Object Vcharset_ideograph_gt_pj_2;
99 Lisp_Object Vcharset_ideograph_gt_pj_3;
100 Lisp_Object Vcharset_ideograph_gt_pj_4;
101 Lisp_Object Vcharset_ideograph_gt_pj_5;
102 Lisp_Object Vcharset_ideograph_gt_pj_6;
103 Lisp_Object Vcharset_ideograph_gt_pj_7;
104 Lisp_Object Vcharset_ideograph_gt_pj_8;
105 Lisp_Object Vcharset_ideograph_gt_pj_9;
106 Lisp_Object Vcharset_ideograph_gt_pj_10;
107 Lisp_Object Vcharset_ideograph_gt_pj_11;
108 Lisp_Object Vcharset_ideograph_daikanwa_2;
109 Lisp_Object Vcharset_ideograph_daikanwa;
110 Lisp_Object Vcharset_ethiopic_ucs;
112 Lisp_Object Vcharset_chinese_big5_1;
113 Lisp_Object Vcharset_chinese_big5_2;
115 #ifdef ENABLE_COMPOSITE_CHARS
116 Lisp_Object Vcharset_composite;
118 /* Hash tables for composite chars. One maps string representing
119 composed chars to their equivalent chars; one goes the
121 Lisp_Object Vcomposite_char_char2string_hash_table;
122 Lisp_Object Vcomposite_char_string2char_hash_table;
124 static int composite_char_row_next;
125 static int composite_char_col_next;
127 #endif /* ENABLE_COMPOSITE_CHARS */
129 struct charset_lookup *chlook;
131 static const struct lrecord_description charset_lookup_description_1[] = {
132 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
141 static const struct struct_description charset_lookup_description = {
142 sizeof (struct charset_lookup),
143 charset_lookup_description_1
147 /* Table of number of bytes in the string representation of a character
148 indexed by the first byte of that representation.
150 rep_bytes_by_first_byte(c) is more efficient than the equivalent
151 canonical computation:
153 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
155 const Bytecount rep_bytes_by_first_byte[0xA0] =
156 { /* 0x00 - 0x7f are for straight ASCII */
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 /* 0x80 - 0x8f are for Dimension-1 official charsets */
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 /* 0x90 - 0x9d are for Dimension-2 official charsets */
172 /* 0x9e is for Dimension-1 private charsets */
173 /* 0x9f is for Dimension-2 private charsets */
174 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
180 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
182 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
186 if (XVECTOR_LENGTH (v) > ccs_len)
189 for (i = 0; i < XVECTOR_LENGTH (v); i++)
191 Lisp_Object c = XVECTOR_DATA(v)[i];
193 if (!NILP (c) && !CHARP (c))
197 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
209 put_char_ccs_code_point (Lisp_Object character,
210 Lisp_Object ccs, Lisp_Object value)
212 if (!EQ (XCHARSET_NAME (ccs), Qucs)
214 || (XCHAR (character) != XINT (value)))
216 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
220 { /* obsolete representation: value must be a list of bytes */
221 Lisp_Object ret = Fcar (value);
225 signal_simple_error ("Invalid value for coded-charset", value);
226 code_point = XINT (ret);
227 if (XCHARSET_GRAPHIC (ccs) == 1)
235 signal_simple_error ("Invalid value for coded-charset",
239 signal_simple_error ("Invalid value for coded-charset",
242 if (XCHARSET_GRAPHIC (ccs) == 1)
244 code_point = (code_point << 8) | j;
247 value = make_int (code_point);
249 else if (INTP (value))
251 code_point = XINT (value);
252 if (XCHARSET_GRAPHIC (ccs) == 1)
254 code_point &= 0x7F7F7F7F;
255 value = make_int (code_point);
259 signal_simple_error ("Invalid value for coded-charset", value);
263 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
266 decoding_table_remove_char (ccs, XINT (cpos));
269 decoding_table_put_char (ccs, code_point, character);
275 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
277 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
278 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
280 if (VECTORP (decoding_table))
282 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
286 decoding_table_remove_char (ccs, XINT (cpos));
289 if (CHAR_TABLEP (encoding_table))
291 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
299 int leading_code_private_11;
302 Lisp_Object Qcharsetp;
304 /* Qdoc_string, Qdimension, Qchars defined in general.c */
305 Lisp_Object Qregistry, Qfinal, Qgraphic;
306 Lisp_Object Qdirection;
307 Lisp_Object Qreverse_direction_charset;
308 Lisp_Object Qleading_byte;
309 Lisp_Object Qshort_name, Qlong_name;
311 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
312 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
329 Qjapanese_jisx0208_1978,
333 Qjapanese_jisx0208_1990,
351 Qvietnamese_viscii_lower,
352 Qvietnamese_viscii_upper,
355 /* Qchinese_big5_cdp, */
356 Qideograph_hanziku_1,
357 Qideograph_hanziku_2,
358 Qideograph_hanziku_3,
359 Qideograph_hanziku_4,
360 Qideograph_hanziku_5,
361 Qideograph_hanziku_6,
362 Qideograph_hanziku_7,
363 Qideograph_hanziku_8,
364 Qideograph_hanziku_9,
365 Qideograph_hanziku_10,
366 Qideograph_hanziku_11,
367 Qideograph_hanziku_12,
370 Qideograph_daikanwa_2,
390 Lisp_Object Ql2r, Qr2l;
392 Lisp_Object Vcharset_hash_table;
394 /* Composite characters are characters constructed by overstriking two
395 or more regular characters.
397 1) The old Mule implementation involves storing composite characters
398 in a buffer as a tag followed by all of the actual characters
399 used to make up the composite character. I think this is a bad
400 idea; it greatly complicates code that wants to handle strings
401 one character at a time because it has to deal with the possibility
402 of great big ungainly characters. It's much more reasonable to
403 simply store an index into a table of composite characters.
405 2) The current implementation only allows for 16,384 separate
406 composite characters over the lifetime of the XEmacs process.
407 This could become a potential problem if the user
408 edited lots of different files that use composite characters.
409 Due to FSF bogosity, increasing the number of allowable
410 composite characters under Mule would decrease the number
411 of possible faces that can exist. Mule already has shrunk
412 this to 2048, and further shrinkage would become uncomfortable.
413 No such problems exist in XEmacs.
415 Composite characters could be represented as 0x80 C1 C2 C3,
416 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
417 for slightly under 2^20 (one million) composite characters
418 over the XEmacs process lifetime, and you only need to
419 increase the size of a Mule character from 19 to 21 bits.
420 Or you could use 0x80 C1 C2 C3 C4, allowing for about
421 85 million (slightly over 2^26) composite characters. */
424 /************************************************************************/
425 /* Basic Emchar functions */
426 /************************************************************************/
428 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
429 string in STR. Returns the number of bytes stored.
430 Do not call this directly. Use the macro set_charptr_emchar() instead.
434 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
449 else if ( c <= 0x7ff )
451 *p++ = (c >> 6) | 0xc0;
452 *p++ = (c & 0x3f) | 0x80;
454 else if ( c <= 0xffff )
456 *p++ = (c >> 12) | 0xe0;
457 *p++ = ((c >> 6) & 0x3f) | 0x80;
458 *p++ = (c & 0x3f) | 0x80;
460 else if ( c <= 0x1fffff )
462 *p++ = (c >> 18) | 0xf0;
463 *p++ = ((c >> 12) & 0x3f) | 0x80;
464 *p++ = ((c >> 6) & 0x3f) | 0x80;
465 *p++ = (c & 0x3f) | 0x80;
467 else if ( c <= 0x3ffffff )
469 *p++ = (c >> 24) | 0xf8;
470 *p++ = ((c >> 18) & 0x3f) | 0x80;
471 *p++ = ((c >> 12) & 0x3f) | 0x80;
472 *p++ = ((c >> 6) & 0x3f) | 0x80;
473 *p++ = (c & 0x3f) | 0x80;
477 *p++ = (c >> 30) | 0xfc;
478 *p++ = ((c >> 24) & 0x3f) | 0x80;
479 *p++ = ((c >> 18) & 0x3f) | 0x80;
480 *p++ = ((c >> 12) & 0x3f) | 0x80;
481 *p++ = ((c >> 6) & 0x3f) | 0x80;
482 *p++ = (c & 0x3f) | 0x80;
485 BREAKUP_CHAR (c, charset, c1, c2);
486 lb = CHAR_LEADING_BYTE (c);
487 if (LEADING_BYTE_PRIVATE_P (lb))
488 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
490 if (EQ (charset, Vcharset_control_1))
499 /* Return the first character from a Mule-encoded string in STR,
500 assuming it's non-ASCII. Do not call this directly.
501 Use the macro charptr_emchar() instead. */
504 non_ascii_charptr_emchar (const Bufbyte *str)
517 else if ( b >= 0xf8 )
522 else if ( b >= 0xf0 )
527 else if ( b >= 0xe0 )
532 else if ( b >= 0xc0 )
542 for( ; len > 0; len-- )
545 ch = ( ch << 6 ) | ( b & 0x3f );
549 Bufbyte i0 = *str, i1, i2 = 0;
552 if (i0 == LEADING_BYTE_CONTROL_1)
553 return (Emchar) (*++str - 0x20);
555 if (LEADING_BYTE_PREFIX_P (i0))
560 charset = CHARSET_BY_LEADING_BYTE (i0);
561 if (XCHARSET_DIMENSION (charset) == 2)
564 return MAKE_CHAR (charset, i1, i2);
568 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
569 Do not call this directly. Use the macro valid_char_p() instead. */
573 non_ascii_valid_char_p (Emchar ch)
577 /* Must have only lowest 19 bits set */
581 f1 = CHAR_FIELD1 (ch);
582 f2 = CHAR_FIELD2 (ch);
583 f3 = CHAR_FIELD3 (ch);
589 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
590 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
591 f2 > MAX_CHAR_FIELD2_PRIVATE)
596 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
597 f2 <= MAX_CHAR_FIELD2_PRIVATE))
601 NOTE: This takes advantage of the fact that
602 FIELD2_TO_OFFICIAL_LEADING_BYTE and
603 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
605 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
606 if (EQ (charset, Qnil))
608 return (XCHARSET_CHARS (charset) == 96);
614 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
615 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
616 f1 > MAX_CHAR_FIELD1_PRIVATE)
618 if (f2 < 0x20 || f3 < 0x20)
621 #ifdef ENABLE_COMPOSITE_CHARS
622 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
624 if (UNBOUNDP (Fgethash (make_int (ch),
625 Vcomposite_char_char2string_hash_table,
630 #endif /* ENABLE_COMPOSITE_CHARS */
632 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
633 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
636 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
638 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
641 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
643 if (EQ (charset, Qnil))
645 return (XCHARSET_CHARS (charset) == 96);
651 /************************************************************************/
652 /* Basic string functions */
653 /************************************************************************/
655 /* Copy the character pointed to by SRC into DST. Do not call this
656 directly. Use the macro charptr_copy_char() instead.
657 Return the number of bytes copied. */
660 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
662 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
664 for (i = bytes; i; i--, dst++, src++)
670 /************************************************************************/
671 /* streams of Emchars */
672 /************************************************************************/
674 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
675 The functions below are not meant to be called directly; use
676 the macros in insdel.h. */
679 Lstream_get_emchar_1 (Lstream *stream, int ch)
681 Bufbyte str[MAX_EMCHAR_LEN];
682 Bufbyte *strptr = str;
685 str[0] = (Bufbyte) ch;
687 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
689 int c = Lstream_getc (stream);
690 bufpos_checking_assert (c >= 0);
691 *++strptr = (Bufbyte) c;
693 return charptr_emchar (str);
697 Lstream_fput_emchar (Lstream *stream, Emchar ch)
699 Bufbyte str[MAX_EMCHAR_LEN];
700 Bytecount len = set_charptr_emchar (str, ch);
701 return Lstream_write (stream, str, len);
705 Lstream_funget_emchar (Lstream *stream, Emchar ch)
707 Bufbyte str[MAX_EMCHAR_LEN];
708 Bytecount len = set_charptr_emchar (str, ch);
709 Lstream_unread (stream, str, len);
713 /************************************************************************/
715 /************************************************************************/
718 mark_charset (Lisp_Object obj)
720 Lisp_Charset *cs = XCHARSET (obj);
722 mark_object (cs->short_name);
723 mark_object (cs->long_name);
724 mark_object (cs->doc_string);
725 mark_object (cs->registry);
726 mark_object (cs->ccl_program);
728 mark_object (cs->decoding_table);
729 mark_object (cs->mother);
735 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
737 Lisp_Charset *cs = XCHARSET (obj);
741 error ("printing unreadable object #<charset %s 0x%x>",
742 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
745 write_c_string ("#<charset ", printcharfun);
746 print_internal (CHARSET_NAME (cs), printcharfun, 0);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
751 write_c_string (" ", printcharfun);
752 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
753 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
755 CHARSET_DIMENSION (cs),
756 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
757 CHARSET_COLUMNS (cs),
758 CHARSET_GRAPHIC (cs),
760 write_c_string (buf, printcharfun);
761 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
762 sprintf (buf, " 0x%x>", cs->header.uid);
763 write_c_string (buf, printcharfun);
766 static const struct lrecord_description charset_description[] = {
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
775 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
776 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
781 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
782 mark_charset, print_charset, 0, 0, 0,
786 /* Make a new charset. */
787 /* #### SJT Should generic properties be allowed? */
789 make_charset (Charset_ID id, Lisp_Object name,
790 unsigned short chars, unsigned char dimension,
791 unsigned char columns, unsigned char graphic,
792 Bufbyte final, unsigned char direction, Lisp_Object short_name,
793 Lisp_Object long_name, Lisp_Object doc,
795 Lisp_Object decoding_table,
796 Emchar min_code, Emchar max_code,
797 Emchar code_offset, unsigned char byte_offset,
798 Lisp_Object mother, unsigned char conversion)
801 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
805 XSETCHARSET (obj, cs);
807 CHARSET_ID (cs) = id;
808 CHARSET_NAME (cs) = name;
809 CHARSET_SHORT_NAME (cs) = short_name;
810 CHARSET_LONG_NAME (cs) = long_name;
811 CHARSET_CHARS (cs) = chars;
812 CHARSET_DIMENSION (cs) = dimension;
813 CHARSET_DIRECTION (cs) = direction;
814 CHARSET_COLUMNS (cs) = columns;
815 CHARSET_GRAPHIC (cs) = graphic;
816 CHARSET_FINAL (cs) = final;
817 CHARSET_DOC_STRING (cs) = doc;
818 CHARSET_REGISTRY (cs) = reg;
819 CHARSET_CCL_PROGRAM (cs) = Qnil;
820 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
822 CHARSET_DECODING_TABLE(cs) = Qunbound;
823 CHARSET_MIN_CODE (cs) = min_code;
824 CHARSET_MAX_CODE (cs) = max_code;
825 CHARSET_CODE_OFFSET (cs) = code_offset;
826 CHARSET_BYTE_OFFSET (cs) = byte_offset;
827 CHARSET_MOTHER (cs) = mother;
828 CHARSET_CONVERSION (cs) = conversion;
832 if (id == LEADING_BYTE_ASCII)
833 CHARSET_REP_BYTES (cs) = 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
837 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
842 /* some charsets do not have final characters. This includes
843 ASCII, Control-1, Composite, and the two faux private
845 unsigned char iso2022_type
846 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
848 if (code_offset == 0)
850 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
851 chlook->charset_by_attributes[iso2022_type][final] = obj;
855 (chlook->charset_by_attributes[iso2022_type][final][direction]));
856 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
860 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
861 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
863 /* Some charsets are "faux" and don't have names or really exist at
864 all except in the leading-byte table. */
866 Fputhash (name, obj, Vcharset_hash_table);
871 get_unallocated_leading_byte (int dimension)
876 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
879 lb = chlook->next_allocated_leading_byte++;
883 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
886 lb = chlook->next_allocated_1_byte_leading_byte++;
890 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
893 lb = chlook->next_allocated_2_byte_leading_byte++;
899 ("No more character sets free for this dimension",
900 make_int (dimension));
906 /* Number of Big5 characters which have the same code in 1st byte. */
908 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
911 decode_defined_char (Lisp_Object ccs, int code_point)
913 int dim = XCHARSET_DIMENSION (ccs);
914 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
922 = get_ccs_octet_table (decoding_table, ccs,
923 (code_point >> (dim * 8)) & 255);
925 if (CHARP (decoding_table))
926 return XCHAR (decoding_table);
929 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
931 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
933 if ( EQ (mother, Vcharset_ucs) )
934 return DECODE_CHAR (mother, code_point);
936 return decode_defined_char (mother, code_point);
943 decode_builtin_char (Lisp_Object charset, int code_point)
945 Lisp_Object mother = XCHARSET_MOTHER (charset);
948 if ( XCHARSET_MAX_CODE (charset) > 0 )
950 if ( CHARSETP (mother) )
952 int code = code_point;
954 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
956 int row = code_point >> 8;
957 int cell = code_point & 255;
961 else if (row < 16 + 32 + 30)
962 code = (row - (16 + 32)) * 94 + cell - 33;
963 else if (row < 18 + 32 + 30)
965 else if (row < 18 + 32 + 60)
966 code = (row - (18 + 32)) * 94 + cell - 33;
968 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
970 int plane = code_point >> 16;
971 int row = (code_point >> 8) & 255;
972 int cell = code_point & 255;
976 else if (row < 16 + 32 + 30)
978 = (plane - 33) * 94 * 60
979 + (row - (16 + 32)) * 94
981 else if (row < 18 + 32 + 30)
983 else if (row < 18 + 32 + 60)
985 = (plane - 33) * 94 * 60
986 + (row - (18 + 32)) * 94
990 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
995 = (XCHARSET_DIMENSION (charset) == 1
997 code_point - XCHARSET_BYTE_OFFSET (charset)
999 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1000 * XCHARSET_CHARS (charset)
1001 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1002 + XCHARSET_CODE_OFFSET (charset);
1003 if ((cid < XCHARSET_MIN_CODE (charset))
1004 || (XCHARSET_MAX_CODE (charset) < cid))
1009 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1011 if (XCHARSET_DIMENSION (charset) == 1)
1013 switch (XCHARSET_CHARS (charset))
1017 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1020 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1028 switch (XCHARSET_CHARS (charset))
1031 return MIN_CHAR_94x94
1032 + (final - '0') * 94 * 94
1033 + (((code_point >> 8) & 0x7F) - 33) * 94
1034 + ((code_point & 0x7F) - 33);
1036 return MIN_CHAR_96x96
1037 + (final - '0') * 96 * 96
1038 + (((code_point >> 8) & 0x7F) - 32) * 96
1039 + ((code_point & 0x7F) - 32);
1051 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1053 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1056 if ( CHAR_TABLEP (encoding_table)
1057 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1062 Lisp_Object mother = XCHARSET_MOTHER (charset);
1063 int min = XCHARSET_MIN_CODE (charset);
1064 int max = XCHARSET_MAX_CODE (charset);
1067 if ( CHARSETP (mother) )
1068 code = charset_code_point (mother, ch, defined_only);
1069 else if (defined_only)
1071 else if ( ((max == 0) && CHARSETP (mother)
1072 && (XCHARSET_FINAL (charset) == 0))
1073 || ((min <= ch) && (ch <= max)) )
1075 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1076 || ((min <= code) && (code <= max)) )
1078 int d = code - XCHARSET_CODE_OFFSET (charset);
1080 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1082 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1084 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1086 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1089 int cell = d % 94 + 33;
1095 return (row << 8) | cell;
1097 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1098 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1099 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1100 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1101 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1103 int plane = d / (94 * 60) + 33;
1104 int row = (d % (94 * 60)) / 94;
1105 int cell = d % 94 + 33;
1111 return (plane << 16) | (row << 8) | cell;
1113 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1115 ( (d / (94 * 94) + 33) << 16)
1116 | ((d / 94 % 94 + 33) << 8)
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1120 ( (d / (96 * 96) + 32) << 16)
1121 | ((d / 96 % 96 + 32) << 8)
1123 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1125 ( (d / (94 * 94 * 94) + 33) << 24)
1126 | ((d / (94 * 94) % 94 + 33) << 16)
1127 | ((d / 94 % 94 + 33) << 8)
1129 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1131 ( (d / (96 * 96 * 96) + 32) << 24)
1132 | ((d / (96 * 96) % 96 + 32) << 16)
1133 | ((d / 96 % 96 + 32) << 8)
1137 printf ("Unknown CCS-conversion %d is specified!",
1138 XCHARSET_CONVERSION (charset));
1142 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1143 ( XCHARSET_MIN_CODE (charset) == 0 )
1145 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1146 (XCHARSET_CODE_OFFSET (charset)
1147 == XCHARSET_MIN_CODE (charset))
1152 if (XCHARSET_DIMENSION (charset) == 1)
1154 if (XCHARSET_CHARS (charset) == 94)
1156 if (((d = ch - (MIN_CHAR_94
1157 + (XCHARSET_FINAL (charset) - '0') * 94))
1162 else if (XCHARSET_CHARS (charset) == 96)
1164 if (((d = ch - (MIN_CHAR_96
1165 + (XCHARSET_FINAL (charset) - '0') * 96))
1173 else if (XCHARSET_DIMENSION (charset) == 2)
1175 if (XCHARSET_CHARS (charset) == 94)
1177 if (((d = ch - (MIN_CHAR_94x94
1179 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1182 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1184 else if (XCHARSET_CHARS (charset) == 96)
1186 if (((d = ch - (MIN_CHAR_96x96
1188 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1191 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1202 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1204 if (c <= MAX_CHAR_BASIC_LATIN)
1206 *charset = Vcharset_ascii;
1211 *charset = Vcharset_control_1;
1216 *charset = Vcharset_latin_iso8859_1;
1220 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1222 *charset = Vcharset_hebrew_iso8859_8;
1223 return c - MIN_CHAR_HEBREW + 0x20;
1226 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1228 *charset = Vcharset_thai_tis620;
1229 return c - MIN_CHAR_THAI + 0x20;
1232 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1233 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1235 return list2 (Vcharset_katakana_jisx0201,
1236 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1239 else if (c <= MAX_CHAR_BMP)
1241 *charset = Vcharset_ucs_bmp;
1244 else if (c <= MAX_CHAR_SMP)
1246 *charset = Vcharset_ucs_smp;
1247 return c - MIN_CHAR_SMP;
1249 else if (c <= MAX_CHAR_SIP)
1251 *charset = Vcharset_ucs_sip;
1252 return c - MIN_CHAR_SIP;
1254 else if (c < MIN_CHAR_DAIKANWA)
1256 *charset = Vcharset_ucs;
1259 else if (c <= MAX_CHAR_DAIKANWA)
1261 *charset = Vcharset_ideograph_daikanwa;
1262 return c - MIN_CHAR_DAIKANWA;
1264 else if (c < MIN_CHAR_94)
1266 *charset = Vcharset_ucs;
1269 else if (c <= MAX_CHAR_94)
1271 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1272 ((c - MIN_CHAR_94) / 94) + '0',
1273 CHARSET_LEFT_TO_RIGHT);
1274 if (!NILP (*charset))
1275 return ((c - MIN_CHAR_94) % 94) + 33;
1278 *charset = Vcharset_ucs;
1282 else if (c <= MAX_CHAR_96)
1284 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1285 ((c - MIN_CHAR_96) / 96) + '0',
1286 CHARSET_LEFT_TO_RIGHT);
1287 if (!NILP (*charset))
1288 return ((c - MIN_CHAR_96) % 96) + 32;
1291 *charset = Vcharset_ucs;
1295 else if (c <= MAX_CHAR_94x94)
1298 = CHARSET_BY_ATTRIBUTES (94, 2,
1299 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1303 | (((c - MIN_CHAR_94x94) % 94) + 33);
1306 *charset = Vcharset_ucs;
1310 else if (c <= MAX_CHAR_96x96)
1313 = CHARSET_BY_ATTRIBUTES (96, 2,
1314 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1315 CHARSET_LEFT_TO_RIGHT);
1316 if (!NILP (*charset))
1317 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1318 | (((c - MIN_CHAR_96x96) % 96) + 32);
1321 *charset = Vcharset_ucs;
1327 *charset = Vcharset_ucs;
1332 Lisp_Object Vdefault_coded_charset_priority_list;
1336 /************************************************************************/
1337 /* Basic charset Lisp functions */
1338 /************************************************************************/
1340 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1341 Return non-nil if OBJECT is a charset.
1345 return CHARSETP (object) ? Qt : Qnil;
1348 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1349 Retrieve the charset of the given name.
1350 If CHARSET-OR-NAME is a charset object, it is simply returned.
1351 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1352 nil is returned. Otherwise the associated charset object is returned.
1356 if (CHARSETP (charset_or_name))
1357 return charset_or_name;
1359 CHECK_SYMBOL (charset_or_name);
1360 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1363 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1364 Retrieve the charset of the given name.
1365 Same as `find-charset' except an error is signalled if there is no such
1366 charset instead of returning nil.
1370 Lisp_Object charset = Ffind_charset (name);
1373 signal_simple_error ("No such charset", name);
1377 /* We store the charsets in hash tables with the names as the key and the
1378 actual charset object as the value. Occasionally we need to use them
1379 in a list format. These routines provide us with that. */
1380 struct charset_list_closure
1382 Lisp_Object *charset_list;
1386 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1387 void *charset_list_closure)
1389 /* This function can GC */
1390 struct charset_list_closure *chcl =
1391 (struct charset_list_closure*) charset_list_closure;
1392 Lisp_Object *charset_list = chcl->charset_list;
1394 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1398 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1399 Return a list of the names of all defined charsets.
1403 Lisp_Object charset_list = Qnil;
1404 struct gcpro gcpro1;
1405 struct charset_list_closure charset_list_closure;
1407 GCPRO1 (charset_list);
1408 charset_list_closure.charset_list = &charset_list;
1409 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1410 &charset_list_closure);
1413 return charset_list;
1416 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1417 Return the name of charset CHARSET.
1421 return XCHARSET_NAME (Fget_charset (charset));
1424 /* #### SJT Should generic properties be allowed? */
1425 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1426 Define a new character set.
1427 This function is for use with Mule support.
1428 NAME is a symbol, the name by which the character set is normally referred.
1429 DOC-STRING is a string describing the character set.
1430 PROPS is a property list, describing the specific nature of the
1431 character set. Recognized properties are:
1433 'short-name Short version of the charset name (ex: Latin-1)
1434 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1435 'registry A regular expression matching the font registry field for
1437 'dimension Number of octets used to index a character in this charset.
1438 Either 1 or 2. Defaults to 1.
1439 If UTF-2000 feature is enabled, 3 or 4 are also available.
1440 'columns Number of columns used to display a character in this charset.
1441 Only used in TTY mode. (Under X, the actual width of a
1442 character can be derived from the font used to display the
1443 characters.) If unspecified, defaults to the dimension
1444 (this is almost always the correct value).
1445 'chars Number of characters in each dimension (94 or 96).
1446 Defaults to 94. Note that if the dimension is 2, the
1447 character set thus described is 94x94 or 96x96.
1448 If UTF-2000 feature is enabled, 128 or 256 are also available.
1449 'final Final byte of ISO 2022 escape sequence. Must be
1450 supplied. Each combination of (DIMENSION, CHARS) defines a
1451 separate namespace for final bytes. Note that ISO
1452 2022 restricts the final byte to the range
1453 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1454 dimension == 2. Note also that final bytes in the range
1455 0x30 - 0x3F are reserved for user-defined (not official)
1457 'graphic 0 (use left half of font on output) or 1 (use right half
1458 of font on output). Defaults to 0. For example, for
1459 a font whose registry is ISO8859-1, the left half
1460 (octets 0x20 - 0x7F) is the `ascii' character set, while
1461 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1462 character set. With 'graphic set to 0, the octets
1463 will have their high bit cleared; with it set to 1,
1464 the octets will have their high bit set.
1465 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1467 'ccl-program A compiled CCL program used to convert a character in
1468 this charset into an index into the font. This is in
1469 addition to the 'graphic property. The CCL program
1470 is passed the octets of the character, with the high
1471 bit cleared and set depending upon whether the value
1472 of the 'graphic property is 0 or 1.
1473 'mother [UTF-2000 only] Base coded-charset.
1474 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1475 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1476 'code-offset [UTF-2000 only] Offset for a code-point of a base
1478 'conversion [UTF-2000 only] Conversion for a code-point of a base
1479 coded-charset (94x60 or 94x94x60).
1481 (name, doc_string, props))
1483 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1484 int direction = CHARSET_LEFT_TO_RIGHT;
1485 Lisp_Object registry = Qnil;
1486 Lisp_Object charset;
1487 Lisp_Object ccl_program = Qnil;
1488 Lisp_Object short_name = Qnil, long_name = Qnil;
1489 Lisp_Object mother = Qnil;
1490 int min_code = 0, max_code = 0, code_offset = 0;
1491 int byte_offset = -1;
1494 CHECK_SYMBOL (name);
1495 if (!NILP (doc_string))
1496 CHECK_STRING (doc_string);
1498 charset = Ffind_charset (name);
1499 if (!NILP (charset))
1500 signal_simple_error ("Cannot redefine existing charset", name);
1503 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1505 if (EQ (keyword, Qshort_name))
1507 CHECK_STRING (value);
1511 if (EQ (keyword, Qlong_name))
1513 CHECK_STRING (value);
1517 else if (EQ (keyword, Qdimension))
1520 dimension = XINT (value);
1521 if (dimension < 1 ||
1528 signal_simple_error ("Invalid value for 'dimension", value);
1531 else if (EQ (keyword, Qchars))
1534 chars = XINT (value);
1535 if (chars != 94 && chars != 96
1537 && chars != 128 && chars != 256
1540 signal_simple_error ("Invalid value for 'chars", value);
1543 else if (EQ (keyword, Qcolumns))
1546 columns = XINT (value);
1547 if (columns != 1 && columns != 2)
1548 signal_simple_error ("Invalid value for 'columns", value);
1551 else if (EQ (keyword, Qgraphic))
1554 graphic = XINT (value);
1562 signal_simple_error ("Invalid value for 'graphic", value);
1565 else if (EQ (keyword, Qregistry))
1567 CHECK_STRING (value);
1571 else if (EQ (keyword, Qdirection))
1573 if (EQ (value, Ql2r))
1574 direction = CHARSET_LEFT_TO_RIGHT;
1575 else if (EQ (value, Qr2l))
1576 direction = CHARSET_RIGHT_TO_LEFT;
1578 signal_simple_error ("Invalid value for 'direction", value);
1581 else if (EQ (keyword, Qfinal))
1583 CHECK_CHAR_COERCE_INT (value);
1584 final = XCHAR (value);
1585 if (final < '0' || final > '~')
1586 signal_simple_error ("Invalid value for 'final", value);
1590 else if (EQ (keyword, Qmother))
1592 mother = Fget_charset (value);
1595 else if (EQ (keyword, Qmin_code))
1598 min_code = XUINT (value);
1601 else if (EQ (keyword, Qmax_code))
1604 max_code = XUINT (value);
1607 else if (EQ (keyword, Qcode_offset))
1610 code_offset = XUINT (value);
1613 else if (EQ (keyword, Qconversion))
1615 if (EQ (value, Q94x60))
1616 conversion = CONVERSION_94x60;
1617 else if (EQ (value, Q94x94x60))
1618 conversion = CONVERSION_94x94x60;
1620 signal_simple_error ("Unrecognized conversion", value);
1624 else if (EQ (keyword, Qccl_program))
1626 struct ccl_program test_ccl;
1628 if (setup_ccl_program (&test_ccl, value) < 0)
1629 signal_simple_error ("Invalid value for 'ccl-program", value);
1630 ccl_program = value;
1634 signal_simple_error ("Unrecognized property", keyword);
1640 error ("'final must be specified");
1642 if (dimension == 2 && final > 0x5F)
1644 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1647 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1648 CHARSET_LEFT_TO_RIGHT)) ||
1649 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1650 CHARSET_RIGHT_TO_LEFT)))
1652 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1654 id = get_unallocated_leading_byte (dimension);
1656 if (NILP (doc_string))
1657 doc_string = build_string ("");
1659 if (NILP (registry))
1660 registry = build_string ("");
1662 if (NILP (short_name))
1663 XSETSTRING (short_name, XSYMBOL (name)->name);
1665 if (NILP (long_name))
1666 long_name = doc_string;
1669 columns = dimension;
1671 if (byte_offset < 0)
1675 else if (chars == 96)
1681 charset = make_charset (id, name, chars, dimension, columns, graphic,
1682 final, direction, short_name, long_name,
1683 doc_string, registry,
1684 Qnil, min_code, max_code, code_offset, byte_offset,
1685 mother, conversion);
1686 if (!NILP (ccl_program))
1687 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1691 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1693 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1694 NEW-NAME is the name of the new charset. Return the new charset.
1696 (charset, new_name))
1698 Lisp_Object new_charset = Qnil;
1699 int id, chars, dimension, columns, graphic, final;
1701 Lisp_Object registry, doc_string, short_name, long_name;
1704 charset = Fget_charset (charset);
1705 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1706 signal_simple_error ("Charset already has reverse-direction charset",
1709 CHECK_SYMBOL (new_name);
1710 if (!NILP (Ffind_charset (new_name)))
1711 signal_simple_error ("Cannot redefine existing charset", new_name);
1713 cs = XCHARSET (charset);
1715 chars = CHARSET_CHARS (cs);
1716 dimension = CHARSET_DIMENSION (cs);
1717 columns = CHARSET_COLUMNS (cs);
1718 id = get_unallocated_leading_byte (dimension);
1720 graphic = CHARSET_GRAPHIC (cs);
1721 final = CHARSET_FINAL (cs);
1722 direction = CHARSET_RIGHT_TO_LEFT;
1723 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1724 direction = CHARSET_LEFT_TO_RIGHT;
1725 doc_string = CHARSET_DOC_STRING (cs);
1726 short_name = CHARSET_SHORT_NAME (cs);
1727 long_name = CHARSET_LONG_NAME (cs);
1728 registry = CHARSET_REGISTRY (cs);
1730 new_charset = make_charset (id, new_name, chars, dimension, columns,
1731 graphic, final, direction, short_name, long_name,
1732 doc_string, registry,
1734 CHARSET_DECODING_TABLE(cs),
1735 CHARSET_MIN_CODE(cs),
1736 CHARSET_MAX_CODE(cs),
1737 CHARSET_CODE_OFFSET(cs),
1738 CHARSET_BYTE_OFFSET(cs),
1740 CHARSET_CONVERSION (cs)
1742 Qnil, 0, 0, 0, 0, Qnil, 0
1746 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1747 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1752 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1753 Define symbol ALIAS as an alias for CHARSET.
1757 CHECK_SYMBOL (alias);
1758 charset = Fget_charset (charset);
1759 return Fputhash (alias, charset, Vcharset_hash_table);
1762 /* #### Reverse direction charsets not yet implemented. */
1764 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1766 Return the reverse-direction charset parallel to CHARSET, if any.
1767 This is the charset with the same properties (in particular, the same
1768 dimension, number of characters per dimension, and final byte) as
1769 CHARSET but whose characters are displayed in the opposite direction.
1773 charset = Fget_charset (charset);
1774 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1778 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1779 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1780 If DIRECTION is omitted, both directions will be checked (left-to-right
1781 will be returned if character sets exist for both directions).
1783 (dimension, chars, final, direction))
1785 int dm, ch, fi, di = -1;
1786 Lisp_Object obj = Qnil;
1788 CHECK_INT (dimension);
1789 dm = XINT (dimension);
1790 if (dm < 1 || dm > 2)
1791 signal_simple_error ("Invalid value for DIMENSION", dimension);
1795 if (ch != 94 && ch != 96)
1796 signal_simple_error ("Invalid value for CHARS", chars);
1798 CHECK_CHAR_COERCE_INT (final);
1800 if (fi < '0' || fi > '~')
1801 signal_simple_error ("Invalid value for FINAL", final);
1803 if (EQ (direction, Ql2r))
1804 di = CHARSET_LEFT_TO_RIGHT;
1805 else if (EQ (direction, Qr2l))
1806 di = CHARSET_RIGHT_TO_LEFT;
1807 else if (!NILP (direction))
1808 signal_simple_error ("Invalid value for DIRECTION", direction);
1810 if (dm == 2 && fi > 0x5F)
1812 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1816 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1818 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1821 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1824 return XCHARSET_NAME (obj);
1828 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1829 Return short name of CHARSET.
1833 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1836 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1837 Return long name of CHARSET.
1841 return XCHARSET_LONG_NAME (Fget_charset (charset));
1844 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1845 Return description of CHARSET.
1849 return XCHARSET_DOC_STRING (Fget_charset (charset));
1852 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1853 Return dimension of CHARSET.
1857 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1860 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1861 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1862 Recognized properties are those listed in `make-charset', as well as
1863 'name and 'doc-string.
1869 charset = Fget_charset (charset);
1870 cs = XCHARSET (charset);
1872 CHECK_SYMBOL (prop);
1873 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1874 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1875 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1876 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1877 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1878 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1879 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1880 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1881 Qnil : make_char (CHARSET_FINAL (cs));
1882 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1883 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1884 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1885 if (EQ (prop, Qdirection))
1886 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1887 if (EQ (prop, Qreverse_direction_charset))
1889 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1890 /* #### Is this translation OK? If so, error checking sufficient? */
1891 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1894 if (EQ (prop, Qmother))
1895 return CHARSET_MOTHER (cs);
1896 if (EQ (prop, Qmin_code))
1897 return make_int (CHARSET_MIN_CODE (cs));
1898 if (EQ (prop, Qmax_code))
1899 return make_int (CHARSET_MAX_CODE (cs));
1901 signal_simple_error ("Unrecognized charset property name", prop);
1902 return Qnil; /* not reached */
1905 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1906 Return charset identification number of CHARSET.
1910 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1913 /* #### We need to figure out which properties we really want to
1916 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1917 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1919 (charset, ccl_program))
1921 struct ccl_program test_ccl;
1923 charset = Fget_charset (charset);
1924 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1925 signal_simple_error ("Invalid ccl-program", ccl_program);
1926 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1931 invalidate_charset_font_caches (Lisp_Object charset)
1933 /* Invalidate font cache entries for charset on all devices. */
1934 Lisp_Object devcons, concons, hash_table;
1935 DEVICE_LOOP_NO_BREAK (devcons, concons)
1937 struct device *d = XDEVICE (XCAR (devcons));
1938 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1939 if (!UNBOUNDP (hash_table))
1940 Fclrhash (hash_table);
1944 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1945 Set the 'registry property of CHARSET to REGISTRY.
1947 (charset, registry))
1949 charset = Fget_charset (charset);
1950 CHECK_STRING (registry);
1951 XCHARSET_REGISTRY (charset) = registry;
1952 invalidate_charset_font_caches (charset);
1953 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1958 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1959 Return mapping-table of CHARSET.
1963 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1966 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1967 Set mapping-table of CHARSET to TABLE.
1971 struct Lisp_Charset *cs;
1975 charset = Fget_charset (charset);
1976 cs = XCHARSET (charset);
1980 CHARSET_DECODING_TABLE(cs) = Qnil;
1983 else if (VECTORP (table))
1985 int ccs_len = CHARSET_BYTE_SIZE (cs);
1986 int ret = decoding_table_check_elements (table,
1987 CHARSET_DIMENSION (cs),
1992 signal_simple_error ("Too big table", table);
1994 signal_simple_error ("Invalid element is found", table);
1996 signal_simple_error ("Something wrong", table);
1998 CHARSET_DECODING_TABLE(cs) = Qnil;
2001 signal_error (Qwrong_type_argument,
2002 list2 (build_translated_string ("vector-or-nil-p"),
2005 byte_offset = CHARSET_BYTE_OFFSET (cs);
2006 switch (CHARSET_DIMENSION (cs))
2009 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2011 Lisp_Object c = XVECTOR_DATA(table)[i];
2014 Fput_char_attribute (c, XCHARSET_NAME (charset),
2015 make_int (i + byte_offset));
2019 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2021 Lisp_Object v = XVECTOR_DATA(table)[i];
2027 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2029 Lisp_Object c = XVECTOR_DATA(v)[j];
2033 (c, XCHARSET_NAME (charset),
2034 make_int ( ( (i + byte_offset) << 8 )
2040 Fput_char_attribute (v, XCHARSET_NAME (charset),
2041 make_int (i + byte_offset));
2050 /************************************************************************/
2051 /* Lisp primitives for working with characters */
2052 /************************************************************************/
2055 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2056 Make a character from CHARSET and code-point CODE.
2057 If DEFINED_ONLY is non-nil, builtin character is not returned.
2058 If corresponding character is not found, nil is returned.
2060 (charset, code, defined_only))
2064 charset = Fget_charset (charset);
2067 if (XCHARSET_GRAPHIC (charset) == 1)
2069 if (NILP (defined_only))
2070 c = DECODE_CHAR (charset, c);
2072 c = decode_defined_char (charset, c);
2073 return c >= 0 ? make_char (c) : Qnil;
2076 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2077 Make a builtin character from CHARSET and code-point CODE.
2083 charset = Fget_charset (charset);
2085 if (EQ (charset, Vcharset_latin_viscii))
2087 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2093 (ret = Fget_char_attribute (chr,
2094 Vcharset_latin_viscii_lower,
2097 charset = Vcharset_latin_viscii_lower;
2101 (ret = Fget_char_attribute (chr,
2102 Vcharset_latin_viscii_upper,
2105 charset = Vcharset_latin_viscii_upper;
2112 if (XCHARSET_GRAPHIC (charset) == 1)
2115 c = decode_builtin_char (charset, c);
2116 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2120 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2121 Make a character from CHARSET and octets ARG1 and ARG2.
2122 ARG2 is required only for characters from two-dimensional charsets.
2123 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2124 character s with caron.
2126 (charset, arg1, arg2))
2130 int lowlim, highlim;
2132 charset = Fget_charset (charset);
2133 cs = XCHARSET (charset);
2135 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2136 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2138 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2140 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2141 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2144 /* It is useful (and safe, according to Olivier Galibert) to strip
2145 the 8th bit off ARG1 and ARG2 because it allows programmers to
2146 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2147 Latin 2 code of the character. */
2155 if (a1 < lowlim || a1 > highlim)
2156 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2158 if (CHARSET_DIMENSION (cs) == 1)
2162 ("Charset is of dimension one; second octet must be nil", arg2);
2163 return make_char (MAKE_CHAR (charset, a1, 0));
2172 a2 = XINT (arg2) & 0x7f;
2174 if (a2 < lowlim || a2 > highlim)
2175 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2177 return make_char (MAKE_CHAR (charset, a1, a2));
2180 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2181 Return the character set of CHARACTER.
2185 CHECK_CHAR_COERCE_INT (character);
2187 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2190 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2191 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2192 N defaults to 0 if omitted.
2196 Lisp_Object charset;
2199 CHECK_CHAR_COERCE_INT (character);
2201 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2203 if (NILP (n) || EQ (n, Qzero))
2204 return make_int (octet0);
2205 else if (EQ (n, make_int (1)))
2206 return make_int (octet1);
2208 signal_simple_error ("Octet number must be 0 or 1", n);
2212 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2213 Return code-point of CHARACTER in specified CHARSET.
2215 (character, charset, defined_only))
2219 CHECK_CHAR_COERCE_INT (character);
2220 charset = Fget_charset (charset);
2221 code_point = charset_code_point (charset, XCHAR (character),
2222 !NILP (defined_only));
2223 if (code_point >= 0)
2224 return make_int (code_point);
2230 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2231 Return list of charset and one or two position-codes of CHARACTER.
2235 /* This function can GC */
2236 struct gcpro gcpro1, gcpro2;
2237 Lisp_Object charset = Qnil;
2238 Lisp_Object rc = Qnil;
2246 GCPRO2 (charset, rc);
2247 CHECK_CHAR_COERCE_INT (character);
2250 code_point = ENCODE_CHAR (XCHAR (character), charset);
2251 dimension = XCHARSET_DIMENSION (charset);
2252 while (dimension > 0)
2254 rc = Fcons (make_int (code_point & 255), rc);
2258 rc = Fcons (XCHARSET_NAME (charset), rc);
2260 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2262 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2264 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2268 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2277 #ifdef ENABLE_COMPOSITE_CHARS
2278 /************************************************************************/
2279 /* composite character functions */
2280 /************************************************************************/
2283 lookup_composite_char (Bufbyte *str, int len)
2285 Lisp_Object lispstr = make_string (str, len);
2286 Lisp_Object ch = Fgethash (lispstr,
2287 Vcomposite_char_string2char_hash_table,
2293 if (composite_char_row_next >= 128)
2294 signal_simple_error ("No more composite chars available", lispstr);
2295 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2296 composite_char_col_next);
2297 Fputhash (make_char (emch), lispstr,
2298 Vcomposite_char_char2string_hash_table);
2299 Fputhash (lispstr, make_char (emch),
2300 Vcomposite_char_string2char_hash_table);
2301 composite_char_col_next++;
2302 if (composite_char_col_next >= 128)
2304 composite_char_col_next = 32;
2305 composite_char_row_next++;
2314 composite_char_string (Emchar ch)
2316 Lisp_Object str = Fgethash (make_char (ch),
2317 Vcomposite_char_char2string_hash_table,
2319 assert (!UNBOUNDP (str));
2323 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2324 Convert a string into a single composite character.
2325 The character is the result of overstriking all the characters in
2330 CHECK_STRING (string);
2331 return make_char (lookup_composite_char (XSTRING_DATA (string),
2332 XSTRING_LENGTH (string)));
2335 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2336 Return a string of the characters comprising a composite character.
2344 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2345 signal_simple_error ("Must be composite char", ch);
2346 return composite_char_string (emch);
2348 #endif /* ENABLE_COMPOSITE_CHARS */
2351 /************************************************************************/
2352 /* initialization */
2353 /************************************************************************/
2356 syms_of_mule_charset (void)
2358 INIT_LRECORD_IMPLEMENTATION (charset);
2360 DEFSUBR (Fcharsetp);
2361 DEFSUBR (Ffind_charset);
2362 DEFSUBR (Fget_charset);
2363 DEFSUBR (Fcharset_list);
2364 DEFSUBR (Fcharset_name);
2365 DEFSUBR (Fmake_charset);
2366 DEFSUBR (Fmake_reverse_direction_charset);
2367 /* DEFSUBR (Freverse_direction_charset); */
2368 DEFSUBR (Fdefine_charset_alias);
2369 DEFSUBR (Fcharset_from_attributes);
2370 DEFSUBR (Fcharset_short_name);
2371 DEFSUBR (Fcharset_long_name);
2372 DEFSUBR (Fcharset_description);
2373 DEFSUBR (Fcharset_dimension);
2374 DEFSUBR (Fcharset_property);
2375 DEFSUBR (Fcharset_id);
2376 DEFSUBR (Fset_charset_ccl_program);
2377 DEFSUBR (Fset_charset_registry);
2379 DEFSUBR (Fcharset_mapping_table);
2380 DEFSUBR (Fset_charset_mapping_table);
2384 DEFSUBR (Fdecode_char);
2385 DEFSUBR (Fdecode_builtin_char);
2386 DEFSUBR (Fencode_char);
2388 DEFSUBR (Fmake_char);
2389 DEFSUBR (Fchar_charset);
2390 DEFSUBR (Fchar_octet);
2391 DEFSUBR (Fsplit_char);
2393 #ifdef ENABLE_COMPOSITE_CHARS
2394 DEFSUBR (Fmake_composite_char);
2395 DEFSUBR (Fcomposite_char_string);
2398 defsymbol (&Qcharsetp, "charsetp");
2399 defsymbol (&Qregistry, "registry");
2400 defsymbol (&Qfinal, "final");
2401 defsymbol (&Qgraphic, "graphic");
2402 defsymbol (&Qdirection, "direction");
2403 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2404 defsymbol (&Qshort_name, "short-name");
2405 defsymbol (&Qlong_name, "long-name");
2407 defsymbol (&Qmother, "mother");
2408 defsymbol (&Qmin_code, "min-code");
2409 defsymbol (&Qmax_code, "max-code");
2410 defsymbol (&Qcode_offset, "code-offset");
2411 defsymbol (&Qconversion, "conversion");
2412 defsymbol (&Q94x60, "94x60");
2413 defsymbol (&Q94x94x60, "94x94x60");
2416 defsymbol (&Ql2r, "l2r");
2417 defsymbol (&Qr2l, "r2l");
2419 /* Charsets, compatible with FSF 20.3
2420 Naming convention is Script-Charset[-Edition] */
2421 defsymbol (&Qascii, "ascii");
2422 defsymbol (&Qcontrol_1, "control-1");
2423 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2424 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2425 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2426 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2427 defsymbol (&Qthai_tis620, "thai-tis620");
2428 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2429 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2430 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2431 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2432 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2433 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2434 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2435 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2436 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2437 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2438 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2439 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2440 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2441 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2442 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2443 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2445 defsymbol (&Qucs, "ucs");
2446 defsymbol (&Qucs_bmp, "ucs-bmp");
2447 defsymbol (&Qucs_smp, "ucs-smp");
2448 defsymbol (&Qucs_sip, "ucs-sip");
2449 defsymbol (&Qucs_cns, "ucs-cns");
2450 defsymbol (&Qucs_jis, "ucs-jis");
2451 defsymbol (&Qucs_ks, "ucs-ks");
2452 defsymbol (&Qucs_big5, "ucs-big5");
2453 defsymbol (&Qlatin_viscii, "latin-viscii");
2454 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2455 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2456 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2457 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2458 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2459 defsymbol (&Qjis_x0208, "=jis-x0208");
2460 defsymbol (&Qideograph_gt, "ideograph-gt");
2461 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2462 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2463 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2464 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2465 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2466 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2467 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2468 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2469 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2470 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2471 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2472 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2473 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2474 defsymbol (&Qchinese_big5, "chinese-big5");
2475 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2476 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2477 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2478 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2479 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2480 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2481 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2482 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2483 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2484 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2485 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2486 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2487 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2488 defsymbol (&Qchina3_jef, "china3-jef");
2489 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2490 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2492 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2493 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2495 defsymbol (&Qcomposite, "composite");
2499 vars_of_mule_charset (void)
2506 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2507 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2509 /* Table of charsets indexed by leading byte. */
2510 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2511 chlook->charset_by_leading_byte[i] = Qnil;
2514 /* Table of charsets indexed by type/final-byte. */
2515 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2516 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2517 chlook->charset_by_attributes[i][j] = Qnil;
2519 /* Table of charsets indexed by type/final-byte/direction. */
2520 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2521 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2522 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2523 chlook->charset_by_attributes[i][j][k] = Qnil;
2527 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2529 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2530 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2534 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2535 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2536 Leading-code of private TYPE9N charset of column-width 1.
2538 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2542 Vdefault_coded_charset_priority_list = Qnil;
2543 DEFVAR_LISP ("default-coded-charset-priority-list",
2544 &Vdefault_coded_charset_priority_list /*
2545 Default order of preferred coded-character-sets.
2551 complex_vars_of_mule_charset (void)
2553 staticpro (&Vcharset_hash_table);
2554 Vcharset_hash_table =
2555 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2557 /* Predefined character sets. We store them into variables for
2561 staticpro (&Vcharset_ucs);
2563 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2564 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2565 build_string ("UCS"),
2566 build_string ("UCS"),
2567 build_string ("ISO/IEC 10646"),
2569 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2570 staticpro (&Vcharset_ucs_bmp);
2572 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2573 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2574 build_string ("BMP"),
2575 build_string ("UCS-BMP"),
2576 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2578 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2579 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2580 staticpro (&Vcharset_ucs_smp);
2582 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2583 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2584 build_string ("SMP"),
2585 build_string ("UCS-SMP"),
2586 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2587 build_string ("UCS00-1"),
2588 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2589 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2590 staticpro (&Vcharset_ucs_sip);
2592 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2593 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2594 build_string ("SIP"),
2595 build_string ("UCS-SIP"),
2596 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2597 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2598 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2599 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2600 staticpro (&Vcharset_ucs_cns);
2602 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2603 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2604 build_string ("UCS for CNS"),
2605 build_string ("UCS for CNS 11643"),
2606 build_string ("ISO/IEC 10646 for CNS 11643"),
2608 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2609 staticpro (&Vcharset_ucs_jis);
2611 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2612 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2613 build_string ("UCS for JIS"),
2614 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2616 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2618 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2619 staticpro (&Vcharset_ucs_ks);
2621 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2622 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2623 build_string ("UCS for KS"),
2624 build_string ("UCS for CCS defined by KS"),
2625 build_string ("ISO/IEC 10646 for Korean Standards"),
2627 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2628 staticpro (&Vcharset_ucs_big5);
2630 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2631 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2632 build_string ("UCS for Big5"),
2633 build_string ("UCS for Big5"),
2634 build_string ("ISO/IEC 10646 for Big5"),
2636 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2638 # define MIN_CHAR_THAI 0
2639 # define MAX_CHAR_THAI 0
2640 /* # define MIN_CHAR_HEBREW 0 */
2641 /* # define MAX_CHAR_HEBREW 0 */
2642 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2643 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2645 staticpro (&Vcharset_ascii);
2647 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2648 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2649 build_string ("ASCII"),
2650 build_string ("ASCII)"),
2651 build_string ("ASCII (ISO646 IRV)"),
2652 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2653 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2654 staticpro (&Vcharset_control_1);
2655 Vcharset_control_1 =
2656 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2657 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2658 build_string ("C1"),
2659 build_string ("Control characters"),
2660 build_string ("Control characters 128-191"),
2662 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2663 staticpro (&Vcharset_latin_iso8859_1);
2664 Vcharset_latin_iso8859_1 =
2665 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2666 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2667 build_string ("Latin-1"),
2668 build_string ("ISO8859-1 (Latin-1)"),
2669 build_string ("ISO8859-1 (Latin-1)"),
2670 build_string ("iso8859-1"),
2671 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2672 staticpro (&Vcharset_latin_iso8859_2);
2673 Vcharset_latin_iso8859_2 =
2674 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2675 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2676 build_string ("Latin-2"),
2677 build_string ("ISO8859-2 (Latin-2)"),
2678 build_string ("ISO8859-2 (Latin-2)"),
2679 build_string ("iso8859-2"),
2680 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2681 staticpro (&Vcharset_latin_iso8859_3);
2682 Vcharset_latin_iso8859_3 =
2683 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2684 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2685 build_string ("Latin-3"),
2686 build_string ("ISO8859-3 (Latin-3)"),
2687 build_string ("ISO8859-3 (Latin-3)"),
2688 build_string ("iso8859-3"),
2689 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2690 staticpro (&Vcharset_latin_iso8859_4);
2691 Vcharset_latin_iso8859_4 =
2692 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2693 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2694 build_string ("Latin-4"),
2695 build_string ("ISO8859-4 (Latin-4)"),
2696 build_string ("ISO8859-4 (Latin-4)"),
2697 build_string ("iso8859-4"),
2698 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2699 staticpro (&Vcharset_thai_tis620);
2700 Vcharset_thai_tis620 =
2701 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2702 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2703 build_string ("TIS620"),
2704 build_string ("TIS620 (Thai)"),
2705 build_string ("TIS620.2529 (Thai)"),
2706 build_string ("tis620"),
2707 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2708 staticpro (&Vcharset_greek_iso8859_7);
2709 Vcharset_greek_iso8859_7 =
2710 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2711 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2712 build_string ("ISO8859-7"),
2713 build_string ("ISO8859-7 (Greek)"),
2714 build_string ("ISO8859-7 (Greek)"),
2715 build_string ("iso8859-7"),
2716 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2717 staticpro (&Vcharset_arabic_iso8859_6);
2718 Vcharset_arabic_iso8859_6 =
2719 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2720 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2721 build_string ("ISO8859-6"),
2722 build_string ("ISO8859-6 (Arabic)"),
2723 build_string ("ISO8859-6 (Arabic)"),
2724 build_string ("iso8859-6"),
2725 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2726 staticpro (&Vcharset_hebrew_iso8859_8);
2727 Vcharset_hebrew_iso8859_8 =
2728 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2729 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2730 build_string ("ISO8859-8"),
2731 build_string ("ISO8859-8 (Hebrew)"),
2732 build_string ("ISO8859-8 (Hebrew)"),
2733 build_string ("iso8859-8"),
2735 0 /* MIN_CHAR_HEBREW */,
2736 0 /* MAX_CHAR_HEBREW */, 0, 32,
2737 Qnil, CONVERSION_IDENTICAL);
2738 staticpro (&Vcharset_katakana_jisx0201);
2739 Vcharset_katakana_jisx0201 =
2740 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2741 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2742 build_string ("JISX0201 Kana"),
2743 build_string ("JISX0201.1976 (Japanese Kana)"),
2744 build_string ("JISX0201.1976 Japanese Kana"),
2745 build_string ("jisx0201\\.1976"),
2746 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2747 staticpro (&Vcharset_latin_jisx0201);
2748 Vcharset_latin_jisx0201 =
2749 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2750 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2751 build_string ("JISX0201 Roman"),
2752 build_string ("JISX0201.1976 (Japanese Roman)"),
2753 build_string ("JISX0201.1976 Japanese Roman"),
2754 build_string ("jisx0201\\.1976"),
2755 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2756 staticpro (&Vcharset_cyrillic_iso8859_5);
2757 Vcharset_cyrillic_iso8859_5 =
2758 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2759 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2760 build_string ("ISO8859-5"),
2761 build_string ("ISO8859-5 (Cyrillic)"),
2762 build_string ("ISO8859-5 (Cyrillic)"),
2763 build_string ("iso8859-5"),
2764 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2765 staticpro (&Vcharset_latin_iso8859_9);
2766 Vcharset_latin_iso8859_9 =
2767 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2768 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2769 build_string ("Latin-5"),
2770 build_string ("ISO8859-9 (Latin-5)"),
2771 build_string ("ISO8859-9 (Latin-5)"),
2772 build_string ("iso8859-9"),
2773 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2775 staticpro (&Vcharset_jis_x0208);
2776 Vcharset_jis_x0208 =
2777 make_charset (LEADING_BYTE_JIS_X0208,
2779 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2780 build_string ("JIS X0208"),
2781 build_string ("JIS X0208 Common"),
2782 build_string ("JIS X0208 Common part"),
2783 build_string ("jisx0208\\.1990"),
2785 MIN_CHAR_JIS_X0208_1990,
2786 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2787 Qnil, CONVERSION_94x94);
2789 staticpro (&Vcharset_japanese_jisx0208_1978);
2790 Vcharset_japanese_jisx0208_1978 =
2791 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2792 Qjapanese_jisx0208_1978, 94, 2,
2793 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2794 build_string ("JIS X0208:1978"),
2795 build_string ("JIS X0208:1978 (Japanese)"),
2797 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2798 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2805 CONVERSION_IDENTICAL);
2806 staticpro (&Vcharset_chinese_gb2312);
2807 Vcharset_chinese_gb2312 =
2808 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2809 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2810 build_string ("GB2312"),
2811 build_string ("GB2312)"),
2812 build_string ("GB2312 Chinese simplified"),
2813 build_string ("gb2312"),
2814 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2815 staticpro (&Vcharset_chinese_gb12345);
2816 Vcharset_chinese_gb12345 =
2817 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2818 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2819 build_string ("G1"),
2820 build_string ("GB 12345)"),
2821 build_string ("GB 12345-1990"),
2822 build_string ("GB12345\\(\\.1990\\)?-0"),
2823 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2824 staticpro (&Vcharset_japanese_jisx0208);
2825 Vcharset_japanese_jisx0208 =
2826 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2827 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2828 build_string ("JISX0208"),
2829 build_string ("JIS X0208:1983 (Japanese)"),
2830 build_string ("JIS X0208:1983 Japanese Kanji"),
2831 build_string ("jisx0208\\.1983"),
2838 CONVERSION_IDENTICAL);
2840 staticpro (&Vcharset_japanese_jisx0208_1990);
2841 Vcharset_japanese_jisx0208_1990 =
2842 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2843 Qjapanese_jisx0208_1990, 94, 2,
2844 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2845 build_string ("JISX0208-1990"),
2846 build_string ("JIS X0208:1990 (Japanese)"),
2847 build_string ("JIS X0208:1990 Japanese Kanji"),
2848 build_string ("jisx0208\\.1990"),
2850 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2851 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2852 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2853 Vcharset_jis_x0208 /* Qnil */,
2854 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2856 staticpro (&Vcharset_korean_ksc5601);
2857 Vcharset_korean_ksc5601 =
2858 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2859 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2860 build_string ("KSC5601"),
2861 build_string ("KSC5601 (Korean"),
2862 build_string ("KSC5601 Korean Hangul and Hanja"),
2863 build_string ("ksc5601"),
2864 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2865 staticpro (&Vcharset_japanese_jisx0212);
2866 Vcharset_japanese_jisx0212 =
2867 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2868 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2869 build_string ("JISX0212"),
2870 build_string ("JISX0212 (Japanese)"),
2871 build_string ("JISX0212 Japanese Supplement"),
2872 build_string ("jisx0212"),
2873 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2875 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2876 staticpro (&Vcharset_chinese_cns11643_1);
2877 Vcharset_chinese_cns11643_1 =
2878 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2879 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2880 build_string ("CNS11643-1"),
2881 build_string ("CNS11643-1 (Chinese traditional)"),
2883 ("CNS 11643 Plane 1 Chinese traditional"),
2884 build_string (CHINESE_CNS_PLANE_RE("1")),
2885 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2886 staticpro (&Vcharset_chinese_cns11643_2);
2887 Vcharset_chinese_cns11643_2 =
2888 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2889 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2890 build_string ("CNS11643-2"),
2891 build_string ("CNS11643-2 (Chinese traditional)"),
2893 ("CNS 11643 Plane 2 Chinese traditional"),
2894 build_string (CHINESE_CNS_PLANE_RE("2")),
2895 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2897 staticpro (&Vcharset_latin_tcvn5712);
2898 Vcharset_latin_tcvn5712 =
2899 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2900 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2901 build_string ("TCVN 5712"),
2902 build_string ("TCVN 5712 (VSCII-2)"),
2903 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2904 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2905 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2906 staticpro (&Vcharset_latin_viscii_lower);
2907 Vcharset_latin_viscii_lower =
2908 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2909 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2910 build_string ("VISCII lower"),
2911 build_string ("VISCII lower (Vietnamese)"),
2912 build_string ("VISCII lower (Vietnamese)"),
2913 build_string ("MULEVISCII-LOWER"),
2914 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2915 staticpro (&Vcharset_latin_viscii_upper);
2916 Vcharset_latin_viscii_upper =
2917 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2918 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2919 build_string ("VISCII upper"),
2920 build_string ("VISCII upper (Vietnamese)"),
2921 build_string ("VISCII upper (Vietnamese)"),
2922 build_string ("MULEVISCII-UPPER"),
2923 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2924 staticpro (&Vcharset_latin_viscii);
2925 Vcharset_latin_viscii =
2926 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2927 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2928 build_string ("VISCII"),
2929 build_string ("VISCII 1.1 (Vietnamese)"),
2930 build_string ("VISCII 1.1 (Vietnamese)"),
2931 build_string ("VISCII1\\.1"),
2932 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2933 staticpro (&Vcharset_chinese_big5);
2934 Vcharset_chinese_big5 =
2935 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2936 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2937 build_string ("Big5"),
2938 build_string ("Big5"),
2939 build_string ("Big5 Chinese traditional"),
2940 build_string ("big5-0"),
2942 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2943 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2945 staticpro (&Vcharset_chinese_big5_cdp);
2946 Vcharset_chinese_big5_cdp =
2947 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2948 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2949 build_string ("Big5-CDP"),
2950 build_string ("Big5 + CDP extension"),
2951 build_string ("Big5 with CDP extension"),
2952 build_string ("big5\\.cdp-0"),
2953 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2954 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2956 #define DEF_HANZIKU(n) \
2957 staticpro (&Vcharset_ideograph_hanziku_##n); \
2958 Vcharset_ideograph_hanziku_##n = \
2959 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2960 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2961 build_string ("HZK-"#n), \
2962 build_string ("HANZIKU-"#n), \
2963 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2965 ("hanziku-"#n"$"), \
2966 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2967 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2980 staticpro (&Vcharset_china3_jef);
2981 Vcharset_china3_jef =
2982 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2983 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2984 build_string ("JC3"),
2985 build_string ("JEF + CHINA3"),
2986 build_string ("JEF + CHINA3 private characters"),
2987 build_string ("china3jef-0"),
2988 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2989 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2990 staticpro (&Vcharset_ideograph_cbeta);
2991 Vcharset_ideograph_cbeta =
2992 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2993 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2994 build_string ("CB"),
2995 build_string ("CBETA"),
2996 build_string ("CBETA private characters"),
2997 build_string ("cbeta-0"),
2998 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2999 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3000 staticpro (&Vcharset_ideograph_gt);
3001 Vcharset_ideograph_gt =
3002 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3003 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3004 build_string ("GT"),
3005 build_string ("GT"),
3006 build_string ("GT"),
3008 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3009 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3010 #define DEF_GT_PJ(n) \
3011 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3012 Vcharset_ideograph_gt_pj_##n = \
3013 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3014 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3015 build_string ("GT-PJ-"#n), \
3016 build_string ("GT (pseudo JIS encoding) part "#n), \
3017 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3019 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3020 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3033 staticpro (&Vcharset_ideograph_daikanwa_2);
3034 Vcharset_ideograph_daikanwa_2 =
3035 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3036 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3037 build_string ("Daikanwa Rev."),
3038 build_string ("Morohashi's Daikanwa Rev."),
3040 ("Daikanwa dictionary (revised version)"),
3041 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3042 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3043 staticpro (&Vcharset_ideograph_daikanwa);
3044 Vcharset_ideograph_daikanwa =
3045 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3046 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3047 build_string ("Daikanwa"),
3048 build_string ("Morohashi's Daikanwa Rev.2"),
3050 ("Daikanwa dictionary (second revised version)"),
3051 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3052 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3053 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3055 staticpro (&Vcharset_ethiopic_ucs);
3056 Vcharset_ethiopic_ucs =
3057 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3058 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3059 build_string ("Ethiopic (UCS)"),
3060 build_string ("Ethiopic (UCS)"),
3061 build_string ("Ethiopic of UCS"),
3062 build_string ("Ethiopic-Unicode"),
3063 Qnil, 0x1200, 0x137F, 0, 0,
3064 Qnil, CONVERSION_IDENTICAL);
3066 staticpro (&Vcharset_chinese_big5_1);
3067 Vcharset_chinese_big5_1 =
3068 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3069 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3070 build_string ("Big5"),
3071 build_string ("Big5 (Level-1)"),
3073 ("Big5 Level-1 Chinese traditional"),
3074 build_string ("big5"),
3075 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3076 staticpro (&Vcharset_chinese_big5_2);
3077 Vcharset_chinese_big5_2 =
3078 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3079 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3080 build_string ("Big5"),
3081 build_string ("Big5 (Level-2)"),
3083 ("Big5 Level-2 Chinese traditional"),
3084 build_string ("big5"),
3085 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3087 #ifdef ENABLE_COMPOSITE_CHARS
3088 /* #### For simplicity, we put composite chars into a 96x96 charset.
3089 This is going to lead to problems because you can run out of
3090 room, esp. as we don't yet recycle numbers. */
3091 staticpro (&Vcharset_composite);
3092 Vcharset_composite =
3093 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3094 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3095 build_string ("Composite"),
3096 build_string ("Composite characters"),
3097 build_string ("Composite characters"),
3100 /* #### not dumped properly */
3101 composite_char_row_next = 32;
3102 composite_char_col_next = 32;
3104 Vcomposite_char_string2char_hash_table =
3105 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3106 Vcomposite_char_char2string_hash_table =
3107 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3108 staticpro (&Vcomposite_char_string2char_hash_table);
3109 staticpro (&Vcomposite_char_char2string_hash_table);
3110 #endif /* ENABLE_COMPOSITE_CHARS */