1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
354 Qideograph_hanziku_1,
355 Qideograph_hanziku_2,
356 Qideograph_hanziku_3,
357 Qideograph_hanziku_4,
358 Qideograph_hanziku_5,
359 Qideograph_hanziku_6,
360 Qideograph_hanziku_7,
361 Qideograph_hanziku_8,
362 Qideograph_hanziku_9,
363 Qideograph_hanziku_10,
364 Qideograph_hanziku_11,
365 Qideograph_hanziku_12,
368 Qideograph_daikanwa_2,
388 Lisp_Object Ql2r, Qr2l;
390 Lisp_Object Vcharset_hash_table;
392 /* Composite characters are characters constructed by overstriking two
393 or more regular characters.
395 1) The old Mule implementation involves storing composite characters
396 in a buffer as a tag followed by all of the actual characters
397 used to make up the composite character. I think this is a bad
398 idea; it greatly complicates code that wants to handle strings
399 one character at a time because it has to deal with the possibility
400 of great big ungainly characters. It's much more reasonable to
401 simply store an index into a table of composite characters.
403 2) The current implementation only allows for 16,384 separate
404 composite characters over the lifetime of the XEmacs process.
405 This could become a potential problem if the user
406 edited lots of different files that use composite characters.
407 Due to FSF bogosity, increasing the number of allowable
408 composite characters under Mule would decrease the number
409 of possible faces that can exist. Mule already has shrunk
410 this to 2048, and further shrinkage would become uncomfortable.
411 No such problems exist in XEmacs.
413 Composite characters could be represented as 0x80 C1 C2 C3,
414 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
415 for slightly under 2^20 (one million) composite characters
416 over the XEmacs process lifetime, and you only need to
417 increase the size of a Mule character from 19 to 21 bits.
418 Or you could use 0x80 C1 C2 C3 C4, allowing for about
419 85 million (slightly over 2^26) composite characters. */
422 /************************************************************************/
423 /* Basic Emchar functions */
424 /************************************************************************/
426 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
427 string in STR. Returns the number of bytes stored.
428 Do not call this directly. Use the macro set_charptr_emchar() instead.
432 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
447 else if ( c <= 0x7ff )
449 *p++ = (c >> 6) | 0xc0;
450 *p++ = (c & 0x3f) | 0x80;
452 else if ( c <= 0xffff )
454 *p++ = (c >> 12) | 0xe0;
455 *p++ = ((c >> 6) & 0x3f) | 0x80;
456 *p++ = (c & 0x3f) | 0x80;
458 else if ( c <= 0x1fffff )
460 *p++ = (c >> 18) | 0xf0;
461 *p++ = ((c >> 12) & 0x3f) | 0x80;
462 *p++ = ((c >> 6) & 0x3f) | 0x80;
463 *p++ = (c & 0x3f) | 0x80;
465 else if ( c <= 0x3ffffff )
467 *p++ = (c >> 24) | 0xf8;
468 *p++ = ((c >> 18) & 0x3f) | 0x80;
469 *p++ = ((c >> 12) & 0x3f) | 0x80;
470 *p++ = ((c >> 6) & 0x3f) | 0x80;
471 *p++ = (c & 0x3f) | 0x80;
475 *p++ = (c >> 30) | 0xfc;
476 *p++ = ((c >> 24) & 0x3f) | 0x80;
477 *p++ = ((c >> 18) & 0x3f) | 0x80;
478 *p++ = ((c >> 12) & 0x3f) | 0x80;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
483 BREAKUP_CHAR (c, charset, c1, c2);
484 lb = CHAR_LEADING_BYTE (c);
485 if (LEADING_BYTE_PRIVATE_P (lb))
486 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
488 if (EQ (charset, Vcharset_control_1))
497 /* Return the first character from a Mule-encoded string in STR,
498 assuming it's non-ASCII. Do not call this directly.
499 Use the macro charptr_emchar() instead. */
502 non_ascii_charptr_emchar (const Bufbyte *str)
515 else if ( b >= 0xf8 )
520 else if ( b >= 0xf0 )
525 else if ( b >= 0xe0 )
530 else if ( b >= 0xc0 )
540 for( ; len > 0; len-- )
543 ch = ( ch << 6 ) | ( b & 0x3f );
547 Bufbyte i0 = *str, i1, i2 = 0;
550 if (i0 == LEADING_BYTE_CONTROL_1)
551 return (Emchar) (*++str - 0x20);
553 if (LEADING_BYTE_PREFIX_P (i0))
558 charset = CHARSET_BY_LEADING_BYTE (i0);
559 if (XCHARSET_DIMENSION (charset) == 2)
562 return MAKE_CHAR (charset, i1, i2);
566 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
567 Do not call this directly. Use the macro valid_char_p() instead. */
571 non_ascii_valid_char_p (Emchar ch)
575 /* Must have only lowest 19 bits set */
579 f1 = CHAR_FIELD1 (ch);
580 f2 = CHAR_FIELD2 (ch);
581 f3 = CHAR_FIELD3 (ch);
587 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
588 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
589 f2 > MAX_CHAR_FIELD2_PRIVATE)
594 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
595 f2 <= MAX_CHAR_FIELD2_PRIVATE))
599 NOTE: This takes advantage of the fact that
600 FIELD2_TO_OFFICIAL_LEADING_BYTE and
601 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
603 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
604 if (EQ (charset, Qnil))
606 return (XCHARSET_CHARS (charset) == 96);
612 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
613 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
614 f1 > MAX_CHAR_FIELD1_PRIVATE)
616 if (f2 < 0x20 || f3 < 0x20)
619 #ifdef ENABLE_COMPOSITE_CHARS
620 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
622 if (UNBOUNDP (Fgethash (make_int (ch),
623 Vcomposite_char_char2string_hash_table,
628 #endif /* ENABLE_COMPOSITE_CHARS */
630 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
631 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
634 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
636 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
639 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
641 if (EQ (charset, Qnil))
643 return (XCHARSET_CHARS (charset) == 96);
649 /************************************************************************/
650 /* Basic string functions */
651 /************************************************************************/
653 /* Copy the character pointed to by SRC into DST. Do not call this
654 directly. Use the macro charptr_copy_char() instead.
655 Return the number of bytes copied. */
658 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
660 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
662 for (i = bytes; i; i--, dst++, src++)
668 /************************************************************************/
669 /* streams of Emchars */
670 /************************************************************************/
672 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
673 The functions below are not meant to be called directly; use
674 the macros in insdel.h. */
677 Lstream_get_emchar_1 (Lstream *stream, int ch)
679 Bufbyte str[MAX_EMCHAR_LEN];
680 Bufbyte *strptr = str;
683 str[0] = (Bufbyte) ch;
685 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
687 int c = Lstream_getc (stream);
688 bufpos_checking_assert (c >= 0);
689 *++strptr = (Bufbyte) c;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj)
718 Lisp_Charset *cs = XCHARSET (obj);
720 mark_object (cs->short_name);
721 mark_object (cs->long_name);
722 mark_object (cs->doc_string);
723 mark_object (cs->registry);
724 mark_object (cs->ccl_program);
726 mark_object (cs->decoding_table);
727 mark_object (cs->mother);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
753 CHARSET_DIMENSION (cs),
754 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
755 CHARSET_COLUMNS (cs),
756 CHARSET_GRAPHIC (cs),
758 write_c_string (buf, printcharfun);
759 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
760 sprintf (buf, " 0x%x>", cs->header.uid);
761 write_c_string (buf, printcharfun);
764 static const struct lrecord_description charset_description[] = {
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
779 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
780 mark_charset, print_charset, 0, 0, 0,
784 /* Make a new charset. */
785 /* #### SJT Should generic properties be allowed? */
787 make_charset (Charset_ID id, Lisp_Object name,
788 unsigned short chars, unsigned char dimension,
789 unsigned char columns, unsigned char graphic,
790 Bufbyte final, unsigned char direction, Lisp_Object short_name,
791 Lisp_Object long_name, Lisp_Object doc,
793 Lisp_Object decoding_table,
794 Emchar min_code, Emchar max_code,
795 Emchar code_offset, unsigned char byte_offset,
796 Lisp_Object mother, unsigned char conversion)
799 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
803 XSETCHARSET (obj, cs);
805 CHARSET_ID (cs) = id;
806 CHARSET_NAME (cs) = name;
807 CHARSET_SHORT_NAME (cs) = short_name;
808 CHARSET_LONG_NAME (cs) = long_name;
809 CHARSET_CHARS (cs) = chars;
810 CHARSET_DIMENSION (cs) = dimension;
811 CHARSET_DIRECTION (cs) = direction;
812 CHARSET_COLUMNS (cs) = columns;
813 CHARSET_GRAPHIC (cs) = graphic;
814 CHARSET_FINAL (cs) = final;
815 CHARSET_DOC_STRING (cs) = doc;
816 CHARSET_REGISTRY (cs) = reg;
817 CHARSET_CCL_PROGRAM (cs) = Qnil;
818 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
820 CHARSET_DECODING_TABLE(cs) = Qunbound;
821 CHARSET_MIN_CODE (cs) = min_code;
822 CHARSET_MAX_CODE (cs) = max_code;
823 CHARSET_CODE_OFFSET (cs) = code_offset;
824 CHARSET_BYTE_OFFSET (cs) = byte_offset;
825 CHARSET_MOTHER (cs) = mother;
826 CHARSET_CONVERSION (cs) = conversion;
830 if (id == LEADING_BYTE_ASCII)
831 CHARSET_REP_BYTES (cs) = 1;
833 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
840 /* some charsets do not have final characters. This includes
841 ASCII, Control-1, Composite, and the two faux private
843 unsigned char iso2022_type
844 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
846 if (code_offset == 0)
848 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
849 chlook->charset_by_attributes[iso2022_type][final] = obj;
853 (chlook->charset_by_attributes[iso2022_type][final][direction]));
854 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
858 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
859 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
861 /* Some charsets are "faux" and don't have names or really exist at
862 all except in the leading-byte table. */
864 Fputhash (name, obj, Vcharset_hash_table);
869 get_unallocated_leading_byte (int dimension)
874 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
877 lb = chlook->next_allocated_leading_byte++;
881 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
884 lb = chlook->next_allocated_1_byte_leading_byte++;
888 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
891 lb = chlook->next_allocated_2_byte_leading_byte++;
897 ("No more character sets free for this dimension",
898 make_int (dimension));
904 /* Number of Big5 characters which have the same code in 1st byte. */
906 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
909 decode_builtin_char (Lisp_Object charset, int code_point)
911 Lisp_Object mother = XCHARSET_MOTHER (charset);
914 if ( CHARSETP (mother) )
916 int code = code_point;
918 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
920 int row = code_point >> 8;
921 int cell = code_point & 255;
925 else if (row < 16 + 32 + 30)
926 code = (row - (16 + 32)) * 94 + cell - 33;
927 else if (row < 18 + 32 + 30)
929 else if (row < 18 + 32 + 60)
930 code = (row - (18 + 32)) * 94 + cell - 33;
932 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
934 int plane = code_point >> 16;
935 int row = (code_point >> 8) & 255;
936 int cell = code_point & 255;
940 else if (row < 16 + 32 + 30)
942 = (plane - 33) * 94 * 60
943 + (row - (16 + 32)) * 94
945 else if (row < 18 + 32 + 30)
947 else if (row < 18 + 32 + 60)
949 = (plane - 33) * 94 * 60
950 + (row - (18 + 32)) * 94
953 return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
955 else if (EQ (charset, Vcharset_chinese_big5))
957 int c1 = code_point >> 8;
958 int c2 = code_point & 0xFF;
961 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
963 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
964 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
966 I = (c1 - 0xA1) * BIG5_SAME_ROW
967 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
971 charset = Vcharset_chinese_big5_1;
975 charset = Vcharset_chinese_big5_2;
976 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
978 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
981 if ((final = XCHARSET_FINAL (charset)) >= '0')
983 if (XCHARSET_DIMENSION (charset) == 1)
985 switch (XCHARSET_CHARS (charset))
989 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
992 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1000 switch (XCHARSET_CHARS (charset))
1003 return MIN_CHAR_94x94
1004 + (final - '0') * 94 * 94
1005 + (((code_point >> 8) & 0x7F) - 33) * 94
1006 + ((code_point & 0x7F) - 33);
1008 return MIN_CHAR_96x96
1009 + (final - '0') * 96 * 96
1010 + (((code_point >> 8) & 0x7F) - 32) * 96
1011 + ((code_point & 0x7F) - 32);
1018 else if (XCHARSET_MAX_CODE (charset))
1021 = (XCHARSET_DIMENSION (charset) == 1
1023 code_point - XCHARSET_BYTE_OFFSET (charset)
1025 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1026 * XCHARSET_CHARS (charset)
1027 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1028 + XCHARSET_CODE_OFFSET (charset);
1029 if ((cid < XCHARSET_MIN_CODE (charset))
1030 || (XCHARSET_MAX_CODE (charset) < cid))
1039 charset_code_point (Lisp_Object charset, Emchar ch)
1041 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1044 if ( CHAR_TABLEP (encoding_table)
1045 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1050 Lisp_Object mother = XCHARSET_MOTHER (charset);
1051 int min = XCHARSET_MIN_CODE (charset);
1052 int max = XCHARSET_MAX_CODE (charset);
1055 if ( CHARSETP (mother) )
1056 code = charset_code_point (mother, ch);
1059 if ( (min <= code) && (code <= max) )
1061 int d = code - XCHARSET_CODE_OFFSET (charset);
1063 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1066 int cell = d % 94 + 33;
1072 return (row << 8) | cell;
1074 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1076 int plane = d / (94 * 60) + 33;
1077 int row = (d % (94 * 60)) / 94;
1078 int cell = d % 94 + 33;
1084 return (plane << 16) | (row << 8) | cell;
1086 else if (XCHARSET_CHARS (charset) == 94)
1088 if (XCHARSET_DIMENSION (charset) == 1)
1090 else if (XCHARSET_DIMENSION (charset) == 2)
1091 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1092 else if (XCHARSET_DIMENSION (charset) == 3)
1094 ( (d / (94 * 94) + 33) << 16)
1095 | ((d / 94 % 94 + 33) << 8)
1097 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1099 ( (d / (94 * 94 * 94) + 33) << 24)
1100 | ((d / (94 * 94) % 94 + 33) << 16)
1101 | ((d / 94 % 94 + 33) << 8)
1104 else if (XCHARSET_CHARS (charset) == 96)
1106 if (XCHARSET_DIMENSION (charset) == 1)
1108 else if (XCHARSET_DIMENSION (charset) == 2)
1109 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1110 else if (XCHARSET_DIMENSION (charset) == 3)
1112 ( (d / (96 * 96) + 32) << 16)
1113 | ((d / 96 % 96 + 32) << 8)
1115 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1117 ( (d / (96 * 96 * 96) + 32) << 24)
1118 | ((d / (96 * 96) % 96 + 32) << 16)
1119 | ((d / 96 % 96 + 32) << 8)
1123 return code - XCHARSET_CODE_OFFSET (charset);
1125 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1126 (XCHARSET_CODE_OFFSET (charset)
1127 == XCHARSET_MIN_CODE (charset)) )
1131 if (XCHARSET_DIMENSION (charset) == 1)
1133 if (XCHARSET_CHARS (charset) == 94)
1135 if (((d = ch - (MIN_CHAR_94
1136 + (XCHARSET_FINAL (charset) - '0') * 94))
1141 else if (XCHARSET_CHARS (charset) == 96)
1143 if (((d = ch - (MIN_CHAR_96
1144 + (XCHARSET_FINAL (charset) - '0') * 96))
1152 else if (XCHARSET_DIMENSION (charset) == 2)
1154 if (XCHARSET_CHARS (charset) == 94)
1156 if (((d = ch - (MIN_CHAR_94x94
1158 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1161 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1163 else if (XCHARSET_CHARS (charset) == 96)
1165 if (((d = ch - (MIN_CHAR_96x96
1167 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1170 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1181 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1183 if (c <= MAX_CHAR_BASIC_LATIN)
1185 *charset = Vcharset_ascii;
1190 *charset = Vcharset_control_1;
1195 *charset = Vcharset_latin_iso8859_1;
1199 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1201 *charset = Vcharset_hebrew_iso8859_8;
1202 return c - MIN_CHAR_HEBREW + 0x20;
1205 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1207 *charset = Vcharset_thai_tis620;
1208 return c - MIN_CHAR_THAI + 0x20;
1211 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1212 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1214 return list2 (Vcharset_katakana_jisx0201,
1215 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1218 else if (c <= MAX_CHAR_BMP)
1220 *charset = Vcharset_ucs_bmp;
1223 else if (c <= MAX_CHAR_SMP)
1225 *charset = Vcharset_ucs_smp;
1226 return c - MIN_CHAR_SMP;
1228 else if (c <= MAX_CHAR_SIP)
1230 *charset = Vcharset_ucs_sip;
1231 return c - MIN_CHAR_SIP;
1233 else if (c < MIN_CHAR_DAIKANWA)
1235 *charset = Vcharset_ucs;
1238 else if (c <= MAX_CHAR_DAIKANWA)
1240 *charset = Vcharset_ideograph_daikanwa;
1241 return c - MIN_CHAR_DAIKANWA;
1243 else if (c < MIN_CHAR_94)
1245 *charset = Vcharset_ucs;
1248 else if (c <= MAX_CHAR_94)
1250 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1251 ((c - MIN_CHAR_94) / 94) + '0',
1252 CHARSET_LEFT_TO_RIGHT);
1253 if (!NILP (*charset))
1254 return ((c - MIN_CHAR_94) % 94) + 33;
1257 *charset = Vcharset_ucs;
1261 else if (c <= MAX_CHAR_96)
1263 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1264 ((c - MIN_CHAR_96) / 96) + '0',
1265 CHARSET_LEFT_TO_RIGHT);
1266 if (!NILP (*charset))
1267 return ((c - MIN_CHAR_96) % 96) + 32;
1270 *charset = Vcharset_ucs;
1274 else if (c <= MAX_CHAR_94x94)
1277 = CHARSET_BY_ATTRIBUTES (94, 2,
1278 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1279 CHARSET_LEFT_TO_RIGHT);
1280 if (!NILP (*charset))
1281 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1282 | (((c - MIN_CHAR_94x94) % 94) + 33);
1285 *charset = Vcharset_ucs;
1289 else if (c <= MAX_CHAR_96x96)
1292 = CHARSET_BY_ATTRIBUTES (96, 2,
1293 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1294 CHARSET_LEFT_TO_RIGHT);
1295 if (!NILP (*charset))
1296 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1297 | (((c - MIN_CHAR_96x96) % 96) + 32);
1300 *charset = Vcharset_ucs;
1306 *charset = Vcharset_ucs;
1311 Lisp_Object Vdefault_coded_charset_priority_list;
1315 /************************************************************************/
1316 /* Basic charset Lisp functions */
1317 /************************************************************************/
1319 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1320 Return non-nil if OBJECT is a charset.
1324 return CHARSETP (object) ? Qt : Qnil;
1327 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1328 Retrieve the charset of the given name.
1329 If CHARSET-OR-NAME is a charset object, it is simply returned.
1330 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1331 nil is returned. Otherwise the associated charset object is returned.
1335 if (CHARSETP (charset_or_name))
1336 return charset_or_name;
1338 CHECK_SYMBOL (charset_or_name);
1339 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1342 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1343 Retrieve the charset of the given name.
1344 Same as `find-charset' except an error is signalled if there is no such
1345 charset instead of returning nil.
1349 Lisp_Object charset = Ffind_charset (name);
1352 signal_simple_error ("No such charset", name);
1356 /* We store the charsets in hash tables with the names as the key and the
1357 actual charset object as the value. Occasionally we need to use them
1358 in a list format. These routines provide us with that. */
1359 struct charset_list_closure
1361 Lisp_Object *charset_list;
1365 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1366 void *charset_list_closure)
1368 /* This function can GC */
1369 struct charset_list_closure *chcl =
1370 (struct charset_list_closure*) charset_list_closure;
1371 Lisp_Object *charset_list = chcl->charset_list;
1373 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1377 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1378 Return a list of the names of all defined charsets.
1382 Lisp_Object charset_list = Qnil;
1383 struct gcpro gcpro1;
1384 struct charset_list_closure charset_list_closure;
1386 GCPRO1 (charset_list);
1387 charset_list_closure.charset_list = &charset_list;
1388 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1389 &charset_list_closure);
1392 return charset_list;
1395 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1396 Return the name of charset CHARSET.
1400 return XCHARSET_NAME (Fget_charset (charset));
1403 /* #### SJT Should generic properties be allowed? */
1404 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1405 Define a new character set.
1406 This function is for use with Mule support.
1407 NAME is a symbol, the name by which the character set is normally referred.
1408 DOC-STRING is a string describing the character set.
1409 PROPS is a property list, describing the specific nature of the
1410 character set. Recognized properties are:
1412 'short-name Short version of the charset name (ex: Latin-1)
1413 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1414 'registry A regular expression matching the font registry field for
1416 'dimension Number of octets used to index a character in this charset.
1417 Either 1 or 2. Defaults to 1.
1418 If UTF-2000 feature is enabled, 3 or 4 are also available.
1419 'columns Number of columns used to display a character in this charset.
1420 Only used in TTY mode. (Under X, the actual width of a
1421 character can be derived from the font used to display the
1422 characters.) If unspecified, defaults to the dimension
1423 (this is almost always the correct value).
1424 'chars Number of characters in each dimension (94 or 96).
1425 Defaults to 94. Note that if the dimension is 2, the
1426 character set thus described is 94x94 or 96x96.
1427 If UTF-2000 feature is enabled, 128 or 256 are also available.
1428 'final Final byte of ISO 2022 escape sequence. Must be
1429 supplied. Each combination of (DIMENSION, CHARS) defines a
1430 separate namespace for final bytes. Note that ISO
1431 2022 restricts the final byte to the range
1432 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1433 dimension == 2. Note also that final bytes in the range
1434 0x30 - 0x3F are reserved for user-defined (not official)
1436 'graphic 0 (use left half of font on output) or 1 (use right half
1437 of font on output). Defaults to 0. For example, for
1438 a font whose registry is ISO8859-1, the left half
1439 (octets 0x20 - 0x7F) is the `ascii' character set, while
1440 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1441 character set. With 'graphic set to 0, the octets
1442 will have their high bit cleared; with it set to 1,
1443 the octets will have their high bit set.
1444 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1446 'ccl-program A compiled CCL program used to convert a character in
1447 this charset into an index into the font. This is in
1448 addition to the 'graphic property. The CCL program
1449 is passed the octets of the character, with the high
1450 bit cleared and set depending upon whether the value
1451 of the 'graphic property is 0 or 1.
1452 'mother [UTF-2000 only] Base coded-charset.
1453 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1454 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1455 'code-offset [UTF-2000 only] Offset for a code-point of a base
1457 'conversion [UTF-2000 only] Conversion for a code-point of a base
1458 coded-charset (94x60 or 94x94x60).
1460 (name, doc_string, props))
1462 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1463 int direction = CHARSET_LEFT_TO_RIGHT;
1464 Lisp_Object registry = Qnil;
1465 Lisp_Object charset;
1466 Lisp_Object ccl_program = Qnil;
1467 Lisp_Object short_name = Qnil, long_name = Qnil;
1468 Lisp_Object mother = Qnil;
1469 int min_code = 0, max_code = 0, code_offset = 0;
1470 int byte_offset = -1;
1473 CHECK_SYMBOL (name);
1474 if (!NILP (doc_string))
1475 CHECK_STRING (doc_string);
1477 charset = Ffind_charset (name);
1478 if (!NILP (charset))
1479 signal_simple_error ("Cannot redefine existing charset", name);
1482 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1484 if (EQ (keyword, Qshort_name))
1486 CHECK_STRING (value);
1490 if (EQ (keyword, Qlong_name))
1492 CHECK_STRING (value);
1496 else if (EQ (keyword, Qdimension))
1499 dimension = XINT (value);
1500 if (dimension < 1 ||
1507 signal_simple_error ("Invalid value for 'dimension", value);
1510 else if (EQ (keyword, Qchars))
1513 chars = XINT (value);
1514 if (chars != 94 && chars != 96
1516 && chars != 128 && chars != 256
1519 signal_simple_error ("Invalid value for 'chars", value);
1522 else if (EQ (keyword, Qcolumns))
1525 columns = XINT (value);
1526 if (columns != 1 && columns != 2)
1527 signal_simple_error ("Invalid value for 'columns", value);
1530 else if (EQ (keyword, Qgraphic))
1533 graphic = XINT (value);
1541 signal_simple_error ("Invalid value for 'graphic", value);
1544 else if (EQ (keyword, Qregistry))
1546 CHECK_STRING (value);
1550 else if (EQ (keyword, Qdirection))
1552 if (EQ (value, Ql2r))
1553 direction = CHARSET_LEFT_TO_RIGHT;
1554 else if (EQ (value, Qr2l))
1555 direction = CHARSET_RIGHT_TO_LEFT;
1557 signal_simple_error ("Invalid value for 'direction", value);
1560 else if (EQ (keyword, Qfinal))
1562 CHECK_CHAR_COERCE_INT (value);
1563 final = XCHAR (value);
1564 if (final < '0' || final > '~')
1565 signal_simple_error ("Invalid value for 'final", value);
1569 else if (EQ (keyword, Qmother))
1571 mother = Fget_charset (value);
1574 else if (EQ (keyword, Qmin_code))
1577 min_code = XUINT (value);
1580 else if (EQ (keyword, Qmax_code))
1583 max_code = XUINT (value);
1586 else if (EQ (keyword, Qcode_offset))
1589 code_offset = XUINT (value);
1592 else if (EQ (keyword, Qconversion))
1594 if (EQ (value, Q94x60))
1595 conversion = CONVERSION_94x60;
1596 else if (EQ (value, Q94x94x60))
1597 conversion = CONVERSION_94x94x60;
1599 signal_simple_error ("Unrecognized conversion", value);
1603 else if (EQ (keyword, Qccl_program))
1605 struct ccl_program test_ccl;
1607 if (setup_ccl_program (&test_ccl, value) < 0)
1608 signal_simple_error ("Invalid value for 'ccl-program", value);
1609 ccl_program = value;
1613 signal_simple_error ("Unrecognized property", keyword);
1619 error ("'final must be specified");
1621 if (dimension == 2 && final > 0x5F)
1623 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1626 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1627 CHARSET_LEFT_TO_RIGHT)) ||
1628 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1629 CHARSET_RIGHT_TO_LEFT)))
1631 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1633 id = get_unallocated_leading_byte (dimension);
1635 if (NILP (doc_string))
1636 doc_string = build_string ("");
1638 if (NILP (registry))
1639 registry = build_string ("");
1641 if (NILP (short_name))
1642 XSETSTRING (short_name, XSYMBOL (name)->name);
1644 if (NILP (long_name))
1645 long_name = doc_string;
1648 columns = dimension;
1650 if (byte_offset < 0)
1654 else if (chars == 96)
1660 charset = make_charset (id, name, chars, dimension, columns, graphic,
1661 final, direction, short_name, long_name,
1662 doc_string, registry,
1663 Qnil, min_code, max_code, code_offset, byte_offset,
1664 mother, conversion);
1665 if (!NILP (ccl_program))
1666 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1670 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1672 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1673 NEW-NAME is the name of the new charset. Return the new charset.
1675 (charset, new_name))
1677 Lisp_Object new_charset = Qnil;
1678 int id, chars, dimension, columns, graphic, final;
1680 Lisp_Object registry, doc_string, short_name, long_name;
1683 charset = Fget_charset (charset);
1684 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1685 signal_simple_error ("Charset already has reverse-direction charset",
1688 CHECK_SYMBOL (new_name);
1689 if (!NILP (Ffind_charset (new_name)))
1690 signal_simple_error ("Cannot redefine existing charset", new_name);
1692 cs = XCHARSET (charset);
1694 chars = CHARSET_CHARS (cs);
1695 dimension = CHARSET_DIMENSION (cs);
1696 columns = CHARSET_COLUMNS (cs);
1697 id = get_unallocated_leading_byte (dimension);
1699 graphic = CHARSET_GRAPHIC (cs);
1700 final = CHARSET_FINAL (cs);
1701 direction = CHARSET_RIGHT_TO_LEFT;
1702 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1703 direction = CHARSET_LEFT_TO_RIGHT;
1704 doc_string = CHARSET_DOC_STRING (cs);
1705 short_name = CHARSET_SHORT_NAME (cs);
1706 long_name = CHARSET_LONG_NAME (cs);
1707 registry = CHARSET_REGISTRY (cs);
1709 new_charset = make_charset (id, new_name, chars, dimension, columns,
1710 graphic, final, direction, short_name, long_name,
1711 doc_string, registry,
1713 CHARSET_DECODING_TABLE(cs),
1714 CHARSET_MIN_CODE(cs),
1715 CHARSET_MAX_CODE(cs),
1716 CHARSET_CODE_OFFSET(cs),
1717 CHARSET_BYTE_OFFSET(cs),
1719 CHARSET_CONVERSION (cs)
1721 Qnil, 0, 0, 0, 0, Qnil, 0
1725 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1726 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1731 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1732 Define symbol ALIAS as an alias for CHARSET.
1736 CHECK_SYMBOL (alias);
1737 charset = Fget_charset (charset);
1738 return Fputhash (alias, charset, Vcharset_hash_table);
1741 /* #### Reverse direction charsets not yet implemented. */
1743 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1745 Return the reverse-direction charset parallel to CHARSET, if any.
1746 This is the charset with the same properties (in particular, the same
1747 dimension, number of characters per dimension, and final byte) as
1748 CHARSET but whose characters are displayed in the opposite direction.
1752 charset = Fget_charset (charset);
1753 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1757 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1758 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1759 If DIRECTION is omitted, both directions will be checked (left-to-right
1760 will be returned if character sets exist for both directions).
1762 (dimension, chars, final, direction))
1764 int dm, ch, fi, di = -1;
1765 Lisp_Object obj = Qnil;
1767 CHECK_INT (dimension);
1768 dm = XINT (dimension);
1769 if (dm < 1 || dm > 2)
1770 signal_simple_error ("Invalid value for DIMENSION", dimension);
1774 if (ch != 94 && ch != 96)
1775 signal_simple_error ("Invalid value for CHARS", chars);
1777 CHECK_CHAR_COERCE_INT (final);
1779 if (fi < '0' || fi > '~')
1780 signal_simple_error ("Invalid value for FINAL", final);
1782 if (EQ (direction, Ql2r))
1783 di = CHARSET_LEFT_TO_RIGHT;
1784 else if (EQ (direction, Qr2l))
1785 di = CHARSET_RIGHT_TO_LEFT;
1786 else if (!NILP (direction))
1787 signal_simple_error ("Invalid value for DIRECTION", direction);
1789 if (dm == 2 && fi > 0x5F)
1791 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1795 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1797 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1800 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1803 return XCHARSET_NAME (obj);
1807 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1808 Return short name of CHARSET.
1812 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1815 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1816 Return long name of CHARSET.
1820 return XCHARSET_LONG_NAME (Fget_charset (charset));
1823 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1824 Return description of CHARSET.
1828 return XCHARSET_DOC_STRING (Fget_charset (charset));
1831 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1832 Return dimension of CHARSET.
1836 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1839 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1840 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1841 Recognized properties are those listed in `make-charset', as well as
1842 'name and 'doc-string.
1848 charset = Fget_charset (charset);
1849 cs = XCHARSET (charset);
1851 CHECK_SYMBOL (prop);
1852 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1853 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1854 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1855 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1856 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1857 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1858 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1859 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1860 Qnil : make_char (CHARSET_FINAL (cs));
1861 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1862 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1863 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1864 if (EQ (prop, Qdirection))
1865 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1866 if (EQ (prop, Qreverse_direction_charset))
1868 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1869 /* #### Is this translation OK? If so, error checking sufficient? */
1870 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1873 if (EQ (prop, Qmother))
1874 return CHARSET_MOTHER (cs);
1875 if (EQ (prop, Qmin_code))
1876 return make_int (CHARSET_MIN_CODE (cs));
1877 if (EQ (prop, Qmax_code))
1878 return make_int (CHARSET_MAX_CODE (cs));
1880 signal_simple_error ("Unrecognized charset property name", prop);
1881 return Qnil; /* not reached */
1884 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1885 Return charset identification number of CHARSET.
1889 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1892 /* #### We need to figure out which properties we really want to
1895 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1896 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1898 (charset, ccl_program))
1900 struct ccl_program test_ccl;
1902 charset = Fget_charset (charset);
1903 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1904 signal_simple_error ("Invalid ccl-program", ccl_program);
1905 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1910 invalidate_charset_font_caches (Lisp_Object charset)
1912 /* Invalidate font cache entries for charset on all devices. */
1913 Lisp_Object devcons, concons, hash_table;
1914 DEVICE_LOOP_NO_BREAK (devcons, concons)
1916 struct device *d = XDEVICE (XCAR (devcons));
1917 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1918 if (!UNBOUNDP (hash_table))
1919 Fclrhash (hash_table);
1923 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1924 Set the 'registry property of CHARSET to REGISTRY.
1926 (charset, registry))
1928 charset = Fget_charset (charset);
1929 CHECK_STRING (registry);
1930 XCHARSET_REGISTRY (charset) = registry;
1931 invalidate_charset_font_caches (charset);
1932 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1937 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1938 Return mapping-table of CHARSET.
1942 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1945 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1946 Set mapping-table of CHARSET to TABLE.
1950 struct Lisp_Charset *cs;
1954 charset = Fget_charset (charset);
1955 cs = XCHARSET (charset);
1959 CHARSET_DECODING_TABLE(cs) = Qnil;
1962 else if (VECTORP (table))
1964 int ccs_len = CHARSET_BYTE_SIZE (cs);
1965 int ret = decoding_table_check_elements (table,
1966 CHARSET_DIMENSION (cs),
1971 signal_simple_error ("Too big table", table);
1973 signal_simple_error ("Invalid element is found", table);
1975 signal_simple_error ("Something wrong", table);
1977 CHARSET_DECODING_TABLE(cs) = Qnil;
1980 signal_error (Qwrong_type_argument,
1981 list2 (build_translated_string ("vector-or-nil-p"),
1984 byte_offset = CHARSET_BYTE_OFFSET (cs);
1985 switch (CHARSET_DIMENSION (cs))
1988 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1990 Lisp_Object c = XVECTOR_DATA(table)[i];
1993 Fput_char_attribute (c, XCHARSET_NAME (charset),
1994 make_int (i + byte_offset));
1998 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2000 Lisp_Object v = XVECTOR_DATA(table)[i];
2006 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2008 Lisp_Object c = XVECTOR_DATA(v)[j];
2012 (c, XCHARSET_NAME (charset),
2013 make_int ( ( (i + byte_offset) << 8 )
2019 Fput_char_attribute (v, XCHARSET_NAME (charset),
2020 make_int (i + byte_offset));
2027 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2028 Save mapping-table of CHARSET.
2032 #ifdef HAVE_DATABASE
2033 struct Lisp_Charset *cs;
2034 int byte_min, byte_max;
2036 Lisp_Object db_file;
2038 charset = Fget_charset (charset);
2039 cs = XCHARSET (charset);
2041 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2042 Qsystem_char_id, 1);
2043 db = Fopen_database (db_file, Qnil, Qnil, Qnil, Qnil);
2045 byte_min = CHARSET_BYTE_OFFSET (cs);
2046 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2047 switch (CHARSET_DIMENSION (cs))
2051 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2054 for (cell = byte_min; cell < byte_max; cell++)
2056 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2059 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2060 Fprin1_to_string (c, Qnil),
2067 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2070 for (row = byte_min; row < byte_max; row++)
2072 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2075 for (cell = byte_min; cell < byte_max; cell++)
2077 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2080 Fput_database (Fprin1_to_string (make_int ((row << 8)
2083 Fprin1_to_string (c, Qnil),
2091 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2094 for (plane = byte_min; plane < byte_max; plane++)
2097 = get_ccs_octet_table (table_p, charset, plane);
2100 for (row = byte_min; row < byte_max; row++)
2103 = get_ccs_octet_table (table_r, charset, row);
2106 for (cell = byte_min; cell < byte_max; cell++)
2108 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2112 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2116 Fprin1_to_string (c, Qnil),
2125 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2128 for (group = byte_min; group < byte_max; group++)
2131 = get_ccs_octet_table (table_g, charset, group);
2134 for (plane = byte_min; plane < byte_max; plane++)
2137 = get_ccs_octet_table (table_p, charset, plane);
2140 for (row = byte_min; row < byte_max; row++)
2143 = get_ccs_octet_table (table_r, charset, row);
2146 for (cell = byte_min; cell < byte_max; cell++)
2149 = get_ccs_octet_table (table_c, charset, cell);
2152 Fput_database (Fprin1_to_string
2153 (make_int (( group << 24)
2158 Fprin1_to_string (c, Qnil),
2166 return Fclose_database (db);
2172 #ifdef HAVE_CHISE_CLIENT
2174 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2178 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2181 db = Fopen_database (db_file, Qnil, Qnil, Qnil, Qnil);
2185 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2192 decoding_table_put_char (ccs, code_point, ret);
2193 Fclose_database (db);
2197 Fclose_database (db);
2201 #endif /* HAVE_CHISE_CLIENT */
2202 #endif /* UTF2000 */
2205 /************************************************************************/
2206 /* Lisp primitives for working with characters */
2207 /************************************************************************/
2210 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2211 Make a character from CHARSET and code-point CODE.
2212 If DEFINED_ONLY is non-nil, builtin character is not returned.
2213 If corresponding character is not found, nil is returned.
2215 (charset, code, defined_only))
2219 charset = Fget_charset (charset);
2222 if (XCHARSET_GRAPHIC (charset) == 1)
2224 if (NILP (defined_only))
2225 c = DECODE_CHAR (charset, c);
2227 c = DECODE_DEFINED_CHAR (charset, c);
2228 return c >= 0 ? make_char (c) : Qnil;
2231 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2232 Make a builtin character from CHARSET and code-point CODE.
2238 charset = Fget_charset (charset);
2240 if (EQ (charset, Vcharset_latin_viscii))
2242 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2248 (ret = Fget_char_attribute (chr,
2249 Vcharset_latin_viscii_lower,
2252 charset = Vcharset_latin_viscii_lower;
2256 (ret = Fget_char_attribute (chr,
2257 Vcharset_latin_viscii_upper,
2260 charset = Vcharset_latin_viscii_upper;
2267 if (XCHARSET_GRAPHIC (charset) == 1)
2270 c = decode_builtin_char (charset, c);
2271 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2275 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2276 Make a character from CHARSET and octets ARG1 and ARG2.
2277 ARG2 is required only for characters from two-dimensional charsets.
2278 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2279 character s with caron.
2281 (charset, arg1, arg2))
2285 int lowlim, highlim;
2287 charset = Fget_charset (charset);
2288 cs = XCHARSET (charset);
2290 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2291 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2293 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2295 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2296 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2299 /* It is useful (and safe, according to Olivier Galibert) to strip
2300 the 8th bit off ARG1 and ARG2 because it allows programmers to
2301 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2302 Latin 2 code of the character. */
2310 if (a1 < lowlim || a1 > highlim)
2311 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2313 if (CHARSET_DIMENSION (cs) == 1)
2317 ("Charset is of dimension one; second octet must be nil", arg2);
2318 return make_char (MAKE_CHAR (charset, a1, 0));
2327 a2 = XINT (arg2) & 0x7f;
2329 if (a2 < lowlim || a2 > highlim)
2330 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2332 return make_char (MAKE_CHAR (charset, a1, a2));
2335 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2336 Return the character set of CHARACTER.
2340 CHECK_CHAR_COERCE_INT (character);
2342 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2345 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2346 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2347 N defaults to 0 if omitted.
2351 Lisp_Object charset;
2354 CHECK_CHAR_COERCE_INT (character);
2356 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2358 if (NILP (n) || EQ (n, Qzero))
2359 return make_int (octet0);
2360 else if (EQ (n, make_int (1)))
2361 return make_int (octet1);
2363 signal_simple_error ("Octet number must be 0 or 1", n);
2367 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2368 Return code-point of CHARACTER in specified CHARSET.
2370 (character, charset))
2374 CHECK_CHAR_COERCE_INT (character);
2375 charset = Fget_charset (charset);
2376 code_point = charset_code_point (charset, XCHAR (character));
2377 if (code_point >= 0)
2378 return make_int (code_point);
2384 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2385 Return list of charset and one or two position-codes of CHARACTER.
2389 /* This function can GC */
2390 struct gcpro gcpro1, gcpro2;
2391 Lisp_Object charset = Qnil;
2392 Lisp_Object rc = Qnil;
2400 GCPRO2 (charset, rc);
2401 CHECK_CHAR_COERCE_INT (character);
2404 code_point = ENCODE_CHAR (XCHAR (character), charset);
2405 dimension = XCHARSET_DIMENSION (charset);
2406 while (dimension > 0)
2408 rc = Fcons (make_int (code_point & 255), rc);
2412 rc = Fcons (XCHARSET_NAME (charset), rc);
2414 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2416 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2418 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2422 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2431 #ifdef ENABLE_COMPOSITE_CHARS
2432 /************************************************************************/
2433 /* composite character functions */
2434 /************************************************************************/
2437 lookup_composite_char (Bufbyte *str, int len)
2439 Lisp_Object lispstr = make_string (str, len);
2440 Lisp_Object ch = Fgethash (lispstr,
2441 Vcomposite_char_string2char_hash_table,
2447 if (composite_char_row_next >= 128)
2448 signal_simple_error ("No more composite chars available", lispstr);
2449 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2450 composite_char_col_next);
2451 Fputhash (make_char (emch), lispstr,
2452 Vcomposite_char_char2string_hash_table);
2453 Fputhash (lispstr, make_char (emch),
2454 Vcomposite_char_string2char_hash_table);
2455 composite_char_col_next++;
2456 if (composite_char_col_next >= 128)
2458 composite_char_col_next = 32;
2459 composite_char_row_next++;
2468 composite_char_string (Emchar ch)
2470 Lisp_Object str = Fgethash (make_char (ch),
2471 Vcomposite_char_char2string_hash_table,
2473 assert (!UNBOUNDP (str));
2477 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2478 Convert a string into a single composite character.
2479 The character is the result of overstriking all the characters in
2484 CHECK_STRING (string);
2485 return make_char (lookup_composite_char (XSTRING_DATA (string),
2486 XSTRING_LENGTH (string)));
2489 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2490 Return a string of the characters comprising a composite character.
2498 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2499 signal_simple_error ("Must be composite char", ch);
2500 return composite_char_string (emch);
2502 #endif /* ENABLE_COMPOSITE_CHARS */
2505 /************************************************************************/
2506 /* initialization */
2507 /************************************************************************/
2510 syms_of_mule_charset (void)
2512 INIT_LRECORD_IMPLEMENTATION (charset);
2514 DEFSUBR (Fcharsetp);
2515 DEFSUBR (Ffind_charset);
2516 DEFSUBR (Fget_charset);
2517 DEFSUBR (Fcharset_list);
2518 DEFSUBR (Fcharset_name);
2519 DEFSUBR (Fmake_charset);
2520 DEFSUBR (Fmake_reverse_direction_charset);
2521 /* DEFSUBR (Freverse_direction_charset); */
2522 DEFSUBR (Fdefine_charset_alias);
2523 DEFSUBR (Fcharset_from_attributes);
2524 DEFSUBR (Fcharset_short_name);
2525 DEFSUBR (Fcharset_long_name);
2526 DEFSUBR (Fcharset_description);
2527 DEFSUBR (Fcharset_dimension);
2528 DEFSUBR (Fcharset_property);
2529 DEFSUBR (Fcharset_id);
2530 DEFSUBR (Fset_charset_ccl_program);
2531 DEFSUBR (Fset_charset_registry);
2533 DEFSUBR (Fcharset_mapping_table);
2534 DEFSUBR (Fset_charset_mapping_table);
2538 DEFSUBR (Fdecode_char);
2539 DEFSUBR (Fdecode_builtin_char);
2540 DEFSUBR (Fencode_char);
2541 DEFSUBR (Fsave_charset_mapping_table);
2543 DEFSUBR (Fmake_char);
2544 DEFSUBR (Fchar_charset);
2545 DEFSUBR (Fchar_octet);
2546 DEFSUBR (Fsplit_char);
2548 #ifdef ENABLE_COMPOSITE_CHARS
2549 DEFSUBR (Fmake_composite_char);
2550 DEFSUBR (Fcomposite_char_string);
2553 defsymbol (&Qcharsetp, "charsetp");
2554 defsymbol (&Qregistry, "registry");
2555 defsymbol (&Qfinal, "final");
2556 defsymbol (&Qgraphic, "graphic");
2557 defsymbol (&Qdirection, "direction");
2558 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2559 defsymbol (&Qshort_name, "short-name");
2560 defsymbol (&Qlong_name, "long-name");
2562 defsymbol (&Qmother, "mother");
2563 defsymbol (&Qmin_code, "min-code");
2564 defsymbol (&Qmax_code, "max-code");
2565 defsymbol (&Qcode_offset, "code-offset");
2566 defsymbol (&Qconversion, "conversion");
2567 defsymbol (&Q94x60, "94x60");
2568 defsymbol (&Q94x94x60, "94x94x60");
2571 defsymbol (&Ql2r, "l2r");
2572 defsymbol (&Qr2l, "r2l");
2574 /* Charsets, compatible with FSF 20.3
2575 Naming convention is Script-Charset[-Edition] */
2576 defsymbol (&Qascii, "ascii");
2577 defsymbol (&Qcontrol_1, "control-1");
2578 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2579 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2580 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2581 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2582 defsymbol (&Qthai_tis620, "thai-tis620");
2583 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2584 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2585 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2586 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2587 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2588 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2589 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2590 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2591 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2592 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2593 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2594 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2595 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2596 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2597 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2598 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2600 defsymbol (&Qucs, "ucs");
2601 defsymbol (&Qucs_bmp, "ucs-bmp");
2602 defsymbol (&Qucs_smp, "ucs-smp");
2603 defsymbol (&Qucs_sip, "ucs-sip");
2604 defsymbol (&Qucs_cns, "ucs-cns");
2605 defsymbol (&Qucs_jis, "ucs-jis");
2606 defsymbol (&Qucs_ks, "ucs-ks");
2607 defsymbol (&Qucs_big5, "ucs-big5");
2608 defsymbol (&Qlatin_viscii, "latin-viscii");
2609 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2610 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2611 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2612 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2613 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2614 defsymbol (&Qideograph_gt, "ideograph-gt");
2615 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2616 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2617 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2618 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2619 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2620 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2621 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2622 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2623 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2624 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2625 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2626 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2627 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2628 defsymbol (&Qchinese_big5, "chinese-big5");
2629 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2630 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2631 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2632 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2633 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2634 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2635 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2636 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2637 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2638 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2639 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2640 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2641 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2642 defsymbol (&Qchina3_jef, "china3-jef");
2643 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2644 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2646 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2647 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2649 defsymbol (&Qcomposite, "composite");
2653 vars_of_mule_charset (void)
2660 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2661 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2663 /* Table of charsets indexed by leading byte. */
2664 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2665 chlook->charset_by_leading_byte[i] = Qnil;
2668 /* Table of charsets indexed by type/final-byte. */
2669 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2670 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2671 chlook->charset_by_attributes[i][j] = Qnil;
2673 /* Table of charsets indexed by type/final-byte/direction. */
2674 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2675 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2676 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2677 chlook->charset_by_attributes[i][j][k] = Qnil;
2681 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2683 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2684 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2688 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2689 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2690 Leading-code of private TYPE9N charset of column-width 1.
2692 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2696 Vdefault_coded_charset_priority_list = Qnil;
2697 DEFVAR_LISP ("default-coded-charset-priority-list",
2698 &Vdefault_coded_charset_priority_list /*
2699 Default order of preferred coded-character-sets.
2705 complex_vars_of_mule_charset (void)
2707 staticpro (&Vcharset_hash_table);
2708 Vcharset_hash_table =
2709 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2711 /* Predefined character sets. We store them into variables for
2715 staticpro (&Vcharset_ucs);
2717 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2718 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2719 build_string ("UCS"),
2720 build_string ("UCS"),
2721 build_string ("ISO/IEC 10646"),
2723 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2724 staticpro (&Vcharset_ucs_bmp);
2726 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2727 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2728 build_string ("BMP"),
2729 build_string ("UCS-BMP"),
2730 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2732 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2733 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2734 staticpro (&Vcharset_ucs_smp);
2736 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2737 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2738 build_string ("SMP"),
2739 build_string ("UCS-SMP"),
2740 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2741 build_string ("UCS00-1"),
2742 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2743 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2744 staticpro (&Vcharset_ucs_sip);
2746 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2747 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2748 build_string ("SIP"),
2749 build_string ("UCS-SIP"),
2750 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2751 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2752 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2753 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2754 staticpro (&Vcharset_ucs_cns);
2756 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2757 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2758 build_string ("UCS for CNS"),
2759 build_string ("UCS for CNS 11643"),
2760 build_string ("ISO/IEC 10646 for CNS 11643"),
2763 Qnil, CONVERSION_IDENTICAL);
2764 staticpro (&Vcharset_ucs_jis);
2766 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2767 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2768 build_string ("UCS for JIS"),
2769 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2770 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2772 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2773 staticpro (&Vcharset_ucs_ks);
2775 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2776 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2777 build_string ("UCS for KS"),
2778 build_string ("UCS for CCS defined by KS"),
2779 build_string ("ISO/IEC 10646 for Korean Standards"),
2781 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2782 staticpro (&Vcharset_ucs_big5);
2784 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2785 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2786 build_string ("UCS for Big5"),
2787 build_string ("UCS for Big5"),
2788 build_string ("ISO/IEC 10646 for Big5"),
2790 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2792 # define MIN_CHAR_THAI 0
2793 # define MAX_CHAR_THAI 0
2794 /* # define MIN_CHAR_HEBREW 0 */
2795 /* # define MAX_CHAR_HEBREW 0 */
2796 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2797 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2799 staticpro (&Vcharset_ascii);
2801 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2802 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2803 build_string ("ASCII"),
2804 build_string ("ASCII)"),
2805 build_string ("ASCII (ISO646 IRV)"),
2806 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2807 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2808 staticpro (&Vcharset_control_1);
2809 Vcharset_control_1 =
2810 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2811 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2812 build_string ("C1"),
2813 build_string ("Control characters"),
2814 build_string ("Control characters 128-191"),
2816 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_latin_iso8859_1);
2818 Vcharset_latin_iso8859_1 =
2819 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2820 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2821 build_string ("Latin-1"),
2822 build_string ("ISO8859-1 (Latin-1)"),
2823 build_string ("ISO8859-1 (Latin-1)"),
2824 build_string ("iso8859-1"),
2825 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2826 staticpro (&Vcharset_latin_iso8859_2);
2827 Vcharset_latin_iso8859_2 =
2828 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2829 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2830 build_string ("Latin-2"),
2831 build_string ("ISO8859-2 (Latin-2)"),
2832 build_string ("ISO8859-2 (Latin-2)"),
2833 build_string ("iso8859-2"),
2834 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2835 staticpro (&Vcharset_latin_iso8859_3);
2836 Vcharset_latin_iso8859_3 =
2837 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2838 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2839 build_string ("Latin-3"),
2840 build_string ("ISO8859-3 (Latin-3)"),
2841 build_string ("ISO8859-3 (Latin-3)"),
2842 build_string ("iso8859-3"),
2843 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2844 staticpro (&Vcharset_latin_iso8859_4);
2845 Vcharset_latin_iso8859_4 =
2846 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2847 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2848 build_string ("Latin-4"),
2849 build_string ("ISO8859-4 (Latin-4)"),
2850 build_string ("ISO8859-4 (Latin-4)"),
2851 build_string ("iso8859-4"),
2852 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2853 staticpro (&Vcharset_thai_tis620);
2854 Vcharset_thai_tis620 =
2855 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2856 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2857 build_string ("TIS620"),
2858 build_string ("TIS620 (Thai)"),
2859 build_string ("TIS620.2529 (Thai)"),
2860 build_string ("tis620"),
2861 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2862 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2863 staticpro (&Vcharset_greek_iso8859_7);
2864 Vcharset_greek_iso8859_7 =
2865 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2866 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2867 build_string ("ISO8859-7"),
2868 build_string ("ISO8859-7 (Greek)"),
2869 build_string ("ISO8859-7 (Greek)"),
2870 build_string ("iso8859-7"),
2871 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2872 staticpro (&Vcharset_arabic_iso8859_6);
2873 Vcharset_arabic_iso8859_6 =
2874 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2875 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2876 build_string ("ISO8859-6"),
2877 build_string ("ISO8859-6 (Arabic)"),
2878 build_string ("ISO8859-6 (Arabic)"),
2879 build_string ("iso8859-6"),
2880 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_hebrew_iso8859_8);
2882 Vcharset_hebrew_iso8859_8 =
2883 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2884 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2885 build_string ("ISO8859-8"),
2886 build_string ("ISO8859-8 (Hebrew)"),
2887 build_string ("ISO8859-8 (Hebrew)"),
2888 build_string ("iso8859-8"),
2890 0 /* MIN_CHAR_HEBREW */,
2891 0 /* MAX_CHAR_HEBREW */, 0, 32,
2892 Qnil, CONVERSION_IDENTICAL);
2893 staticpro (&Vcharset_katakana_jisx0201);
2894 Vcharset_katakana_jisx0201 =
2895 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2896 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2897 build_string ("JISX0201 Kana"),
2898 build_string ("JISX0201.1976 (Japanese Kana)"),
2899 build_string ("JISX0201.1976 Japanese Kana"),
2900 build_string ("jisx0201\\.1976"),
2901 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2902 staticpro (&Vcharset_latin_jisx0201);
2903 Vcharset_latin_jisx0201 =
2904 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2905 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2906 build_string ("JISX0201 Roman"),
2907 build_string ("JISX0201.1976 (Japanese Roman)"),
2908 build_string ("JISX0201.1976 Japanese Roman"),
2909 build_string ("jisx0201\\.1976"),
2910 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2911 staticpro (&Vcharset_cyrillic_iso8859_5);
2912 Vcharset_cyrillic_iso8859_5 =
2913 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2914 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2915 build_string ("ISO8859-5"),
2916 build_string ("ISO8859-5 (Cyrillic)"),
2917 build_string ("ISO8859-5 (Cyrillic)"),
2918 build_string ("iso8859-5"),
2919 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2920 staticpro (&Vcharset_latin_iso8859_9);
2921 Vcharset_latin_iso8859_9 =
2922 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2923 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2924 build_string ("Latin-5"),
2925 build_string ("ISO8859-9 (Latin-5)"),
2926 build_string ("ISO8859-9 (Latin-5)"),
2927 build_string ("iso8859-9"),
2928 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2929 staticpro (&Vcharset_japanese_jisx0208_1978);
2930 Vcharset_japanese_jisx0208_1978 =
2931 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2932 Qjapanese_jisx0208_1978, 94, 2,
2933 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2934 build_string ("JIS X0208:1978"),
2935 build_string ("JIS X0208:1978 (Japanese)"),
2937 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2938 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2939 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2940 staticpro (&Vcharset_chinese_gb2312);
2941 Vcharset_chinese_gb2312 =
2942 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2943 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2944 build_string ("GB2312"),
2945 build_string ("GB2312)"),
2946 build_string ("GB2312 Chinese simplified"),
2947 build_string ("gb2312"),
2948 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2949 staticpro (&Vcharset_chinese_gb12345);
2950 Vcharset_chinese_gb12345 =
2951 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2952 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2953 build_string ("G1"),
2954 build_string ("GB 12345)"),
2955 build_string ("GB 12345-1990"),
2956 build_string ("GB12345\\(\\.1990\\)?-0"),
2957 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2958 staticpro (&Vcharset_japanese_jisx0208);
2959 Vcharset_japanese_jisx0208 =
2960 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2961 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2962 build_string ("JISX0208"),
2963 build_string ("JIS X0208:1983 (Japanese)"),
2964 build_string ("JIS X0208:1983 Japanese Kanji"),
2965 build_string ("jisx0208\\.1983"),
2966 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2968 staticpro (&Vcharset_japanese_jisx0208_1990);
2969 Vcharset_japanese_jisx0208_1990 =
2970 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2971 Qjapanese_jisx0208_1990, 94, 2,
2972 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2973 build_string ("JISX0208-1990"),
2974 build_string ("JIS X0208:1990 (Japanese)"),
2975 build_string ("JIS X0208:1990 Japanese Kanji"),
2976 build_string ("jisx0208\\.1990"),
2978 MIN_CHAR_JIS_X0208_1990,
2979 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2980 Qnil, CONVERSION_IDENTICAL);
2982 staticpro (&Vcharset_korean_ksc5601);
2983 Vcharset_korean_ksc5601 =
2984 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2985 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2986 build_string ("KSC5601"),
2987 build_string ("KSC5601 (Korean"),
2988 build_string ("KSC5601 Korean Hangul and Hanja"),
2989 build_string ("ksc5601"),
2990 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2991 staticpro (&Vcharset_japanese_jisx0212);
2992 Vcharset_japanese_jisx0212 =
2993 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2994 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2995 build_string ("JISX0212"),
2996 build_string ("JISX0212 (Japanese)"),
2997 build_string ("JISX0212 Japanese Supplement"),
2998 build_string ("jisx0212"),
2999 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3001 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3002 staticpro (&Vcharset_chinese_cns11643_1);
3003 Vcharset_chinese_cns11643_1 =
3004 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3005 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3006 build_string ("CNS11643-1"),
3007 build_string ("CNS11643-1 (Chinese traditional)"),
3009 ("CNS 11643 Plane 1 Chinese traditional"),
3010 build_string (CHINESE_CNS_PLANE_RE("1")),
3011 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3012 staticpro (&Vcharset_chinese_cns11643_2);
3013 Vcharset_chinese_cns11643_2 =
3014 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3015 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3016 build_string ("CNS11643-2"),
3017 build_string ("CNS11643-2 (Chinese traditional)"),
3019 ("CNS 11643 Plane 2 Chinese traditional"),
3020 build_string (CHINESE_CNS_PLANE_RE("2")),
3021 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3023 staticpro (&Vcharset_latin_tcvn5712);
3024 Vcharset_latin_tcvn5712 =
3025 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3026 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3027 build_string ("TCVN 5712"),
3028 build_string ("TCVN 5712 (VSCII-2)"),
3029 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3030 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3031 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3032 staticpro (&Vcharset_latin_viscii_lower);
3033 Vcharset_latin_viscii_lower =
3034 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3035 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3036 build_string ("VISCII lower"),
3037 build_string ("VISCII lower (Vietnamese)"),
3038 build_string ("VISCII lower (Vietnamese)"),
3039 build_string ("MULEVISCII-LOWER"),
3040 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3041 staticpro (&Vcharset_latin_viscii_upper);
3042 Vcharset_latin_viscii_upper =
3043 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3044 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3045 build_string ("VISCII upper"),
3046 build_string ("VISCII upper (Vietnamese)"),
3047 build_string ("VISCII upper (Vietnamese)"),
3048 build_string ("MULEVISCII-UPPER"),
3049 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3050 staticpro (&Vcharset_latin_viscii);
3051 Vcharset_latin_viscii =
3052 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3053 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3054 build_string ("VISCII"),
3055 build_string ("VISCII 1.1 (Vietnamese)"),
3056 build_string ("VISCII 1.1 (Vietnamese)"),
3057 build_string ("VISCII1\\.1"),
3058 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3059 staticpro (&Vcharset_chinese_big5);
3060 Vcharset_chinese_big5 =
3061 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3062 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3063 build_string ("Big5"),
3064 build_string ("Big5"),
3065 build_string ("Big5 Chinese traditional"),
3066 build_string ("big5"),
3068 0 /* MIN_CHAR_BIG5_CDP */,
3069 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
3070 Qnil, CONVERSION_IDENTICAL);
3071 staticpro (&Vcharset_chinese_big5_cdp);
3072 Vcharset_chinese_big5_cdp =
3073 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3074 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3075 build_string ("Big5-CDP"),
3076 build_string ("Big5 + CDP extension"),
3077 build_string ("Big5 with CDP extension"),
3078 build_string ("big5\\.cdp-0"),
3079 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3080 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3081 #define DEF_HANZIKU(n) \
3082 staticpro (&Vcharset_ideograph_hanziku_##n); \
3083 Vcharset_ideograph_hanziku_##n = \
3084 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3085 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3086 build_string ("HZK-"#n), \
3087 build_string ("HANZIKU-"#n), \
3088 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3090 ("hanziku-"#n"$"), \
3091 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3092 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3105 staticpro (&Vcharset_china3_jef);
3106 Vcharset_china3_jef =
3107 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
3108 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3109 build_string ("JC3"),
3110 build_string ("JEF + CHINA3"),
3111 build_string ("JEF + CHINA3 private characters"),
3112 build_string ("china3jef-0"),
3113 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
3114 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
3115 staticpro (&Vcharset_ideograph_cbeta);
3116 Vcharset_ideograph_cbeta =
3117 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3118 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3119 build_string ("CB"),
3120 build_string ("CBETA"),
3121 build_string ("CBETA private characters"),
3122 build_string ("cbeta-0"),
3123 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3124 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3125 staticpro (&Vcharset_ideograph_gt);
3126 Vcharset_ideograph_gt =
3127 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3128 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3129 build_string ("GT"),
3130 build_string ("GT"),
3131 build_string ("GT"),
3133 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3134 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3135 #define DEF_GT_PJ(n) \
3136 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3137 Vcharset_ideograph_gt_pj_##n = \
3138 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3139 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3140 build_string ("GT-PJ-"#n), \
3141 build_string ("GT (pseudo JIS encoding) part "#n), \
3142 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3144 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3145 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3158 staticpro (&Vcharset_ideograph_daikanwa_2);
3159 Vcharset_ideograph_daikanwa_2 =
3160 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3161 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3162 build_string ("Daikanwa Rev."),
3163 build_string ("Morohashi's Daikanwa Rev."),
3165 ("Daikanwa dictionary (revised version)"),
3166 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3167 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3168 staticpro (&Vcharset_ideograph_daikanwa);
3169 Vcharset_ideograph_daikanwa =
3170 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3171 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3172 build_string ("Daikanwa"),
3173 build_string ("Morohashi's Daikanwa Rev.2"),
3175 ("Daikanwa dictionary (second revised version)"),
3176 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3177 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3178 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3180 staticpro (&Vcharset_ethiopic_ucs);
3181 Vcharset_ethiopic_ucs =
3182 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3183 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3184 build_string ("Ethiopic (UCS)"),
3185 build_string ("Ethiopic (UCS)"),
3186 build_string ("Ethiopic of UCS"),
3187 build_string ("Ethiopic-Unicode"),
3188 Qnil, 0x1200, 0x137F, 0, 0,
3189 Qnil, CONVERSION_IDENTICAL);
3191 staticpro (&Vcharset_chinese_big5_1);
3192 Vcharset_chinese_big5_1 =
3193 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3194 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3195 build_string ("Big5"),
3196 build_string ("Big5 (Level-1)"),
3198 ("Big5 Level-1 Chinese traditional"),
3199 build_string ("big5"),
3200 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3201 staticpro (&Vcharset_chinese_big5_2);
3202 Vcharset_chinese_big5_2 =
3203 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3204 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3205 build_string ("Big5"),
3206 build_string ("Big5 (Level-2)"),
3208 ("Big5 Level-2 Chinese traditional"),
3209 build_string ("big5"),
3210 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3212 #ifdef ENABLE_COMPOSITE_CHARS
3213 /* #### For simplicity, we put composite chars into a 96x96 charset.
3214 This is going to lead to problems because you can run out of
3215 room, esp. as we don't yet recycle numbers. */
3216 staticpro (&Vcharset_composite);
3217 Vcharset_composite =
3218 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3219 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3220 build_string ("Composite"),
3221 build_string ("Composite characters"),
3222 build_string ("Composite characters"),
3225 /* #### not dumped properly */
3226 composite_char_row_next = 32;
3227 composite_char_col_next = 32;
3229 Vcomposite_char_string2char_hash_table =
3230 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3231 Vcomposite_char_char2string_hash_table =
3232 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3233 staticpro (&Vcomposite_char_string2char_hash_table);
3234 staticpro (&Vcomposite_char_char2string_hash_table);
3235 #endif /* ENABLE_COMPOSITE_CHARS */