1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 /* Lisp_Object Vcharset_chinese_big5_cdp; */
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
353 /* Qchinese_big5_cdp, */
354 Qideograph_hanziku_1,
355 Qideograph_hanziku_2,
356 Qideograph_hanziku_3,
357 Qideograph_hanziku_4,
358 Qideograph_hanziku_5,
359 Qideograph_hanziku_6,
360 Qideograph_hanziku_7,
361 Qideograph_hanziku_8,
362 Qideograph_hanziku_9,
363 Qideograph_hanziku_10,
364 Qideograph_hanziku_11,
365 Qideograph_hanziku_12,
368 Qideograph_daikanwa_2,
388 Lisp_Object Ql2r, Qr2l;
390 Lisp_Object Vcharset_hash_table;
392 /* Composite characters are characters constructed by overstriking two
393 or more regular characters.
395 1) The old Mule implementation involves storing composite characters
396 in a buffer as a tag followed by all of the actual characters
397 used to make up the composite character. I think this is a bad
398 idea; it greatly complicates code that wants to handle strings
399 one character at a time because it has to deal with the possibility
400 of great big ungainly characters. It's much more reasonable to
401 simply store an index into a table of composite characters.
403 2) The current implementation only allows for 16,384 separate
404 composite characters over the lifetime of the XEmacs process.
405 This could become a potential problem if the user
406 edited lots of different files that use composite characters.
407 Due to FSF bogosity, increasing the number of allowable
408 composite characters under Mule would decrease the number
409 of possible faces that can exist. Mule already has shrunk
410 this to 2048, and further shrinkage would become uncomfortable.
411 No such problems exist in XEmacs.
413 Composite characters could be represented as 0x80 C1 C2 C3,
414 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
415 for slightly under 2^20 (one million) composite characters
416 over the XEmacs process lifetime, and you only need to
417 increase the size of a Mule character from 19 to 21 bits.
418 Or you could use 0x80 C1 C2 C3 C4, allowing for about
419 85 million (slightly over 2^26) composite characters. */
422 /************************************************************************/
423 /* Basic Emchar functions */
424 /************************************************************************/
426 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
427 string in STR. Returns the number of bytes stored.
428 Do not call this directly. Use the macro set_charptr_emchar() instead.
432 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
447 else if ( c <= 0x7ff )
449 *p++ = (c >> 6) | 0xc0;
450 *p++ = (c & 0x3f) | 0x80;
452 else if ( c <= 0xffff )
454 *p++ = (c >> 12) | 0xe0;
455 *p++ = ((c >> 6) & 0x3f) | 0x80;
456 *p++ = (c & 0x3f) | 0x80;
458 else if ( c <= 0x1fffff )
460 *p++ = (c >> 18) | 0xf0;
461 *p++ = ((c >> 12) & 0x3f) | 0x80;
462 *p++ = ((c >> 6) & 0x3f) | 0x80;
463 *p++ = (c & 0x3f) | 0x80;
465 else if ( c <= 0x3ffffff )
467 *p++ = (c >> 24) | 0xf8;
468 *p++ = ((c >> 18) & 0x3f) | 0x80;
469 *p++ = ((c >> 12) & 0x3f) | 0x80;
470 *p++ = ((c >> 6) & 0x3f) | 0x80;
471 *p++ = (c & 0x3f) | 0x80;
475 *p++ = (c >> 30) | 0xfc;
476 *p++ = ((c >> 24) & 0x3f) | 0x80;
477 *p++ = ((c >> 18) & 0x3f) | 0x80;
478 *p++ = ((c >> 12) & 0x3f) | 0x80;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
483 BREAKUP_CHAR (c, charset, c1, c2);
484 lb = CHAR_LEADING_BYTE (c);
485 if (LEADING_BYTE_PRIVATE_P (lb))
486 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
488 if (EQ (charset, Vcharset_control_1))
497 /* Return the first character from a Mule-encoded string in STR,
498 assuming it's non-ASCII. Do not call this directly.
499 Use the macro charptr_emchar() instead. */
502 non_ascii_charptr_emchar (const Bufbyte *str)
515 else if ( b >= 0xf8 )
520 else if ( b >= 0xf0 )
525 else if ( b >= 0xe0 )
530 else if ( b >= 0xc0 )
540 for( ; len > 0; len-- )
543 ch = ( ch << 6 ) | ( b & 0x3f );
547 Bufbyte i0 = *str, i1, i2 = 0;
550 if (i0 == LEADING_BYTE_CONTROL_1)
551 return (Emchar) (*++str - 0x20);
553 if (LEADING_BYTE_PREFIX_P (i0))
558 charset = CHARSET_BY_LEADING_BYTE (i0);
559 if (XCHARSET_DIMENSION (charset) == 2)
562 return MAKE_CHAR (charset, i1, i2);
566 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
567 Do not call this directly. Use the macro valid_char_p() instead. */
571 non_ascii_valid_char_p (Emchar ch)
575 /* Must have only lowest 19 bits set */
579 f1 = CHAR_FIELD1 (ch);
580 f2 = CHAR_FIELD2 (ch);
581 f3 = CHAR_FIELD3 (ch);
587 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
588 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
589 f2 > MAX_CHAR_FIELD2_PRIVATE)
594 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
595 f2 <= MAX_CHAR_FIELD2_PRIVATE))
599 NOTE: This takes advantage of the fact that
600 FIELD2_TO_OFFICIAL_LEADING_BYTE and
601 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
603 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
604 if (EQ (charset, Qnil))
606 return (XCHARSET_CHARS (charset) == 96);
612 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
613 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
614 f1 > MAX_CHAR_FIELD1_PRIVATE)
616 if (f2 < 0x20 || f3 < 0x20)
619 #ifdef ENABLE_COMPOSITE_CHARS
620 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
622 if (UNBOUNDP (Fgethash (make_int (ch),
623 Vcomposite_char_char2string_hash_table,
628 #endif /* ENABLE_COMPOSITE_CHARS */
630 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
631 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
634 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
636 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
639 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
641 if (EQ (charset, Qnil))
643 return (XCHARSET_CHARS (charset) == 96);
649 /************************************************************************/
650 /* Basic string functions */
651 /************************************************************************/
653 /* Copy the character pointed to by SRC into DST. Do not call this
654 directly. Use the macro charptr_copy_char() instead.
655 Return the number of bytes copied. */
658 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
660 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
662 for (i = bytes; i; i--, dst++, src++)
668 /************************************************************************/
669 /* streams of Emchars */
670 /************************************************************************/
672 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
673 The functions below are not meant to be called directly; use
674 the macros in insdel.h. */
677 Lstream_get_emchar_1 (Lstream *stream, int ch)
679 Bufbyte str[MAX_EMCHAR_LEN];
680 Bufbyte *strptr = str;
683 str[0] = (Bufbyte) ch;
685 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
687 int c = Lstream_getc (stream);
688 bufpos_checking_assert (c >= 0);
689 *++strptr = (Bufbyte) c;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj)
718 Lisp_Charset *cs = XCHARSET (obj);
720 mark_object (cs->short_name);
721 mark_object (cs->long_name);
722 mark_object (cs->doc_string);
723 mark_object (cs->registry);
724 mark_object (cs->ccl_program);
726 mark_object (cs->decoding_table);
727 mark_object (cs->mother);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
753 CHARSET_DIMENSION (cs),
754 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
755 CHARSET_COLUMNS (cs),
756 CHARSET_GRAPHIC (cs),
758 write_c_string (buf, printcharfun);
759 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
760 sprintf (buf, " 0x%x>", cs->header.uid);
761 write_c_string (buf, printcharfun);
764 static const struct lrecord_description charset_description[] = {
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
779 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
780 mark_charset, print_charset, 0, 0, 0,
784 /* Make a new charset. */
785 /* #### SJT Should generic properties be allowed? */
787 make_charset (Charset_ID id, Lisp_Object name,
788 unsigned short chars, unsigned char dimension,
789 unsigned char columns, unsigned char graphic,
790 Bufbyte final, unsigned char direction, Lisp_Object short_name,
791 Lisp_Object long_name, Lisp_Object doc,
793 Lisp_Object decoding_table,
794 Emchar min_code, Emchar max_code,
795 Emchar code_offset, unsigned char byte_offset,
796 Lisp_Object mother, unsigned char conversion)
799 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
803 XSETCHARSET (obj, cs);
805 CHARSET_ID (cs) = id;
806 CHARSET_NAME (cs) = name;
807 CHARSET_SHORT_NAME (cs) = short_name;
808 CHARSET_LONG_NAME (cs) = long_name;
809 CHARSET_CHARS (cs) = chars;
810 CHARSET_DIMENSION (cs) = dimension;
811 CHARSET_DIRECTION (cs) = direction;
812 CHARSET_COLUMNS (cs) = columns;
813 CHARSET_GRAPHIC (cs) = graphic;
814 CHARSET_FINAL (cs) = final;
815 CHARSET_DOC_STRING (cs) = doc;
816 CHARSET_REGISTRY (cs) = reg;
817 CHARSET_CCL_PROGRAM (cs) = Qnil;
818 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
820 CHARSET_DECODING_TABLE(cs) = Qunbound;
821 CHARSET_MIN_CODE (cs) = min_code;
822 CHARSET_MAX_CODE (cs) = max_code;
823 CHARSET_CODE_OFFSET (cs) = code_offset;
824 CHARSET_BYTE_OFFSET (cs) = byte_offset;
825 CHARSET_MOTHER (cs) = mother;
826 CHARSET_CONVERSION (cs) = conversion;
830 if (id == LEADING_BYTE_ASCII)
831 CHARSET_REP_BYTES (cs) = 1;
833 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
840 /* some charsets do not have final characters. This includes
841 ASCII, Control-1, Composite, and the two faux private
843 unsigned char iso2022_type
844 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
846 if (code_offset == 0)
848 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
849 chlook->charset_by_attributes[iso2022_type][final] = obj;
853 (chlook->charset_by_attributes[iso2022_type][final][direction]));
854 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
858 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
859 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
861 /* Some charsets are "faux" and don't have names or really exist at
862 all except in the leading-byte table. */
864 Fputhash (name, obj, Vcharset_hash_table);
869 get_unallocated_leading_byte (int dimension)
874 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
877 lb = chlook->next_allocated_leading_byte++;
881 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
884 lb = chlook->next_allocated_1_byte_leading_byte++;
888 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
891 lb = chlook->next_allocated_2_byte_leading_byte++;
897 ("No more character sets free for this dimension",
898 make_int (dimension));
904 /* Number of Big5 characters which have the same code in 1st byte. */
906 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
909 decode_defined_char (Lisp_Object ccs, int code_point)
911 int dim = XCHARSET_DIMENSION (ccs);
912 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
920 = get_ccs_octet_table (decoding_table, ccs,
921 (code_point >> (dim * 8)) & 255);
923 if (CHARP (decoding_table))
924 return XCHAR (decoding_table);
926 if (EQ (decoding_table, Qunloaded) ||
927 EQ (decoding_table, Qunbound) ||
928 NILP (decoding_table) )
930 char_id = load_char_decoding_entry_maybe (ccs, code_point);
935 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
937 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
939 if ( EQ (mother, Vcharset_ucs) )
940 return DECODE_CHAR (mother, code_point);
942 return decode_defined_char (mother, code_point);
949 decode_builtin_char (Lisp_Object charset, int code_point)
951 Lisp_Object mother = XCHARSET_MOTHER (charset);
954 if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
956 int code = code_point;
958 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
960 int row = code_point >> 8;
961 int cell = code_point & 255;
965 else if (row < 16 + 32 + 30)
966 code = (row - (16 + 32)) * 94 + cell - 33;
967 else if (row < 18 + 32 + 30)
969 else if (row < 18 + 32 + 60)
970 code = (row - (18 + 32)) * 94 + cell - 33;
972 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
974 int plane = code_point >> 16;
975 int row = (code_point >> 8) & 255;
976 int cell = code_point & 255;
980 else if (row < 16 + 32 + 30)
982 = (plane - 33) * 94 * 60
983 + (row - (16 + 32)) * 94
985 else if (row < 18 + 32 + 30)
987 else if (row < 18 + 32 + 60)
989 = (plane - 33) * 94 * 60
990 + (row - (18 + 32)) * 94
994 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
997 else if (EQ (charset, Vcharset_chinese_big5))
999 int c1 = code_point >> 8;
1000 int c2 = code_point & 0xFF;
1003 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1005 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1006 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1008 I = (c1 - 0xA1) * BIG5_SAME_ROW
1009 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1013 charset = Vcharset_chinese_big5_1;
1017 charset = Vcharset_chinese_big5_2;
1018 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1020 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1024 if ((final = XCHARSET_FINAL (charset)) >= '0')
1026 if (XCHARSET_DIMENSION (charset) == 1)
1028 switch (XCHARSET_CHARS (charset))
1032 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1035 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1043 switch (XCHARSET_CHARS (charset))
1046 return MIN_CHAR_94x94
1047 + (final - '0') * 94 * 94
1048 + (((code_point >> 8) & 0x7F) - 33) * 94
1049 + ((code_point & 0x7F) - 33);
1051 return MIN_CHAR_96x96
1052 + (final - '0') * 96 * 96
1053 + (((code_point >> 8) & 0x7F) - 32) * 96
1054 + ((code_point & 0x7F) - 32);
1061 else if (XCHARSET_MAX_CODE (charset))
1064 = (XCHARSET_DIMENSION (charset) == 1
1066 code_point - XCHARSET_BYTE_OFFSET (charset)
1068 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1069 * XCHARSET_CHARS (charset)
1070 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1071 + XCHARSET_CODE_OFFSET (charset);
1072 if ((cid < XCHARSET_MIN_CODE (charset))
1073 || (XCHARSET_MAX_CODE (charset) < cid))
1082 charset_code_point (Lisp_Object charset, Emchar ch)
1084 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1087 if ( CHAR_TABLEP (encoding_table)
1088 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1093 Lisp_Object mother = XCHARSET_MOTHER (charset);
1094 int min = XCHARSET_MIN_CODE (charset);
1095 int max = XCHARSET_MAX_CODE (charset);
1098 if ( CHARSETP (mother) )
1099 code = charset_code_point (mother, ch);
1102 if ( ((max == 0) && CHARSETP (mother)) ||
1103 ((min <= code) && (code <= max)) )
1105 int d = code - XCHARSET_CODE_OFFSET (charset);
1107 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1110 int cell = d % 94 + 33;
1116 return (row << 8) | cell;
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1120 int plane = d / (94 * 60) + 33;
1121 int row = (d % (94 * 60)) / 94;
1122 int cell = d % 94 + 33;
1128 return (plane << 16) | (row << 8) | cell;
1130 else if (XCHARSET_CHARS (charset) == 94)
1132 if (XCHARSET_DIMENSION (charset) == 1)
1134 else if (XCHARSET_DIMENSION (charset) == 2)
1135 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1136 else if (XCHARSET_DIMENSION (charset) == 3)
1138 ( (d / (94 * 94) + 33) << 16)
1139 | ((d / 94 % 94 + 33) << 8)
1141 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1143 ( (d / (94 * 94 * 94) + 33) << 24)
1144 | ((d / (94 * 94) % 94 + 33) << 16)
1145 | ((d / 94 % 94 + 33) << 8)
1148 else if (XCHARSET_CHARS (charset) == 96)
1150 if (XCHARSET_DIMENSION (charset) == 1)
1152 else if (XCHARSET_DIMENSION (charset) == 2)
1153 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1154 else if (XCHARSET_DIMENSION (charset) == 3)
1156 ( (d / (96 * 96) + 32) << 16)
1157 | ((d / 96 % 96 + 32) << 8)
1159 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1161 ( (d / (96 * 96 * 96) + 32) << 24)
1162 | ((d / (96 * 96) % 96 + 32) << 16)
1163 | ((d / 96 % 96 + 32) << 8)
1167 return code - XCHARSET_CODE_OFFSET (charset);
1169 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1170 (XCHARSET_CODE_OFFSET (charset)
1171 == XCHARSET_MIN_CODE (charset)) )
1175 if (XCHARSET_DIMENSION (charset) == 1)
1177 if (XCHARSET_CHARS (charset) == 94)
1179 if (((d = ch - (MIN_CHAR_94
1180 + (XCHARSET_FINAL (charset) - '0') * 94))
1185 else if (XCHARSET_CHARS (charset) == 96)
1187 if (((d = ch - (MIN_CHAR_96
1188 + (XCHARSET_FINAL (charset) - '0') * 96))
1196 else if (XCHARSET_DIMENSION (charset) == 2)
1198 if (XCHARSET_CHARS (charset) == 94)
1200 if (((d = ch - (MIN_CHAR_94x94
1202 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1205 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1207 else if (XCHARSET_CHARS (charset) == 96)
1209 if (((d = ch - (MIN_CHAR_96x96
1211 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1214 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1225 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1227 if (c <= MAX_CHAR_BASIC_LATIN)
1229 *charset = Vcharset_ascii;
1234 *charset = Vcharset_control_1;
1239 *charset = Vcharset_latin_iso8859_1;
1243 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1245 *charset = Vcharset_hebrew_iso8859_8;
1246 return c - MIN_CHAR_HEBREW + 0x20;
1249 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1251 *charset = Vcharset_thai_tis620;
1252 return c - MIN_CHAR_THAI + 0x20;
1255 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1256 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1258 return list2 (Vcharset_katakana_jisx0201,
1259 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1262 else if (c <= MAX_CHAR_BMP)
1264 *charset = Vcharset_ucs_bmp;
1267 else if (c <= MAX_CHAR_SMP)
1269 *charset = Vcharset_ucs_smp;
1270 return c - MIN_CHAR_SMP;
1272 else if (c <= MAX_CHAR_SIP)
1274 *charset = Vcharset_ucs_sip;
1275 return c - MIN_CHAR_SIP;
1277 else if (c < MIN_CHAR_DAIKANWA)
1279 *charset = Vcharset_ucs;
1282 else if (c <= MAX_CHAR_DAIKANWA)
1284 *charset = Vcharset_ideograph_daikanwa;
1285 return c - MIN_CHAR_DAIKANWA;
1287 else if (c < MIN_CHAR_94)
1289 *charset = Vcharset_ucs;
1292 else if (c <= MAX_CHAR_94)
1294 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1295 ((c - MIN_CHAR_94) / 94) + '0',
1296 CHARSET_LEFT_TO_RIGHT);
1297 if (!NILP (*charset))
1298 return ((c - MIN_CHAR_94) % 94) + 33;
1301 *charset = Vcharset_ucs;
1305 else if (c <= MAX_CHAR_96)
1307 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1308 ((c - MIN_CHAR_96) / 96) + '0',
1309 CHARSET_LEFT_TO_RIGHT);
1310 if (!NILP (*charset))
1311 return ((c - MIN_CHAR_96) % 96) + 32;
1314 *charset = Vcharset_ucs;
1318 else if (c <= MAX_CHAR_94x94)
1321 = CHARSET_BY_ATTRIBUTES (94, 2,
1322 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1323 CHARSET_LEFT_TO_RIGHT);
1324 if (!NILP (*charset))
1325 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1326 | (((c - MIN_CHAR_94x94) % 94) + 33);
1329 *charset = Vcharset_ucs;
1333 else if (c <= MAX_CHAR_96x96)
1336 = CHARSET_BY_ATTRIBUTES (96, 2,
1337 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1338 CHARSET_LEFT_TO_RIGHT);
1339 if (!NILP (*charset))
1340 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1341 | (((c - MIN_CHAR_96x96) % 96) + 32);
1344 *charset = Vcharset_ucs;
1350 *charset = Vcharset_ucs;
1355 Lisp_Object Vdefault_coded_charset_priority_list;
1359 /************************************************************************/
1360 /* Basic charset Lisp functions */
1361 /************************************************************************/
1363 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1364 Return non-nil if OBJECT is a charset.
1368 return CHARSETP (object) ? Qt : Qnil;
1371 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1372 Retrieve the charset of the given name.
1373 If CHARSET-OR-NAME is a charset object, it is simply returned.
1374 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1375 nil is returned. Otherwise the associated charset object is returned.
1379 if (CHARSETP (charset_or_name))
1380 return charset_or_name;
1382 CHECK_SYMBOL (charset_or_name);
1383 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1386 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1387 Retrieve the charset of the given name.
1388 Same as `find-charset' except an error is signalled if there is no such
1389 charset instead of returning nil.
1393 Lisp_Object charset = Ffind_charset (name);
1396 signal_simple_error ("No such charset", name);
1400 /* We store the charsets in hash tables with the names as the key and the
1401 actual charset object as the value. Occasionally we need to use them
1402 in a list format. These routines provide us with that. */
1403 struct charset_list_closure
1405 Lisp_Object *charset_list;
1409 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1410 void *charset_list_closure)
1412 /* This function can GC */
1413 struct charset_list_closure *chcl =
1414 (struct charset_list_closure*) charset_list_closure;
1415 Lisp_Object *charset_list = chcl->charset_list;
1417 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1421 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1422 Return a list of the names of all defined charsets.
1426 Lisp_Object charset_list = Qnil;
1427 struct gcpro gcpro1;
1428 struct charset_list_closure charset_list_closure;
1430 GCPRO1 (charset_list);
1431 charset_list_closure.charset_list = &charset_list;
1432 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1433 &charset_list_closure);
1436 return charset_list;
1439 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1440 Return the name of charset CHARSET.
1444 return XCHARSET_NAME (Fget_charset (charset));
1447 /* #### SJT Should generic properties be allowed? */
1448 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1449 Define a new character set.
1450 This function is for use with Mule support.
1451 NAME is a symbol, the name by which the character set is normally referred.
1452 DOC-STRING is a string describing the character set.
1453 PROPS is a property list, describing the specific nature of the
1454 character set. Recognized properties are:
1456 'short-name Short version of the charset name (ex: Latin-1)
1457 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1458 'registry A regular expression matching the font registry field for
1460 'dimension Number of octets used to index a character in this charset.
1461 Either 1 or 2. Defaults to 1.
1462 If UTF-2000 feature is enabled, 3 or 4 are also available.
1463 'columns Number of columns used to display a character in this charset.
1464 Only used in TTY mode. (Under X, the actual width of a
1465 character can be derived from the font used to display the
1466 characters.) If unspecified, defaults to the dimension
1467 (this is almost always the correct value).
1468 'chars Number of characters in each dimension (94 or 96).
1469 Defaults to 94. Note that if the dimension is 2, the
1470 character set thus described is 94x94 or 96x96.
1471 If UTF-2000 feature is enabled, 128 or 256 are also available.
1472 'final Final byte of ISO 2022 escape sequence. Must be
1473 supplied. Each combination of (DIMENSION, CHARS) defines a
1474 separate namespace for final bytes. Note that ISO
1475 2022 restricts the final byte to the range
1476 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1477 dimension == 2. Note also that final bytes in the range
1478 0x30 - 0x3F are reserved for user-defined (not official)
1480 'graphic 0 (use left half of font on output) or 1 (use right half
1481 of font on output). Defaults to 0. For example, for
1482 a font whose registry is ISO8859-1, the left half
1483 (octets 0x20 - 0x7F) is the `ascii' character set, while
1484 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1485 character set. With 'graphic set to 0, the octets
1486 will have their high bit cleared; with it set to 1,
1487 the octets will have their high bit set.
1488 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1490 'ccl-program A compiled CCL program used to convert a character in
1491 this charset into an index into the font. This is in
1492 addition to the 'graphic property. The CCL program
1493 is passed the octets of the character, with the high
1494 bit cleared and set depending upon whether the value
1495 of the 'graphic property is 0 or 1.
1496 'mother [UTF-2000 only] Base coded-charset.
1497 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1498 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1499 'code-offset [UTF-2000 only] Offset for a code-point of a base
1501 'conversion [UTF-2000 only] Conversion for a code-point of a base
1502 coded-charset (94x60 or 94x94x60).
1504 (name, doc_string, props))
1506 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1507 int direction = CHARSET_LEFT_TO_RIGHT;
1508 Lisp_Object registry = Qnil;
1509 Lisp_Object charset;
1510 Lisp_Object ccl_program = Qnil;
1511 Lisp_Object short_name = Qnil, long_name = Qnil;
1512 Lisp_Object mother = Qnil;
1513 int min_code = 0, max_code = 0, code_offset = 0;
1514 int byte_offset = -1;
1517 CHECK_SYMBOL (name);
1518 if (!NILP (doc_string))
1519 CHECK_STRING (doc_string);
1521 charset = Ffind_charset (name);
1522 if (!NILP (charset))
1523 signal_simple_error ("Cannot redefine existing charset", name);
1526 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1528 if (EQ (keyword, Qshort_name))
1530 CHECK_STRING (value);
1534 if (EQ (keyword, Qlong_name))
1536 CHECK_STRING (value);
1540 else if (EQ (keyword, Qdimension))
1543 dimension = XINT (value);
1544 if (dimension < 1 ||
1551 signal_simple_error ("Invalid value for 'dimension", value);
1554 else if (EQ (keyword, Qchars))
1557 chars = XINT (value);
1558 if (chars != 94 && chars != 96
1560 && chars != 128 && chars != 256
1563 signal_simple_error ("Invalid value for 'chars", value);
1566 else if (EQ (keyword, Qcolumns))
1569 columns = XINT (value);
1570 if (columns != 1 && columns != 2)
1571 signal_simple_error ("Invalid value for 'columns", value);
1574 else if (EQ (keyword, Qgraphic))
1577 graphic = XINT (value);
1585 signal_simple_error ("Invalid value for 'graphic", value);
1588 else if (EQ (keyword, Qregistry))
1590 CHECK_STRING (value);
1594 else if (EQ (keyword, Qdirection))
1596 if (EQ (value, Ql2r))
1597 direction = CHARSET_LEFT_TO_RIGHT;
1598 else if (EQ (value, Qr2l))
1599 direction = CHARSET_RIGHT_TO_LEFT;
1601 signal_simple_error ("Invalid value for 'direction", value);
1604 else if (EQ (keyword, Qfinal))
1606 CHECK_CHAR_COERCE_INT (value);
1607 final = XCHAR (value);
1608 if (final < '0' || final > '~')
1609 signal_simple_error ("Invalid value for 'final", value);
1613 else if (EQ (keyword, Qmother))
1615 mother = Fget_charset (value);
1618 else if (EQ (keyword, Qmin_code))
1621 min_code = XUINT (value);
1624 else if (EQ (keyword, Qmax_code))
1627 max_code = XUINT (value);
1630 else if (EQ (keyword, Qcode_offset))
1633 code_offset = XUINT (value);
1636 else if (EQ (keyword, Qconversion))
1638 if (EQ (value, Q94x60))
1639 conversion = CONVERSION_94x60;
1640 else if (EQ (value, Q94x94x60))
1641 conversion = CONVERSION_94x94x60;
1643 signal_simple_error ("Unrecognized conversion", value);
1647 else if (EQ (keyword, Qccl_program))
1649 struct ccl_program test_ccl;
1651 if (setup_ccl_program (&test_ccl, value) < 0)
1652 signal_simple_error ("Invalid value for 'ccl-program", value);
1653 ccl_program = value;
1657 signal_simple_error ("Unrecognized property", keyword);
1663 error ("'final must be specified");
1665 if (dimension == 2 && final > 0x5F)
1667 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1670 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1671 CHARSET_LEFT_TO_RIGHT)) ||
1672 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1673 CHARSET_RIGHT_TO_LEFT)))
1675 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1677 id = get_unallocated_leading_byte (dimension);
1679 if (NILP (doc_string))
1680 doc_string = build_string ("");
1682 if (NILP (registry))
1683 registry = build_string ("");
1685 if (NILP (short_name))
1686 XSETSTRING (short_name, XSYMBOL (name)->name);
1688 if (NILP (long_name))
1689 long_name = doc_string;
1692 columns = dimension;
1694 if (byte_offset < 0)
1698 else if (chars == 96)
1704 charset = make_charset (id, name, chars, dimension, columns, graphic,
1705 final, direction, short_name, long_name,
1706 doc_string, registry,
1707 Qnil, min_code, max_code, code_offset, byte_offset,
1708 mother, conversion);
1709 if (!NILP (ccl_program))
1710 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1714 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1716 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1717 NEW-NAME is the name of the new charset. Return the new charset.
1719 (charset, new_name))
1721 Lisp_Object new_charset = Qnil;
1722 int id, chars, dimension, columns, graphic, final;
1724 Lisp_Object registry, doc_string, short_name, long_name;
1727 charset = Fget_charset (charset);
1728 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1729 signal_simple_error ("Charset already has reverse-direction charset",
1732 CHECK_SYMBOL (new_name);
1733 if (!NILP (Ffind_charset (new_name)))
1734 signal_simple_error ("Cannot redefine existing charset", new_name);
1736 cs = XCHARSET (charset);
1738 chars = CHARSET_CHARS (cs);
1739 dimension = CHARSET_DIMENSION (cs);
1740 columns = CHARSET_COLUMNS (cs);
1741 id = get_unallocated_leading_byte (dimension);
1743 graphic = CHARSET_GRAPHIC (cs);
1744 final = CHARSET_FINAL (cs);
1745 direction = CHARSET_RIGHT_TO_LEFT;
1746 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1747 direction = CHARSET_LEFT_TO_RIGHT;
1748 doc_string = CHARSET_DOC_STRING (cs);
1749 short_name = CHARSET_SHORT_NAME (cs);
1750 long_name = CHARSET_LONG_NAME (cs);
1751 registry = CHARSET_REGISTRY (cs);
1753 new_charset = make_charset (id, new_name, chars, dimension, columns,
1754 graphic, final, direction, short_name, long_name,
1755 doc_string, registry,
1757 CHARSET_DECODING_TABLE(cs),
1758 CHARSET_MIN_CODE(cs),
1759 CHARSET_MAX_CODE(cs),
1760 CHARSET_CODE_OFFSET(cs),
1761 CHARSET_BYTE_OFFSET(cs),
1763 CHARSET_CONVERSION (cs)
1765 Qnil, 0, 0, 0, 0, Qnil, 0
1769 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1770 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1775 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1776 Define symbol ALIAS as an alias for CHARSET.
1780 CHECK_SYMBOL (alias);
1781 charset = Fget_charset (charset);
1782 return Fputhash (alias, charset, Vcharset_hash_table);
1785 /* #### Reverse direction charsets not yet implemented. */
1787 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1789 Return the reverse-direction charset parallel to CHARSET, if any.
1790 This is the charset with the same properties (in particular, the same
1791 dimension, number of characters per dimension, and final byte) as
1792 CHARSET but whose characters are displayed in the opposite direction.
1796 charset = Fget_charset (charset);
1797 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1801 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1802 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1803 If DIRECTION is omitted, both directions will be checked (left-to-right
1804 will be returned if character sets exist for both directions).
1806 (dimension, chars, final, direction))
1808 int dm, ch, fi, di = -1;
1809 Lisp_Object obj = Qnil;
1811 CHECK_INT (dimension);
1812 dm = XINT (dimension);
1813 if (dm < 1 || dm > 2)
1814 signal_simple_error ("Invalid value for DIMENSION", dimension);
1818 if (ch != 94 && ch != 96)
1819 signal_simple_error ("Invalid value for CHARS", chars);
1821 CHECK_CHAR_COERCE_INT (final);
1823 if (fi < '0' || fi > '~')
1824 signal_simple_error ("Invalid value for FINAL", final);
1826 if (EQ (direction, Ql2r))
1827 di = CHARSET_LEFT_TO_RIGHT;
1828 else if (EQ (direction, Qr2l))
1829 di = CHARSET_RIGHT_TO_LEFT;
1830 else if (!NILP (direction))
1831 signal_simple_error ("Invalid value for DIRECTION", direction);
1833 if (dm == 2 && fi > 0x5F)
1835 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1839 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1841 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1844 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1847 return XCHARSET_NAME (obj);
1851 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1852 Return short name of CHARSET.
1856 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1859 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1860 Return long name of CHARSET.
1864 return XCHARSET_LONG_NAME (Fget_charset (charset));
1867 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1868 Return description of CHARSET.
1872 return XCHARSET_DOC_STRING (Fget_charset (charset));
1875 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1876 Return dimension of CHARSET.
1880 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1883 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1884 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1885 Recognized properties are those listed in `make-charset', as well as
1886 'name and 'doc-string.
1892 charset = Fget_charset (charset);
1893 cs = XCHARSET (charset);
1895 CHECK_SYMBOL (prop);
1896 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1897 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1898 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1899 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1900 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1901 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1902 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1903 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1904 Qnil : make_char (CHARSET_FINAL (cs));
1905 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1906 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1907 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1908 if (EQ (prop, Qdirection))
1909 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1910 if (EQ (prop, Qreverse_direction_charset))
1912 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1913 /* #### Is this translation OK? If so, error checking sufficient? */
1914 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1917 if (EQ (prop, Qmother))
1918 return CHARSET_MOTHER (cs);
1919 if (EQ (prop, Qmin_code))
1920 return make_int (CHARSET_MIN_CODE (cs));
1921 if (EQ (prop, Qmax_code))
1922 return make_int (CHARSET_MAX_CODE (cs));
1924 signal_simple_error ("Unrecognized charset property name", prop);
1925 return Qnil; /* not reached */
1928 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1929 Return charset identification number of CHARSET.
1933 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1936 /* #### We need to figure out which properties we really want to
1939 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1940 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1942 (charset, ccl_program))
1944 struct ccl_program test_ccl;
1946 charset = Fget_charset (charset);
1947 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1948 signal_simple_error ("Invalid ccl-program", ccl_program);
1949 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1954 invalidate_charset_font_caches (Lisp_Object charset)
1956 /* Invalidate font cache entries for charset on all devices. */
1957 Lisp_Object devcons, concons, hash_table;
1958 DEVICE_LOOP_NO_BREAK (devcons, concons)
1960 struct device *d = XDEVICE (XCAR (devcons));
1961 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1962 if (!UNBOUNDP (hash_table))
1963 Fclrhash (hash_table);
1967 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1968 Set the 'registry property of CHARSET to REGISTRY.
1970 (charset, registry))
1972 charset = Fget_charset (charset);
1973 CHECK_STRING (registry);
1974 XCHARSET_REGISTRY (charset) = registry;
1975 invalidate_charset_font_caches (charset);
1976 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1981 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1982 Return mapping-table of CHARSET.
1986 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1989 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1990 Set mapping-table of CHARSET to TABLE.
1994 struct Lisp_Charset *cs;
1998 charset = Fget_charset (charset);
1999 cs = XCHARSET (charset);
2003 CHARSET_DECODING_TABLE(cs) = Qnil;
2006 else if (VECTORP (table))
2008 int ccs_len = CHARSET_BYTE_SIZE (cs);
2009 int ret = decoding_table_check_elements (table,
2010 CHARSET_DIMENSION (cs),
2015 signal_simple_error ("Too big table", table);
2017 signal_simple_error ("Invalid element is found", table);
2019 signal_simple_error ("Something wrong", table);
2021 CHARSET_DECODING_TABLE(cs) = Qnil;
2024 signal_error (Qwrong_type_argument,
2025 list2 (build_translated_string ("vector-or-nil-p"),
2028 byte_offset = CHARSET_BYTE_OFFSET (cs);
2029 switch (CHARSET_DIMENSION (cs))
2032 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2034 Lisp_Object c = XVECTOR_DATA(table)[i];
2037 Fput_char_attribute (c, XCHARSET_NAME (charset),
2038 make_int (i + byte_offset));
2042 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2044 Lisp_Object v = XVECTOR_DATA(table)[i];
2050 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2052 Lisp_Object c = XVECTOR_DATA(v)[j];
2056 (c, XCHARSET_NAME (charset),
2057 make_int ( ( (i + byte_offset) << 8 )
2063 Fput_char_attribute (v, XCHARSET_NAME (charset),
2064 make_int (i + byte_offset));
2071 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2072 Save mapping-table of CHARSET.
2076 #ifdef HAVE_DATABASE
2077 struct Lisp_Charset *cs;
2078 int byte_min, byte_max;
2080 Lisp_Object db_file;
2082 charset = Fget_charset (charset);
2083 cs = XCHARSET (charset);
2085 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2086 Qsystem_char_id, 1);
2087 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2089 byte_min = CHARSET_BYTE_OFFSET (cs);
2090 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2091 switch (CHARSET_DIMENSION (cs))
2095 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2098 for (cell = byte_min; cell < byte_max; cell++)
2100 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2103 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2104 Fprin1_to_string (c, Qnil),
2111 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2114 for (row = byte_min; row < byte_max; row++)
2116 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2119 for (cell = byte_min; cell < byte_max; cell++)
2121 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2124 Fput_database (Fprin1_to_string (make_int ((row << 8)
2127 Fprin1_to_string (c, Qnil),
2135 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2138 for (plane = byte_min; plane < byte_max; plane++)
2141 = get_ccs_octet_table (table_p, charset, plane);
2144 for (row = byte_min; row < byte_max; row++)
2147 = get_ccs_octet_table (table_r, charset, row);
2150 for (cell = byte_min; cell < byte_max; cell++)
2152 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2156 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2160 Fprin1_to_string (c, Qnil),
2169 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2172 for (group = byte_min; group < byte_max; group++)
2175 = get_ccs_octet_table (table_g, charset, group);
2178 for (plane = byte_min; plane < byte_max; plane++)
2181 = get_ccs_octet_table (table_p, charset, plane);
2184 for (row = byte_min; row < byte_max; row++)
2187 = get_ccs_octet_table (table_r, charset, row);
2190 for (cell = byte_min; cell < byte_max; cell++)
2193 = get_ccs_octet_table (table_c, charset, cell);
2196 Fput_database (Fprin1_to_string
2197 (make_int (( group << 24)
2202 Fprin1_to_string (c, Qnil),
2210 return Fclose_database (db);
2216 #ifdef HAVE_CHISE_CLIENT
2218 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2222 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2225 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2229 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2236 decoding_table_put_char (ccs, code_point, ret);
2237 Fclose_database (db);
2241 Fclose_database (db);
2245 #endif /* HAVE_CHISE_CLIENT */
2246 #endif /* UTF2000 */
2249 /************************************************************************/
2250 /* Lisp primitives for working with characters */
2251 /************************************************************************/
2254 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2255 Make a character from CHARSET and code-point CODE.
2256 If DEFINED_ONLY is non-nil, builtin character is not returned.
2257 If corresponding character is not found, nil is returned.
2259 (charset, code, defined_only))
2263 charset = Fget_charset (charset);
2266 if (XCHARSET_GRAPHIC (charset) == 1)
2268 if (NILP (defined_only))
2269 c = DECODE_CHAR (charset, c);
2271 c = decode_defined_char (charset, c);
2272 return c >= 0 ? make_char (c) : Qnil;
2275 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2276 Make a builtin character from CHARSET and code-point CODE.
2282 charset = Fget_charset (charset);
2284 if (EQ (charset, Vcharset_latin_viscii))
2286 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2292 (ret = Fget_char_attribute (chr,
2293 Vcharset_latin_viscii_lower,
2296 charset = Vcharset_latin_viscii_lower;
2300 (ret = Fget_char_attribute (chr,
2301 Vcharset_latin_viscii_upper,
2304 charset = Vcharset_latin_viscii_upper;
2311 if (XCHARSET_GRAPHIC (charset) == 1)
2314 c = decode_builtin_char (charset, c);
2315 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2319 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2320 Make a character from CHARSET and octets ARG1 and ARG2.
2321 ARG2 is required only for characters from two-dimensional charsets.
2322 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2323 character s with caron.
2325 (charset, arg1, arg2))
2329 int lowlim, highlim;
2331 charset = Fget_charset (charset);
2332 cs = XCHARSET (charset);
2334 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2335 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2337 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2339 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2340 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2343 /* It is useful (and safe, according to Olivier Galibert) to strip
2344 the 8th bit off ARG1 and ARG2 because it allows programmers to
2345 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2346 Latin 2 code of the character. */
2354 if (a1 < lowlim || a1 > highlim)
2355 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2357 if (CHARSET_DIMENSION (cs) == 1)
2361 ("Charset is of dimension one; second octet must be nil", arg2);
2362 return make_char (MAKE_CHAR (charset, a1, 0));
2371 a2 = XINT (arg2) & 0x7f;
2373 if (a2 < lowlim || a2 > highlim)
2374 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2376 return make_char (MAKE_CHAR (charset, a1, a2));
2379 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2380 Return the character set of CHARACTER.
2384 CHECK_CHAR_COERCE_INT (character);
2386 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2389 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2390 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2391 N defaults to 0 if omitted.
2395 Lisp_Object charset;
2398 CHECK_CHAR_COERCE_INT (character);
2400 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2402 if (NILP (n) || EQ (n, Qzero))
2403 return make_int (octet0);
2404 else if (EQ (n, make_int (1)))
2405 return make_int (octet1);
2407 signal_simple_error ("Octet number must be 0 or 1", n);
2411 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2412 Return code-point of CHARACTER in specified CHARSET.
2414 (character, charset))
2418 CHECK_CHAR_COERCE_INT (character);
2419 charset = Fget_charset (charset);
2420 code_point = charset_code_point (charset, XCHAR (character));
2421 if (code_point >= 0)
2422 return make_int (code_point);
2428 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2429 Return list of charset and one or two position-codes of CHARACTER.
2433 /* This function can GC */
2434 struct gcpro gcpro1, gcpro2;
2435 Lisp_Object charset = Qnil;
2436 Lisp_Object rc = Qnil;
2444 GCPRO2 (charset, rc);
2445 CHECK_CHAR_COERCE_INT (character);
2448 code_point = ENCODE_CHAR (XCHAR (character), charset);
2449 dimension = XCHARSET_DIMENSION (charset);
2450 while (dimension > 0)
2452 rc = Fcons (make_int (code_point & 255), rc);
2456 rc = Fcons (XCHARSET_NAME (charset), rc);
2458 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2460 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2462 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2466 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2475 #ifdef ENABLE_COMPOSITE_CHARS
2476 /************************************************************************/
2477 /* composite character functions */
2478 /************************************************************************/
2481 lookup_composite_char (Bufbyte *str, int len)
2483 Lisp_Object lispstr = make_string (str, len);
2484 Lisp_Object ch = Fgethash (lispstr,
2485 Vcomposite_char_string2char_hash_table,
2491 if (composite_char_row_next >= 128)
2492 signal_simple_error ("No more composite chars available", lispstr);
2493 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2494 composite_char_col_next);
2495 Fputhash (make_char (emch), lispstr,
2496 Vcomposite_char_char2string_hash_table);
2497 Fputhash (lispstr, make_char (emch),
2498 Vcomposite_char_string2char_hash_table);
2499 composite_char_col_next++;
2500 if (composite_char_col_next >= 128)
2502 composite_char_col_next = 32;
2503 composite_char_row_next++;
2512 composite_char_string (Emchar ch)
2514 Lisp_Object str = Fgethash (make_char (ch),
2515 Vcomposite_char_char2string_hash_table,
2517 assert (!UNBOUNDP (str));
2521 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2522 Convert a string into a single composite character.
2523 The character is the result of overstriking all the characters in
2528 CHECK_STRING (string);
2529 return make_char (lookup_composite_char (XSTRING_DATA (string),
2530 XSTRING_LENGTH (string)));
2533 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2534 Return a string of the characters comprising a composite character.
2542 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2543 signal_simple_error ("Must be composite char", ch);
2544 return composite_char_string (emch);
2546 #endif /* ENABLE_COMPOSITE_CHARS */
2549 /************************************************************************/
2550 /* initialization */
2551 /************************************************************************/
2554 syms_of_mule_charset (void)
2556 INIT_LRECORD_IMPLEMENTATION (charset);
2558 DEFSUBR (Fcharsetp);
2559 DEFSUBR (Ffind_charset);
2560 DEFSUBR (Fget_charset);
2561 DEFSUBR (Fcharset_list);
2562 DEFSUBR (Fcharset_name);
2563 DEFSUBR (Fmake_charset);
2564 DEFSUBR (Fmake_reverse_direction_charset);
2565 /* DEFSUBR (Freverse_direction_charset); */
2566 DEFSUBR (Fdefine_charset_alias);
2567 DEFSUBR (Fcharset_from_attributes);
2568 DEFSUBR (Fcharset_short_name);
2569 DEFSUBR (Fcharset_long_name);
2570 DEFSUBR (Fcharset_description);
2571 DEFSUBR (Fcharset_dimension);
2572 DEFSUBR (Fcharset_property);
2573 DEFSUBR (Fcharset_id);
2574 DEFSUBR (Fset_charset_ccl_program);
2575 DEFSUBR (Fset_charset_registry);
2577 DEFSUBR (Fcharset_mapping_table);
2578 DEFSUBR (Fset_charset_mapping_table);
2582 DEFSUBR (Fdecode_char);
2583 DEFSUBR (Fdecode_builtin_char);
2584 DEFSUBR (Fencode_char);
2585 DEFSUBR (Fsave_charset_mapping_table);
2587 DEFSUBR (Fmake_char);
2588 DEFSUBR (Fchar_charset);
2589 DEFSUBR (Fchar_octet);
2590 DEFSUBR (Fsplit_char);
2592 #ifdef ENABLE_COMPOSITE_CHARS
2593 DEFSUBR (Fmake_composite_char);
2594 DEFSUBR (Fcomposite_char_string);
2597 defsymbol (&Qcharsetp, "charsetp");
2598 defsymbol (&Qregistry, "registry");
2599 defsymbol (&Qfinal, "final");
2600 defsymbol (&Qgraphic, "graphic");
2601 defsymbol (&Qdirection, "direction");
2602 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2603 defsymbol (&Qshort_name, "short-name");
2604 defsymbol (&Qlong_name, "long-name");
2606 defsymbol (&Qmother, "mother");
2607 defsymbol (&Qmin_code, "min-code");
2608 defsymbol (&Qmax_code, "max-code");
2609 defsymbol (&Qcode_offset, "code-offset");
2610 defsymbol (&Qconversion, "conversion");
2611 defsymbol (&Q94x60, "94x60");
2612 defsymbol (&Q94x94x60, "94x94x60");
2615 defsymbol (&Ql2r, "l2r");
2616 defsymbol (&Qr2l, "r2l");
2618 /* Charsets, compatible with FSF 20.3
2619 Naming convention is Script-Charset[-Edition] */
2620 defsymbol (&Qascii, "ascii");
2621 defsymbol (&Qcontrol_1, "control-1");
2622 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2623 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2624 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2625 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2626 defsymbol (&Qthai_tis620, "thai-tis620");
2627 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2628 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2629 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2630 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2631 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2632 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2633 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2634 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2635 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2636 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2637 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2638 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2639 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2640 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2641 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2642 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2644 defsymbol (&Qucs, "ucs");
2645 defsymbol (&Qucs_bmp, "ucs-bmp");
2646 defsymbol (&Qucs_smp, "ucs-smp");
2647 defsymbol (&Qucs_sip, "ucs-sip");
2648 defsymbol (&Qucs_cns, "ucs-cns");
2649 defsymbol (&Qucs_jis, "ucs-jis");
2650 defsymbol (&Qucs_ks, "ucs-ks");
2651 defsymbol (&Qucs_big5, "ucs-big5");
2652 defsymbol (&Qlatin_viscii, "latin-viscii");
2653 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2654 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2655 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2656 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2657 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2658 defsymbol (&Qideograph_gt, "ideograph-gt");
2659 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2660 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2661 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2662 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2663 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2664 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2665 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2666 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2667 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2668 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2669 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2670 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2671 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2672 defsymbol (&Qchinese_big5, "chinese-big5");
2673 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2674 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2675 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2676 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2677 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2678 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2679 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2680 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2681 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2682 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2683 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2684 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2685 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2686 defsymbol (&Qchina3_jef, "china3-jef");
2687 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2688 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2690 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2691 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2693 defsymbol (&Qcomposite, "composite");
2697 vars_of_mule_charset (void)
2704 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2705 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2707 /* Table of charsets indexed by leading byte. */
2708 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2709 chlook->charset_by_leading_byte[i] = Qnil;
2712 /* Table of charsets indexed by type/final-byte. */
2713 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2714 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2715 chlook->charset_by_attributes[i][j] = Qnil;
2717 /* Table of charsets indexed by type/final-byte/direction. */
2718 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2719 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2720 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2721 chlook->charset_by_attributes[i][j][k] = Qnil;
2725 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2727 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2728 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2732 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2733 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2734 Leading-code of private TYPE9N charset of column-width 1.
2736 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2740 Vdefault_coded_charset_priority_list = Qnil;
2741 DEFVAR_LISP ("default-coded-charset-priority-list",
2742 &Vdefault_coded_charset_priority_list /*
2743 Default order of preferred coded-character-sets.
2749 complex_vars_of_mule_charset (void)
2751 staticpro (&Vcharset_hash_table);
2752 Vcharset_hash_table =
2753 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2755 /* Predefined character sets. We store them into variables for
2759 staticpro (&Vcharset_ucs);
2761 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2762 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2763 build_string ("UCS"),
2764 build_string ("UCS"),
2765 build_string ("ISO/IEC 10646"),
2767 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2768 staticpro (&Vcharset_ucs_bmp);
2770 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2771 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2772 build_string ("BMP"),
2773 build_string ("UCS-BMP"),
2774 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2776 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2777 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2778 staticpro (&Vcharset_ucs_smp);
2780 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2781 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2782 build_string ("SMP"),
2783 build_string ("UCS-SMP"),
2784 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2785 build_string ("UCS00-1"),
2786 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2787 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2788 staticpro (&Vcharset_ucs_sip);
2790 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2791 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2792 build_string ("SIP"),
2793 build_string ("UCS-SIP"),
2794 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2795 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2796 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2797 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2798 staticpro (&Vcharset_ucs_cns);
2800 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2801 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2802 build_string ("UCS for CNS"),
2803 build_string ("UCS for CNS 11643"),
2804 build_string ("ISO/IEC 10646 for CNS 11643"),
2806 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2807 staticpro (&Vcharset_ucs_jis);
2809 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2810 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2811 build_string ("UCS for JIS"),
2812 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2814 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2816 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_ucs_ks);
2819 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2820 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2821 build_string ("UCS for KS"),
2822 build_string ("UCS for CCS defined by KS"),
2823 build_string ("ISO/IEC 10646 for Korean Standards"),
2825 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2826 staticpro (&Vcharset_ucs_big5);
2828 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2829 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2830 build_string ("UCS for Big5"),
2831 build_string ("UCS for Big5"),
2832 build_string ("ISO/IEC 10646 for Big5"),
2834 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2836 # define MIN_CHAR_THAI 0
2837 # define MAX_CHAR_THAI 0
2838 /* # define MIN_CHAR_HEBREW 0 */
2839 /* # define MAX_CHAR_HEBREW 0 */
2840 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2841 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2843 staticpro (&Vcharset_ascii);
2845 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2846 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2847 build_string ("ASCII"),
2848 build_string ("ASCII)"),
2849 build_string ("ASCII (ISO646 IRV)"),
2850 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2851 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2852 staticpro (&Vcharset_control_1);
2853 Vcharset_control_1 =
2854 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2855 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2856 build_string ("C1"),
2857 build_string ("Control characters"),
2858 build_string ("Control characters 128-191"),
2860 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2861 staticpro (&Vcharset_latin_iso8859_1);
2862 Vcharset_latin_iso8859_1 =
2863 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2864 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2865 build_string ("Latin-1"),
2866 build_string ("ISO8859-1 (Latin-1)"),
2867 build_string ("ISO8859-1 (Latin-1)"),
2868 build_string ("iso8859-1"),
2869 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2870 staticpro (&Vcharset_latin_iso8859_2);
2871 Vcharset_latin_iso8859_2 =
2872 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2873 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2874 build_string ("Latin-2"),
2875 build_string ("ISO8859-2 (Latin-2)"),
2876 build_string ("ISO8859-2 (Latin-2)"),
2877 build_string ("iso8859-2"),
2878 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2879 staticpro (&Vcharset_latin_iso8859_3);
2880 Vcharset_latin_iso8859_3 =
2881 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2882 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2883 build_string ("Latin-3"),
2884 build_string ("ISO8859-3 (Latin-3)"),
2885 build_string ("ISO8859-3 (Latin-3)"),
2886 build_string ("iso8859-3"),
2887 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2888 staticpro (&Vcharset_latin_iso8859_4);
2889 Vcharset_latin_iso8859_4 =
2890 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2891 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2892 build_string ("Latin-4"),
2893 build_string ("ISO8859-4 (Latin-4)"),
2894 build_string ("ISO8859-4 (Latin-4)"),
2895 build_string ("iso8859-4"),
2896 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2897 staticpro (&Vcharset_thai_tis620);
2898 Vcharset_thai_tis620 =
2899 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2900 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2901 build_string ("TIS620"),
2902 build_string ("TIS620 (Thai)"),
2903 build_string ("TIS620.2529 (Thai)"),
2904 build_string ("tis620"),
2905 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2906 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2907 staticpro (&Vcharset_greek_iso8859_7);
2908 Vcharset_greek_iso8859_7 =
2909 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2910 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2911 build_string ("ISO8859-7"),
2912 build_string ("ISO8859-7 (Greek)"),
2913 build_string ("ISO8859-7 (Greek)"),
2914 build_string ("iso8859-7"),
2915 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2916 staticpro (&Vcharset_arabic_iso8859_6);
2917 Vcharset_arabic_iso8859_6 =
2918 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2919 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2920 build_string ("ISO8859-6"),
2921 build_string ("ISO8859-6 (Arabic)"),
2922 build_string ("ISO8859-6 (Arabic)"),
2923 build_string ("iso8859-6"),
2924 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2925 staticpro (&Vcharset_hebrew_iso8859_8);
2926 Vcharset_hebrew_iso8859_8 =
2927 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2928 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2929 build_string ("ISO8859-8"),
2930 build_string ("ISO8859-8 (Hebrew)"),
2931 build_string ("ISO8859-8 (Hebrew)"),
2932 build_string ("iso8859-8"),
2934 0 /* MIN_CHAR_HEBREW */,
2935 0 /* MAX_CHAR_HEBREW */, 0, 32,
2936 Qnil, CONVERSION_IDENTICAL);
2937 staticpro (&Vcharset_katakana_jisx0201);
2938 Vcharset_katakana_jisx0201 =
2939 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2940 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2941 build_string ("JISX0201 Kana"),
2942 build_string ("JISX0201.1976 (Japanese Kana)"),
2943 build_string ("JISX0201.1976 Japanese Kana"),
2944 build_string ("jisx0201\\.1976"),
2945 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2946 staticpro (&Vcharset_latin_jisx0201);
2947 Vcharset_latin_jisx0201 =
2948 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2949 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2950 build_string ("JISX0201 Roman"),
2951 build_string ("JISX0201.1976 (Japanese Roman)"),
2952 build_string ("JISX0201.1976 Japanese Roman"),
2953 build_string ("jisx0201\\.1976"),
2954 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2955 staticpro (&Vcharset_cyrillic_iso8859_5);
2956 Vcharset_cyrillic_iso8859_5 =
2957 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2958 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2959 build_string ("ISO8859-5"),
2960 build_string ("ISO8859-5 (Cyrillic)"),
2961 build_string ("ISO8859-5 (Cyrillic)"),
2962 build_string ("iso8859-5"),
2963 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2964 staticpro (&Vcharset_latin_iso8859_9);
2965 Vcharset_latin_iso8859_9 =
2966 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2967 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2968 build_string ("Latin-5"),
2969 build_string ("ISO8859-9 (Latin-5)"),
2970 build_string ("ISO8859-9 (Latin-5)"),
2971 build_string ("iso8859-9"),
2972 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2973 staticpro (&Vcharset_japanese_jisx0208_1978);
2974 Vcharset_japanese_jisx0208_1978 =
2975 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2976 Qjapanese_jisx0208_1978, 94, 2,
2977 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2978 build_string ("JIS X0208:1978"),
2979 build_string ("JIS X0208:1978 (Japanese)"),
2981 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2982 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2983 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2984 staticpro (&Vcharset_chinese_gb2312);
2985 Vcharset_chinese_gb2312 =
2986 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2987 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2988 build_string ("GB2312"),
2989 build_string ("GB2312)"),
2990 build_string ("GB2312 Chinese simplified"),
2991 build_string ("gb2312"),
2992 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2993 staticpro (&Vcharset_chinese_gb12345);
2994 Vcharset_chinese_gb12345 =
2995 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2996 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2997 build_string ("G1"),
2998 build_string ("GB 12345)"),
2999 build_string ("GB 12345-1990"),
3000 build_string ("GB12345\\(\\.1990\\)?-0"),
3001 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3002 staticpro (&Vcharset_japanese_jisx0208);
3003 Vcharset_japanese_jisx0208 =
3004 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3005 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3006 build_string ("JISX0208"),
3007 build_string ("JIS X0208:1983 (Japanese)"),
3008 build_string ("JIS X0208:1983 Japanese Kanji"),
3009 build_string ("jisx0208\\.1983"),
3010 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3012 staticpro (&Vcharset_japanese_jisx0208_1990);
3013 Vcharset_japanese_jisx0208_1990 =
3014 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3015 Qjapanese_jisx0208_1990, 94, 2,
3016 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3017 build_string ("JISX0208-1990"),
3018 build_string ("JIS X0208:1990 (Japanese)"),
3019 build_string ("JIS X0208:1990 Japanese Kanji"),
3020 build_string ("jisx0208\\.1990"),
3022 MIN_CHAR_JIS_X0208_1990,
3023 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3024 Qnil, CONVERSION_IDENTICAL);
3026 staticpro (&Vcharset_korean_ksc5601);
3027 Vcharset_korean_ksc5601 =
3028 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3029 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3030 build_string ("KSC5601"),
3031 build_string ("KSC5601 (Korean"),
3032 build_string ("KSC5601 Korean Hangul and Hanja"),
3033 build_string ("ksc5601"),
3034 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3035 staticpro (&Vcharset_japanese_jisx0212);
3036 Vcharset_japanese_jisx0212 =
3037 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3038 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3039 build_string ("JISX0212"),
3040 build_string ("JISX0212 (Japanese)"),
3041 build_string ("JISX0212 Japanese Supplement"),
3042 build_string ("jisx0212"),
3043 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3045 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3046 staticpro (&Vcharset_chinese_cns11643_1);
3047 Vcharset_chinese_cns11643_1 =
3048 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3049 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3050 build_string ("CNS11643-1"),
3051 build_string ("CNS11643-1 (Chinese traditional)"),
3053 ("CNS 11643 Plane 1 Chinese traditional"),
3054 build_string (CHINESE_CNS_PLANE_RE("1")),
3055 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3056 staticpro (&Vcharset_chinese_cns11643_2);
3057 Vcharset_chinese_cns11643_2 =
3058 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3059 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3060 build_string ("CNS11643-2"),
3061 build_string ("CNS11643-2 (Chinese traditional)"),
3063 ("CNS 11643 Plane 2 Chinese traditional"),
3064 build_string (CHINESE_CNS_PLANE_RE("2")),
3065 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3067 staticpro (&Vcharset_latin_tcvn5712);
3068 Vcharset_latin_tcvn5712 =
3069 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3070 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3071 build_string ("TCVN 5712"),
3072 build_string ("TCVN 5712 (VSCII-2)"),
3073 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3074 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3075 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3076 staticpro (&Vcharset_latin_viscii_lower);
3077 Vcharset_latin_viscii_lower =
3078 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3079 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3080 build_string ("VISCII lower"),
3081 build_string ("VISCII lower (Vietnamese)"),
3082 build_string ("VISCII lower (Vietnamese)"),
3083 build_string ("MULEVISCII-LOWER"),
3084 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3085 staticpro (&Vcharset_latin_viscii_upper);
3086 Vcharset_latin_viscii_upper =
3087 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3088 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3089 build_string ("VISCII upper"),
3090 build_string ("VISCII upper (Vietnamese)"),
3091 build_string ("VISCII upper (Vietnamese)"),
3092 build_string ("MULEVISCII-UPPER"),
3093 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3094 staticpro (&Vcharset_latin_viscii);
3095 Vcharset_latin_viscii =
3096 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3097 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3098 build_string ("VISCII"),
3099 build_string ("VISCII 1.1 (Vietnamese)"),
3100 build_string ("VISCII 1.1 (Vietnamese)"),
3101 build_string ("VISCII1\\.1"),
3102 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3103 staticpro (&Vcharset_chinese_big5);
3104 Vcharset_chinese_big5 =
3105 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3106 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3107 build_string ("Big5"),
3108 build_string ("Big5"),
3109 build_string ("Big5 Chinese traditional"),
3110 build_string ("big5-0"),
3112 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3113 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3115 staticpro (&Vcharset_chinese_big5_cdp);
3116 Vcharset_chinese_big5_cdp =
3117 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3118 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Big5-CDP"),
3120 build_string ("Big5 + CDP extension"),
3121 build_string ("Big5 with CDP extension"),
3122 build_string ("big5\\.cdp-0"),
3123 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3124 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3126 #define DEF_HANZIKU(n) \
3127 staticpro (&Vcharset_ideograph_hanziku_##n); \
3128 Vcharset_ideograph_hanziku_##n = \
3129 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3130 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3131 build_string ("HZK-"#n), \
3132 build_string ("HANZIKU-"#n), \
3133 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3135 ("hanziku-"#n"$"), \
3136 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3137 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3150 staticpro (&Vcharset_china3_jef);
3151 Vcharset_china3_jef =
3152 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
3153 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3154 build_string ("JC3"),
3155 build_string ("JEF + CHINA3"),
3156 build_string ("JEF + CHINA3 private characters"),
3157 build_string ("china3jef-0"),
3158 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
3159 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
3160 staticpro (&Vcharset_ideograph_cbeta);
3161 Vcharset_ideograph_cbeta =
3162 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3163 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3164 build_string ("CB"),
3165 build_string ("CBETA"),
3166 build_string ("CBETA private characters"),
3167 build_string ("cbeta-0"),
3168 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3169 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3170 staticpro (&Vcharset_ideograph_gt);
3171 Vcharset_ideograph_gt =
3172 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3173 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3174 build_string ("GT"),
3175 build_string ("GT"),
3176 build_string ("GT"),
3178 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3179 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3180 #define DEF_GT_PJ(n) \
3181 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3182 Vcharset_ideograph_gt_pj_##n = \
3183 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3184 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3185 build_string ("GT-PJ-"#n), \
3186 build_string ("GT (pseudo JIS encoding) part "#n), \
3187 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3189 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3190 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3203 staticpro (&Vcharset_ideograph_daikanwa_2);
3204 Vcharset_ideograph_daikanwa_2 =
3205 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3206 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3207 build_string ("Daikanwa Rev."),
3208 build_string ("Morohashi's Daikanwa Rev."),
3210 ("Daikanwa dictionary (revised version)"),
3211 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3212 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3213 staticpro (&Vcharset_ideograph_daikanwa);
3214 Vcharset_ideograph_daikanwa =
3215 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3216 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3217 build_string ("Daikanwa"),
3218 build_string ("Morohashi's Daikanwa Rev.2"),
3220 ("Daikanwa dictionary (second revised version)"),
3221 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3222 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3223 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3225 staticpro (&Vcharset_ethiopic_ucs);
3226 Vcharset_ethiopic_ucs =
3227 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3228 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3229 build_string ("Ethiopic (UCS)"),
3230 build_string ("Ethiopic (UCS)"),
3231 build_string ("Ethiopic of UCS"),
3232 build_string ("Ethiopic-Unicode"),
3233 Qnil, 0x1200, 0x137F, 0, 0,
3234 Qnil, CONVERSION_IDENTICAL);
3236 staticpro (&Vcharset_chinese_big5_1);
3237 Vcharset_chinese_big5_1 =
3238 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3239 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3240 build_string ("Big5"),
3241 build_string ("Big5 (Level-1)"),
3243 ("Big5 Level-1 Chinese traditional"),
3244 build_string ("big5"),
3245 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3246 staticpro (&Vcharset_chinese_big5_2);
3247 Vcharset_chinese_big5_2 =
3248 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3249 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3250 build_string ("Big5"),
3251 build_string ("Big5 (Level-2)"),
3253 ("Big5 Level-2 Chinese traditional"),
3254 build_string ("big5"),
3255 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3257 #ifdef ENABLE_COMPOSITE_CHARS
3258 /* #### For simplicity, we put composite chars into a 96x96 charset.
3259 This is going to lead to problems because you can run out of
3260 room, esp. as we don't yet recycle numbers. */
3261 staticpro (&Vcharset_composite);
3262 Vcharset_composite =
3263 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3264 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3265 build_string ("Composite"),
3266 build_string ("Composite characters"),
3267 build_string ("Composite characters"),
3270 /* #### not dumped properly */
3271 composite_char_row_next = 32;
3272 composite_char_col_next = 32;
3274 Vcomposite_char_string2char_hash_table =
3275 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3276 Vcomposite_char_char2string_hash_table =
3277 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3278 staticpro (&Vcomposite_char_string2char_hash_table);
3279 staticpro (&Vcomposite_char_char2string_hash_table);
3280 #endif /* ENABLE_COMPOSITE_CHARS */