1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_ucs_big5;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_ideograph_gt;
95 Lisp_Object Vcharset_ideograph_gt_pj_1;
96 Lisp_Object Vcharset_ideograph_gt_pj_2;
97 Lisp_Object Vcharset_ideograph_gt_pj_3;
98 Lisp_Object Vcharset_ideograph_gt_pj_4;
99 Lisp_Object Vcharset_ideograph_gt_pj_5;
100 Lisp_Object Vcharset_ideograph_gt_pj_6;
101 Lisp_Object Vcharset_ideograph_gt_pj_7;
102 Lisp_Object Vcharset_ideograph_gt_pj_8;
103 Lisp_Object Vcharset_ideograph_gt_pj_9;
104 Lisp_Object Vcharset_ideograph_gt_pj_10;
105 Lisp_Object Vcharset_ideograph_gt_pj_11;
106 Lisp_Object Vcharset_ideograph_daikanwa_2;
107 Lisp_Object Vcharset_ideograph_daikanwa;
108 Lisp_Object Vcharset_ethiopic_ucs;
110 Lisp_Object Vcharset_chinese_big5_1;
111 Lisp_Object Vcharset_chinese_big5_2;
113 #ifdef ENABLE_COMPOSITE_CHARS
114 Lisp_Object Vcharset_composite;
116 /* Hash tables for composite chars. One maps string representing
117 composed chars to their equivalent chars; one goes the
119 Lisp_Object Vcomposite_char_char2string_hash_table;
120 Lisp_Object Vcomposite_char_string2char_hash_table;
122 static int composite_char_row_next;
123 static int composite_char_col_next;
125 #endif /* ENABLE_COMPOSITE_CHARS */
127 struct charset_lookup *chlook;
129 static const struct lrecord_description charset_lookup_description_1[] = {
130 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
132 NUM_LEADING_BYTES+4*128
139 static const struct struct_description charset_lookup_description = {
140 sizeof (struct charset_lookup),
141 charset_lookup_description_1
145 /* Table of number of bytes in the string representation of a character
146 indexed by the first byte of that representation.
148 rep_bytes_by_first_byte(c) is more efficient than the equivalent
149 canonical computation:
151 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
153 const Bytecount rep_bytes_by_first_byte[0xA0] =
154 { /* 0x00 - 0x7f are for straight ASCII */
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 /* 0x80 - 0x8f are for Dimension-1 official charsets */
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 /* 0x90 - 0x9d are for Dimension-2 official charsets */
170 /* 0x9e is for Dimension-1 private charsets */
171 /* 0x9f is for Dimension-2 private charsets */
172 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
178 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
180 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
184 if (XVECTOR_LENGTH (v) > ccs_len)
187 for (i = 0; i < XVECTOR_LENGTH (v); i++)
189 Lisp_Object c = XVECTOR_DATA(v)[i];
191 if (!NILP (c) && !CHARP (c))
195 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
207 put_char_ccs_code_point (Lisp_Object character,
208 Lisp_Object ccs, Lisp_Object value)
210 if (!EQ (XCHARSET_NAME (ccs), Qucs)
212 || (XCHAR (character) != XINT (value)))
214 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
218 { /* obsolete representation: value must be a list of bytes */
219 Lisp_Object ret = Fcar (value);
223 signal_simple_error ("Invalid value for coded-charset", value);
224 code_point = XINT (ret);
225 if (XCHARSET_GRAPHIC (ccs) == 1)
233 signal_simple_error ("Invalid value for coded-charset",
237 signal_simple_error ("Invalid value for coded-charset",
240 if (XCHARSET_GRAPHIC (ccs) == 1)
242 code_point = (code_point << 8) | j;
245 value = make_int (code_point);
247 else if (INTP (value))
249 code_point = XINT (value);
250 if (XCHARSET_GRAPHIC (ccs) == 1)
252 code_point &= 0x7F7F7F7F;
253 value = make_int (code_point);
257 signal_simple_error ("Invalid value for coded-charset", value);
261 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
264 decoding_table_remove_char (ccs, XINT (cpos));
267 decoding_table_put_char (ccs, code_point, character);
273 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
275 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
276 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
278 if (VECTORP (decoding_table))
280 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
284 decoding_table_remove_char (ccs, XINT (cpos));
287 if (CHAR_TABLEP (encoding_table))
289 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
297 int leading_code_private_11;
300 Lisp_Object Qcharsetp;
302 /* Qdoc_string, Qdimension, Qchars defined in general.c */
303 Lisp_Object Qregistry, Qfinal, Qgraphic;
304 Lisp_Object Qdirection;
305 Lisp_Object Qreverse_direction_charset;
306 Lisp_Object Qleading_byte;
307 Lisp_Object Qshort_name, Qlong_name;
309 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
310 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
327 Qjapanese_jisx0208_1978,
331 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
354 /* Qchinese_big5_cdp, */
355 Qideograph_hanziku_1,
356 Qideograph_hanziku_2,
357 Qideograph_hanziku_3,
358 Qideograph_hanziku_4,
359 Qideograph_hanziku_5,
360 Qideograph_hanziku_6,
361 Qideograph_hanziku_7,
362 Qideograph_hanziku_8,
363 Qideograph_hanziku_9,
364 Qideograph_hanziku_10,
365 Qideograph_hanziku_11,
366 Qideograph_hanziku_12,
367 Qideograph_daikanwa_2,
387 Lisp_Object Ql2r, Qr2l;
389 Lisp_Object Vcharset_hash_table;
391 /* Composite characters are characters constructed by overstriking two
392 or more regular characters.
394 1) The old Mule implementation involves storing composite characters
395 in a buffer as a tag followed by all of the actual characters
396 used to make up the composite character. I think this is a bad
397 idea; it greatly complicates code that wants to handle strings
398 one character at a time because it has to deal with the possibility
399 of great big ungainly characters. It's much more reasonable to
400 simply store an index into a table of composite characters.
402 2) The current implementation only allows for 16,384 separate
403 composite characters over the lifetime of the XEmacs process.
404 This could become a potential problem if the user
405 edited lots of different files that use composite characters.
406 Due to FSF bogosity, increasing the number of allowable
407 composite characters under Mule would decrease the number
408 of possible faces that can exist. Mule already has shrunk
409 this to 2048, and further shrinkage would become uncomfortable.
410 No such problems exist in XEmacs.
412 Composite characters could be represented as 0x80 C1 C2 C3,
413 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
414 for slightly under 2^20 (one million) composite characters
415 over the XEmacs process lifetime, and you only need to
416 increase the size of a Mule character from 19 to 21 bits.
417 Or you could use 0x80 C1 C2 C3 C4, allowing for about
418 85 million (slightly over 2^26) composite characters. */
421 /************************************************************************/
422 /* Basic Emchar functions */
423 /************************************************************************/
425 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
426 string in STR. Returns the number of bytes stored.
427 Do not call this directly. Use the macro set_charptr_emchar() instead.
431 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
446 else if ( c <= 0x7ff )
448 *p++ = (c >> 6) | 0xc0;
449 *p++ = (c & 0x3f) | 0x80;
451 else if ( c <= 0xffff )
453 *p++ = (c >> 12) | 0xe0;
454 *p++ = ((c >> 6) & 0x3f) | 0x80;
455 *p++ = (c & 0x3f) | 0x80;
457 else if ( c <= 0x1fffff )
459 *p++ = (c >> 18) | 0xf0;
460 *p++ = ((c >> 12) & 0x3f) | 0x80;
461 *p++ = ((c >> 6) & 0x3f) | 0x80;
462 *p++ = (c & 0x3f) | 0x80;
464 else if ( c <= 0x3ffffff )
466 *p++ = (c >> 24) | 0xf8;
467 *p++ = ((c >> 18) & 0x3f) | 0x80;
468 *p++ = ((c >> 12) & 0x3f) | 0x80;
469 *p++ = ((c >> 6) & 0x3f) | 0x80;
470 *p++ = (c & 0x3f) | 0x80;
474 *p++ = (c >> 30) | 0xfc;
475 *p++ = ((c >> 24) & 0x3f) | 0x80;
476 *p++ = ((c >> 18) & 0x3f) | 0x80;
477 *p++ = ((c >> 12) & 0x3f) | 0x80;
478 *p++ = ((c >> 6) & 0x3f) | 0x80;
479 *p++ = (c & 0x3f) | 0x80;
482 BREAKUP_CHAR (c, charset, c1, c2);
483 lb = CHAR_LEADING_BYTE (c);
484 if (LEADING_BYTE_PRIVATE_P (lb))
485 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
487 if (EQ (charset, Vcharset_control_1))
496 /* Return the first character from a Mule-encoded string in STR,
497 assuming it's non-ASCII. Do not call this directly.
498 Use the macro charptr_emchar() instead. */
501 non_ascii_charptr_emchar (const Bufbyte *str)
514 else if ( b >= 0xf8 )
519 else if ( b >= 0xf0 )
524 else if ( b >= 0xe0 )
529 else if ( b >= 0xc0 )
539 for( ; len > 0; len-- )
542 ch = ( ch << 6 ) | ( b & 0x3f );
546 Bufbyte i0 = *str, i1, i2 = 0;
549 if (i0 == LEADING_BYTE_CONTROL_1)
550 return (Emchar) (*++str - 0x20);
552 if (LEADING_BYTE_PREFIX_P (i0))
557 charset = CHARSET_BY_LEADING_BYTE (i0);
558 if (XCHARSET_DIMENSION (charset) == 2)
561 return MAKE_CHAR (charset, i1, i2);
565 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
566 Do not call this directly. Use the macro valid_char_p() instead. */
570 non_ascii_valid_char_p (Emchar ch)
574 /* Must have only lowest 19 bits set */
578 f1 = CHAR_FIELD1 (ch);
579 f2 = CHAR_FIELD2 (ch);
580 f3 = CHAR_FIELD3 (ch);
586 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
587 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
588 f2 > MAX_CHAR_FIELD2_PRIVATE)
593 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
594 f2 <= MAX_CHAR_FIELD2_PRIVATE))
598 NOTE: This takes advantage of the fact that
599 FIELD2_TO_OFFICIAL_LEADING_BYTE and
600 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
602 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
603 if (EQ (charset, Qnil))
605 return (XCHARSET_CHARS (charset) == 96);
611 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
612 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
613 f1 > MAX_CHAR_FIELD1_PRIVATE)
615 if (f2 < 0x20 || f3 < 0x20)
618 #ifdef ENABLE_COMPOSITE_CHARS
619 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
621 if (UNBOUNDP (Fgethash (make_int (ch),
622 Vcomposite_char_char2string_hash_table,
627 #endif /* ENABLE_COMPOSITE_CHARS */
629 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
630 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
633 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
635 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
638 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
640 if (EQ (charset, Qnil))
642 return (XCHARSET_CHARS (charset) == 96);
648 /************************************************************************/
649 /* Basic string functions */
650 /************************************************************************/
652 /* Copy the character pointed to by SRC into DST. Do not call this
653 directly. Use the macro charptr_copy_char() instead.
654 Return the number of bytes copied. */
657 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
659 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
661 for (i = bytes; i; i--, dst++, src++)
667 /************************************************************************/
668 /* streams of Emchars */
669 /************************************************************************/
671 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
672 The functions below are not meant to be called directly; use
673 the macros in insdel.h. */
676 Lstream_get_emchar_1 (Lstream *stream, int ch)
678 Bufbyte str[MAX_EMCHAR_LEN];
679 Bufbyte *strptr = str;
682 str[0] = (Bufbyte) ch;
684 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
686 int c = Lstream_getc (stream);
687 bufpos_checking_assert (c >= 0);
688 *++strptr = (Bufbyte) c;
690 return charptr_emchar (str);
694 Lstream_fput_emchar (Lstream *stream, Emchar ch)
696 Bufbyte str[MAX_EMCHAR_LEN];
697 Bytecount len = set_charptr_emchar (str, ch);
698 return Lstream_write (stream, str, len);
702 Lstream_funget_emchar (Lstream *stream, Emchar ch)
704 Bufbyte str[MAX_EMCHAR_LEN];
705 Bytecount len = set_charptr_emchar (str, ch);
706 Lstream_unread (stream, str, len);
710 /************************************************************************/
712 /************************************************************************/
715 mark_charset (Lisp_Object obj)
717 Lisp_Charset *cs = XCHARSET (obj);
719 mark_object (cs->short_name);
720 mark_object (cs->long_name);
721 mark_object (cs->doc_string);
722 mark_object (cs->registry);
723 mark_object (cs->ccl_program);
725 mark_object (cs->decoding_table);
726 mark_object (cs->mother);
732 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
734 Lisp_Charset *cs = XCHARSET (obj);
738 error ("printing unreadable object #<charset %s 0x%x>",
739 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
742 write_c_string ("#<charset ", printcharfun);
743 print_internal (CHARSET_NAME (cs), printcharfun, 0);
744 write_c_string (" ", printcharfun);
745 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
746 write_c_string (" ", printcharfun);
747 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
748 write_c_string (" ", printcharfun);
749 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
750 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
752 CHARSET_DIMENSION (cs),
753 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
754 CHARSET_COLUMNS (cs),
755 CHARSET_GRAPHIC (cs),
757 write_c_string (buf, printcharfun);
758 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
759 sprintf (buf, " 0x%x>", cs->header.uid);
760 write_c_string (buf, printcharfun);
763 static const struct lrecord_description charset_description[] = {
764 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
778 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
779 mark_charset, print_charset, 0, 0, 0,
783 /* Make a new charset. */
784 /* #### SJT Should generic properties be allowed? */
786 make_charset (Charset_ID id, Lisp_Object name,
787 unsigned short chars, unsigned char dimension,
788 unsigned char columns, unsigned char graphic,
789 Bufbyte final, unsigned char direction, Lisp_Object short_name,
790 Lisp_Object long_name, Lisp_Object doc,
792 Lisp_Object decoding_table,
793 Emchar min_code, Emchar max_code,
794 Emchar code_offset, unsigned char byte_offset,
795 Lisp_Object mother, unsigned char conversion)
798 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
802 XSETCHARSET (obj, cs);
804 CHARSET_ID (cs) = id;
805 CHARSET_NAME (cs) = name;
806 CHARSET_SHORT_NAME (cs) = short_name;
807 CHARSET_LONG_NAME (cs) = long_name;
808 CHARSET_CHARS (cs) = chars;
809 CHARSET_DIMENSION (cs) = dimension;
810 CHARSET_DIRECTION (cs) = direction;
811 CHARSET_COLUMNS (cs) = columns;
812 CHARSET_GRAPHIC (cs) = graphic;
813 CHARSET_FINAL (cs) = final;
814 CHARSET_DOC_STRING (cs) = doc;
815 CHARSET_REGISTRY (cs) = reg;
816 CHARSET_CCL_PROGRAM (cs) = Qnil;
817 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
819 CHARSET_DECODING_TABLE(cs) = Qunbound;
820 CHARSET_MIN_CODE (cs) = min_code;
821 CHARSET_MAX_CODE (cs) = max_code;
822 CHARSET_CODE_OFFSET (cs) = code_offset;
823 CHARSET_BYTE_OFFSET (cs) = byte_offset;
824 CHARSET_MOTHER (cs) = mother;
825 CHARSET_CONVERSION (cs) = conversion;
829 if (id == LEADING_BYTE_ASCII)
830 CHARSET_REP_BYTES (cs) = 1;
832 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
834 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
839 /* some charsets do not have final characters. This includes
840 ASCII, Control-1, Composite, and the two faux private
842 unsigned char iso2022_type
843 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
845 if (code_offset == 0)
847 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
848 chlook->charset_by_attributes[iso2022_type][final] = obj;
852 (chlook->charset_by_attributes[iso2022_type][final][direction]));
853 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
857 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
858 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
860 /* Some charsets are "faux" and don't have names or really exist at
861 all except in the leading-byte table. */
863 Fputhash (name, obj, Vcharset_hash_table);
868 get_unallocated_leading_byte (int dimension)
873 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
876 lb = chlook->next_allocated_leading_byte++;
880 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
883 lb = chlook->next_allocated_1_byte_leading_byte++;
887 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
890 lb = chlook->next_allocated_2_byte_leading_byte++;
896 ("No more character sets free for this dimension",
897 make_int (dimension));
903 /* Number of Big5 characters which have the same code in 1st byte. */
905 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
908 decode_defined_char (Lisp_Object ccs, int code_point)
910 int dim = XCHARSET_DIMENSION (ccs);
911 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
919 = get_ccs_octet_table (decoding_table, ccs,
920 (code_point >> (dim * 8)) & 255);
922 if (CHARP (decoding_table))
923 return XCHAR (decoding_table);
924 #ifdef HAVE_CHISE_CLIENT
925 if (EQ (decoding_table, Qunloaded))
927 char_id = load_char_decoding_entry_maybe (ccs, code_point);
932 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
934 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
936 if ( EQ (mother, Vcharset_ucs) )
937 return DECODE_CHAR (mother, code_point);
939 return decode_defined_char (mother, code_point);
941 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
944 = (((code_point >> 8) & 0x7F) - 33) * 94
945 + (( code_point & 0x7F) - 33);
946 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
947 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
949 b2 += b2 < 0x3F ? 0x40 : 0x62;
950 return decode_defined_char (mother, (b1 << 8) | b2);
952 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
955 = (((code_point >> 8) & 0x7F) - 33) * 94
956 + (( code_point & 0x7F) - 33)
957 + BIG5_SAME_ROW * (0xC9 - 0xA1);
958 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
959 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
961 b2 += b2 < 0x3F ? 0x40 : 0x62;
962 return decode_defined_char (mother, (b1 << 8) | b2);
969 decode_builtin_char (Lisp_Object charset, int code_point)
971 Lisp_Object mother = XCHARSET_MOTHER (charset);
974 if ( XCHARSET_MAX_CODE (charset) > 0 )
976 if ( CHARSETP (mother) )
978 int code = code_point;
980 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
982 int row = code_point >> 8;
983 int cell = code_point & 255;
987 else if (row < 16 + 32 + 30)
988 code = (row - (16 + 32)) * 94 + cell - 33;
989 else if (row < 18 + 32 + 30)
991 else if (row < 18 + 32 + 60)
992 code = (row - (18 + 32)) * 94 + cell - 33;
994 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
996 int plane = code_point >> 16;
997 int row = (code_point >> 8) & 255;
998 int cell = code_point & 255;
1002 else if (row < 16 + 32 + 30)
1004 = (plane - 33) * 94 * 60
1005 + (row - (16 + 32)) * 94
1007 else if (row < 18 + 32 + 30)
1009 else if (row < 18 + 32 + 60)
1011 = (plane - 33) * 94 * 60
1012 + (row - (18 + 32)) * 94
1015 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1018 = (((code_point >> 8) & 0x7F) - 33) * 94
1019 + (( code_point & 0x7F) - 33);
1020 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1021 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1023 b2 += b2 < 0x3F ? 0x40 : 0x62;
1024 code = (b1 << 8) | b2;
1026 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1029 = (((code_point >> 8) & 0x7F) - 33) * 94
1030 + (( code_point & 0x7F) - 33)
1031 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1032 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1033 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1035 b2 += b2 < 0x3F ? 0x40 : 0x62;
1036 code = (b1 << 8) | b2;
1039 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
1044 = (XCHARSET_DIMENSION (charset) == 1
1046 code_point - XCHARSET_BYTE_OFFSET (charset)
1048 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1049 * XCHARSET_CHARS (charset)
1050 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1051 + XCHARSET_CODE_OFFSET (charset);
1052 if ((cid < XCHARSET_MIN_CODE (charset))
1053 || (XCHARSET_MAX_CODE (charset) < cid))
1058 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1060 if (XCHARSET_DIMENSION (charset) == 1)
1062 switch (XCHARSET_CHARS (charset))
1066 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1069 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1077 switch (XCHARSET_CHARS (charset))
1080 return MIN_CHAR_94x94
1081 + (final - '0') * 94 * 94
1082 + (((code_point >> 8) & 0x7F) - 33) * 94
1083 + ((code_point & 0x7F) - 33);
1085 return MIN_CHAR_96x96
1086 + (final - '0') * 96 * 96
1087 + (((code_point >> 8) & 0x7F) - 32) * 96
1088 + ((code_point & 0x7F) - 32);
1100 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1102 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1105 if ( CHAR_TABLEP (encoding_table)
1106 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1111 Lisp_Object mother = XCHARSET_MOTHER (charset);
1112 int min = XCHARSET_MIN_CODE (charset);
1113 int max = XCHARSET_MAX_CODE (charset);
1116 if ( CHARSETP (mother) )
1118 if (XCHARSET_FINAL (charset) >= '0')
1119 code = charset_code_point (mother, ch, 1);
1121 code = charset_code_point (mother, ch, defined_only);
1123 else if (defined_only)
1125 else if ( ((max == 0) && CHARSETP (mother)
1126 && (XCHARSET_FINAL (charset) == 0))
1127 || ((min <= ch) && (ch <= max)) )
1129 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1130 || ((min <= code) && (code <= max)) )
1132 int d = code - XCHARSET_CODE_OFFSET (charset);
1134 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1136 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1138 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1140 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1143 int cell = d % 94 + 33;
1149 return (row << 8) | cell;
1151 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1153 int B1 = d >> 8, B2 = d & 0xFF;
1155 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1156 - (B2 < 0x7F ? 0x40 : 0x62);
1160 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1163 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1165 int B1 = d >> 8, B2 = d & 0xFF;
1167 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1168 - (B2 < 0x7F ? 0x40 : 0x62);
1172 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1173 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1176 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1177 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1178 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1179 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1180 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1182 int plane = d / (94 * 60) + 33;
1183 int row = (d % (94 * 60)) / 94;
1184 int cell = d % 94 + 33;
1190 return (plane << 16) | (row << 8) | cell;
1192 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1194 ( (d / (94 * 94) + 33) << 16)
1195 | ((d / 94 % 94 + 33) << 8)
1197 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1199 ( (d / (96 * 96) + 32) << 16)
1200 | ((d / 96 % 96 + 32) << 8)
1202 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1204 ( (d / (94 * 94 * 94) + 33) << 24)
1205 | ((d / (94 * 94) % 94 + 33) << 16)
1206 | ((d / 94 % 94 + 33) << 8)
1208 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1210 ( (d / (96 * 96 * 96) + 32) << 24)
1211 | ((d / (96 * 96) % 96 + 32) << 16)
1212 | ((d / 96 % 96 + 32) << 8)
1216 printf ("Unknown CCS-conversion %d is specified!",
1217 XCHARSET_CONVERSION (charset));
1221 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1222 ( XCHARSET_MIN_CODE (charset) == 0 )
1224 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1225 (XCHARSET_CODE_OFFSET (charset)
1226 == XCHARSET_MIN_CODE (charset))
1231 if (XCHARSET_DIMENSION (charset) == 1)
1233 if (XCHARSET_CHARS (charset) == 94)
1235 if (((d = ch - (MIN_CHAR_94
1236 + (XCHARSET_FINAL (charset) - '0') * 94))
1241 else if (XCHARSET_CHARS (charset) == 96)
1243 if (((d = ch - (MIN_CHAR_96
1244 + (XCHARSET_FINAL (charset) - '0') * 96))
1252 else if (XCHARSET_DIMENSION (charset) == 2)
1254 if (XCHARSET_CHARS (charset) == 94)
1256 if (((d = ch - (MIN_CHAR_94x94
1258 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1261 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1263 else if (XCHARSET_CHARS (charset) == 96)
1265 if (((d = ch - (MIN_CHAR_96x96
1267 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1270 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1281 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1283 if (c <= MAX_CHAR_BASIC_LATIN)
1285 *charset = Vcharset_ascii;
1290 *charset = Vcharset_control_1;
1295 *charset = Vcharset_latin_iso8859_1;
1299 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1301 *charset = Vcharset_hebrew_iso8859_8;
1302 return c - MIN_CHAR_HEBREW + 0x20;
1305 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1307 *charset = Vcharset_thai_tis620;
1308 return c - MIN_CHAR_THAI + 0x20;
1311 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1312 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1314 return list2 (Vcharset_katakana_jisx0201,
1315 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1318 else if (c <= MAX_CHAR_BMP)
1320 *charset = Vcharset_ucs_bmp;
1323 else if (c <= MAX_CHAR_SMP)
1325 *charset = Vcharset_ucs_smp;
1326 return c - MIN_CHAR_SMP;
1328 else if (c <= MAX_CHAR_SIP)
1330 *charset = Vcharset_ucs_sip;
1331 return c - MIN_CHAR_SIP;
1333 else if (c < MIN_CHAR_DAIKANWA)
1335 *charset = Vcharset_ucs;
1338 else if (c <= MAX_CHAR_DAIKANWA)
1340 *charset = Vcharset_ideograph_daikanwa;
1341 return c - MIN_CHAR_DAIKANWA;
1343 else if (c < MIN_CHAR_94)
1345 *charset = Vcharset_ucs;
1348 else if (c <= MAX_CHAR_94)
1350 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1351 ((c - MIN_CHAR_94) / 94) + '0',
1352 CHARSET_LEFT_TO_RIGHT);
1353 if (!NILP (*charset))
1354 return ((c - MIN_CHAR_94) % 94) + 33;
1357 *charset = Vcharset_ucs;
1361 else if (c <= MAX_CHAR_96)
1363 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1364 ((c - MIN_CHAR_96) / 96) + '0',
1365 CHARSET_LEFT_TO_RIGHT);
1366 if (!NILP (*charset))
1367 return ((c - MIN_CHAR_96) % 96) + 32;
1370 *charset = Vcharset_ucs;
1374 else if (c <= MAX_CHAR_94x94)
1377 = CHARSET_BY_ATTRIBUTES (94, 2,
1378 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1379 CHARSET_LEFT_TO_RIGHT);
1380 if (!NILP (*charset))
1381 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1382 | (((c - MIN_CHAR_94x94) % 94) + 33);
1385 *charset = Vcharset_ucs;
1389 else if (c <= MAX_CHAR_96x96)
1392 = CHARSET_BY_ATTRIBUTES (96, 2,
1393 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1394 CHARSET_LEFT_TO_RIGHT);
1395 if (!NILP (*charset))
1396 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1397 | (((c - MIN_CHAR_96x96) % 96) + 32);
1400 *charset = Vcharset_ucs;
1406 *charset = Vcharset_ucs;
1411 Lisp_Object Vdefault_coded_charset_priority_list;
1415 /************************************************************************/
1416 /* Basic charset Lisp functions */
1417 /************************************************************************/
1419 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1420 Return non-nil if OBJECT is a charset.
1424 return CHARSETP (object) ? Qt : Qnil;
1427 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1428 Retrieve the charset of the given name.
1429 If CHARSET-OR-NAME is a charset object, it is simply returned.
1430 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1431 nil is returned. Otherwise the associated charset object is returned.
1435 if (CHARSETP (charset_or_name))
1436 return charset_or_name;
1438 CHECK_SYMBOL (charset_or_name);
1439 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1442 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1443 Retrieve the charset of the given name.
1444 Same as `find-charset' except an error is signalled if there is no such
1445 charset instead of returning nil.
1449 Lisp_Object charset = Ffind_charset (name);
1452 signal_simple_error ("No such charset", name);
1456 /* We store the charsets in hash tables with the names as the key and the
1457 actual charset object as the value. Occasionally we need to use them
1458 in a list format. These routines provide us with that. */
1459 struct charset_list_closure
1461 Lisp_Object *charset_list;
1465 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1466 void *charset_list_closure)
1468 /* This function can GC */
1469 struct charset_list_closure *chcl =
1470 (struct charset_list_closure*) charset_list_closure;
1471 Lisp_Object *charset_list = chcl->charset_list;
1473 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1477 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1478 Return a list of the names of all defined charsets.
1482 Lisp_Object charset_list = Qnil;
1483 struct gcpro gcpro1;
1484 struct charset_list_closure charset_list_closure;
1486 GCPRO1 (charset_list);
1487 charset_list_closure.charset_list = &charset_list;
1488 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1489 &charset_list_closure);
1492 return charset_list;
1495 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1496 Return the name of charset CHARSET.
1500 return XCHARSET_NAME (Fget_charset (charset));
1503 /* #### SJT Should generic properties be allowed? */
1504 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1505 Define a new character set.
1506 This function is for use with Mule support.
1507 NAME is a symbol, the name by which the character set is normally referred.
1508 DOC-STRING is a string describing the character set.
1509 PROPS is a property list, describing the specific nature of the
1510 character set. Recognized properties are:
1512 'short-name Short version of the charset name (ex: Latin-1)
1513 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1514 'registry A regular expression matching the font registry field for
1516 'dimension Number of octets used to index a character in this charset.
1517 Either 1 or 2. Defaults to 1.
1518 If UTF-2000 feature is enabled, 3 or 4 are also available.
1519 'columns Number of columns used to display a character in this charset.
1520 Only used in TTY mode. (Under X, the actual width of a
1521 character can be derived from the font used to display the
1522 characters.) If unspecified, defaults to the dimension
1523 (this is almost always the correct value).
1524 'chars Number of characters in each dimension (94 or 96).
1525 Defaults to 94. Note that if the dimension is 2, the
1526 character set thus described is 94x94 or 96x96.
1527 If UTF-2000 feature is enabled, 128 or 256 are also available.
1528 'final Final byte of ISO 2022 escape sequence. Must be
1529 supplied. Each combination of (DIMENSION, CHARS) defines a
1530 separate namespace for final bytes. Note that ISO
1531 2022 restricts the final byte to the range
1532 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1533 dimension == 2. Note also that final bytes in the range
1534 0x30 - 0x3F are reserved for user-defined (not official)
1536 'graphic 0 (use left half of font on output) or 1 (use right half
1537 of font on output). Defaults to 0. For example, for
1538 a font whose registry is ISO8859-1, the left half
1539 (octets 0x20 - 0x7F) is the `ascii' character set, while
1540 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1541 character set. With 'graphic set to 0, the octets
1542 will have their high bit cleared; with it set to 1,
1543 the octets will have their high bit set.
1544 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1546 'ccl-program A compiled CCL program used to convert a character in
1547 this charset into an index into the font. This is in
1548 addition to the 'graphic property. The CCL program
1549 is passed the octets of the character, with the high
1550 bit cleared and set depending upon whether the value
1551 of the 'graphic property is 0 or 1.
1552 'mother [UTF-2000 only] Base coded-charset.
1553 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1554 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1555 'code-offset [UTF-2000 only] Offset for a code-point of a base
1557 'conversion [UTF-2000 only] Conversion for a code-point of a base
1558 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1560 (name, doc_string, props))
1562 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1563 int direction = CHARSET_LEFT_TO_RIGHT;
1564 Lisp_Object registry = Qnil;
1565 Lisp_Object charset;
1566 Lisp_Object ccl_program = Qnil;
1567 Lisp_Object short_name = Qnil, long_name = Qnil;
1568 Lisp_Object mother = Qnil;
1569 int min_code = 0, max_code = 0, code_offset = 0;
1570 int byte_offset = -1;
1573 CHECK_SYMBOL (name);
1574 if (!NILP (doc_string))
1575 CHECK_STRING (doc_string);
1577 charset = Ffind_charset (name);
1578 if (!NILP (charset))
1579 signal_simple_error ("Cannot redefine existing charset", name);
1582 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1584 if (EQ (keyword, Qshort_name))
1586 CHECK_STRING (value);
1590 else if (EQ (keyword, Qlong_name))
1592 CHECK_STRING (value);
1596 else if (EQ (keyword, Qdimension))
1599 dimension = XINT (value);
1600 if (dimension < 1 ||
1607 signal_simple_error ("Invalid value for 'dimension", value);
1610 else if (EQ (keyword, Qchars))
1613 chars = XINT (value);
1614 if (chars != 94 && chars != 96
1616 && chars != 128 && chars != 256
1619 signal_simple_error ("Invalid value for 'chars", value);
1622 else if (EQ (keyword, Qcolumns))
1625 columns = XINT (value);
1626 if (columns != 1 && columns != 2)
1627 signal_simple_error ("Invalid value for 'columns", value);
1630 else if (EQ (keyword, Qgraphic))
1633 graphic = XINT (value);
1641 signal_simple_error ("Invalid value for 'graphic", value);
1644 else if (EQ (keyword, Qregistry))
1646 CHECK_STRING (value);
1650 else if (EQ (keyword, Qdirection))
1652 if (EQ (value, Ql2r))
1653 direction = CHARSET_LEFT_TO_RIGHT;
1654 else if (EQ (value, Qr2l))
1655 direction = CHARSET_RIGHT_TO_LEFT;
1657 signal_simple_error ("Invalid value for 'direction", value);
1660 else if (EQ (keyword, Qfinal))
1662 CHECK_CHAR_COERCE_INT (value);
1663 final = XCHAR (value);
1664 if (final < '0' || final > '~')
1665 signal_simple_error ("Invalid value for 'final", value);
1669 else if (EQ (keyword, Qmother))
1671 mother = Fget_charset (value);
1674 else if (EQ (keyword, Qmin_code))
1677 min_code = XUINT (value);
1680 else if (EQ (keyword, Qmax_code))
1683 max_code = XUINT (value);
1686 else if (EQ (keyword, Qcode_offset))
1689 code_offset = XUINT (value);
1692 else if (EQ (keyword, Qconversion))
1694 if (EQ (value, Q94x60))
1695 conversion = CONVERSION_94x60;
1696 else if (EQ (value, Q94x94x60))
1697 conversion = CONVERSION_94x94x60;
1698 else if (EQ (value, Qbig5_1))
1699 conversion = CONVERSION_BIG5_1;
1700 else if (EQ (value, Qbig5_2))
1701 conversion = CONVERSION_BIG5_2;
1703 signal_simple_error ("Unrecognized conversion", value);
1707 else if (EQ (keyword, Qccl_program))
1709 struct ccl_program test_ccl;
1711 if (setup_ccl_program (&test_ccl, value) < 0)
1712 signal_simple_error ("Invalid value for 'ccl-program", value);
1713 ccl_program = value;
1717 signal_simple_error ("Unrecognized property", keyword);
1723 error ("'final must be specified");
1725 if (dimension == 2 && final > 0x5F)
1727 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1730 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1731 CHARSET_LEFT_TO_RIGHT)) ||
1732 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1733 CHARSET_RIGHT_TO_LEFT)))
1735 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1737 id = get_unallocated_leading_byte (dimension);
1739 if (NILP (doc_string))
1740 doc_string = build_string ("");
1742 if (NILP (registry))
1743 registry = build_string ("");
1745 if (NILP (short_name))
1746 XSETSTRING (short_name, XSYMBOL (name)->name);
1748 if (NILP (long_name))
1749 long_name = doc_string;
1752 columns = dimension;
1754 if (byte_offset < 0)
1758 else if (chars == 96)
1764 charset = make_charset (id, name, chars, dimension, columns, graphic,
1765 final, direction, short_name, long_name,
1766 doc_string, registry,
1767 Qnil, min_code, max_code, code_offset, byte_offset,
1768 mother, conversion);
1769 if (!NILP (ccl_program))
1770 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1774 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1776 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1777 NEW-NAME is the name of the new charset. Return the new charset.
1779 (charset, new_name))
1781 Lisp_Object new_charset = Qnil;
1782 int id, chars, dimension, columns, graphic, final;
1784 Lisp_Object registry, doc_string, short_name, long_name;
1787 charset = Fget_charset (charset);
1788 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1789 signal_simple_error ("Charset already has reverse-direction charset",
1792 CHECK_SYMBOL (new_name);
1793 if (!NILP (Ffind_charset (new_name)))
1794 signal_simple_error ("Cannot redefine existing charset", new_name);
1796 cs = XCHARSET (charset);
1798 chars = CHARSET_CHARS (cs);
1799 dimension = CHARSET_DIMENSION (cs);
1800 columns = CHARSET_COLUMNS (cs);
1801 id = get_unallocated_leading_byte (dimension);
1803 graphic = CHARSET_GRAPHIC (cs);
1804 final = CHARSET_FINAL (cs);
1805 direction = CHARSET_RIGHT_TO_LEFT;
1806 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1807 direction = CHARSET_LEFT_TO_RIGHT;
1808 doc_string = CHARSET_DOC_STRING (cs);
1809 short_name = CHARSET_SHORT_NAME (cs);
1810 long_name = CHARSET_LONG_NAME (cs);
1811 registry = CHARSET_REGISTRY (cs);
1813 new_charset = make_charset (id, new_name, chars, dimension, columns,
1814 graphic, final, direction, short_name, long_name,
1815 doc_string, registry,
1817 CHARSET_DECODING_TABLE(cs),
1818 CHARSET_MIN_CODE(cs),
1819 CHARSET_MAX_CODE(cs),
1820 CHARSET_CODE_OFFSET(cs),
1821 CHARSET_BYTE_OFFSET(cs),
1823 CHARSET_CONVERSION (cs)
1825 Qnil, 0, 0, 0, 0, Qnil, 0
1829 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1830 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1835 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1836 Define symbol ALIAS as an alias for CHARSET.
1840 CHECK_SYMBOL (alias);
1841 charset = Fget_charset (charset);
1842 return Fputhash (alias, charset, Vcharset_hash_table);
1845 /* #### Reverse direction charsets not yet implemented. */
1847 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1849 Return the reverse-direction charset parallel to CHARSET, if any.
1850 This is the charset with the same properties (in particular, the same
1851 dimension, number of characters per dimension, and final byte) as
1852 CHARSET but whose characters are displayed in the opposite direction.
1856 charset = Fget_charset (charset);
1857 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1861 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1862 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1863 If DIRECTION is omitted, both directions will be checked (left-to-right
1864 will be returned if character sets exist for both directions).
1866 (dimension, chars, final, direction))
1868 int dm, ch, fi, di = -1;
1869 Lisp_Object obj = Qnil;
1871 CHECK_INT (dimension);
1872 dm = XINT (dimension);
1873 if (dm < 1 || dm > 2)
1874 signal_simple_error ("Invalid value for DIMENSION", dimension);
1878 if (ch != 94 && ch != 96)
1879 signal_simple_error ("Invalid value for CHARS", chars);
1881 CHECK_CHAR_COERCE_INT (final);
1883 if (fi < '0' || fi > '~')
1884 signal_simple_error ("Invalid value for FINAL", final);
1886 if (EQ (direction, Ql2r))
1887 di = CHARSET_LEFT_TO_RIGHT;
1888 else if (EQ (direction, Qr2l))
1889 di = CHARSET_RIGHT_TO_LEFT;
1890 else if (!NILP (direction))
1891 signal_simple_error ("Invalid value for DIRECTION", direction);
1893 if (dm == 2 && fi > 0x5F)
1895 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1899 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1901 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1904 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1907 return XCHARSET_NAME (obj);
1911 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1912 Return short name of CHARSET.
1916 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1919 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1920 Return long name of CHARSET.
1924 return XCHARSET_LONG_NAME (Fget_charset (charset));
1927 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1928 Return description of CHARSET.
1932 return XCHARSET_DOC_STRING (Fget_charset (charset));
1935 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1936 Return dimension of CHARSET.
1940 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1943 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1944 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1945 Recognized properties are those listed in `make-charset', as well as
1946 'name and 'doc-string.
1952 charset = Fget_charset (charset);
1953 cs = XCHARSET (charset);
1955 CHECK_SYMBOL (prop);
1956 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1957 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1958 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1959 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1960 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1961 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1962 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1963 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1964 Qnil : make_char (CHARSET_FINAL (cs));
1965 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1966 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1967 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1968 if (EQ (prop, Qdirection))
1969 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1970 if (EQ (prop, Qreverse_direction_charset))
1972 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1973 /* #### Is this translation OK? If so, error checking sufficient? */
1974 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1977 if (EQ (prop, Qmother))
1978 return CHARSET_MOTHER (cs);
1979 if (EQ (prop, Qmin_code))
1980 return make_int (CHARSET_MIN_CODE (cs));
1981 if (EQ (prop, Qmax_code))
1982 return make_int (CHARSET_MAX_CODE (cs));
1984 signal_simple_error ("Unrecognized charset property name", prop);
1985 return Qnil; /* not reached */
1988 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1989 Return charset identification number of CHARSET.
1993 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1996 /* #### We need to figure out which properties we really want to
1999 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2000 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2002 (charset, ccl_program))
2004 struct ccl_program test_ccl;
2006 charset = Fget_charset (charset);
2007 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2008 signal_simple_error ("Invalid ccl-program", ccl_program);
2009 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2014 invalidate_charset_font_caches (Lisp_Object charset)
2016 /* Invalidate font cache entries for charset on all devices. */
2017 Lisp_Object devcons, concons, hash_table;
2018 DEVICE_LOOP_NO_BREAK (devcons, concons)
2020 struct device *d = XDEVICE (XCAR (devcons));
2021 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2022 if (!UNBOUNDP (hash_table))
2023 Fclrhash (hash_table);
2027 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2028 Set the 'registry property of CHARSET to REGISTRY.
2030 (charset, registry))
2032 charset = Fget_charset (charset);
2033 CHECK_STRING (registry);
2034 XCHARSET_REGISTRY (charset) = registry;
2035 invalidate_charset_font_caches (charset);
2036 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2041 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2042 Return mapping-table of CHARSET.
2046 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2049 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2050 Set mapping-table of CHARSET to TABLE.
2054 struct Lisp_Charset *cs;
2058 charset = Fget_charset (charset);
2059 cs = XCHARSET (charset);
2063 CHARSET_DECODING_TABLE(cs) = Qnil;
2066 else if (VECTORP (table))
2068 int ccs_len = CHARSET_BYTE_SIZE (cs);
2069 int ret = decoding_table_check_elements (table,
2070 CHARSET_DIMENSION (cs),
2075 signal_simple_error ("Too big table", table);
2077 signal_simple_error ("Invalid element is found", table);
2079 signal_simple_error ("Something wrong", table);
2081 CHARSET_DECODING_TABLE(cs) = Qnil;
2084 signal_error (Qwrong_type_argument,
2085 list2 (build_translated_string ("vector-or-nil-p"),
2088 byte_offset = CHARSET_BYTE_OFFSET (cs);
2089 switch (CHARSET_DIMENSION (cs))
2092 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2094 Lisp_Object c = XVECTOR_DATA(table)[i];
2097 Fput_char_attribute (c, XCHARSET_NAME (charset),
2098 make_int (i + byte_offset));
2102 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2104 Lisp_Object v = XVECTOR_DATA(table)[i];
2110 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2112 Lisp_Object c = XVECTOR_DATA(v)[j];
2116 (c, XCHARSET_NAME (charset),
2117 make_int ( ( (i + byte_offset) << 8 )
2123 Fput_char_attribute (v, XCHARSET_NAME (charset),
2124 make_int (i + byte_offset));
2131 #ifdef HAVE_CHISE_CLIENT
2132 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2133 Save mapping-table of CHARSET.
2137 struct Lisp_Charset *cs;
2138 int byte_min, byte_max;
2140 Lisp_Object db_file;
2142 charset = Fget_charset (charset);
2143 cs = XCHARSET (charset);
2145 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2146 Qsystem_char_id, 1);
2147 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2149 byte_min = CHARSET_BYTE_OFFSET (cs);
2150 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2151 switch (CHARSET_DIMENSION (cs))
2155 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2158 for (cell = byte_min; cell < byte_max; cell++)
2160 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2163 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2164 Fprin1_to_string (c, Qnil),
2171 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2174 for (row = byte_min; row < byte_max; row++)
2176 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2179 for (cell = byte_min; cell < byte_max; cell++)
2181 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2184 Fput_database (Fprin1_to_string (make_int ((row << 8)
2187 Fprin1_to_string (c, Qnil),
2195 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2198 for (plane = byte_min; plane < byte_max; plane++)
2201 = get_ccs_octet_table (table_p, charset, plane);
2204 for (row = byte_min; row < byte_max; row++)
2207 = get_ccs_octet_table (table_r, charset, row);
2210 for (cell = byte_min; cell < byte_max; cell++)
2212 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2216 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2220 Fprin1_to_string (c, Qnil),
2229 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2232 for (group = byte_min; group < byte_max; group++)
2235 = get_ccs_octet_table (table_g, charset, group);
2238 for (plane = byte_min; plane < byte_max; plane++)
2241 = get_ccs_octet_table (table_p, charset, plane);
2244 for (row = byte_min; row < byte_max; row++)
2247 = get_ccs_octet_table (table_r, charset, row);
2250 for (cell = byte_min; cell < byte_max; cell++)
2253 = get_ccs_octet_table (table_c, charset, cell);
2256 Fput_database (Fprin1_to_string
2257 (make_int (( group << 24)
2262 Fprin1_to_string (c, Qnil),
2270 return Fclose_database (db);
2273 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2274 Reset mapping-table of CCS with database file.
2278 Lisp_Object db_file;
2280 ccs = Fget_charset (ccs);
2281 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2282 Qsystem_char_id, 0);
2284 if (!NILP (Ffile_exists_p (db_file)))
2286 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2293 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2297 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2300 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2304 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2311 decoding_table_put_char (ccs, code_point, ret);
2312 Fclose_database (db);
2316 decoding_table_put_char (ccs, code_point, Qnil);
2317 Fclose_database (db);
2321 #endif /* HAVE_CHISE_CLIENT */
2322 #endif /* UTF2000 */
2325 /************************************************************************/
2326 /* Lisp primitives for working with characters */
2327 /************************************************************************/
2330 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2331 Make a character from CHARSET and code-point CODE.
2332 If DEFINED_ONLY is non-nil, builtin character is not returned.
2333 If corresponding character is not found, nil is returned.
2335 (charset, code, defined_only))
2339 charset = Fget_charset (charset);
2342 if (XCHARSET_GRAPHIC (charset) == 1)
2344 if (NILP (defined_only))
2345 c = DECODE_CHAR (charset, c);
2347 c = decode_defined_char (charset, c);
2348 return c >= 0 ? make_char (c) : Qnil;
2351 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2352 Make a builtin character from CHARSET and code-point CODE.
2358 charset = Fget_charset (charset);
2360 if (EQ (charset, Vcharset_latin_viscii))
2362 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2368 (ret = Fget_char_attribute (chr,
2369 Vcharset_latin_viscii_lower,
2372 charset = Vcharset_latin_viscii_lower;
2376 (ret = Fget_char_attribute (chr,
2377 Vcharset_latin_viscii_upper,
2380 charset = Vcharset_latin_viscii_upper;
2387 if (XCHARSET_GRAPHIC (charset) == 1)
2390 c = decode_builtin_char (charset, c);
2391 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2395 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2396 Make a character from CHARSET and octets ARG1 and ARG2.
2397 ARG2 is required only for characters from two-dimensional charsets.
2398 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2399 character s with caron.
2401 (charset, arg1, arg2))
2405 int lowlim, highlim;
2407 charset = Fget_charset (charset);
2408 cs = XCHARSET (charset);
2410 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2411 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2413 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2415 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2416 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2419 /* It is useful (and safe, according to Olivier Galibert) to strip
2420 the 8th bit off ARG1 and ARG2 because it allows programmers to
2421 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2422 Latin 2 code of the character. */
2430 if (a1 < lowlim || a1 > highlim)
2431 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2433 if (CHARSET_DIMENSION (cs) == 1)
2437 ("Charset is of dimension one; second octet must be nil", arg2);
2438 return make_char (MAKE_CHAR (charset, a1, 0));
2447 a2 = XINT (arg2) & 0x7f;
2449 if (a2 < lowlim || a2 > highlim)
2450 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2452 return make_char (MAKE_CHAR (charset, a1, a2));
2455 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2456 Return the character set of CHARACTER.
2460 CHECK_CHAR_COERCE_INT (character);
2462 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2465 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2466 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2467 N defaults to 0 if omitted.
2471 Lisp_Object charset;
2474 CHECK_CHAR_COERCE_INT (character);
2476 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2478 if (NILP (n) || EQ (n, Qzero))
2479 return make_int (octet0);
2480 else if (EQ (n, make_int (1)))
2481 return make_int (octet1);
2483 signal_simple_error ("Octet number must be 0 or 1", n);
2487 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2488 Return code-point of CHARACTER in specified CHARSET.
2490 (character, charset, defined_only))
2494 CHECK_CHAR_COERCE_INT (character);
2495 charset = Fget_charset (charset);
2496 code_point = charset_code_point (charset, XCHAR (character),
2497 !NILP (defined_only));
2498 if (code_point >= 0)
2499 return make_int (code_point);
2505 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2506 Return list of charset and one or two position-codes of CHARACTER.
2510 /* This function can GC */
2511 struct gcpro gcpro1, gcpro2;
2512 Lisp_Object charset = Qnil;
2513 Lisp_Object rc = Qnil;
2521 GCPRO2 (charset, rc);
2522 CHECK_CHAR_COERCE_INT (character);
2525 code_point = ENCODE_CHAR (XCHAR (character), charset);
2526 dimension = XCHARSET_DIMENSION (charset);
2527 while (dimension > 0)
2529 rc = Fcons (make_int (code_point & 255), rc);
2533 rc = Fcons (XCHARSET_NAME (charset), rc);
2535 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2537 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2539 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2543 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2552 #ifdef ENABLE_COMPOSITE_CHARS
2553 /************************************************************************/
2554 /* composite character functions */
2555 /************************************************************************/
2558 lookup_composite_char (Bufbyte *str, int len)
2560 Lisp_Object lispstr = make_string (str, len);
2561 Lisp_Object ch = Fgethash (lispstr,
2562 Vcomposite_char_string2char_hash_table,
2568 if (composite_char_row_next >= 128)
2569 signal_simple_error ("No more composite chars available", lispstr);
2570 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2571 composite_char_col_next);
2572 Fputhash (make_char (emch), lispstr,
2573 Vcomposite_char_char2string_hash_table);
2574 Fputhash (lispstr, make_char (emch),
2575 Vcomposite_char_string2char_hash_table);
2576 composite_char_col_next++;
2577 if (composite_char_col_next >= 128)
2579 composite_char_col_next = 32;
2580 composite_char_row_next++;
2589 composite_char_string (Emchar ch)
2591 Lisp_Object str = Fgethash (make_char (ch),
2592 Vcomposite_char_char2string_hash_table,
2594 assert (!UNBOUNDP (str));
2598 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2599 Convert a string into a single composite character.
2600 The character is the result of overstriking all the characters in
2605 CHECK_STRING (string);
2606 return make_char (lookup_composite_char (XSTRING_DATA (string),
2607 XSTRING_LENGTH (string)));
2610 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2611 Return a string of the characters comprising a composite character.
2619 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2620 signal_simple_error ("Must be composite char", ch);
2621 return composite_char_string (emch);
2623 #endif /* ENABLE_COMPOSITE_CHARS */
2626 /************************************************************************/
2627 /* initialization */
2628 /************************************************************************/
2631 syms_of_mule_charset (void)
2633 INIT_LRECORD_IMPLEMENTATION (charset);
2635 DEFSUBR (Fcharsetp);
2636 DEFSUBR (Ffind_charset);
2637 DEFSUBR (Fget_charset);
2638 DEFSUBR (Fcharset_list);
2639 DEFSUBR (Fcharset_name);
2640 DEFSUBR (Fmake_charset);
2641 DEFSUBR (Fmake_reverse_direction_charset);
2642 /* DEFSUBR (Freverse_direction_charset); */
2643 DEFSUBR (Fdefine_charset_alias);
2644 DEFSUBR (Fcharset_from_attributes);
2645 DEFSUBR (Fcharset_short_name);
2646 DEFSUBR (Fcharset_long_name);
2647 DEFSUBR (Fcharset_description);
2648 DEFSUBR (Fcharset_dimension);
2649 DEFSUBR (Fcharset_property);
2650 DEFSUBR (Fcharset_id);
2651 DEFSUBR (Fset_charset_ccl_program);
2652 DEFSUBR (Fset_charset_registry);
2654 DEFSUBR (Fcharset_mapping_table);
2655 DEFSUBR (Fset_charset_mapping_table);
2656 #ifdef HAVE_CHISE_CLIENT
2657 DEFSUBR (Fsave_charset_mapping_table);
2658 DEFSUBR (Freset_charset_mapping_table);
2661 DEFSUBR (Fdecode_char);
2662 DEFSUBR (Fdecode_builtin_char);
2663 DEFSUBR (Fencode_char);
2665 DEFSUBR (Fmake_char);
2666 DEFSUBR (Fchar_charset);
2667 DEFSUBR (Fchar_octet);
2668 DEFSUBR (Fsplit_char);
2670 #ifdef ENABLE_COMPOSITE_CHARS
2671 DEFSUBR (Fmake_composite_char);
2672 DEFSUBR (Fcomposite_char_string);
2675 defsymbol (&Qcharsetp, "charsetp");
2676 defsymbol (&Qregistry, "registry");
2677 defsymbol (&Qfinal, "final");
2678 defsymbol (&Qgraphic, "graphic");
2679 defsymbol (&Qdirection, "direction");
2680 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2681 defsymbol (&Qshort_name, "short-name");
2682 defsymbol (&Qlong_name, "long-name");
2684 defsymbol (&Qmother, "mother");
2685 defsymbol (&Qmin_code, "min-code");
2686 defsymbol (&Qmax_code, "max-code");
2687 defsymbol (&Qcode_offset, "code-offset");
2688 defsymbol (&Qconversion, "conversion");
2689 defsymbol (&Q94x60, "94x60");
2690 defsymbol (&Q94x94x60, "94x94x60");
2691 defsymbol (&Qbig5_1, "big5-1");
2692 defsymbol (&Qbig5_2, "big5-2");
2695 defsymbol (&Ql2r, "l2r");
2696 defsymbol (&Qr2l, "r2l");
2698 /* Charsets, compatible with FSF 20.3
2699 Naming convention is Script-Charset[-Edition] */
2700 defsymbol (&Qascii, "ascii");
2701 defsymbol (&Qcontrol_1, "control-1");
2702 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2703 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2704 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2705 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2706 defsymbol (&Qthai_tis620, "thai-tis620");
2707 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2708 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2709 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2710 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2711 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2712 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2713 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2714 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2715 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2716 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2717 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2718 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2719 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2720 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2721 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2722 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2724 defsymbol (&Qucs, "ucs");
2725 defsymbol (&Qucs_bmp, "ucs-bmp");
2726 defsymbol (&Qucs_smp, "ucs-smp");
2727 defsymbol (&Qucs_sip, "ucs-sip");
2728 defsymbol (&Qucs_gb, "ucs-gb");
2729 defsymbol (&Qucs_cns, "ucs-cns");
2730 defsymbol (&Qucs_jis, "ucs-jis");
2731 defsymbol (&Qucs_ks, "ucs-ks");
2732 defsymbol (&Qucs_big5, "ucs-big5");
2733 defsymbol (&Qlatin_viscii, "latin-viscii");
2734 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2735 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2736 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2737 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2738 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2739 defsymbol (&Qjis_x0208, "=jis-x0208");
2740 defsymbol (&Qideograph_gt, "ideograph-gt");
2741 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2742 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2743 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2744 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2745 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2746 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2747 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2748 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2749 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2750 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2751 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2752 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2753 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2754 defsymbol (&Qchinese_big5, "chinese-big5");
2755 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2756 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2757 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2758 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2759 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2760 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2761 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2762 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2763 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2764 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2765 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2766 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2767 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2768 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2770 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2771 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2773 defsymbol (&Qcomposite, "composite");
2777 vars_of_mule_charset (void)
2784 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2785 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2787 /* Table of charsets indexed by leading byte. */
2788 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2789 chlook->charset_by_leading_byte[i] = Qnil;
2792 /* Table of charsets indexed by type/final-byte. */
2793 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2794 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2795 chlook->charset_by_attributes[i][j] = Qnil;
2797 /* Table of charsets indexed by type/final-byte/direction. */
2798 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2799 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2800 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2801 chlook->charset_by_attributes[i][j][k] = Qnil;
2805 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2807 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2808 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2812 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2813 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2814 Leading-code of private TYPE9N charset of column-width 1.
2816 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2820 Vdefault_coded_charset_priority_list = Qnil;
2821 DEFVAR_LISP ("default-coded-charset-priority-list",
2822 &Vdefault_coded_charset_priority_list /*
2823 Default order of preferred coded-character-sets.
2829 complex_vars_of_mule_charset (void)
2831 staticpro (&Vcharset_hash_table);
2832 Vcharset_hash_table =
2833 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2835 /* Predefined character sets. We store them into variables for
2839 staticpro (&Vcharset_ucs);
2841 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2842 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2843 build_string ("UCS"),
2844 build_string ("UCS"),
2845 build_string ("ISO/IEC 10646"),
2847 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2848 staticpro (&Vcharset_ucs_bmp);
2850 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2851 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2852 build_string ("BMP"),
2853 build_string ("UCS-BMP"),
2854 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2856 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2857 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2858 staticpro (&Vcharset_ucs_smp);
2860 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2861 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2862 build_string ("SMP"),
2863 build_string ("UCS-SMP"),
2864 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2865 build_string ("UCS00-1"),
2866 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2867 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2868 staticpro (&Vcharset_ucs_sip);
2870 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2871 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2872 build_string ("SIP"),
2873 build_string ("UCS-SIP"),
2874 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2875 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2876 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2877 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2878 staticpro (&Vcharset_ucs_gb);
2880 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2881 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2882 build_string ("UCS for GB"),
2883 build_string ("UCS for GB"),
2884 build_string ("ISO/IEC 10646 for GB"),
2886 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2887 staticpro (&Vcharset_ucs_cns);
2889 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2890 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2891 build_string ("UCS for CNS"),
2892 build_string ("UCS for CNS 11643"),
2893 build_string ("ISO/IEC 10646 for CNS 11643"),
2895 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2896 staticpro (&Vcharset_ucs_jis);
2898 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2899 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2900 build_string ("UCS for JIS"),
2901 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2903 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2905 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2906 staticpro (&Vcharset_ucs_ks);
2908 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2909 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2910 build_string ("UCS for KS"),
2911 build_string ("UCS for CCS defined by KS"),
2912 build_string ("ISO/IEC 10646 for Korean Standards"),
2914 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2915 staticpro (&Vcharset_ucs_big5);
2917 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2918 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2919 build_string ("UCS for Big5"),
2920 build_string ("UCS for Big5"),
2921 build_string ("ISO/IEC 10646 for Big5"),
2923 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2925 # define MIN_CHAR_THAI 0
2926 # define MAX_CHAR_THAI 0
2927 /* # define MIN_CHAR_HEBREW 0 */
2928 /* # define MAX_CHAR_HEBREW 0 */
2929 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2930 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2932 staticpro (&Vcharset_ascii);
2934 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2935 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2936 build_string ("ASCII"),
2937 build_string ("ASCII)"),
2938 build_string ("ASCII (ISO646 IRV)"),
2939 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2940 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2941 staticpro (&Vcharset_control_1);
2942 Vcharset_control_1 =
2943 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2944 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2945 build_string ("C1"),
2946 build_string ("Control characters"),
2947 build_string ("Control characters 128-191"),
2949 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2950 staticpro (&Vcharset_latin_iso8859_1);
2951 Vcharset_latin_iso8859_1 =
2952 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2953 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2954 build_string ("Latin-1"),
2955 build_string ("ISO8859-1 (Latin-1)"),
2956 build_string ("ISO8859-1 (Latin-1)"),
2957 build_string ("iso8859-1"),
2958 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2959 staticpro (&Vcharset_latin_iso8859_2);
2960 Vcharset_latin_iso8859_2 =
2961 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2962 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2963 build_string ("Latin-2"),
2964 build_string ("ISO8859-2 (Latin-2)"),
2965 build_string ("ISO8859-2 (Latin-2)"),
2966 build_string ("iso8859-2"),
2967 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2968 staticpro (&Vcharset_latin_iso8859_3);
2969 Vcharset_latin_iso8859_3 =
2970 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2971 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2972 build_string ("Latin-3"),
2973 build_string ("ISO8859-3 (Latin-3)"),
2974 build_string ("ISO8859-3 (Latin-3)"),
2975 build_string ("iso8859-3"),
2976 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2977 staticpro (&Vcharset_latin_iso8859_4);
2978 Vcharset_latin_iso8859_4 =
2979 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2980 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2981 build_string ("Latin-4"),
2982 build_string ("ISO8859-4 (Latin-4)"),
2983 build_string ("ISO8859-4 (Latin-4)"),
2984 build_string ("iso8859-4"),
2985 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2986 staticpro (&Vcharset_thai_tis620);
2987 Vcharset_thai_tis620 =
2988 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2989 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2990 build_string ("TIS620"),
2991 build_string ("TIS620 (Thai)"),
2992 build_string ("TIS620.2529 (Thai)"),
2993 build_string ("tis620"),
2994 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2995 staticpro (&Vcharset_greek_iso8859_7);
2996 Vcharset_greek_iso8859_7 =
2997 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2998 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2999 build_string ("ISO8859-7"),
3000 build_string ("ISO8859-7 (Greek)"),
3001 build_string ("ISO8859-7 (Greek)"),
3002 build_string ("iso8859-7"),
3003 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3004 staticpro (&Vcharset_arabic_iso8859_6);
3005 Vcharset_arabic_iso8859_6 =
3006 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3007 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3008 build_string ("ISO8859-6"),
3009 build_string ("ISO8859-6 (Arabic)"),
3010 build_string ("ISO8859-6 (Arabic)"),
3011 build_string ("iso8859-6"),
3012 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3013 staticpro (&Vcharset_hebrew_iso8859_8);
3014 Vcharset_hebrew_iso8859_8 =
3015 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3016 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3017 build_string ("ISO8859-8"),
3018 build_string ("ISO8859-8 (Hebrew)"),
3019 build_string ("ISO8859-8 (Hebrew)"),
3020 build_string ("iso8859-8"),
3022 0 /* MIN_CHAR_HEBREW */,
3023 0 /* MAX_CHAR_HEBREW */, 0, 32,
3024 Qnil, CONVERSION_IDENTICAL);
3025 staticpro (&Vcharset_katakana_jisx0201);
3026 Vcharset_katakana_jisx0201 =
3027 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3028 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3029 build_string ("JISX0201 Kana"),
3030 build_string ("JISX0201.1976 (Japanese Kana)"),
3031 build_string ("JISX0201.1976 Japanese Kana"),
3032 build_string ("jisx0201\\.1976"),
3033 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3034 staticpro (&Vcharset_latin_jisx0201);
3035 Vcharset_latin_jisx0201 =
3036 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3037 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3038 build_string ("JISX0201 Roman"),
3039 build_string ("JISX0201.1976 (Japanese Roman)"),
3040 build_string ("JISX0201.1976 Japanese Roman"),
3041 build_string ("jisx0201\\.1976"),
3042 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3043 staticpro (&Vcharset_cyrillic_iso8859_5);
3044 Vcharset_cyrillic_iso8859_5 =
3045 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3046 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3047 build_string ("ISO8859-5"),
3048 build_string ("ISO8859-5 (Cyrillic)"),
3049 build_string ("ISO8859-5 (Cyrillic)"),
3050 build_string ("iso8859-5"),
3051 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3052 staticpro (&Vcharset_latin_iso8859_9);
3053 Vcharset_latin_iso8859_9 =
3054 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3055 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3056 build_string ("Latin-5"),
3057 build_string ("ISO8859-9 (Latin-5)"),
3058 build_string ("ISO8859-9 (Latin-5)"),
3059 build_string ("iso8859-9"),
3060 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3062 staticpro (&Vcharset_jis_x0208);
3063 Vcharset_jis_x0208 =
3064 make_charset (LEADING_BYTE_JIS_X0208,
3066 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3067 build_string ("JIS X0208"),
3068 build_string ("JIS X0208 Common"),
3069 build_string ("JIS X0208 Common part"),
3070 build_string ("jisx0208\\.1990"),
3072 MIN_CHAR_JIS_X0208_1990,
3073 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3074 Qnil, CONVERSION_94x94);
3076 staticpro (&Vcharset_japanese_jisx0208_1978);
3077 Vcharset_japanese_jisx0208_1978 =
3078 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3079 Qjapanese_jisx0208_1978, 94, 2,
3080 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3081 build_string ("JIS X0208:1978"),
3082 build_string ("JIS X0208:1978 (Japanese)"),
3084 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3085 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3092 CONVERSION_IDENTICAL);
3093 staticpro (&Vcharset_chinese_gb2312);
3094 Vcharset_chinese_gb2312 =
3095 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3096 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3097 build_string ("GB2312"),
3098 build_string ("GB2312)"),
3099 build_string ("GB2312 Chinese simplified"),
3100 build_string ("gb2312"),
3101 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3102 staticpro (&Vcharset_chinese_gb12345);
3103 Vcharset_chinese_gb12345 =
3104 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3105 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3106 build_string ("G1"),
3107 build_string ("GB 12345)"),
3108 build_string ("GB 12345-1990"),
3109 build_string ("GB12345\\(\\.1990\\)?-0"),
3110 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3111 staticpro (&Vcharset_japanese_jisx0208);
3112 Vcharset_japanese_jisx0208 =
3113 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3114 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3115 build_string ("JISX0208"),
3116 build_string ("JIS X0208:1983 (Japanese)"),
3117 build_string ("JIS X0208:1983 Japanese Kanji"),
3118 build_string ("jisx0208\\.1983"),
3125 CONVERSION_IDENTICAL);
3127 staticpro (&Vcharset_japanese_jisx0208_1990);
3128 Vcharset_japanese_jisx0208_1990 =
3129 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3130 Qjapanese_jisx0208_1990, 94, 2,
3131 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3132 build_string ("JISX0208-1990"),
3133 build_string ("JIS X0208:1990 (Japanese)"),
3134 build_string ("JIS X0208:1990 Japanese Kanji"),
3135 build_string ("jisx0208\\.1990"),
3137 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3138 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3139 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3140 Vcharset_jis_x0208 /* Qnil */,
3141 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3143 staticpro (&Vcharset_korean_ksc5601);
3144 Vcharset_korean_ksc5601 =
3145 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3146 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3147 build_string ("KSC5601"),
3148 build_string ("KSC5601 (Korean"),
3149 build_string ("KSC5601 Korean Hangul and Hanja"),
3150 build_string ("ksc5601"),
3151 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3152 staticpro (&Vcharset_japanese_jisx0212);
3153 Vcharset_japanese_jisx0212 =
3154 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3155 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3156 build_string ("JISX0212"),
3157 build_string ("JISX0212 (Japanese)"),
3158 build_string ("JISX0212 Japanese Supplement"),
3159 build_string ("jisx0212"),
3160 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3162 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3163 staticpro (&Vcharset_chinese_cns11643_1);
3164 Vcharset_chinese_cns11643_1 =
3165 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3166 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3167 build_string ("CNS11643-1"),
3168 build_string ("CNS11643-1 (Chinese traditional)"),
3170 ("CNS 11643 Plane 1 Chinese traditional"),
3171 build_string (CHINESE_CNS_PLANE_RE("1")),
3172 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3173 staticpro (&Vcharset_chinese_cns11643_2);
3174 Vcharset_chinese_cns11643_2 =
3175 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3176 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3177 build_string ("CNS11643-2"),
3178 build_string ("CNS11643-2 (Chinese traditional)"),
3180 ("CNS 11643 Plane 2 Chinese traditional"),
3181 build_string (CHINESE_CNS_PLANE_RE("2")),
3182 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3184 staticpro (&Vcharset_latin_tcvn5712);
3185 Vcharset_latin_tcvn5712 =
3186 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3187 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3188 build_string ("TCVN 5712"),
3189 build_string ("TCVN 5712 (VSCII-2)"),
3190 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3191 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3192 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3193 staticpro (&Vcharset_latin_viscii_lower);
3194 Vcharset_latin_viscii_lower =
3195 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3196 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3197 build_string ("VISCII lower"),
3198 build_string ("VISCII lower (Vietnamese)"),
3199 build_string ("VISCII lower (Vietnamese)"),
3200 build_string ("MULEVISCII-LOWER"),
3201 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3202 staticpro (&Vcharset_latin_viscii_upper);
3203 Vcharset_latin_viscii_upper =
3204 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3205 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3206 build_string ("VISCII upper"),
3207 build_string ("VISCII upper (Vietnamese)"),
3208 build_string ("VISCII upper (Vietnamese)"),
3209 build_string ("MULEVISCII-UPPER"),
3210 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3211 staticpro (&Vcharset_latin_viscii);
3212 Vcharset_latin_viscii =
3213 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3214 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3215 build_string ("VISCII"),
3216 build_string ("VISCII 1.1 (Vietnamese)"),
3217 build_string ("VISCII 1.1 (Vietnamese)"),
3218 build_string ("VISCII1\\.1"),
3219 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3220 staticpro (&Vcharset_chinese_big5);
3221 Vcharset_chinese_big5 =
3222 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3223 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3224 build_string ("Big5"),
3225 build_string ("Big5"),
3226 build_string ("Big5 Chinese traditional"),
3227 build_string ("big5-0"),
3229 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3230 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3232 staticpro (&Vcharset_chinese_big5_cdp);
3233 Vcharset_chinese_big5_cdp =
3234 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3235 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3236 build_string ("Big5-CDP"),
3237 build_string ("Big5 + CDP extension"),
3238 build_string ("Big5 with CDP extension"),
3239 build_string ("big5\\.cdp-0"),
3240 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3241 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3243 #define DEF_HANZIKU(n) \
3244 staticpro (&Vcharset_ideograph_hanziku_##n); \
3245 Vcharset_ideograph_hanziku_##n = \
3246 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3247 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3248 build_string ("HZK-"#n), \
3249 build_string ("HANZIKU-"#n), \
3250 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3252 ("hanziku-"#n"$"), \
3253 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3254 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3267 staticpro (&Vcharset_ideograph_gt);
3268 Vcharset_ideograph_gt =
3269 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3270 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3271 build_string ("GT"),
3272 build_string ("GT"),
3273 build_string ("GT"),
3275 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3276 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3277 #define DEF_GT_PJ(n) \
3278 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3279 Vcharset_ideograph_gt_pj_##n = \
3280 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3281 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3282 build_string ("GT-PJ-"#n), \
3283 build_string ("GT (pseudo JIS encoding) part "#n), \
3284 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3286 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3287 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3300 staticpro (&Vcharset_ideograph_daikanwa_2);
3301 Vcharset_ideograph_daikanwa_2 =
3302 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3303 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3304 build_string ("Daikanwa Rev."),
3305 build_string ("Morohashi's Daikanwa Rev."),
3307 ("Daikanwa dictionary (revised version)"),
3308 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3309 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3310 staticpro (&Vcharset_ideograph_daikanwa);
3311 Vcharset_ideograph_daikanwa =
3312 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3313 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3314 build_string ("Daikanwa"),
3315 build_string ("Morohashi's Daikanwa Rev.2"),
3317 ("Daikanwa dictionary (second revised version)"),
3318 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3319 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3320 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3322 staticpro (&Vcharset_ethiopic_ucs);
3323 Vcharset_ethiopic_ucs =
3324 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3325 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3326 build_string ("Ethiopic (UCS)"),
3327 build_string ("Ethiopic (UCS)"),
3328 build_string ("Ethiopic of UCS"),
3329 build_string ("Ethiopic-Unicode"),
3330 Qnil, 0x1200, 0x137F, 0, 0,
3331 Qnil, CONVERSION_IDENTICAL);
3333 staticpro (&Vcharset_chinese_big5_1);
3334 Vcharset_chinese_big5_1 =
3335 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3336 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3337 build_string ("Big5"),
3338 build_string ("Big5 (Level-1)"),
3340 ("Big5 Level-1 Chinese traditional"),
3341 build_string ("big5"),
3342 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3343 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3344 staticpro (&Vcharset_chinese_big5_2);
3345 Vcharset_chinese_big5_2 =
3346 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3347 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3348 build_string ("Big5"),
3349 build_string ("Big5 (Level-2)"),
3351 ("Big5 Level-2 Chinese traditional"),
3352 build_string ("big5"),
3353 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3354 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3356 #ifdef ENABLE_COMPOSITE_CHARS
3357 /* #### For simplicity, we put composite chars into a 96x96 charset.
3358 This is going to lead to problems because you can run out of
3359 room, esp. as we don't yet recycle numbers. */
3360 staticpro (&Vcharset_composite);
3361 Vcharset_composite =
3362 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3363 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3364 build_string ("Composite"),
3365 build_string ("Composite characters"),
3366 build_string ("Composite characters"),
3369 /* #### not dumped properly */
3370 composite_char_row_next = 32;
3371 composite_char_col_next = 32;
3373 Vcomposite_char_string2char_hash_table =
3374 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3375 Vcomposite_char_char2string_hash_table =
3376 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3377 staticpro (&Vcomposite_char_string2char_hash_table);
3378 staticpro (&Vcomposite_char_char2string_hash_table);
3379 #endif /* ENABLE_COMPOSITE_CHARS */