1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 put_char_ccs_code_point (Lisp_Object character,
181 Lisp_Object ccs, Lisp_Object value)
183 if (!EQ (XCHARSET_NAME (ccs), Qucs)
185 || (XCHAR (character) != XINT (value)))
187 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
191 { /* obsolete representation: value must be a list of bytes */
192 Lisp_Object ret = Fcar (value);
196 signal_simple_error ("Invalid value for coded-charset", value);
197 code_point = XINT (ret);
198 if (XCHARSET_GRAPHIC (ccs) == 1)
206 signal_simple_error ("Invalid value for coded-charset",
210 signal_simple_error ("Invalid value for coded-charset",
213 if (XCHARSET_GRAPHIC (ccs) == 1)
215 code_point = (code_point << 8) | j;
218 value = make_int (code_point);
220 else if (INTP (value))
222 code_point = XINT (value);
223 if (XCHARSET_GRAPHIC (ccs) == 1)
225 code_point &= 0x7F7F7F7F;
226 value = make_int (code_point);
230 signal_simple_error ("Invalid value for coded-charset", value);
234 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
237 decoding_table_remove_char (ccs, XINT (cpos));
240 decoding_table_put_char (ccs, code_point, character);
246 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
248 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
249 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
251 if (VECTORP (decoding_table))
253 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
257 decoding_table_remove_char (ccs, XINT (cpos));
260 if (CHAR_TABLEP (encoding_table))
262 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
270 int leading_code_private_11;
273 Lisp_Object Qcharsetp;
275 /* Qdoc_string, Qdimension, Qchars defined in general.c */
276 Lisp_Object Qregistry, Qfinal, Qgraphic;
277 Lisp_Object Qdirection;
278 Lisp_Object Qreverse_direction_charset;
279 Lisp_Object Qleading_byte;
280 Lisp_Object Qshort_name, Qlong_name;
282 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
283 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
300 Qjapanese_jisx0208_1978,
304 Qjapanese_jisx0208_1990,
322 Qvietnamese_viscii_lower,
323 Qvietnamese_viscii_upper,
332 Lisp_Object Ql2r, Qr2l;
334 Lisp_Object Vcharset_hash_table;
336 /* Composite characters are characters constructed by overstriking two
337 or more regular characters.
339 1) The old Mule implementation involves storing composite characters
340 in a buffer as a tag followed by all of the actual characters
341 used to make up the composite character. I think this is a bad
342 idea; it greatly complicates code that wants to handle strings
343 one character at a time because it has to deal with the possibility
344 of great big ungainly characters. It's much more reasonable to
345 simply store an index into a table of composite characters.
347 2) The current implementation only allows for 16,384 separate
348 composite characters over the lifetime of the XEmacs process.
349 This could become a potential problem if the user
350 edited lots of different files that use composite characters.
351 Due to FSF bogosity, increasing the number of allowable
352 composite characters under Mule would decrease the number
353 of possible faces that can exist. Mule already has shrunk
354 this to 2048, and further shrinkage would become uncomfortable.
355 No such problems exist in XEmacs.
357 Composite characters could be represented as 0x80 C1 C2 C3,
358 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
359 for slightly under 2^20 (one million) composite characters
360 over the XEmacs process lifetime, and you only need to
361 increase the size of a Mule character from 19 to 21 bits.
362 Or you could use 0x80 C1 C2 C3 C4, allowing for about
363 85 million (slightly over 2^26) composite characters. */
366 /************************************************************************/
367 /* Basic Emchar functions */
368 /************************************************************************/
370 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
371 string in STR. Returns the number of bytes stored.
372 Do not call this directly. Use the macro set_charptr_emchar() instead.
376 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
391 else if ( c <= 0x7ff )
393 *p++ = (c >> 6) | 0xc0;
394 *p++ = (c & 0x3f) | 0x80;
396 else if ( c <= 0xffff )
398 *p++ = (c >> 12) | 0xe0;
399 *p++ = ((c >> 6) & 0x3f) | 0x80;
400 *p++ = (c & 0x3f) | 0x80;
402 else if ( c <= 0x1fffff )
404 *p++ = (c >> 18) | 0xf0;
405 *p++ = ((c >> 12) & 0x3f) | 0x80;
406 *p++ = ((c >> 6) & 0x3f) | 0x80;
407 *p++ = (c & 0x3f) | 0x80;
409 else if ( c <= 0x3ffffff )
411 *p++ = (c >> 24) | 0xf8;
412 *p++ = ((c >> 18) & 0x3f) | 0x80;
413 *p++ = ((c >> 12) & 0x3f) | 0x80;
414 *p++ = ((c >> 6) & 0x3f) | 0x80;
415 *p++ = (c & 0x3f) | 0x80;
419 *p++ = (c >> 30) | 0xfc;
420 *p++ = ((c >> 24) & 0x3f) | 0x80;
421 *p++ = ((c >> 18) & 0x3f) | 0x80;
422 *p++ = ((c >> 12) & 0x3f) | 0x80;
423 *p++ = ((c >> 6) & 0x3f) | 0x80;
424 *p++ = (c & 0x3f) | 0x80;
427 BREAKUP_CHAR (c, charset, c1, c2);
428 lb = CHAR_LEADING_BYTE (c);
429 if (LEADING_BYTE_PRIVATE_P (lb))
430 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
432 if (EQ (charset, Vcharset_control_1))
441 /* Return the first character from a Mule-encoded string in STR,
442 assuming it's non-ASCII. Do not call this directly.
443 Use the macro charptr_emchar() instead. */
446 non_ascii_charptr_emchar (const Bufbyte *str)
459 else if ( b >= 0xf8 )
464 else if ( b >= 0xf0 )
469 else if ( b >= 0xe0 )
474 else if ( b >= 0xc0 )
484 for( ; len > 0; len-- )
487 ch = ( ch << 6 ) | ( b & 0x3f );
491 Bufbyte i0 = *str, i1, i2 = 0;
494 if (i0 == LEADING_BYTE_CONTROL_1)
495 return (Emchar) (*++str - 0x20);
497 if (LEADING_BYTE_PREFIX_P (i0))
502 charset = CHARSET_BY_LEADING_BYTE (i0);
503 if (XCHARSET_DIMENSION (charset) == 2)
506 return MAKE_CHAR (charset, i1, i2);
510 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
511 Do not call this directly. Use the macro valid_char_p() instead. */
515 non_ascii_valid_char_p (Emchar ch)
519 /* Must have only lowest 19 bits set */
523 f1 = CHAR_FIELD1 (ch);
524 f2 = CHAR_FIELD2 (ch);
525 f3 = CHAR_FIELD3 (ch);
531 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
532 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
533 f2 > MAX_CHAR_FIELD2_PRIVATE)
538 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
539 f2 <= MAX_CHAR_FIELD2_PRIVATE))
543 NOTE: This takes advantage of the fact that
544 FIELD2_TO_OFFICIAL_LEADING_BYTE and
545 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
547 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
548 if (EQ (charset, Qnil))
550 return (XCHARSET_CHARS (charset) == 96);
556 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
557 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
558 f1 > MAX_CHAR_FIELD1_PRIVATE)
560 if (f2 < 0x20 || f3 < 0x20)
563 #ifdef ENABLE_COMPOSITE_CHARS
564 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
566 if (UNBOUNDP (Fgethash (make_int (ch),
567 Vcomposite_char_char2string_hash_table,
572 #endif /* ENABLE_COMPOSITE_CHARS */
574 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
575 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
578 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
580 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
583 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
585 if (EQ (charset, Qnil))
587 return (XCHARSET_CHARS (charset) == 96);
593 /************************************************************************/
594 /* Basic string functions */
595 /************************************************************************/
597 /* Copy the character pointed to by SRC into DST. Do not call this
598 directly. Use the macro charptr_copy_char() instead.
599 Return the number of bytes copied. */
602 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
604 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
606 for (i = bytes; i; i--, dst++, src++)
612 /************************************************************************/
613 /* streams of Emchars */
614 /************************************************************************/
616 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
617 The functions below are not meant to be called directly; use
618 the macros in insdel.h. */
621 Lstream_get_emchar_1 (Lstream *stream, int ch)
623 Bufbyte str[MAX_EMCHAR_LEN];
624 Bufbyte *strptr = str;
627 str[0] = (Bufbyte) ch;
629 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
631 int c = Lstream_getc (stream);
632 bufpos_checking_assert (c >= 0);
633 *++strptr = (Bufbyte) c;
635 return charptr_emchar (str);
639 Lstream_fput_emchar (Lstream *stream, Emchar ch)
641 Bufbyte str[MAX_EMCHAR_LEN];
642 Bytecount len = set_charptr_emchar (str, ch);
643 return Lstream_write (stream, str, len);
647 Lstream_funget_emchar (Lstream *stream, Emchar ch)
649 Bufbyte str[MAX_EMCHAR_LEN];
650 Bytecount len = set_charptr_emchar (str, ch);
651 Lstream_unread (stream, str, len);
655 /************************************************************************/
657 /************************************************************************/
660 mark_charset (Lisp_Object obj)
662 Lisp_Charset *cs = XCHARSET (obj);
664 mark_object (cs->short_name);
665 mark_object (cs->long_name);
666 mark_object (cs->doc_string);
667 mark_object (cs->registry);
668 mark_object (cs->ccl_program);
670 mark_object (cs->decoding_table);
671 mark_object (cs->mother);
677 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
679 Lisp_Charset *cs = XCHARSET (obj);
683 error ("printing unreadable object #<charset %s 0x%x>",
684 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
687 write_c_string ("#<charset ", printcharfun);
688 print_internal (CHARSET_NAME (cs), printcharfun, 0);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
691 write_c_string (" ", printcharfun);
692 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
693 write_c_string (" ", printcharfun);
694 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
695 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
697 CHARSET_DIMENSION (cs),
698 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
699 CHARSET_COLUMNS (cs),
700 CHARSET_GRAPHIC (cs),
702 write_c_string (buf, printcharfun);
703 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
704 sprintf (buf, " 0x%x>", cs->header.uid);
705 write_c_string (buf, printcharfun);
708 static const struct lrecord_description charset_description[] = {
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
715 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
717 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
718 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
723 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
724 mark_charset, print_charset, 0, 0, 0,
728 /* Make a new charset. */
729 /* #### SJT Should generic properties be allowed? */
731 make_charset (Charset_ID id, Lisp_Object name,
732 unsigned short chars, unsigned char dimension,
733 unsigned char columns, unsigned char graphic,
734 Bufbyte final, unsigned char direction, Lisp_Object short_name,
735 Lisp_Object long_name, Lisp_Object doc,
737 Lisp_Object decoding_table,
738 Emchar min_code, Emchar max_code,
739 Emchar code_offset, unsigned char byte_offset,
740 Lisp_Object mother, unsigned char conversion)
743 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
747 XSETCHARSET (obj, cs);
749 CHARSET_ID (cs) = id;
750 CHARSET_NAME (cs) = name;
751 CHARSET_SHORT_NAME (cs) = short_name;
752 CHARSET_LONG_NAME (cs) = long_name;
753 CHARSET_CHARS (cs) = chars;
754 CHARSET_DIMENSION (cs) = dimension;
755 CHARSET_DIRECTION (cs) = direction;
756 CHARSET_COLUMNS (cs) = columns;
757 CHARSET_GRAPHIC (cs) = graphic;
758 CHARSET_FINAL (cs) = final;
759 CHARSET_DOC_STRING (cs) = doc;
760 CHARSET_REGISTRY (cs) = reg;
761 CHARSET_CCL_PROGRAM (cs) = Qnil;
762 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
764 CHARSET_DECODING_TABLE(cs) = Qunbound;
765 CHARSET_MIN_CODE (cs) = min_code;
766 CHARSET_MAX_CODE (cs) = max_code;
767 CHARSET_CODE_OFFSET (cs) = code_offset;
768 CHARSET_BYTE_OFFSET (cs) = byte_offset;
769 CHARSET_MOTHER (cs) = mother;
770 CHARSET_CONVERSION (cs) = conversion;
774 if (id == LEADING_BYTE_ASCII)
775 CHARSET_REP_BYTES (cs) = 1;
777 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
779 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
784 /* some charsets do not have final characters. This includes
785 ASCII, Control-1, Composite, and the two faux private
787 unsigned char iso2022_type
788 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
790 if (code_offset == 0)
792 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
793 chlook->charset_by_attributes[iso2022_type][final] = obj;
797 (chlook->charset_by_attributes[iso2022_type][final][direction]));
798 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
802 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
803 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
805 /* Some charsets are "faux" and don't have names or really exist at
806 all except in the leading-byte table. */
808 Fputhash (name, obj, Vcharset_hash_table);
813 get_unallocated_leading_byte (int dimension)
818 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
821 lb = chlook->next_allocated_leading_byte++;
825 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
828 lb = chlook->next_allocated_1_byte_leading_byte++;
832 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
835 lb = chlook->next_allocated_2_byte_leading_byte++;
841 ("No more character sets free for this dimension",
842 make_int (dimension));
848 /* Number of Big5 characters which have the same code in 1st byte. */
850 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
853 decode_defined_char (Lisp_Object ccs, int code_point)
855 int dim = XCHARSET_DIMENSION (ccs);
856 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
864 = get_ccs_octet_table (decoding_table, ccs,
865 (code_point >> (dim * 8)) & 255);
867 if (CHARP (decoding_table))
868 return XCHAR (decoding_table);
869 #ifdef HAVE_CHISE_CLIENT
870 if (EQ (decoding_table, Qunloaded))
872 char_id = load_char_decoding_entry_maybe (ccs, code_point);
877 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
879 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
881 if ( EQ (mother, Vcharset_ucs) )
882 return DECODE_CHAR (mother, code_point);
884 return decode_defined_char (mother, code_point);
886 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
889 = (((code_point >> 8) & 0x7F) - 33) * 94
890 + (( code_point & 0x7F) - 33);
891 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
892 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
894 b2 += b2 < 0x3F ? 0x40 : 0x62;
895 return decode_defined_char (mother, (b1 << 8) | b2);
897 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
900 = (((code_point >> 8) & 0x7F) - 33) * 94
901 + (( code_point & 0x7F) - 33)
902 + BIG5_SAME_ROW * (0xC9 - 0xA1);
903 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
904 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
906 b2 += b2 < 0x3F ? 0x40 : 0x62;
907 return decode_defined_char (mother, (b1 << 8) | b2);
914 decode_builtin_char (Lisp_Object charset, int code_point)
916 Lisp_Object mother = XCHARSET_MOTHER (charset);
919 if ( XCHARSET_MAX_CODE (charset) > 0 )
921 if ( CHARSETP (mother) )
923 int code = code_point;
925 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
927 int row = code_point >> 8;
928 int cell = code_point & 255;
932 else if (row < 16 + 32 + 30)
933 code = (row - (16 + 32)) * 94 + cell - 33;
934 else if (row < 18 + 32 + 30)
936 else if (row < 18 + 32 + 60)
937 code = (row - (18 + 32)) * 94 + cell - 33;
939 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
941 int plane = code_point >> 16;
942 int row = (code_point >> 8) & 255;
943 int cell = code_point & 255;
947 else if (row < 16 + 32 + 30)
949 = (plane - 33) * 94 * 60
950 + (row - (16 + 32)) * 94
952 else if (row < 18 + 32 + 30)
954 else if (row < 18 + 32 + 60)
956 = (plane - 33) * 94 * 60
957 + (row - (18 + 32)) * 94
960 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
963 = (((code_point >> 8) & 0x7F) - 33) * 94
964 + (( code_point & 0x7F) - 33);
965 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
966 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
968 b2 += b2 < 0x3F ? 0x40 : 0x62;
969 code = (b1 << 8) | b2;
971 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
974 = (((code_point >> 8) & 0x7F) - 33) * 94
975 + (( code_point & 0x7F) - 33)
976 + BIG5_SAME_ROW * (0xC9 - 0xA1);
977 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
978 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
980 b2 += b2 < 0x3F ? 0x40 : 0x62;
981 code = (b1 << 8) | b2;
984 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
989 = (XCHARSET_DIMENSION (charset) == 1
991 code_point - XCHARSET_BYTE_OFFSET (charset)
993 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
994 * XCHARSET_CHARS (charset)
995 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
996 + XCHARSET_CODE_OFFSET (charset);
997 if ((cid < XCHARSET_MIN_CODE (charset))
998 || (XCHARSET_MAX_CODE (charset) < cid))
1003 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1005 if (XCHARSET_DIMENSION (charset) == 1)
1007 switch (XCHARSET_CHARS (charset))
1011 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1014 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1022 switch (XCHARSET_CHARS (charset))
1025 return MIN_CHAR_94x94
1026 + (final - '0') * 94 * 94
1027 + (((code_point >> 8) & 0x7F) - 33) * 94
1028 + ((code_point & 0x7F) - 33);
1030 return MIN_CHAR_96x96
1031 + (final - '0') * 96 * 96
1032 + (((code_point >> 8) & 0x7F) - 32) * 96
1033 + ((code_point & 0x7F) - 32);
1045 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1047 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1050 if ( CHAR_TABLEP (encoding_table)
1051 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1056 Lisp_Object mother = XCHARSET_MOTHER (charset);
1057 int min = XCHARSET_MIN_CODE (charset);
1058 int max = XCHARSET_MAX_CODE (charset);
1061 if ( CHARSETP (mother) )
1063 if (XCHARSET_FINAL (charset) >= '0')
1064 code = charset_code_point (mother, ch, 1);
1066 code = charset_code_point (mother, ch, defined_only);
1068 else if (defined_only)
1070 else if ( ((max == 0) && CHARSETP (mother)
1071 && (XCHARSET_FINAL (charset) == 0))
1072 || ((min <= ch) && (ch <= max)) )
1074 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1075 || ((min <= code) && (code <= max)) )
1077 int d = code - XCHARSET_CODE_OFFSET (charset);
1079 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1083 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1085 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1088 int cell = d % 94 + 33;
1094 return (row << 8) | cell;
1096 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1098 int B1 = d >> 8, B2 = d & 0xFF;
1100 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1101 - (B2 < 0x7F ? 0x40 : 0x62);
1105 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1108 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1110 int B1 = d >> 8, B2 = d & 0xFF;
1112 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1113 - (B2 < 0x7F ? 0x40 : 0x62);
1117 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1118 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1122 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1123 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1124 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1125 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1127 int plane = d / (94 * 60) + 33;
1128 int row = (d % (94 * 60)) / 94;
1129 int cell = d % 94 + 33;
1135 return (plane << 16) | (row << 8) | cell;
1137 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1139 ( (d / (94 * 94) + 33) << 16)
1140 | ((d / 94 % 94 + 33) << 8)
1142 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1144 ( (d / (96 * 96) + 32) << 16)
1145 | ((d / 96 % 96 + 32) << 8)
1147 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1149 ( (d / (94 * 94 * 94) + 33) << 24)
1150 | ((d / (94 * 94) % 94 + 33) << 16)
1151 | ((d / 94 % 94 + 33) << 8)
1153 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1155 ( (d / (96 * 96 * 96) + 32) << 24)
1156 | ((d / (96 * 96) % 96 + 32) << 16)
1157 | ((d / 96 % 96 + 32) << 8)
1161 printf ("Unknown CCS-conversion %d is specified!",
1162 XCHARSET_CONVERSION (charset));
1166 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1167 ( XCHARSET_MIN_CODE (charset) == 0 )
1169 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1170 (XCHARSET_CODE_OFFSET (charset)
1171 == XCHARSET_MIN_CODE (charset))
1176 if (XCHARSET_DIMENSION (charset) == 1)
1178 if (XCHARSET_CHARS (charset) == 94)
1180 if (((d = ch - (MIN_CHAR_94
1181 + (XCHARSET_FINAL (charset) - '0') * 94))
1186 else if (XCHARSET_CHARS (charset) == 96)
1188 if (((d = ch - (MIN_CHAR_96
1189 + (XCHARSET_FINAL (charset) - '0') * 96))
1197 else if (XCHARSET_DIMENSION (charset) == 2)
1199 if (XCHARSET_CHARS (charset) == 94)
1201 if (((d = ch - (MIN_CHAR_94x94
1203 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1206 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1208 else if (XCHARSET_CHARS (charset) == 96)
1210 if (((d = ch - (MIN_CHAR_96x96
1212 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1215 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1226 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1228 if (c <= MAX_CHAR_BASIC_LATIN)
1230 *charset = Vcharset_ascii;
1235 *charset = Vcharset_control_1;
1240 *charset = Vcharset_latin_iso8859_1;
1244 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1246 *charset = Vcharset_hebrew_iso8859_8;
1247 return c - MIN_CHAR_HEBREW + 0x20;
1250 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1252 *charset = Vcharset_thai_tis620;
1253 return c - MIN_CHAR_THAI + 0x20;
1256 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1257 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1259 return list2 (Vcharset_katakana_jisx0201,
1260 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1263 else if (c <= MAX_CHAR_BMP)
1265 *charset = Vcharset_ucs_bmp;
1268 else if (c <= MAX_CHAR_SMP)
1270 *charset = Vcharset_ucs_smp;
1271 return c - MIN_CHAR_SMP;
1273 else if (c <= MAX_CHAR_SIP)
1275 *charset = Vcharset_ucs_sip;
1276 return c - MIN_CHAR_SIP;
1278 else if (c < MIN_CHAR_94)
1280 *charset = Vcharset_ucs;
1283 else if (c <= MAX_CHAR_94)
1285 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1286 ((c - MIN_CHAR_94) / 94) + '0',
1287 CHARSET_LEFT_TO_RIGHT);
1288 if (!NILP (*charset))
1289 return ((c - MIN_CHAR_94) % 94) + 33;
1292 *charset = Vcharset_ucs;
1296 else if (c <= MAX_CHAR_96)
1298 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1299 ((c - MIN_CHAR_96) / 96) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return ((c - MIN_CHAR_96) % 96) + 32;
1305 *charset = Vcharset_ucs;
1309 else if (c <= MAX_CHAR_94x94)
1312 = CHARSET_BY_ATTRIBUTES (94, 2,
1313 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1314 CHARSET_LEFT_TO_RIGHT);
1315 if (!NILP (*charset))
1316 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1317 | (((c - MIN_CHAR_94x94) % 94) + 33);
1320 *charset = Vcharset_ucs;
1324 else if (c <= MAX_CHAR_96x96)
1327 = CHARSET_BY_ATTRIBUTES (96, 2,
1328 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1329 CHARSET_LEFT_TO_RIGHT);
1330 if (!NILP (*charset))
1331 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1332 | (((c - MIN_CHAR_96x96) % 96) + 32);
1335 *charset = Vcharset_ucs;
1341 *charset = Vcharset_ucs;
1346 Lisp_Object Vdefault_coded_charset_priority_list;
1350 /************************************************************************/
1351 /* Basic charset Lisp functions */
1352 /************************************************************************/
1354 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1355 Return non-nil if OBJECT is a charset.
1359 return CHARSETP (object) ? Qt : Qnil;
1362 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1363 Retrieve the charset of the given name.
1364 If CHARSET-OR-NAME is a charset object, it is simply returned.
1365 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1366 nil is returned. Otherwise the associated charset object is returned.
1370 if (CHARSETP (charset_or_name))
1371 return charset_or_name;
1373 CHECK_SYMBOL (charset_or_name);
1374 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1377 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1378 Retrieve the charset of the given name.
1379 Same as `find-charset' except an error is signalled if there is no such
1380 charset instead of returning nil.
1384 Lisp_Object charset = Ffind_charset (name);
1387 signal_simple_error ("No such charset", name);
1391 /* We store the charsets in hash tables with the names as the key and the
1392 actual charset object as the value. Occasionally we need to use them
1393 in a list format. These routines provide us with that. */
1394 struct charset_list_closure
1396 Lisp_Object *charset_list;
1400 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1401 void *charset_list_closure)
1403 /* This function can GC */
1404 struct charset_list_closure *chcl =
1405 (struct charset_list_closure*) charset_list_closure;
1406 Lisp_Object *charset_list = chcl->charset_list;
1408 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1412 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1413 Return a list of the names of all defined charsets.
1417 Lisp_Object charset_list = Qnil;
1418 struct gcpro gcpro1;
1419 struct charset_list_closure charset_list_closure;
1421 GCPRO1 (charset_list);
1422 charset_list_closure.charset_list = &charset_list;
1423 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1424 &charset_list_closure);
1427 return charset_list;
1430 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1431 Return the name of charset CHARSET.
1435 return XCHARSET_NAME (Fget_charset (charset));
1438 /* #### SJT Should generic properties be allowed? */
1439 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1440 Define a new character set.
1441 This function is for use with Mule support.
1442 NAME is a symbol, the name by which the character set is normally referred.
1443 DOC-STRING is a string describing the character set.
1444 PROPS is a property list, describing the specific nature of the
1445 character set. Recognized properties are:
1447 'short-name Short version of the charset name (ex: Latin-1)
1448 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1449 'registry A regular expression matching the font registry field for
1451 'dimension Number of octets used to index a character in this charset.
1452 Either 1 or 2. Defaults to 1.
1453 If UTF-2000 feature is enabled, 3 or 4 are also available.
1454 'columns Number of columns used to display a character in this charset.
1455 Only used in TTY mode. (Under X, the actual width of a
1456 character can be derived from the font used to display the
1457 characters.) If unspecified, defaults to the dimension
1458 (this is almost always the correct value).
1459 'chars Number of characters in each dimension (94 or 96).
1460 Defaults to 94. Note that if the dimension is 2, the
1461 character set thus described is 94x94 or 96x96.
1462 If UTF-2000 feature is enabled, 128 or 256 are also available.
1463 'final Final byte of ISO 2022 escape sequence. Must be
1464 supplied. Each combination of (DIMENSION, CHARS) defines a
1465 separate namespace for final bytes. Note that ISO
1466 2022 restricts the final byte to the range
1467 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1468 dimension == 2. Note also that final bytes in the range
1469 0x30 - 0x3F are reserved for user-defined (not official)
1471 'graphic 0 (use left half of font on output) or 1 (use right half
1472 of font on output). Defaults to 0. For example, for
1473 a font whose registry is ISO8859-1, the left half
1474 (octets 0x20 - 0x7F) is the `ascii' character set, while
1475 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1476 character set. With 'graphic set to 0, the octets
1477 will have their high bit cleared; with it set to 1,
1478 the octets will have their high bit set.
1479 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1481 'ccl-program A compiled CCL program used to convert a character in
1482 this charset into an index into the font. This is in
1483 addition to the 'graphic property. The CCL program
1484 is passed the octets of the character, with the high
1485 bit cleared and set depending upon whether the value
1486 of the 'graphic property is 0 or 1.
1487 'mother [UTF-2000 only] Base coded-charset.
1488 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1489 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1490 'code-offset [UTF-2000 only] Offset for a code-point of a base
1492 'conversion [UTF-2000 only] Conversion for a code-point of a base
1493 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1495 (name, doc_string, props))
1497 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1498 int direction = CHARSET_LEFT_TO_RIGHT;
1499 Lisp_Object registry = Qnil;
1500 Lisp_Object charset;
1501 Lisp_Object ccl_program = Qnil;
1502 Lisp_Object short_name = Qnil, long_name = Qnil;
1503 Lisp_Object mother = Qnil;
1504 int min_code = 0, max_code = 0, code_offset = 0;
1505 int byte_offset = -1;
1508 CHECK_SYMBOL (name);
1509 if (!NILP (doc_string))
1510 CHECK_STRING (doc_string);
1512 charset = Ffind_charset (name);
1513 if (!NILP (charset))
1514 signal_simple_error ("Cannot redefine existing charset", name);
1517 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1519 if (EQ (keyword, Qshort_name))
1521 CHECK_STRING (value);
1525 else if (EQ (keyword, Qlong_name))
1527 CHECK_STRING (value);
1531 else if (EQ (keyword, Qdimension))
1534 dimension = XINT (value);
1535 if (dimension < 1 ||
1542 signal_simple_error ("Invalid value for 'dimension", value);
1545 else if (EQ (keyword, Qchars))
1548 chars = XINT (value);
1549 if (chars != 94 && chars != 96
1551 && chars != 128 && chars != 256
1554 signal_simple_error ("Invalid value for 'chars", value);
1557 else if (EQ (keyword, Qcolumns))
1560 columns = XINT (value);
1561 if (columns != 1 && columns != 2)
1562 signal_simple_error ("Invalid value for 'columns", value);
1565 else if (EQ (keyword, Qgraphic))
1568 graphic = XINT (value);
1576 signal_simple_error ("Invalid value for 'graphic", value);
1579 else if (EQ (keyword, Qregistry))
1581 CHECK_STRING (value);
1585 else if (EQ (keyword, Qdirection))
1587 if (EQ (value, Ql2r))
1588 direction = CHARSET_LEFT_TO_RIGHT;
1589 else if (EQ (value, Qr2l))
1590 direction = CHARSET_RIGHT_TO_LEFT;
1592 signal_simple_error ("Invalid value for 'direction", value);
1595 else if (EQ (keyword, Qfinal))
1597 CHECK_CHAR_COERCE_INT (value);
1598 final = XCHAR (value);
1599 if (final < '0' || final > '~')
1600 signal_simple_error ("Invalid value for 'final", value);
1604 else if (EQ (keyword, Qmother))
1606 mother = Fget_charset (value);
1609 else if (EQ (keyword, Qmin_code))
1612 min_code = XUINT (value);
1615 else if (EQ (keyword, Qmax_code))
1618 max_code = XUINT (value);
1621 else if (EQ (keyword, Qcode_offset))
1624 code_offset = XUINT (value);
1627 else if (EQ (keyword, Qconversion))
1629 if (EQ (value, Q94x60))
1630 conversion = CONVERSION_94x60;
1631 else if (EQ (value, Q94x94x60))
1632 conversion = CONVERSION_94x94x60;
1633 else if (EQ (value, Qbig5_1))
1634 conversion = CONVERSION_BIG5_1;
1635 else if (EQ (value, Qbig5_2))
1636 conversion = CONVERSION_BIG5_2;
1638 signal_simple_error ("Unrecognized conversion", value);
1642 else if (EQ (keyword, Qccl_program))
1644 struct ccl_program test_ccl;
1646 if (setup_ccl_program (&test_ccl, value) < 0)
1647 signal_simple_error ("Invalid value for 'ccl-program", value);
1648 ccl_program = value;
1652 signal_simple_error ("Unrecognized property", keyword);
1658 error ("'final must be specified");
1660 if (dimension == 2 && final > 0x5F)
1662 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1665 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1666 CHARSET_LEFT_TO_RIGHT)) ||
1667 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1668 CHARSET_RIGHT_TO_LEFT)))
1670 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1672 id = get_unallocated_leading_byte (dimension);
1674 if (NILP (doc_string))
1675 doc_string = build_string ("");
1677 if (NILP (registry))
1678 registry = build_string ("");
1680 if (NILP (short_name))
1681 XSETSTRING (short_name, XSYMBOL (name)->name);
1683 if (NILP (long_name))
1684 long_name = doc_string;
1687 columns = dimension;
1689 if (byte_offset < 0)
1693 else if (chars == 96)
1699 charset = make_charset (id, name, chars, dimension, columns, graphic,
1700 final, direction, short_name, long_name,
1701 doc_string, registry,
1702 Qnil, min_code, max_code, code_offset, byte_offset,
1703 mother, conversion);
1704 if (!NILP (ccl_program))
1705 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1709 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1711 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1712 NEW-NAME is the name of the new charset. Return the new charset.
1714 (charset, new_name))
1716 Lisp_Object new_charset = Qnil;
1717 int id, chars, dimension, columns, graphic, final;
1719 Lisp_Object registry, doc_string, short_name, long_name;
1722 charset = Fget_charset (charset);
1723 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1724 signal_simple_error ("Charset already has reverse-direction charset",
1727 CHECK_SYMBOL (new_name);
1728 if (!NILP (Ffind_charset (new_name)))
1729 signal_simple_error ("Cannot redefine existing charset", new_name);
1731 cs = XCHARSET (charset);
1733 chars = CHARSET_CHARS (cs);
1734 dimension = CHARSET_DIMENSION (cs);
1735 columns = CHARSET_COLUMNS (cs);
1736 id = get_unallocated_leading_byte (dimension);
1738 graphic = CHARSET_GRAPHIC (cs);
1739 final = CHARSET_FINAL (cs);
1740 direction = CHARSET_RIGHT_TO_LEFT;
1741 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1742 direction = CHARSET_LEFT_TO_RIGHT;
1743 doc_string = CHARSET_DOC_STRING (cs);
1744 short_name = CHARSET_SHORT_NAME (cs);
1745 long_name = CHARSET_LONG_NAME (cs);
1746 registry = CHARSET_REGISTRY (cs);
1748 new_charset = make_charset (id, new_name, chars, dimension, columns,
1749 graphic, final, direction, short_name, long_name,
1750 doc_string, registry,
1752 CHARSET_DECODING_TABLE(cs),
1753 CHARSET_MIN_CODE(cs),
1754 CHARSET_MAX_CODE(cs),
1755 CHARSET_CODE_OFFSET(cs),
1756 CHARSET_BYTE_OFFSET(cs),
1758 CHARSET_CONVERSION (cs)
1760 Qnil, 0, 0, 0, 0, Qnil, 0
1764 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1765 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1770 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1771 Define symbol ALIAS as an alias for CHARSET.
1775 CHECK_SYMBOL (alias);
1776 charset = Fget_charset (charset);
1777 return Fputhash (alias, charset, Vcharset_hash_table);
1780 /* #### Reverse direction charsets not yet implemented. */
1782 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1784 Return the reverse-direction charset parallel to CHARSET, if any.
1785 This is the charset with the same properties (in particular, the same
1786 dimension, number of characters per dimension, and final byte) as
1787 CHARSET but whose characters are displayed in the opposite direction.
1791 charset = Fget_charset (charset);
1792 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1796 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1797 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1798 If DIRECTION is omitted, both directions will be checked (left-to-right
1799 will be returned if character sets exist for both directions).
1801 (dimension, chars, final, direction))
1803 int dm, ch, fi, di = -1;
1804 Lisp_Object obj = Qnil;
1806 CHECK_INT (dimension);
1807 dm = XINT (dimension);
1808 if (dm < 1 || dm > 2)
1809 signal_simple_error ("Invalid value for DIMENSION", dimension);
1813 if (ch != 94 && ch != 96)
1814 signal_simple_error ("Invalid value for CHARS", chars);
1816 CHECK_CHAR_COERCE_INT (final);
1818 if (fi < '0' || fi > '~')
1819 signal_simple_error ("Invalid value for FINAL", final);
1821 if (EQ (direction, Ql2r))
1822 di = CHARSET_LEFT_TO_RIGHT;
1823 else if (EQ (direction, Qr2l))
1824 di = CHARSET_RIGHT_TO_LEFT;
1825 else if (!NILP (direction))
1826 signal_simple_error ("Invalid value for DIRECTION", direction);
1828 if (dm == 2 && fi > 0x5F)
1830 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1834 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1836 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1839 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1842 return XCHARSET_NAME (obj);
1846 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1847 Return short name of CHARSET.
1851 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1854 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1855 Return long name of CHARSET.
1859 return XCHARSET_LONG_NAME (Fget_charset (charset));
1862 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1863 Return description of CHARSET.
1867 return XCHARSET_DOC_STRING (Fget_charset (charset));
1870 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1871 Return dimension of CHARSET.
1875 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1878 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1879 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1880 Recognized properties are those listed in `make-charset', as well as
1881 'name and 'doc-string.
1887 charset = Fget_charset (charset);
1888 cs = XCHARSET (charset);
1890 CHECK_SYMBOL (prop);
1891 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1892 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1893 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1894 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1895 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1896 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1897 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1898 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1899 Qnil : make_char (CHARSET_FINAL (cs));
1900 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1901 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1902 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1903 if (EQ (prop, Qdirection))
1904 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1905 if (EQ (prop, Qreverse_direction_charset))
1907 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1908 /* #### Is this translation OK? If so, error checking sufficient? */
1909 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1912 if (EQ (prop, Qmother))
1913 return CHARSET_MOTHER (cs);
1914 if (EQ (prop, Qmin_code))
1915 return make_int (CHARSET_MIN_CODE (cs));
1916 if (EQ (prop, Qmax_code))
1917 return make_int (CHARSET_MAX_CODE (cs));
1919 signal_simple_error ("Unrecognized charset property name", prop);
1920 return Qnil; /* not reached */
1923 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1924 Return charset identification number of CHARSET.
1928 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1931 /* #### We need to figure out which properties we really want to
1934 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1935 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1937 (charset, ccl_program))
1939 struct ccl_program test_ccl;
1941 charset = Fget_charset (charset);
1942 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1943 signal_simple_error ("Invalid ccl-program", ccl_program);
1944 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1949 invalidate_charset_font_caches (Lisp_Object charset)
1951 /* Invalidate font cache entries for charset on all devices. */
1952 Lisp_Object devcons, concons, hash_table;
1953 DEVICE_LOOP_NO_BREAK (devcons, concons)
1955 struct device *d = XDEVICE (XCAR (devcons));
1956 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1957 if (!UNBOUNDP (hash_table))
1958 Fclrhash (hash_table);
1962 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1963 Set the 'registry property of CHARSET to REGISTRY.
1965 (charset, registry))
1967 charset = Fget_charset (charset);
1968 CHECK_STRING (registry);
1969 XCHARSET_REGISTRY (charset) = registry;
1970 invalidate_charset_font_caches (charset);
1971 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1976 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1977 Return mapping-table of CHARSET.
1981 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1984 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1985 Set mapping-table of CHARSET to TABLE.
1989 struct Lisp_Charset *cs;
1993 charset = Fget_charset (charset);
1994 cs = XCHARSET (charset);
1998 CHARSET_DECODING_TABLE(cs) = Qnil;
2001 else if (VECTORP (table))
2003 int ccs_len = CHARSET_BYTE_SIZE (cs);
2004 int ret = decoding_table_check_elements (table,
2005 CHARSET_DIMENSION (cs),
2010 signal_simple_error ("Too big table", table);
2012 signal_simple_error ("Invalid element is found", table);
2014 signal_simple_error ("Something wrong", table);
2016 CHARSET_DECODING_TABLE(cs) = Qnil;
2019 signal_error (Qwrong_type_argument,
2020 list2 (build_translated_string ("vector-or-nil-p"),
2023 byte_offset = CHARSET_BYTE_OFFSET (cs);
2024 switch (CHARSET_DIMENSION (cs))
2027 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2029 Lisp_Object c = XVECTOR_DATA(table)[i];
2032 Fput_char_attribute (c, XCHARSET_NAME (charset),
2033 make_int (i + byte_offset));
2037 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2039 Lisp_Object v = XVECTOR_DATA(table)[i];
2045 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2047 Lisp_Object c = XVECTOR_DATA(v)[j];
2051 (c, XCHARSET_NAME (charset),
2052 make_int ( ( (i + byte_offset) << 8 )
2058 Fput_char_attribute (v, XCHARSET_NAME (charset),
2059 make_int (i + byte_offset));
2066 #ifdef HAVE_CHISE_CLIENT
2067 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2068 Save mapping-table of CHARSET.
2072 struct Lisp_Charset *cs;
2073 int byte_min, byte_max;
2075 Lisp_Object db_file;
2077 charset = Fget_charset (charset);
2078 cs = XCHARSET (charset);
2080 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2081 Qsystem_char_id, 1);
2082 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2084 byte_min = CHARSET_BYTE_OFFSET (cs);
2085 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2086 switch (CHARSET_DIMENSION (cs))
2090 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2093 for (cell = byte_min; cell < byte_max; cell++)
2095 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2098 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2099 Fprin1_to_string (c, Qnil),
2106 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2109 for (row = byte_min; row < byte_max; row++)
2111 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2114 for (cell = byte_min; cell < byte_max; cell++)
2116 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2119 Fput_database (Fprin1_to_string (make_int ((row << 8)
2122 Fprin1_to_string (c, Qnil),
2130 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2133 for (plane = byte_min; plane < byte_max; plane++)
2136 = get_ccs_octet_table (table_p, charset, plane);
2139 for (row = byte_min; row < byte_max; row++)
2142 = get_ccs_octet_table (table_r, charset, row);
2145 for (cell = byte_min; cell < byte_max; cell++)
2147 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2151 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2155 Fprin1_to_string (c, Qnil),
2164 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2167 for (group = byte_min; group < byte_max; group++)
2170 = get_ccs_octet_table (table_g, charset, group);
2173 for (plane = byte_min; plane < byte_max; plane++)
2176 = get_ccs_octet_table (table_p, charset, plane);
2179 for (row = byte_min; row < byte_max; row++)
2182 = get_ccs_octet_table (table_r, charset, row);
2185 for (cell = byte_min; cell < byte_max; cell++)
2188 = get_ccs_octet_table (table_c, charset, cell);
2191 Fput_database (Fprin1_to_string
2192 (make_int (( group << 24)
2197 Fprin1_to_string (c, Qnil),
2205 return Fclose_database (db);
2208 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2209 Reset mapping-table of CCS with database file.
2213 Lisp_Object db_file;
2215 ccs = Fget_charset (ccs);
2216 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2217 Qsystem_char_id, 0);
2219 if (!NILP (Ffile_exists_p (db_file)))
2221 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2228 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2232 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2235 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2239 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2246 decoding_table_put_char (ccs, code_point, ret);
2247 Fclose_database (db);
2251 decoding_table_put_char (ccs, code_point, Qnil);
2252 Fclose_database (db);
2256 #endif /* HAVE_CHISE_CLIENT */
2257 #endif /* UTF2000 */
2260 /************************************************************************/
2261 /* Lisp primitives for working with characters */
2262 /************************************************************************/
2265 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2266 Make a character from CHARSET and code-point CODE.
2267 If DEFINED_ONLY is non-nil, builtin character is not returned.
2268 If corresponding character is not found, nil is returned.
2270 (charset, code, defined_only))
2274 charset = Fget_charset (charset);
2277 if (XCHARSET_GRAPHIC (charset) == 1)
2279 if (NILP (defined_only))
2280 c = DECODE_CHAR (charset, c);
2282 c = decode_defined_char (charset, c);
2283 return c >= 0 ? make_char (c) : Qnil;
2286 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2287 Make a builtin character from CHARSET and code-point CODE.
2293 charset = Fget_charset (charset);
2295 if (EQ (charset, Vcharset_latin_viscii))
2297 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2303 (ret = Fget_char_attribute (chr,
2304 Vcharset_latin_viscii_lower,
2307 charset = Vcharset_latin_viscii_lower;
2311 (ret = Fget_char_attribute (chr,
2312 Vcharset_latin_viscii_upper,
2315 charset = Vcharset_latin_viscii_upper;
2322 if (XCHARSET_GRAPHIC (charset) == 1)
2325 c = decode_builtin_char (charset, c);
2326 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2330 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2331 Make a character from CHARSET and octets ARG1 and ARG2.
2332 ARG2 is required only for characters from two-dimensional charsets.
2333 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2334 character s with caron.
2336 (charset, arg1, arg2))
2340 int lowlim, highlim;
2342 charset = Fget_charset (charset);
2343 cs = XCHARSET (charset);
2345 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2346 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2348 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2350 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2351 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2354 /* It is useful (and safe, according to Olivier Galibert) to strip
2355 the 8th bit off ARG1 and ARG2 because it allows programmers to
2356 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2357 Latin 2 code of the character. */
2365 if (a1 < lowlim || a1 > highlim)
2366 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2368 if (CHARSET_DIMENSION (cs) == 1)
2372 ("Charset is of dimension one; second octet must be nil", arg2);
2373 return make_char (MAKE_CHAR (charset, a1, 0));
2382 a2 = XINT (arg2) & 0x7f;
2384 if (a2 < lowlim || a2 > highlim)
2385 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2387 return make_char (MAKE_CHAR (charset, a1, a2));
2390 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2391 Return the character set of CHARACTER.
2395 CHECK_CHAR_COERCE_INT (character);
2397 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2400 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2401 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2402 N defaults to 0 if omitted.
2406 Lisp_Object charset;
2409 CHECK_CHAR_COERCE_INT (character);
2411 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2413 if (NILP (n) || EQ (n, Qzero))
2414 return make_int (octet0);
2415 else if (EQ (n, make_int (1)))
2416 return make_int (octet1);
2418 signal_simple_error ("Octet number must be 0 or 1", n);
2422 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2423 Return code-point of CHARACTER in specified CHARSET.
2425 (character, charset, defined_only))
2429 CHECK_CHAR_COERCE_INT (character);
2430 charset = Fget_charset (charset);
2431 code_point = charset_code_point (charset, XCHAR (character),
2432 !NILP (defined_only));
2433 if (code_point >= 0)
2434 return make_int (code_point);
2440 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2441 Return list of charset and one or two position-codes of CHARACTER.
2445 /* This function can GC */
2446 struct gcpro gcpro1, gcpro2;
2447 Lisp_Object charset = Qnil;
2448 Lisp_Object rc = Qnil;
2456 GCPRO2 (charset, rc);
2457 CHECK_CHAR_COERCE_INT (character);
2460 code_point = ENCODE_CHAR (XCHAR (character), charset);
2461 dimension = XCHARSET_DIMENSION (charset);
2462 while (dimension > 0)
2464 rc = Fcons (make_int (code_point & 255), rc);
2468 rc = Fcons (XCHARSET_NAME (charset), rc);
2470 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2472 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2474 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2478 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2487 #ifdef ENABLE_COMPOSITE_CHARS
2488 /************************************************************************/
2489 /* composite character functions */
2490 /************************************************************************/
2493 lookup_composite_char (Bufbyte *str, int len)
2495 Lisp_Object lispstr = make_string (str, len);
2496 Lisp_Object ch = Fgethash (lispstr,
2497 Vcomposite_char_string2char_hash_table,
2503 if (composite_char_row_next >= 128)
2504 signal_simple_error ("No more composite chars available", lispstr);
2505 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2506 composite_char_col_next);
2507 Fputhash (make_char (emch), lispstr,
2508 Vcomposite_char_char2string_hash_table);
2509 Fputhash (lispstr, make_char (emch),
2510 Vcomposite_char_string2char_hash_table);
2511 composite_char_col_next++;
2512 if (composite_char_col_next >= 128)
2514 composite_char_col_next = 32;
2515 composite_char_row_next++;
2524 composite_char_string (Emchar ch)
2526 Lisp_Object str = Fgethash (make_char (ch),
2527 Vcomposite_char_char2string_hash_table,
2529 assert (!UNBOUNDP (str));
2533 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2534 Convert a string into a single composite character.
2535 The character is the result of overstriking all the characters in
2540 CHECK_STRING (string);
2541 return make_char (lookup_composite_char (XSTRING_DATA (string),
2542 XSTRING_LENGTH (string)));
2545 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2546 Return a string of the characters comprising a composite character.
2554 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2555 signal_simple_error ("Must be composite char", ch);
2556 return composite_char_string (emch);
2558 #endif /* ENABLE_COMPOSITE_CHARS */
2561 /************************************************************************/
2562 /* initialization */
2563 /************************************************************************/
2566 syms_of_mule_charset (void)
2568 INIT_LRECORD_IMPLEMENTATION (charset);
2570 DEFSUBR (Fcharsetp);
2571 DEFSUBR (Ffind_charset);
2572 DEFSUBR (Fget_charset);
2573 DEFSUBR (Fcharset_list);
2574 DEFSUBR (Fcharset_name);
2575 DEFSUBR (Fmake_charset);
2576 DEFSUBR (Fmake_reverse_direction_charset);
2577 /* DEFSUBR (Freverse_direction_charset); */
2578 DEFSUBR (Fdefine_charset_alias);
2579 DEFSUBR (Fcharset_from_attributes);
2580 DEFSUBR (Fcharset_short_name);
2581 DEFSUBR (Fcharset_long_name);
2582 DEFSUBR (Fcharset_description);
2583 DEFSUBR (Fcharset_dimension);
2584 DEFSUBR (Fcharset_property);
2585 DEFSUBR (Fcharset_id);
2586 DEFSUBR (Fset_charset_ccl_program);
2587 DEFSUBR (Fset_charset_registry);
2589 DEFSUBR (Fcharset_mapping_table);
2590 DEFSUBR (Fset_charset_mapping_table);
2591 #ifdef HAVE_CHISE_CLIENT
2592 DEFSUBR (Fsave_charset_mapping_table);
2593 DEFSUBR (Freset_charset_mapping_table);
2596 DEFSUBR (Fdecode_char);
2597 DEFSUBR (Fdecode_builtin_char);
2598 DEFSUBR (Fencode_char);
2600 DEFSUBR (Fmake_char);
2601 DEFSUBR (Fchar_charset);
2602 DEFSUBR (Fchar_octet);
2603 DEFSUBR (Fsplit_char);
2605 #ifdef ENABLE_COMPOSITE_CHARS
2606 DEFSUBR (Fmake_composite_char);
2607 DEFSUBR (Fcomposite_char_string);
2610 defsymbol (&Qcharsetp, "charsetp");
2611 defsymbol (&Qregistry, "registry");
2612 defsymbol (&Qfinal, "final");
2613 defsymbol (&Qgraphic, "graphic");
2614 defsymbol (&Qdirection, "direction");
2615 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2616 defsymbol (&Qshort_name, "short-name");
2617 defsymbol (&Qlong_name, "long-name");
2619 defsymbol (&Qmother, "mother");
2620 defsymbol (&Qmin_code, "min-code");
2621 defsymbol (&Qmax_code, "max-code");
2622 defsymbol (&Qcode_offset, "code-offset");
2623 defsymbol (&Qconversion, "conversion");
2624 defsymbol (&Q94x60, "94x60");
2625 defsymbol (&Q94x94x60, "94x94x60");
2626 defsymbol (&Qbig5_1, "big5-1");
2627 defsymbol (&Qbig5_2, "big5-2");
2630 defsymbol (&Ql2r, "l2r");
2631 defsymbol (&Qr2l, "r2l");
2633 /* Charsets, compatible with FSF 20.3
2634 Naming convention is Script-Charset[-Edition] */
2635 defsymbol (&Qascii, "ascii");
2636 defsymbol (&Qcontrol_1, "control-1");
2637 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2638 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2639 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2640 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2641 defsymbol (&Qthai_tis620, "thai-tis620");
2642 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2643 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2644 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2645 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2646 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2647 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2648 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2649 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2650 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2651 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2652 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2653 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2654 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2655 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2656 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2657 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2659 defsymbol (&Qucs, "ucs");
2660 defsymbol (&Qucs_bmp, "ucs-bmp");
2661 defsymbol (&Qucs_smp, "ucs-smp");
2662 defsymbol (&Qucs_sip, "ucs-sip");
2663 defsymbol (&Qucs_gb, "ucs-gb");
2664 defsymbol (&Qucs_cns, "ucs-cns");
2665 defsymbol (&Qucs_jis, "ucs-jis");
2666 defsymbol (&Qucs_ks, "ucs-ks");
2667 defsymbol (&Qlatin_viscii, "latin-viscii");
2668 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2669 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2670 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2671 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2672 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2673 defsymbol (&Qjis_x0208, "=jis-x0208");
2674 defsymbol (&Qchinese_big5, "chinese-big5");
2675 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2677 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2678 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2680 defsymbol (&Qcomposite, "composite");
2684 vars_of_mule_charset (void)
2691 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2692 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2694 /* Table of charsets indexed by leading byte. */
2695 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2696 chlook->charset_by_leading_byte[i] = Qnil;
2699 /* Table of charsets indexed by type/final-byte. */
2700 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2701 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2702 chlook->charset_by_attributes[i][j] = Qnil;
2704 /* Table of charsets indexed by type/final-byte/direction. */
2705 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2706 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2707 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2708 chlook->charset_by_attributes[i][j][k] = Qnil;
2712 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2714 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2715 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2719 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2720 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2721 Leading-code of private TYPE9N charset of column-width 1.
2723 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2727 Vdefault_coded_charset_priority_list = Qnil;
2728 DEFVAR_LISP ("default-coded-charset-priority-list",
2729 &Vdefault_coded_charset_priority_list /*
2730 Default order of preferred coded-character-sets.
2736 complex_vars_of_mule_charset (void)
2738 staticpro (&Vcharset_hash_table);
2739 Vcharset_hash_table =
2740 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2742 /* Predefined character sets. We store them into variables for
2746 staticpro (&Vcharset_ucs);
2748 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2749 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2750 build_string ("UCS"),
2751 build_string ("UCS"),
2752 build_string ("ISO/IEC 10646"),
2754 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2755 staticpro (&Vcharset_ucs_bmp);
2757 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2758 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2759 build_string ("BMP"),
2760 build_string ("UCS-BMP"),
2761 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2763 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2764 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2765 staticpro (&Vcharset_ucs_smp);
2767 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2768 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2769 build_string ("SMP"),
2770 build_string ("UCS-SMP"),
2771 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2772 build_string ("UCS00-1"),
2773 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2774 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2775 staticpro (&Vcharset_ucs_sip);
2777 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2778 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2779 build_string ("SIP"),
2780 build_string ("UCS-SIP"),
2781 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2782 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2783 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2784 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2785 staticpro (&Vcharset_ucs_gb);
2787 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2788 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2789 build_string ("UCS for GB"),
2790 build_string ("UCS for GB"),
2791 build_string ("ISO/IEC 10646 for GB"),
2793 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2794 staticpro (&Vcharset_ucs_cns);
2796 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2797 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2798 build_string ("UCS for CNS"),
2799 build_string ("UCS for CNS 11643"),
2800 build_string ("ISO/IEC 10646 for CNS 11643"),
2802 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2803 staticpro (&Vcharset_ucs_jis);
2805 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2806 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2807 build_string ("UCS for JIS"),
2808 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2810 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2812 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2813 staticpro (&Vcharset_ucs_ks);
2815 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2816 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2817 build_string ("UCS for KS"),
2818 build_string ("UCS for CCS defined by KS"),
2819 build_string ("ISO/IEC 10646 for Korean Standards"),
2821 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2823 # define MIN_CHAR_THAI 0
2824 # define MAX_CHAR_THAI 0
2825 /* # define MIN_CHAR_HEBREW 0 */
2826 /* # define MAX_CHAR_HEBREW 0 */
2827 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2828 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2830 staticpro (&Vcharset_ascii);
2832 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2833 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2834 build_string ("ASCII"),
2835 build_string ("ASCII)"),
2836 build_string ("ASCII (ISO646 IRV)"),
2837 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2838 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2839 staticpro (&Vcharset_control_1);
2840 Vcharset_control_1 =
2841 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2842 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2843 build_string ("C1"),
2844 build_string ("Control characters"),
2845 build_string ("Control characters 128-191"),
2847 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2848 staticpro (&Vcharset_latin_iso8859_1);
2849 Vcharset_latin_iso8859_1 =
2850 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2851 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2852 build_string ("Latin-1"),
2853 build_string ("ISO8859-1 (Latin-1)"),
2854 build_string ("ISO8859-1 (Latin-1)"),
2855 build_string ("iso8859-1"),
2856 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2857 staticpro (&Vcharset_latin_iso8859_2);
2858 Vcharset_latin_iso8859_2 =
2859 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2860 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2861 build_string ("Latin-2"),
2862 build_string ("ISO8859-2 (Latin-2)"),
2863 build_string ("ISO8859-2 (Latin-2)"),
2864 build_string ("iso8859-2"),
2865 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2866 staticpro (&Vcharset_latin_iso8859_3);
2867 Vcharset_latin_iso8859_3 =
2868 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2869 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2870 build_string ("Latin-3"),
2871 build_string ("ISO8859-3 (Latin-3)"),
2872 build_string ("ISO8859-3 (Latin-3)"),
2873 build_string ("iso8859-3"),
2874 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2875 staticpro (&Vcharset_latin_iso8859_4);
2876 Vcharset_latin_iso8859_4 =
2877 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2878 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2879 build_string ("Latin-4"),
2880 build_string ("ISO8859-4 (Latin-4)"),
2881 build_string ("ISO8859-4 (Latin-4)"),
2882 build_string ("iso8859-4"),
2883 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2884 staticpro (&Vcharset_thai_tis620);
2885 Vcharset_thai_tis620 =
2886 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2887 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2888 build_string ("TIS620"),
2889 build_string ("TIS620 (Thai)"),
2890 build_string ("TIS620.2529 (Thai)"),
2891 build_string ("tis620"),
2892 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2893 staticpro (&Vcharset_greek_iso8859_7);
2894 Vcharset_greek_iso8859_7 =
2895 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2896 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2897 build_string ("ISO8859-7"),
2898 build_string ("ISO8859-7 (Greek)"),
2899 build_string ("ISO8859-7 (Greek)"),
2900 build_string ("iso8859-7"),
2901 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2902 staticpro (&Vcharset_arabic_iso8859_6);
2903 Vcharset_arabic_iso8859_6 =
2904 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2905 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2906 build_string ("ISO8859-6"),
2907 build_string ("ISO8859-6 (Arabic)"),
2908 build_string ("ISO8859-6 (Arabic)"),
2909 build_string ("iso8859-6"),
2910 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2911 staticpro (&Vcharset_hebrew_iso8859_8);
2912 Vcharset_hebrew_iso8859_8 =
2913 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2914 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2915 build_string ("ISO8859-8"),
2916 build_string ("ISO8859-8 (Hebrew)"),
2917 build_string ("ISO8859-8 (Hebrew)"),
2918 build_string ("iso8859-8"),
2920 0 /* MIN_CHAR_HEBREW */,
2921 0 /* MAX_CHAR_HEBREW */, 0, 32,
2922 Qnil, CONVERSION_IDENTICAL);
2923 staticpro (&Vcharset_katakana_jisx0201);
2924 Vcharset_katakana_jisx0201 =
2925 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2926 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2927 build_string ("JISX0201 Kana"),
2928 build_string ("JISX0201.1976 (Japanese Kana)"),
2929 build_string ("JISX0201.1976 Japanese Kana"),
2930 build_string ("jisx0201\\.1976"),
2931 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2932 staticpro (&Vcharset_latin_jisx0201);
2933 Vcharset_latin_jisx0201 =
2934 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2935 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2936 build_string ("JISX0201 Roman"),
2937 build_string ("JISX0201.1976 (Japanese Roman)"),
2938 build_string ("JISX0201.1976 Japanese Roman"),
2939 build_string ("jisx0201\\.1976"),
2940 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2941 staticpro (&Vcharset_cyrillic_iso8859_5);
2942 Vcharset_cyrillic_iso8859_5 =
2943 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2944 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2945 build_string ("ISO8859-5"),
2946 build_string ("ISO8859-5 (Cyrillic)"),
2947 build_string ("ISO8859-5 (Cyrillic)"),
2948 build_string ("iso8859-5"),
2949 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2950 staticpro (&Vcharset_latin_iso8859_9);
2951 Vcharset_latin_iso8859_9 =
2952 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2953 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2954 build_string ("Latin-5"),
2955 build_string ("ISO8859-9 (Latin-5)"),
2956 build_string ("ISO8859-9 (Latin-5)"),
2957 build_string ("iso8859-9"),
2958 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2960 staticpro (&Vcharset_jis_x0208);
2961 Vcharset_jis_x0208 =
2962 make_charset (LEADING_BYTE_JIS_X0208,
2964 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2965 build_string ("JIS X0208"),
2966 build_string ("JIS X0208 Common"),
2967 build_string ("JIS X0208 Common part"),
2968 build_string ("jisx0208\\.1990"),
2970 MIN_CHAR_JIS_X0208_1990,
2971 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2972 Qnil, CONVERSION_94x94);
2974 staticpro (&Vcharset_japanese_jisx0208_1978);
2975 Vcharset_japanese_jisx0208_1978 =
2976 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2977 Qjapanese_jisx0208_1978, 94, 2,
2978 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2979 build_string ("JIS X0208:1978"),
2980 build_string ("JIS X0208:1978 (Japanese)"),
2982 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2983 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2990 CONVERSION_IDENTICAL);
2991 staticpro (&Vcharset_chinese_gb2312);
2992 Vcharset_chinese_gb2312 =
2993 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2994 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2995 build_string ("GB2312"),
2996 build_string ("GB2312)"),
2997 build_string ("GB2312 Chinese simplified"),
2998 build_string ("gb2312"),
2999 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3000 staticpro (&Vcharset_chinese_gb12345);
3001 Vcharset_chinese_gb12345 =
3002 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3003 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3004 build_string ("G1"),
3005 build_string ("GB 12345)"),
3006 build_string ("GB 12345-1990"),
3007 build_string ("GB12345\\(\\.1990\\)?-0"),
3008 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3009 staticpro (&Vcharset_japanese_jisx0208);
3010 Vcharset_japanese_jisx0208 =
3011 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3012 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3013 build_string ("JISX0208"),
3014 build_string ("JIS X0208:1983 (Japanese)"),
3015 build_string ("JIS X0208:1983 Japanese Kanji"),
3016 build_string ("jisx0208\\.1983"),
3023 CONVERSION_IDENTICAL);
3025 staticpro (&Vcharset_japanese_jisx0208_1990);
3026 Vcharset_japanese_jisx0208_1990 =
3027 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3028 Qjapanese_jisx0208_1990, 94, 2,
3029 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3030 build_string ("JISX0208-1990"),
3031 build_string ("JIS X0208:1990 (Japanese)"),
3032 build_string ("JIS X0208:1990 Japanese Kanji"),
3033 build_string ("jisx0208\\.1990"),
3035 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3036 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3037 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3038 Vcharset_jis_x0208 /* Qnil */,
3039 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3041 staticpro (&Vcharset_korean_ksc5601);
3042 Vcharset_korean_ksc5601 =
3043 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3044 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3045 build_string ("KSC5601"),
3046 build_string ("KSC5601 (Korean"),
3047 build_string ("KSC5601 Korean Hangul and Hanja"),
3048 build_string ("ksc5601"),
3049 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3050 staticpro (&Vcharset_japanese_jisx0212);
3051 Vcharset_japanese_jisx0212 =
3052 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3053 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3054 build_string ("JISX0212"),
3055 build_string ("JISX0212 (Japanese)"),
3056 build_string ("JISX0212 Japanese Supplement"),
3057 build_string ("jisx0212"),
3058 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3060 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3061 staticpro (&Vcharset_chinese_cns11643_1);
3062 Vcharset_chinese_cns11643_1 =
3063 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3064 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3065 build_string ("CNS11643-1"),
3066 build_string ("CNS11643-1 (Chinese traditional)"),
3068 ("CNS 11643 Plane 1 Chinese traditional"),
3069 build_string (CHINESE_CNS_PLANE_RE("1")),
3070 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3071 staticpro (&Vcharset_chinese_cns11643_2);
3072 Vcharset_chinese_cns11643_2 =
3073 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3074 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3075 build_string ("CNS11643-2"),
3076 build_string ("CNS11643-2 (Chinese traditional)"),
3078 ("CNS 11643 Plane 2 Chinese traditional"),
3079 build_string (CHINESE_CNS_PLANE_RE("2")),
3080 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3082 staticpro (&Vcharset_latin_tcvn5712);
3083 Vcharset_latin_tcvn5712 =
3084 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3085 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3086 build_string ("TCVN 5712"),
3087 build_string ("TCVN 5712 (VSCII-2)"),
3088 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3089 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3090 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3091 staticpro (&Vcharset_latin_viscii_lower);
3092 Vcharset_latin_viscii_lower =
3093 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3094 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3095 build_string ("VISCII lower"),
3096 build_string ("VISCII lower (Vietnamese)"),
3097 build_string ("VISCII lower (Vietnamese)"),
3098 build_string ("MULEVISCII-LOWER"),
3099 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3100 staticpro (&Vcharset_latin_viscii_upper);
3101 Vcharset_latin_viscii_upper =
3102 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3103 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3104 build_string ("VISCII upper"),
3105 build_string ("VISCII upper (Vietnamese)"),
3106 build_string ("VISCII upper (Vietnamese)"),
3107 build_string ("MULEVISCII-UPPER"),
3108 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3109 staticpro (&Vcharset_latin_viscii);
3110 Vcharset_latin_viscii =
3111 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3112 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3113 build_string ("VISCII"),
3114 build_string ("VISCII 1.1 (Vietnamese)"),
3115 build_string ("VISCII 1.1 (Vietnamese)"),
3116 build_string ("VISCII1\\.1"),
3117 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3118 staticpro (&Vcharset_chinese_big5);
3119 Vcharset_chinese_big5 =
3120 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3121 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3122 build_string ("Big5"),
3123 build_string ("Big5"),
3124 build_string ("Big5 Chinese traditional"),
3125 build_string ("big5-0"),
3127 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3128 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3130 staticpro (&Vcharset_ethiopic_ucs);
3131 Vcharset_ethiopic_ucs =
3132 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3133 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3134 build_string ("Ethiopic (UCS)"),
3135 build_string ("Ethiopic (UCS)"),
3136 build_string ("Ethiopic of UCS"),
3137 build_string ("Ethiopic-Unicode"),
3138 Qnil, 0x1200, 0x137F, 0, 0,
3139 Qnil, CONVERSION_IDENTICAL);
3141 staticpro (&Vcharset_chinese_big5_1);
3142 Vcharset_chinese_big5_1 =
3143 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3144 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3145 build_string ("Big5"),
3146 build_string ("Big5 (Level-1)"),
3148 ("Big5 Level-1 Chinese traditional"),
3149 build_string ("big5"),
3150 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3151 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3152 staticpro (&Vcharset_chinese_big5_2);
3153 Vcharset_chinese_big5_2 =
3154 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3155 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3156 build_string ("Big5"),
3157 build_string ("Big5 (Level-2)"),
3159 ("Big5 Level-2 Chinese traditional"),
3160 build_string ("big5"),
3161 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3162 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3164 #ifdef ENABLE_COMPOSITE_CHARS
3165 /* #### For simplicity, we put composite chars into a 96x96 charset.
3166 This is going to lead to problems because you can run out of
3167 room, esp. as we don't yet recycle numbers. */
3168 staticpro (&Vcharset_composite);
3169 Vcharset_composite =
3170 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3171 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3172 build_string ("Composite"),
3173 build_string ("Composite characters"),
3174 build_string ("Composite characters"),
3177 /* #### not dumped properly */
3178 composite_char_row_next = 32;
3179 composite_char_col_next = 32;
3181 Vcomposite_char_string2char_hash_table =
3182 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3183 Vcomposite_char_char2string_hash_table =
3184 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3185 staticpro (&Vcomposite_char_string2char_hash_table);
3186 staticpro (&Vcomposite_char_char2string_hash_table);
3187 #endif /* ENABLE_COMPOSITE_CHARS */