1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_jis_x0208;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_ethiopic_ucs;
79 Lisp_Object Vcharset_chinese_big5_1;
80 Lisp_Object Vcharset_chinese_big5_2;
82 #ifdef ENABLE_COMPOSITE_CHARS
83 Lisp_Object Vcharset_composite;
85 /* Hash tables for composite chars. One maps string representing
86 composed chars to their equivalent chars; one goes the
88 Lisp_Object Vcomposite_char_char2string_hash_table;
89 Lisp_Object Vcomposite_char_string2char_hash_table;
91 static int composite_char_row_next;
92 static int composite_char_col_next;
94 #endif /* ENABLE_COMPOSITE_CHARS */
96 struct charset_lookup *chlook;
98 static const struct lrecord_description charset_lookup_description_1[] = {
99 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
101 NUM_LEADING_BYTES+4*128
108 static const struct struct_description charset_lookup_description = {
109 sizeof (struct charset_lookup),
110 charset_lookup_description_1
114 /* Table of number of bytes in the string representation of a character
115 indexed by the first byte of that representation.
117 rep_bytes_by_first_byte(c) is more efficient than the equivalent
118 canonical computation:
120 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
122 const Bytecount rep_bytes_by_first_byte[0xA0] =
123 { /* 0x00 - 0x7f are for straight ASCII */
124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 /* 0x80 - 0x8f are for Dimension-1 official charsets */
134 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 /* 0x90 - 0x9d are for Dimension-2 official charsets */
139 /* 0x9e is for Dimension-1 private charsets */
140 /* 0x9f is for Dimension-2 private charsets */
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
147 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
149 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
153 if (XVECTOR_LENGTH (v) > ccs_len)
156 for (i = 0; i < XVECTOR_LENGTH (v); i++)
158 Lisp_Object c = XVECTOR_DATA(v)[i];
160 if (!NILP (c) && !CHARP (c))
164 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
176 put_char_ccs_code_point (Lisp_Object character,
177 Lisp_Object ccs, Lisp_Object value)
179 if (!EQ (XCHARSET_NAME (ccs), Qucs)
181 || (XCHAR (character) != XINT (value)))
183 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
187 { /* obsolete representation: value must be a list of bytes */
188 Lisp_Object ret = Fcar (value);
192 signal_simple_error ("Invalid value for coded-charset", value);
193 code_point = XINT (ret);
194 if (XCHARSET_GRAPHIC (ccs) == 1)
202 signal_simple_error ("Invalid value for coded-charset",
206 signal_simple_error ("Invalid value for coded-charset",
209 if (XCHARSET_GRAPHIC (ccs) == 1)
211 code_point = (code_point << 8) | j;
214 value = make_int (code_point);
216 else if (INTP (value))
218 code_point = XINT (value);
219 if (XCHARSET_GRAPHIC (ccs) == 1)
221 code_point &= 0x7F7F7F7F;
222 value = make_int (code_point);
226 signal_simple_error ("Invalid value for coded-charset", value);
230 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
233 decoding_table_remove_char (ccs, XINT (cpos));
236 decoding_table_put_char (ccs, code_point, character);
242 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
244 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
245 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
247 if (VECTORP (decoding_table))
249 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
253 decoding_table_remove_char (ccs, XINT (cpos));
256 if (CHAR_TABLEP (encoding_table))
258 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
266 int leading_code_private_11;
269 Lisp_Object Qcharsetp;
271 /* Qdoc_string, Qdimension, Qchars defined in general.c */
272 Lisp_Object Qregistry, Qfinal, Qgraphic;
273 Lisp_Object Qdirection;
274 Lisp_Object Qreverse_direction_charset;
275 Lisp_Object Qleading_byte;
276 Lisp_Object Qshort_name, Qlong_name;
278 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
279 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
296 Qjapanese_jisx0208_1978,
300 Qjapanese_jisx0208_1990,
314 Qvietnamese_viscii_lower,
315 Qvietnamese_viscii_upper,
324 Lisp_Object Ql2r, Qr2l;
326 Lisp_Object Vcharset_hash_table;
328 /* Composite characters are characters constructed by overstriking two
329 or more regular characters.
331 1) The old Mule implementation involves storing composite characters
332 in a buffer as a tag followed by all of the actual characters
333 used to make up the composite character. I think this is a bad
334 idea; it greatly complicates code that wants to handle strings
335 one character at a time because it has to deal with the possibility
336 of great big ungainly characters. It's much more reasonable to
337 simply store an index into a table of composite characters.
339 2) The current implementation only allows for 16,384 separate
340 composite characters over the lifetime of the XEmacs process.
341 This could become a potential problem if the user
342 edited lots of different files that use composite characters.
343 Due to FSF bogosity, increasing the number of allowable
344 composite characters under Mule would decrease the number
345 of possible faces that can exist. Mule already has shrunk
346 this to 2048, and further shrinkage would become uncomfortable.
347 No such problems exist in XEmacs.
349 Composite characters could be represented as 0x80 C1 C2 C3,
350 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
351 for slightly under 2^20 (one million) composite characters
352 over the XEmacs process lifetime, and you only need to
353 increase the size of a Mule character from 19 to 21 bits.
354 Or you could use 0x80 C1 C2 C3 C4, allowing for about
355 85 million (slightly over 2^26) composite characters. */
358 /************************************************************************/
359 /* Basic Emchar functions */
360 /************************************************************************/
362 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
363 string in STR. Returns the number of bytes stored.
364 Do not call this directly. Use the macro set_charptr_emchar() instead.
368 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
383 else if ( c <= 0x7ff )
385 *p++ = (c >> 6) | 0xc0;
386 *p++ = (c & 0x3f) | 0x80;
388 else if ( c <= 0xffff )
390 *p++ = (c >> 12) | 0xe0;
391 *p++ = ((c >> 6) & 0x3f) | 0x80;
392 *p++ = (c & 0x3f) | 0x80;
394 else if ( c <= 0x1fffff )
396 *p++ = (c >> 18) | 0xf0;
397 *p++ = ((c >> 12) & 0x3f) | 0x80;
398 *p++ = ((c >> 6) & 0x3f) | 0x80;
399 *p++ = (c & 0x3f) | 0x80;
401 else if ( c <= 0x3ffffff )
403 *p++ = (c >> 24) | 0xf8;
404 *p++ = ((c >> 18) & 0x3f) | 0x80;
405 *p++ = ((c >> 12) & 0x3f) | 0x80;
406 *p++ = ((c >> 6) & 0x3f) | 0x80;
407 *p++ = (c & 0x3f) | 0x80;
411 *p++ = (c >> 30) | 0xfc;
412 *p++ = ((c >> 24) & 0x3f) | 0x80;
413 *p++ = ((c >> 18) & 0x3f) | 0x80;
414 *p++ = ((c >> 12) & 0x3f) | 0x80;
415 *p++ = ((c >> 6) & 0x3f) | 0x80;
416 *p++ = (c & 0x3f) | 0x80;
419 BREAKUP_CHAR (c, charset, c1, c2);
420 lb = CHAR_LEADING_BYTE (c);
421 if (LEADING_BYTE_PRIVATE_P (lb))
422 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
424 if (EQ (charset, Vcharset_control_1))
433 /* Return the first character from a Mule-encoded string in STR,
434 assuming it's non-ASCII. Do not call this directly.
435 Use the macro charptr_emchar() instead. */
438 non_ascii_charptr_emchar (const Bufbyte *str)
451 else if ( b >= 0xf8 )
456 else if ( b >= 0xf0 )
461 else if ( b >= 0xe0 )
466 else if ( b >= 0xc0 )
476 for( ; len > 0; len-- )
479 ch = ( ch << 6 ) | ( b & 0x3f );
483 Bufbyte i0 = *str, i1, i2 = 0;
486 if (i0 == LEADING_BYTE_CONTROL_1)
487 return (Emchar) (*++str - 0x20);
489 if (LEADING_BYTE_PREFIX_P (i0))
494 charset = CHARSET_BY_LEADING_BYTE (i0);
495 if (XCHARSET_DIMENSION (charset) == 2)
498 return MAKE_CHAR (charset, i1, i2);
502 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
503 Do not call this directly. Use the macro valid_char_p() instead. */
507 non_ascii_valid_char_p (Emchar ch)
511 /* Must have only lowest 19 bits set */
515 f1 = CHAR_FIELD1 (ch);
516 f2 = CHAR_FIELD2 (ch);
517 f3 = CHAR_FIELD3 (ch);
523 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
524 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
525 f2 > MAX_CHAR_FIELD2_PRIVATE)
530 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
531 f2 <= MAX_CHAR_FIELD2_PRIVATE))
535 NOTE: This takes advantage of the fact that
536 FIELD2_TO_OFFICIAL_LEADING_BYTE and
537 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
539 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
540 if (EQ (charset, Qnil))
542 return (XCHARSET_CHARS (charset) == 96);
548 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
549 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
550 f1 > MAX_CHAR_FIELD1_PRIVATE)
552 if (f2 < 0x20 || f3 < 0x20)
555 #ifdef ENABLE_COMPOSITE_CHARS
556 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
558 if (UNBOUNDP (Fgethash (make_int (ch),
559 Vcomposite_char_char2string_hash_table,
564 #endif /* ENABLE_COMPOSITE_CHARS */
566 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
567 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
570 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
572 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
575 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
577 if (EQ (charset, Qnil))
579 return (XCHARSET_CHARS (charset) == 96);
585 /************************************************************************/
586 /* Basic string functions */
587 /************************************************************************/
589 /* Copy the character pointed to by SRC into DST. Do not call this
590 directly. Use the macro charptr_copy_char() instead.
591 Return the number of bytes copied. */
594 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
596 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
598 for (i = bytes; i; i--, dst++, src++)
604 /************************************************************************/
605 /* streams of Emchars */
606 /************************************************************************/
608 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
609 The functions below are not meant to be called directly; use
610 the macros in insdel.h. */
613 Lstream_get_emchar_1 (Lstream *stream, int ch)
615 Bufbyte str[MAX_EMCHAR_LEN];
616 Bufbyte *strptr = str;
619 str[0] = (Bufbyte) ch;
621 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
623 int c = Lstream_getc (stream);
624 bufpos_checking_assert (c >= 0);
625 *++strptr = (Bufbyte) c;
627 return charptr_emchar (str);
631 Lstream_fput_emchar (Lstream *stream, Emchar ch)
633 Bufbyte str[MAX_EMCHAR_LEN];
634 Bytecount len = set_charptr_emchar (str, ch);
635 return Lstream_write (stream, str, len);
639 Lstream_funget_emchar (Lstream *stream, Emchar ch)
641 Bufbyte str[MAX_EMCHAR_LEN];
642 Bytecount len = set_charptr_emchar (str, ch);
643 Lstream_unread (stream, str, len);
647 /************************************************************************/
649 /************************************************************************/
652 mark_charset (Lisp_Object obj)
654 Lisp_Charset *cs = XCHARSET (obj);
656 mark_object (cs->short_name);
657 mark_object (cs->long_name);
658 mark_object (cs->doc_string);
659 mark_object (cs->registry);
660 mark_object (cs->ccl_program);
662 mark_object (cs->decoding_table);
663 mark_object (cs->mother);
669 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
671 Lisp_Charset *cs = XCHARSET (obj);
675 error ("printing unreadable object #<charset %s 0x%x>",
676 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
679 write_c_string ("#<charset ", printcharfun);
680 print_internal (CHARSET_NAME (cs), printcharfun, 0);
681 write_c_string (" ", printcharfun);
682 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
683 write_c_string (" ", printcharfun);
684 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
687 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
689 CHARSET_DIMENSION (cs),
690 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
691 CHARSET_COLUMNS (cs),
692 CHARSET_GRAPHIC (cs),
694 write_c_string (buf, printcharfun);
695 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
696 sprintf (buf, " 0x%x>", cs->header.uid);
697 write_c_string (buf, printcharfun);
700 static const struct lrecord_description charset_description[] = {
701 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
702 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
703 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
715 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
716 mark_charset, print_charset, 0, 0, 0,
720 /* Make a new charset. */
721 /* #### SJT Should generic properties be allowed? */
723 make_charset (Charset_ID id, Lisp_Object name,
724 unsigned short chars, unsigned char dimension,
725 unsigned char columns, unsigned char graphic,
726 Bufbyte final, unsigned char direction, Lisp_Object short_name,
727 Lisp_Object long_name, Lisp_Object doc,
729 Lisp_Object decoding_table,
730 Emchar min_code, Emchar max_code,
731 Emchar code_offset, unsigned char byte_offset,
732 Lisp_Object mother, unsigned char conversion)
735 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
739 XSETCHARSET (obj, cs);
741 CHARSET_ID (cs) = id;
742 CHARSET_NAME (cs) = name;
743 CHARSET_SHORT_NAME (cs) = short_name;
744 CHARSET_LONG_NAME (cs) = long_name;
745 CHARSET_CHARS (cs) = chars;
746 CHARSET_DIMENSION (cs) = dimension;
747 CHARSET_DIRECTION (cs) = direction;
748 CHARSET_COLUMNS (cs) = columns;
749 CHARSET_GRAPHIC (cs) = graphic;
750 CHARSET_FINAL (cs) = final;
751 CHARSET_DOC_STRING (cs) = doc;
752 CHARSET_REGISTRY (cs) = reg;
753 CHARSET_CCL_PROGRAM (cs) = Qnil;
754 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
756 CHARSET_DECODING_TABLE(cs) = Qunbound;
757 CHARSET_MIN_CODE (cs) = min_code;
758 CHARSET_MAX_CODE (cs) = max_code;
759 CHARSET_CODE_OFFSET (cs) = code_offset;
760 CHARSET_BYTE_OFFSET (cs) = byte_offset;
761 CHARSET_MOTHER (cs) = mother;
762 CHARSET_CONVERSION (cs) = conversion;
766 if (id == LEADING_BYTE_ASCII)
767 CHARSET_REP_BYTES (cs) = 1;
769 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
771 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
776 /* some charsets do not have final characters. This includes
777 ASCII, Control-1, Composite, and the two faux private
779 unsigned char iso2022_type
780 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
782 if (code_offset == 0)
784 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
785 chlook->charset_by_attributes[iso2022_type][final] = obj;
789 (chlook->charset_by_attributes[iso2022_type][final][direction]));
790 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
794 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
795 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
797 /* Some charsets are "faux" and don't have names or really exist at
798 all except in the leading-byte table. */
800 Fputhash (name, obj, Vcharset_hash_table);
805 get_unallocated_leading_byte (int dimension)
810 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
813 lb = chlook->next_allocated_leading_byte++;
817 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
820 lb = chlook->next_allocated_1_byte_leading_byte++;
824 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
827 lb = chlook->next_allocated_2_byte_leading_byte++;
833 ("No more character sets free for this dimension",
834 make_int (dimension));
840 /* Number of Big5 characters which have the same code in 1st byte. */
842 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
845 decode_defined_char (Lisp_Object ccs, int code_point)
847 int dim = XCHARSET_DIMENSION (ccs);
848 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
856 = get_ccs_octet_table (decoding_table, ccs,
857 (code_point >> (dim * 8)) & 255);
859 if (CHARP (decoding_table))
860 return XCHAR (decoding_table);
861 #ifdef HAVE_CHISE_CLIENT
862 if (EQ (decoding_table, Qunloaded))
864 char_id = load_char_decoding_entry_maybe (ccs, code_point);
869 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
871 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
873 if ( EQ (mother, Vcharset_ucs) )
874 return DECODE_CHAR (mother, code_point);
876 return decode_defined_char (mother, code_point);
878 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
881 = (((code_point >> 8) & 0x7F) - 33) * 94
882 + (( code_point & 0x7F) - 33);
883 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
884 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
886 b2 += b2 < 0x3F ? 0x40 : 0x62;
887 return decode_defined_char (mother, (b1 << 8) | b2);
889 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
892 = (((code_point >> 8) & 0x7F) - 33) * 94
893 + (( code_point & 0x7F) - 33)
894 + BIG5_SAME_ROW * (0xC9 - 0xA1);
895 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
896 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
898 b2 += b2 < 0x3F ? 0x40 : 0x62;
899 return decode_defined_char (mother, (b1 << 8) | b2);
906 decode_builtin_char (Lisp_Object charset, int code_point)
908 Lisp_Object mother = XCHARSET_MOTHER (charset);
911 if ( XCHARSET_MAX_CODE (charset) > 0 )
913 if ( CHARSETP (mother) )
915 int code = code_point;
917 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
919 int row = code_point >> 8;
920 int cell = code_point & 255;
924 else if (row < 16 + 32 + 30)
925 code = (row - (16 + 32)) * 94 + cell - 33;
926 else if (row < 18 + 32 + 30)
928 else if (row < 18 + 32 + 60)
929 code = (row - (18 + 32)) * 94 + cell - 33;
931 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
933 int plane = code_point >> 16;
934 int row = (code_point >> 8) & 255;
935 int cell = code_point & 255;
939 else if (row < 16 + 32 + 30)
941 = (plane - 33) * 94 * 60
942 + (row - (16 + 32)) * 94
944 else if (row < 18 + 32 + 30)
946 else if (row < 18 + 32 + 60)
948 = (plane - 33) * 94 * 60
949 + (row - (18 + 32)) * 94
952 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
955 = (((code_point >> 8) & 0x7F) - 33) * 94
956 + (( code_point & 0x7F) - 33);
957 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
958 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
960 b2 += b2 < 0x3F ? 0x40 : 0x62;
961 code = (b1 << 8) | b2;
963 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
966 = (((code_point >> 8) & 0x7F) - 33) * 94
967 + (( code_point & 0x7F) - 33)
968 + BIG5_SAME_ROW * (0xC9 - 0xA1);
969 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
970 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
972 b2 += b2 < 0x3F ? 0x40 : 0x62;
973 code = (b1 << 8) | b2;
976 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
981 = (XCHARSET_DIMENSION (charset) == 1
983 code_point - XCHARSET_BYTE_OFFSET (charset)
985 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
986 * XCHARSET_CHARS (charset)
987 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
988 + XCHARSET_CODE_OFFSET (charset);
989 if ((cid < XCHARSET_MIN_CODE (charset))
990 || (XCHARSET_MAX_CODE (charset) < cid))
995 else if ((final = XCHARSET_FINAL (charset)) >= '0')
997 if (XCHARSET_DIMENSION (charset) == 1)
999 switch (XCHARSET_CHARS (charset))
1003 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1006 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1014 switch (XCHARSET_CHARS (charset))
1017 return MIN_CHAR_94x94
1018 + (final - '0') * 94 * 94
1019 + (((code_point >> 8) & 0x7F) - 33) * 94
1020 + ((code_point & 0x7F) - 33);
1022 return MIN_CHAR_96x96
1023 + (final - '0') * 96 * 96
1024 + (((code_point >> 8) & 0x7F) - 32) * 96
1025 + ((code_point & 0x7F) - 32);
1037 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1039 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1042 if ( CHAR_TABLEP (encoding_table)
1043 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1048 Lisp_Object mother = XCHARSET_MOTHER (charset);
1049 int min = XCHARSET_MIN_CODE (charset);
1050 int max = XCHARSET_MAX_CODE (charset);
1053 if ( CHARSETP (mother) )
1055 if (XCHARSET_FINAL (charset) >= '0')
1056 code = charset_code_point (mother, ch, 1);
1058 code = charset_code_point (mother, ch, defined_only);
1060 else if (defined_only)
1062 else if ( ((max == 0) && CHARSETP (mother)
1063 && (XCHARSET_FINAL (charset) == 0))
1064 || ((min <= ch) && (ch <= max)) )
1066 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1067 || ((min <= code) && (code <= max)) )
1069 int d = code - XCHARSET_CODE_OFFSET (charset);
1071 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1073 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1075 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1080 int cell = d % 94 + 33;
1086 return (row << 8) | cell;
1088 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1090 int B1 = d >> 8, B2 = d & 0xFF;
1092 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1093 - (B2 < 0x7F ? 0x40 : 0x62);
1097 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1100 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1102 int B1 = d >> 8, B2 = d & 0xFF;
1104 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1105 - (B2 < 0x7F ? 0x40 : 0x62);
1109 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1110 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1113 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1114 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1115 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1116 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1117 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1119 int plane = d / (94 * 60) + 33;
1120 int row = (d % (94 * 60)) / 94;
1121 int cell = d % 94 + 33;
1127 return (plane << 16) | (row << 8) | cell;
1129 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1131 ( (d / (94 * 94) + 33) << 16)
1132 | ((d / 94 % 94 + 33) << 8)
1134 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1136 ( (d / (96 * 96) + 32) << 16)
1137 | ((d / 96 % 96 + 32) << 8)
1139 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1141 ( (d / (94 * 94 * 94) + 33) << 24)
1142 | ((d / (94 * 94) % 94 + 33) << 16)
1143 | ((d / 94 % 94 + 33) << 8)
1145 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1147 ( (d / (96 * 96 * 96) + 32) << 24)
1148 | ((d / (96 * 96) % 96 + 32) << 16)
1149 | ((d / 96 % 96 + 32) << 8)
1153 printf ("Unknown CCS-conversion %d is specified!",
1154 XCHARSET_CONVERSION (charset));
1158 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1159 ( XCHARSET_MIN_CODE (charset) == 0 )
1161 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1162 (XCHARSET_CODE_OFFSET (charset)
1163 == XCHARSET_MIN_CODE (charset))
1168 if (XCHARSET_DIMENSION (charset) == 1)
1170 if (XCHARSET_CHARS (charset) == 94)
1172 if (((d = ch - (MIN_CHAR_94
1173 + (XCHARSET_FINAL (charset) - '0') * 94))
1178 else if (XCHARSET_CHARS (charset) == 96)
1180 if (((d = ch - (MIN_CHAR_96
1181 + (XCHARSET_FINAL (charset) - '0') * 96))
1189 else if (XCHARSET_DIMENSION (charset) == 2)
1191 if (XCHARSET_CHARS (charset) == 94)
1193 if (((d = ch - (MIN_CHAR_94x94
1195 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1198 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1200 else if (XCHARSET_CHARS (charset) == 96)
1202 if (((d = ch - (MIN_CHAR_96x96
1204 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1207 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1218 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1220 if (c <= MAX_CHAR_BASIC_LATIN)
1222 *charset = Vcharset_ascii;
1227 *charset = Vcharset_control_1;
1232 *charset = Vcharset_latin_iso8859_1;
1236 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1238 *charset = Vcharset_hebrew_iso8859_8;
1239 return c - MIN_CHAR_HEBREW + 0x20;
1242 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1244 *charset = Vcharset_thai_tis620;
1245 return c - MIN_CHAR_THAI + 0x20;
1248 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1249 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1251 return list2 (Vcharset_katakana_jisx0201,
1252 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1255 else if (c <= MAX_CHAR_BMP)
1257 *charset = Vcharset_ucs_bmp;
1260 else if (c <= MAX_CHAR_SMP)
1262 *charset = Vcharset_ucs_smp;
1263 return c - MIN_CHAR_SMP;
1265 else if (c <= MAX_CHAR_SIP)
1267 *charset = Vcharset_ucs_sip;
1268 return c - MIN_CHAR_SIP;
1270 else if (c < MIN_CHAR_94)
1272 *charset = Vcharset_ucs;
1275 else if (c <= MAX_CHAR_94)
1277 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1278 ((c - MIN_CHAR_94) / 94) + '0',
1279 CHARSET_LEFT_TO_RIGHT);
1280 if (!NILP (*charset))
1281 return ((c - MIN_CHAR_94) % 94) + 33;
1284 *charset = Vcharset_ucs;
1288 else if (c <= MAX_CHAR_96)
1290 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1291 ((c - MIN_CHAR_96) / 96) + '0',
1292 CHARSET_LEFT_TO_RIGHT);
1293 if (!NILP (*charset))
1294 return ((c - MIN_CHAR_96) % 96) + 32;
1297 *charset = Vcharset_ucs;
1301 else if (c <= MAX_CHAR_94x94)
1304 = CHARSET_BY_ATTRIBUTES (94, 2,
1305 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1306 CHARSET_LEFT_TO_RIGHT);
1307 if (!NILP (*charset))
1308 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1309 | (((c - MIN_CHAR_94x94) % 94) + 33);
1312 *charset = Vcharset_ucs;
1316 else if (c <= MAX_CHAR_96x96)
1319 = CHARSET_BY_ATTRIBUTES (96, 2,
1320 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1321 CHARSET_LEFT_TO_RIGHT);
1322 if (!NILP (*charset))
1323 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1324 | (((c - MIN_CHAR_96x96) % 96) + 32);
1327 *charset = Vcharset_ucs;
1333 *charset = Vcharset_ucs;
1338 Lisp_Object Vdefault_coded_charset_priority_list;
1342 /************************************************************************/
1343 /* Basic charset Lisp functions */
1344 /************************************************************************/
1346 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1347 Return non-nil if OBJECT is a charset.
1351 return CHARSETP (object) ? Qt : Qnil;
1354 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1355 Retrieve the charset of the given name.
1356 If CHARSET-OR-NAME is a charset object, it is simply returned.
1357 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1358 nil is returned. Otherwise the associated charset object is returned.
1362 if (CHARSETP (charset_or_name))
1363 return charset_or_name;
1365 CHECK_SYMBOL (charset_or_name);
1366 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1369 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1370 Retrieve the charset of the given name.
1371 Same as `find-charset' except an error is signalled if there is no such
1372 charset instead of returning nil.
1376 Lisp_Object charset = Ffind_charset (name);
1379 signal_simple_error ("No such charset", name);
1383 /* We store the charsets in hash tables with the names as the key and the
1384 actual charset object as the value. Occasionally we need to use them
1385 in a list format. These routines provide us with that. */
1386 struct charset_list_closure
1388 Lisp_Object *charset_list;
1392 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1393 void *charset_list_closure)
1395 /* This function can GC */
1396 struct charset_list_closure *chcl =
1397 (struct charset_list_closure*) charset_list_closure;
1398 Lisp_Object *charset_list = chcl->charset_list;
1400 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1404 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1405 Return a list of the names of all defined charsets.
1409 Lisp_Object charset_list = Qnil;
1410 struct gcpro gcpro1;
1411 struct charset_list_closure charset_list_closure;
1413 GCPRO1 (charset_list);
1414 charset_list_closure.charset_list = &charset_list;
1415 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1416 &charset_list_closure);
1419 return charset_list;
1422 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1423 Return the name of charset CHARSET.
1427 return XCHARSET_NAME (Fget_charset (charset));
1430 /* #### SJT Should generic properties be allowed? */
1431 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1432 Define a new character set.
1433 This function is for use with Mule support.
1434 NAME is a symbol, the name by which the character set is normally referred.
1435 DOC-STRING is a string describing the character set.
1436 PROPS is a property list, describing the specific nature of the
1437 character set. Recognized properties are:
1439 'short-name Short version of the charset name (ex: Latin-1)
1440 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1441 'registry A regular expression matching the font registry field for
1443 'dimension Number of octets used to index a character in this charset.
1444 Either 1 or 2. Defaults to 1.
1445 If UTF-2000 feature is enabled, 3 or 4 are also available.
1446 'columns Number of columns used to display a character in this charset.
1447 Only used in TTY mode. (Under X, the actual width of a
1448 character can be derived from the font used to display the
1449 characters.) If unspecified, defaults to the dimension
1450 (this is almost always the correct value).
1451 'chars Number of characters in each dimension (94 or 96).
1452 Defaults to 94. Note that if the dimension is 2, the
1453 character set thus described is 94x94 or 96x96.
1454 If UTF-2000 feature is enabled, 128 or 256 are also available.
1455 'final Final byte of ISO 2022 escape sequence. Must be
1456 supplied. Each combination of (DIMENSION, CHARS) defines a
1457 separate namespace for final bytes. Note that ISO
1458 2022 restricts the final byte to the range
1459 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1460 dimension == 2. Note also that final bytes in the range
1461 0x30 - 0x3F are reserved for user-defined (not official)
1463 'graphic 0 (use left half of font on output) or 1 (use right half
1464 of font on output). Defaults to 0. For example, for
1465 a font whose registry is ISO8859-1, the left half
1466 (octets 0x20 - 0x7F) is the `ascii' character set, while
1467 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1468 character set. With 'graphic set to 0, the octets
1469 will have their high bit cleared; with it set to 1,
1470 the octets will have their high bit set.
1471 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1473 'ccl-program A compiled CCL program used to convert a character in
1474 this charset into an index into the font. This is in
1475 addition to the 'graphic property. The CCL program
1476 is passed the octets of the character, with the high
1477 bit cleared and set depending upon whether the value
1478 of the 'graphic property is 0 or 1.
1479 'mother [UTF-2000 only] Base coded-charset.
1480 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1481 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1482 'code-offset [UTF-2000 only] Offset for a code-point of a base
1484 'conversion [UTF-2000 only] Conversion for a code-point of a base
1485 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1487 (name, doc_string, props))
1489 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1490 int direction = CHARSET_LEFT_TO_RIGHT;
1491 Lisp_Object registry = Qnil;
1492 Lisp_Object charset;
1493 Lisp_Object ccl_program = Qnil;
1494 Lisp_Object short_name = Qnil, long_name = Qnil;
1495 Lisp_Object mother = Qnil;
1496 int min_code = 0, max_code = 0, code_offset = 0;
1497 int byte_offset = -1;
1500 CHECK_SYMBOL (name);
1501 if (!NILP (doc_string))
1502 CHECK_STRING (doc_string);
1504 charset = Ffind_charset (name);
1505 if (!NILP (charset))
1506 signal_simple_error ("Cannot redefine existing charset", name);
1509 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1511 if (EQ (keyword, Qshort_name))
1513 CHECK_STRING (value);
1517 else if (EQ (keyword, Qlong_name))
1519 CHECK_STRING (value);
1523 else if (EQ (keyword, Qdimension))
1526 dimension = XINT (value);
1527 if (dimension < 1 ||
1534 signal_simple_error ("Invalid value for 'dimension", value);
1537 else if (EQ (keyword, Qchars))
1540 chars = XINT (value);
1541 if (chars != 94 && chars != 96
1543 && chars != 128 && chars != 256
1546 signal_simple_error ("Invalid value for 'chars", value);
1549 else if (EQ (keyword, Qcolumns))
1552 columns = XINT (value);
1553 if (columns != 1 && columns != 2)
1554 signal_simple_error ("Invalid value for 'columns", value);
1557 else if (EQ (keyword, Qgraphic))
1560 graphic = XINT (value);
1568 signal_simple_error ("Invalid value for 'graphic", value);
1571 else if (EQ (keyword, Qregistry))
1573 CHECK_STRING (value);
1577 else if (EQ (keyword, Qdirection))
1579 if (EQ (value, Ql2r))
1580 direction = CHARSET_LEFT_TO_RIGHT;
1581 else if (EQ (value, Qr2l))
1582 direction = CHARSET_RIGHT_TO_LEFT;
1584 signal_simple_error ("Invalid value for 'direction", value);
1587 else if (EQ (keyword, Qfinal))
1589 CHECK_CHAR_COERCE_INT (value);
1590 final = XCHAR (value);
1591 if (final < '0' || final > '~')
1592 signal_simple_error ("Invalid value for 'final", value);
1596 else if (EQ (keyword, Qmother))
1598 mother = Fget_charset (value);
1601 else if (EQ (keyword, Qmin_code))
1604 min_code = XUINT (value);
1607 else if (EQ (keyword, Qmax_code))
1610 max_code = XUINT (value);
1613 else if (EQ (keyword, Qcode_offset))
1616 code_offset = XUINT (value);
1619 else if (EQ (keyword, Qconversion))
1621 if (EQ (value, Q94x60))
1622 conversion = CONVERSION_94x60;
1623 else if (EQ (value, Q94x94x60))
1624 conversion = CONVERSION_94x94x60;
1625 else if (EQ (value, Qbig5_1))
1626 conversion = CONVERSION_BIG5_1;
1627 else if (EQ (value, Qbig5_2))
1628 conversion = CONVERSION_BIG5_2;
1630 signal_simple_error ("Unrecognized conversion", value);
1634 else if (EQ (keyword, Qccl_program))
1636 struct ccl_program test_ccl;
1638 if (setup_ccl_program (&test_ccl, value) < 0)
1639 signal_simple_error ("Invalid value for 'ccl-program", value);
1640 ccl_program = value;
1644 signal_simple_error ("Unrecognized property", keyword);
1650 error ("'final must be specified");
1652 if (dimension == 2 && final > 0x5F)
1654 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1657 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1658 CHARSET_LEFT_TO_RIGHT)) ||
1659 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1660 CHARSET_RIGHT_TO_LEFT)))
1662 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1664 id = get_unallocated_leading_byte (dimension);
1666 if (NILP (doc_string))
1667 doc_string = build_string ("");
1669 if (NILP (registry))
1670 registry = build_string ("");
1672 if (NILP (short_name))
1673 XSETSTRING (short_name, XSYMBOL (name)->name);
1675 if (NILP (long_name))
1676 long_name = doc_string;
1679 columns = dimension;
1681 if (byte_offset < 0)
1685 else if (chars == 96)
1691 charset = make_charset (id, name, chars, dimension, columns, graphic,
1692 final, direction, short_name, long_name,
1693 doc_string, registry,
1694 Qnil, min_code, max_code, code_offset, byte_offset,
1695 mother, conversion);
1696 if (!NILP (ccl_program))
1697 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1701 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1703 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1704 NEW-NAME is the name of the new charset. Return the new charset.
1706 (charset, new_name))
1708 Lisp_Object new_charset = Qnil;
1709 int id, chars, dimension, columns, graphic, final;
1711 Lisp_Object registry, doc_string, short_name, long_name;
1714 charset = Fget_charset (charset);
1715 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1716 signal_simple_error ("Charset already has reverse-direction charset",
1719 CHECK_SYMBOL (new_name);
1720 if (!NILP (Ffind_charset (new_name)))
1721 signal_simple_error ("Cannot redefine existing charset", new_name);
1723 cs = XCHARSET (charset);
1725 chars = CHARSET_CHARS (cs);
1726 dimension = CHARSET_DIMENSION (cs);
1727 columns = CHARSET_COLUMNS (cs);
1728 id = get_unallocated_leading_byte (dimension);
1730 graphic = CHARSET_GRAPHIC (cs);
1731 final = CHARSET_FINAL (cs);
1732 direction = CHARSET_RIGHT_TO_LEFT;
1733 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1734 direction = CHARSET_LEFT_TO_RIGHT;
1735 doc_string = CHARSET_DOC_STRING (cs);
1736 short_name = CHARSET_SHORT_NAME (cs);
1737 long_name = CHARSET_LONG_NAME (cs);
1738 registry = CHARSET_REGISTRY (cs);
1740 new_charset = make_charset (id, new_name, chars, dimension, columns,
1741 graphic, final, direction, short_name, long_name,
1742 doc_string, registry,
1744 CHARSET_DECODING_TABLE(cs),
1745 CHARSET_MIN_CODE(cs),
1746 CHARSET_MAX_CODE(cs),
1747 CHARSET_CODE_OFFSET(cs),
1748 CHARSET_BYTE_OFFSET(cs),
1750 CHARSET_CONVERSION (cs)
1752 Qnil, 0, 0, 0, 0, Qnil, 0
1756 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1757 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1762 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1763 Define symbol ALIAS as an alias for CHARSET.
1767 CHECK_SYMBOL (alias);
1768 charset = Fget_charset (charset);
1769 return Fputhash (alias, charset, Vcharset_hash_table);
1772 /* #### Reverse direction charsets not yet implemented. */
1774 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1776 Return the reverse-direction charset parallel to CHARSET, if any.
1777 This is the charset with the same properties (in particular, the same
1778 dimension, number of characters per dimension, and final byte) as
1779 CHARSET but whose characters are displayed in the opposite direction.
1783 charset = Fget_charset (charset);
1784 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1788 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1789 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1790 If DIRECTION is omitted, both directions will be checked (left-to-right
1791 will be returned if character sets exist for both directions).
1793 (dimension, chars, final, direction))
1795 int dm, ch, fi, di = -1;
1796 Lisp_Object obj = Qnil;
1798 CHECK_INT (dimension);
1799 dm = XINT (dimension);
1800 if (dm < 1 || dm > 2)
1801 signal_simple_error ("Invalid value for DIMENSION", dimension);
1805 if (ch != 94 && ch != 96)
1806 signal_simple_error ("Invalid value for CHARS", chars);
1808 CHECK_CHAR_COERCE_INT (final);
1810 if (fi < '0' || fi > '~')
1811 signal_simple_error ("Invalid value for FINAL", final);
1813 if (EQ (direction, Ql2r))
1814 di = CHARSET_LEFT_TO_RIGHT;
1815 else if (EQ (direction, Qr2l))
1816 di = CHARSET_RIGHT_TO_LEFT;
1817 else if (!NILP (direction))
1818 signal_simple_error ("Invalid value for DIRECTION", direction);
1820 if (dm == 2 && fi > 0x5F)
1822 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1826 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1828 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1831 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1834 return XCHARSET_NAME (obj);
1838 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1839 Return short name of CHARSET.
1843 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1846 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1847 Return long name of CHARSET.
1851 return XCHARSET_LONG_NAME (Fget_charset (charset));
1854 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1855 Return description of CHARSET.
1859 return XCHARSET_DOC_STRING (Fget_charset (charset));
1862 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1863 Return dimension of CHARSET.
1867 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1870 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1871 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1872 Recognized properties are those listed in `make-charset', as well as
1873 'name and 'doc-string.
1879 charset = Fget_charset (charset);
1880 cs = XCHARSET (charset);
1882 CHECK_SYMBOL (prop);
1883 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1884 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1885 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1886 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1887 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1888 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1889 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1890 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1891 Qnil : make_char (CHARSET_FINAL (cs));
1892 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1893 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1894 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1895 if (EQ (prop, Qdirection))
1896 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1897 if (EQ (prop, Qreverse_direction_charset))
1899 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1900 /* #### Is this translation OK? If so, error checking sufficient? */
1901 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1904 if (EQ (prop, Qmother))
1905 return CHARSET_MOTHER (cs);
1906 if (EQ (prop, Qmin_code))
1907 return make_int (CHARSET_MIN_CODE (cs));
1908 if (EQ (prop, Qmax_code))
1909 return make_int (CHARSET_MAX_CODE (cs));
1911 signal_simple_error ("Unrecognized charset property name", prop);
1912 return Qnil; /* not reached */
1915 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1916 Return charset identification number of CHARSET.
1920 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1923 /* #### We need to figure out which properties we really want to
1926 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1927 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1929 (charset, ccl_program))
1931 struct ccl_program test_ccl;
1933 charset = Fget_charset (charset);
1934 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1935 signal_simple_error ("Invalid ccl-program", ccl_program);
1936 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1941 invalidate_charset_font_caches (Lisp_Object charset)
1943 /* Invalidate font cache entries for charset on all devices. */
1944 Lisp_Object devcons, concons, hash_table;
1945 DEVICE_LOOP_NO_BREAK (devcons, concons)
1947 struct device *d = XDEVICE (XCAR (devcons));
1948 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1949 if (!UNBOUNDP (hash_table))
1950 Fclrhash (hash_table);
1954 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1955 Set the 'registry property of CHARSET to REGISTRY.
1957 (charset, registry))
1959 charset = Fget_charset (charset);
1960 CHECK_STRING (registry);
1961 XCHARSET_REGISTRY (charset) = registry;
1962 invalidate_charset_font_caches (charset);
1963 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1968 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1969 Return mapping-table of CHARSET.
1973 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1976 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1977 Set mapping-table of CHARSET to TABLE.
1981 struct Lisp_Charset *cs;
1985 charset = Fget_charset (charset);
1986 cs = XCHARSET (charset);
1990 CHARSET_DECODING_TABLE(cs) = Qnil;
1993 else if (VECTORP (table))
1995 int ccs_len = CHARSET_BYTE_SIZE (cs);
1996 int ret = decoding_table_check_elements (table,
1997 CHARSET_DIMENSION (cs),
2002 signal_simple_error ("Too big table", table);
2004 signal_simple_error ("Invalid element is found", table);
2006 signal_simple_error ("Something wrong", table);
2008 CHARSET_DECODING_TABLE(cs) = Qnil;
2011 signal_error (Qwrong_type_argument,
2012 list2 (build_translated_string ("vector-or-nil-p"),
2015 byte_offset = CHARSET_BYTE_OFFSET (cs);
2016 switch (CHARSET_DIMENSION (cs))
2019 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2021 Lisp_Object c = XVECTOR_DATA(table)[i];
2024 Fput_char_attribute (c, XCHARSET_NAME (charset),
2025 make_int (i + byte_offset));
2029 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2031 Lisp_Object v = XVECTOR_DATA(table)[i];
2037 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2039 Lisp_Object c = XVECTOR_DATA(v)[j];
2043 (c, XCHARSET_NAME (charset),
2044 make_int ( ( (i + byte_offset) << 8 )
2050 Fput_char_attribute (v, XCHARSET_NAME (charset),
2051 make_int (i + byte_offset));
2058 #ifdef HAVE_CHISE_CLIENT
2059 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2060 Save mapping-table of CHARSET.
2064 struct Lisp_Charset *cs;
2065 int byte_min, byte_max;
2067 Lisp_Object db_file;
2069 charset = Fget_charset (charset);
2070 cs = XCHARSET (charset);
2072 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2073 Qsystem_char_id, 1);
2074 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2076 byte_min = CHARSET_BYTE_OFFSET (cs);
2077 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2078 switch (CHARSET_DIMENSION (cs))
2082 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2085 for (cell = byte_min; cell < byte_max; cell++)
2087 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2090 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2091 Fprin1_to_string (c, Qnil),
2098 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2101 for (row = byte_min; row < byte_max; row++)
2103 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2106 for (cell = byte_min; cell < byte_max; cell++)
2108 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2111 Fput_database (Fprin1_to_string (make_int ((row << 8)
2114 Fprin1_to_string (c, Qnil),
2122 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2125 for (plane = byte_min; plane < byte_max; plane++)
2128 = get_ccs_octet_table (table_p, charset, plane);
2131 for (row = byte_min; row < byte_max; row++)
2134 = get_ccs_octet_table (table_r, charset, row);
2137 for (cell = byte_min; cell < byte_max; cell++)
2139 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2143 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2147 Fprin1_to_string (c, Qnil),
2156 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2159 for (group = byte_min; group < byte_max; group++)
2162 = get_ccs_octet_table (table_g, charset, group);
2165 for (plane = byte_min; plane < byte_max; plane++)
2168 = get_ccs_octet_table (table_p, charset, plane);
2171 for (row = byte_min; row < byte_max; row++)
2174 = get_ccs_octet_table (table_r, charset, row);
2177 for (cell = byte_min; cell < byte_max; cell++)
2180 = get_ccs_octet_table (table_c, charset, cell);
2183 Fput_database (Fprin1_to_string
2184 (make_int (( group << 24)
2189 Fprin1_to_string (c, Qnil),
2197 return Fclose_database (db);
2200 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2201 Reset mapping-table of CCS with database file.
2205 Lisp_Object db_file;
2207 ccs = Fget_charset (ccs);
2208 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2209 Qsystem_char_id, 0);
2211 if (!NILP (Ffile_exists_p (db_file)))
2213 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2220 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2224 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2227 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2231 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2238 decoding_table_put_char (ccs, code_point, ret);
2239 Fclose_database (db);
2243 decoding_table_put_char (ccs, code_point, Qnil);
2244 Fclose_database (db);
2248 #endif /* HAVE_CHISE_CLIENT */
2249 #endif /* UTF2000 */
2252 /************************************************************************/
2253 /* Lisp primitives for working with characters */
2254 /************************************************************************/
2257 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2258 Make a character from CHARSET and code-point CODE.
2259 If DEFINED_ONLY is non-nil, builtin character is not returned.
2260 If corresponding character is not found, nil is returned.
2262 (charset, code, defined_only))
2266 charset = Fget_charset (charset);
2269 if (XCHARSET_GRAPHIC (charset) == 1)
2271 if (NILP (defined_only))
2272 c = DECODE_CHAR (charset, c);
2274 c = decode_defined_char (charset, c);
2275 return c >= 0 ? make_char (c) : Qnil;
2278 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2279 Make a builtin character from CHARSET and code-point CODE.
2285 charset = Fget_charset (charset);
2287 if (EQ (charset, Vcharset_latin_viscii))
2289 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2295 (ret = Fget_char_attribute (chr,
2296 Vcharset_latin_viscii_lower,
2299 charset = Vcharset_latin_viscii_lower;
2303 (ret = Fget_char_attribute (chr,
2304 Vcharset_latin_viscii_upper,
2307 charset = Vcharset_latin_viscii_upper;
2314 if (XCHARSET_GRAPHIC (charset) == 1)
2317 c = decode_builtin_char (charset, c);
2318 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2322 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2323 Make a character from CHARSET and octets ARG1 and ARG2.
2324 ARG2 is required only for characters from two-dimensional charsets.
2325 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2326 character s with caron.
2328 (charset, arg1, arg2))
2332 int lowlim, highlim;
2334 charset = Fget_charset (charset);
2335 cs = XCHARSET (charset);
2337 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2338 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2340 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2342 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2343 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2346 /* It is useful (and safe, according to Olivier Galibert) to strip
2347 the 8th bit off ARG1 and ARG2 because it allows programmers to
2348 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2349 Latin 2 code of the character. */
2357 if (a1 < lowlim || a1 > highlim)
2358 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2360 if (CHARSET_DIMENSION (cs) == 1)
2364 ("Charset is of dimension one; second octet must be nil", arg2);
2365 return make_char (MAKE_CHAR (charset, a1, 0));
2374 a2 = XINT (arg2) & 0x7f;
2376 if (a2 < lowlim || a2 > highlim)
2377 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2379 return make_char (MAKE_CHAR (charset, a1, a2));
2382 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2383 Return the character set of CHARACTER.
2387 CHECK_CHAR_COERCE_INT (character);
2389 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2392 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2393 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2394 N defaults to 0 if omitted.
2398 Lisp_Object charset;
2401 CHECK_CHAR_COERCE_INT (character);
2403 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2405 if (NILP (n) || EQ (n, Qzero))
2406 return make_int (octet0);
2407 else if (EQ (n, make_int (1)))
2408 return make_int (octet1);
2410 signal_simple_error ("Octet number must be 0 or 1", n);
2414 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2415 Return code-point of CHARACTER in specified CHARSET.
2417 (character, charset, defined_only))
2421 CHECK_CHAR_COERCE_INT (character);
2422 charset = Fget_charset (charset);
2423 code_point = charset_code_point (charset, XCHAR (character),
2424 !NILP (defined_only));
2425 if (code_point >= 0)
2426 return make_int (code_point);
2432 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2433 Return list of charset and one or two position-codes of CHARACTER.
2437 /* This function can GC */
2438 struct gcpro gcpro1, gcpro2;
2439 Lisp_Object charset = Qnil;
2440 Lisp_Object rc = Qnil;
2448 GCPRO2 (charset, rc);
2449 CHECK_CHAR_COERCE_INT (character);
2452 code_point = ENCODE_CHAR (XCHAR (character), charset);
2453 dimension = XCHARSET_DIMENSION (charset);
2454 while (dimension > 0)
2456 rc = Fcons (make_int (code_point & 255), rc);
2460 rc = Fcons (XCHARSET_NAME (charset), rc);
2462 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2464 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2466 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2470 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2479 #ifdef ENABLE_COMPOSITE_CHARS
2480 /************************************************************************/
2481 /* composite character functions */
2482 /************************************************************************/
2485 lookup_composite_char (Bufbyte *str, int len)
2487 Lisp_Object lispstr = make_string (str, len);
2488 Lisp_Object ch = Fgethash (lispstr,
2489 Vcomposite_char_string2char_hash_table,
2495 if (composite_char_row_next >= 128)
2496 signal_simple_error ("No more composite chars available", lispstr);
2497 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2498 composite_char_col_next);
2499 Fputhash (make_char (emch), lispstr,
2500 Vcomposite_char_char2string_hash_table);
2501 Fputhash (lispstr, make_char (emch),
2502 Vcomposite_char_string2char_hash_table);
2503 composite_char_col_next++;
2504 if (composite_char_col_next >= 128)
2506 composite_char_col_next = 32;
2507 composite_char_row_next++;
2516 composite_char_string (Emchar ch)
2518 Lisp_Object str = Fgethash (make_char (ch),
2519 Vcomposite_char_char2string_hash_table,
2521 assert (!UNBOUNDP (str));
2525 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2526 Convert a string into a single composite character.
2527 The character is the result of overstriking all the characters in
2532 CHECK_STRING (string);
2533 return make_char (lookup_composite_char (XSTRING_DATA (string),
2534 XSTRING_LENGTH (string)));
2537 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2538 Return a string of the characters comprising a composite character.
2546 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2547 signal_simple_error ("Must be composite char", ch);
2548 return composite_char_string (emch);
2550 #endif /* ENABLE_COMPOSITE_CHARS */
2553 /************************************************************************/
2554 /* initialization */
2555 /************************************************************************/
2558 syms_of_mule_charset (void)
2560 INIT_LRECORD_IMPLEMENTATION (charset);
2562 DEFSUBR (Fcharsetp);
2563 DEFSUBR (Ffind_charset);
2564 DEFSUBR (Fget_charset);
2565 DEFSUBR (Fcharset_list);
2566 DEFSUBR (Fcharset_name);
2567 DEFSUBR (Fmake_charset);
2568 DEFSUBR (Fmake_reverse_direction_charset);
2569 /* DEFSUBR (Freverse_direction_charset); */
2570 DEFSUBR (Fdefine_charset_alias);
2571 DEFSUBR (Fcharset_from_attributes);
2572 DEFSUBR (Fcharset_short_name);
2573 DEFSUBR (Fcharset_long_name);
2574 DEFSUBR (Fcharset_description);
2575 DEFSUBR (Fcharset_dimension);
2576 DEFSUBR (Fcharset_property);
2577 DEFSUBR (Fcharset_id);
2578 DEFSUBR (Fset_charset_ccl_program);
2579 DEFSUBR (Fset_charset_registry);
2581 DEFSUBR (Fcharset_mapping_table);
2582 DEFSUBR (Fset_charset_mapping_table);
2583 #ifdef HAVE_CHISE_CLIENT
2584 DEFSUBR (Fsave_charset_mapping_table);
2585 DEFSUBR (Freset_charset_mapping_table);
2588 DEFSUBR (Fdecode_char);
2589 DEFSUBR (Fdecode_builtin_char);
2590 DEFSUBR (Fencode_char);
2592 DEFSUBR (Fmake_char);
2593 DEFSUBR (Fchar_charset);
2594 DEFSUBR (Fchar_octet);
2595 DEFSUBR (Fsplit_char);
2597 #ifdef ENABLE_COMPOSITE_CHARS
2598 DEFSUBR (Fmake_composite_char);
2599 DEFSUBR (Fcomposite_char_string);
2602 defsymbol (&Qcharsetp, "charsetp");
2603 defsymbol (&Qregistry, "registry");
2604 defsymbol (&Qfinal, "final");
2605 defsymbol (&Qgraphic, "graphic");
2606 defsymbol (&Qdirection, "direction");
2607 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2608 defsymbol (&Qshort_name, "short-name");
2609 defsymbol (&Qlong_name, "long-name");
2611 defsymbol (&Qmother, "mother");
2612 defsymbol (&Qmin_code, "min-code");
2613 defsymbol (&Qmax_code, "max-code");
2614 defsymbol (&Qcode_offset, "code-offset");
2615 defsymbol (&Qconversion, "conversion");
2616 defsymbol (&Q94x60, "94x60");
2617 defsymbol (&Q94x94x60, "94x94x60");
2618 defsymbol (&Qbig5_1, "big5-1");
2619 defsymbol (&Qbig5_2, "big5-2");
2622 defsymbol (&Ql2r, "l2r");
2623 defsymbol (&Qr2l, "r2l");
2625 /* Charsets, compatible with FSF 20.3
2626 Naming convention is Script-Charset[-Edition] */
2627 defsymbol (&Qascii, "ascii");
2628 defsymbol (&Qcontrol_1, "control-1");
2629 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2630 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2631 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2632 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2633 defsymbol (&Qthai_tis620, "thai-tis620");
2634 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2635 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2636 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2637 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2638 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2639 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2640 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2641 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2642 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2643 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2644 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2645 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2646 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2647 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2648 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2649 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2651 defsymbol (&Qucs, "ucs");
2652 defsymbol (&Qucs_bmp, "ucs-bmp");
2653 defsymbol (&Qucs_smp, "ucs-smp");
2654 defsymbol (&Qucs_sip, "ucs-sip");
2655 defsymbol (&Qlatin_viscii, "latin-viscii");
2656 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2657 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2658 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2659 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2660 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2661 defsymbol (&Qjis_x0208, "=jis-x0208");
2662 defsymbol (&Qchinese_big5, "chinese-big5");
2663 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2665 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2666 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2668 defsymbol (&Qcomposite, "composite");
2672 vars_of_mule_charset (void)
2679 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2680 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2682 /* Table of charsets indexed by leading byte. */
2683 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2684 chlook->charset_by_leading_byte[i] = Qnil;
2687 /* Table of charsets indexed by type/final-byte. */
2688 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2689 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2690 chlook->charset_by_attributes[i][j] = Qnil;
2692 /* Table of charsets indexed by type/final-byte/direction. */
2693 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2694 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2695 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2696 chlook->charset_by_attributes[i][j][k] = Qnil;
2700 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2702 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2703 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2707 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2708 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2709 Leading-code of private TYPE9N charset of column-width 1.
2711 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2715 Vdefault_coded_charset_priority_list = Qnil;
2716 DEFVAR_LISP ("default-coded-charset-priority-list",
2717 &Vdefault_coded_charset_priority_list /*
2718 Default order of preferred coded-character-sets.
2724 complex_vars_of_mule_charset (void)
2726 staticpro (&Vcharset_hash_table);
2727 Vcharset_hash_table =
2728 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2730 /* Predefined character sets. We store them into variables for
2734 staticpro (&Vcharset_ucs);
2736 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2737 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2738 build_string ("UCS"),
2739 build_string ("UCS"),
2740 build_string ("ISO/IEC 10646"),
2742 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2743 staticpro (&Vcharset_ucs_bmp);
2745 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2746 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2747 build_string ("BMP"),
2748 build_string ("UCS-BMP"),
2749 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2751 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2752 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2753 staticpro (&Vcharset_ucs_smp);
2755 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2756 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2757 build_string ("SMP"),
2758 build_string ("UCS-SMP"),
2759 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2760 build_string ("UCS00-1"),
2761 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2762 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2763 staticpro (&Vcharset_ucs_sip);
2765 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2766 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2767 build_string ("SIP"),
2768 build_string ("UCS-SIP"),
2769 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2770 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2771 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2772 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2774 # define MIN_CHAR_THAI 0
2775 # define MAX_CHAR_THAI 0
2776 /* # define MIN_CHAR_HEBREW 0 */
2777 /* # define MAX_CHAR_HEBREW 0 */
2778 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2779 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2781 staticpro (&Vcharset_ascii);
2783 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2784 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2785 build_string ("ASCII"),
2786 build_string ("ASCII)"),
2787 build_string ("ASCII (ISO646 IRV)"),
2788 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2789 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2790 staticpro (&Vcharset_control_1);
2791 Vcharset_control_1 =
2792 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2793 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2794 build_string ("C1"),
2795 build_string ("Control characters"),
2796 build_string ("Control characters 128-191"),
2798 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2799 staticpro (&Vcharset_latin_iso8859_1);
2800 Vcharset_latin_iso8859_1 =
2801 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2802 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2803 build_string ("Latin-1"),
2804 build_string ("ISO8859-1 (Latin-1)"),
2805 build_string ("ISO8859-1 (Latin-1)"),
2806 build_string ("iso8859-1"),
2807 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2808 staticpro (&Vcharset_latin_iso8859_2);
2809 Vcharset_latin_iso8859_2 =
2810 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2811 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2812 build_string ("Latin-2"),
2813 build_string ("ISO8859-2 (Latin-2)"),
2814 build_string ("ISO8859-2 (Latin-2)"),
2815 build_string ("iso8859-2"),
2816 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_latin_iso8859_3);
2818 Vcharset_latin_iso8859_3 =
2819 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2820 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2821 build_string ("Latin-3"),
2822 build_string ("ISO8859-3 (Latin-3)"),
2823 build_string ("ISO8859-3 (Latin-3)"),
2824 build_string ("iso8859-3"),
2825 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2826 staticpro (&Vcharset_latin_iso8859_4);
2827 Vcharset_latin_iso8859_4 =
2828 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2829 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2830 build_string ("Latin-4"),
2831 build_string ("ISO8859-4 (Latin-4)"),
2832 build_string ("ISO8859-4 (Latin-4)"),
2833 build_string ("iso8859-4"),
2834 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2835 staticpro (&Vcharset_thai_tis620);
2836 Vcharset_thai_tis620 =
2837 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2838 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2839 build_string ("TIS620"),
2840 build_string ("TIS620 (Thai)"),
2841 build_string ("TIS620.2529 (Thai)"),
2842 build_string ("tis620"),
2843 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2844 staticpro (&Vcharset_greek_iso8859_7);
2845 Vcharset_greek_iso8859_7 =
2846 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2847 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2848 build_string ("ISO8859-7"),
2849 build_string ("ISO8859-7 (Greek)"),
2850 build_string ("ISO8859-7 (Greek)"),
2851 build_string ("iso8859-7"),
2852 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2853 staticpro (&Vcharset_arabic_iso8859_6);
2854 Vcharset_arabic_iso8859_6 =
2855 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2856 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2857 build_string ("ISO8859-6"),
2858 build_string ("ISO8859-6 (Arabic)"),
2859 build_string ("ISO8859-6 (Arabic)"),
2860 build_string ("iso8859-6"),
2861 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_hebrew_iso8859_8);
2863 Vcharset_hebrew_iso8859_8 =
2864 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2865 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2866 build_string ("ISO8859-8"),
2867 build_string ("ISO8859-8 (Hebrew)"),
2868 build_string ("ISO8859-8 (Hebrew)"),
2869 build_string ("iso8859-8"),
2871 0 /* MIN_CHAR_HEBREW */,
2872 0 /* MAX_CHAR_HEBREW */, 0, 32,
2873 Qnil, CONVERSION_IDENTICAL);
2874 staticpro (&Vcharset_katakana_jisx0201);
2875 Vcharset_katakana_jisx0201 =
2876 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2877 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2878 build_string ("JISX0201 Kana"),
2879 build_string ("JISX0201.1976 (Japanese Kana)"),
2880 build_string ("JISX0201.1976 Japanese Kana"),
2881 build_string ("jisx0201\\.1976"),
2882 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2883 staticpro (&Vcharset_latin_jisx0201);
2884 Vcharset_latin_jisx0201 =
2885 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2886 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2887 build_string ("JISX0201 Roman"),
2888 build_string ("JISX0201.1976 (Japanese Roman)"),
2889 build_string ("JISX0201.1976 Japanese Roman"),
2890 build_string ("jisx0201\\.1976"),
2891 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2892 staticpro (&Vcharset_cyrillic_iso8859_5);
2893 Vcharset_cyrillic_iso8859_5 =
2894 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2895 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2896 build_string ("ISO8859-5"),
2897 build_string ("ISO8859-5 (Cyrillic)"),
2898 build_string ("ISO8859-5 (Cyrillic)"),
2899 build_string ("iso8859-5"),
2900 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2901 staticpro (&Vcharset_latin_iso8859_9);
2902 Vcharset_latin_iso8859_9 =
2903 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2904 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2905 build_string ("Latin-5"),
2906 build_string ("ISO8859-9 (Latin-5)"),
2907 build_string ("ISO8859-9 (Latin-5)"),
2908 build_string ("iso8859-9"),
2909 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2911 staticpro (&Vcharset_jis_x0208);
2912 Vcharset_jis_x0208 =
2913 make_charset (LEADING_BYTE_JIS_X0208,
2915 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2916 build_string ("JIS X0208"),
2917 build_string ("JIS X0208 Common"),
2918 build_string ("JIS X0208 Common part"),
2919 build_string ("jisx0208\\.1990"),
2921 MIN_CHAR_JIS_X0208_1990,
2922 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2923 Qnil, CONVERSION_94x94);
2925 staticpro (&Vcharset_japanese_jisx0208_1978);
2926 Vcharset_japanese_jisx0208_1978 =
2927 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2928 Qjapanese_jisx0208_1978, 94, 2,
2929 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2930 build_string ("JIS X0208:1978"),
2931 build_string ("JIS X0208:1978 (Japanese)"),
2933 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2934 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2941 CONVERSION_IDENTICAL);
2942 staticpro (&Vcharset_chinese_gb2312);
2943 Vcharset_chinese_gb2312 =
2944 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2945 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2946 build_string ("GB2312"),
2947 build_string ("GB2312)"),
2948 build_string ("GB2312 Chinese simplified"),
2949 build_string ("gb2312"),
2950 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2951 staticpro (&Vcharset_chinese_gb12345);
2952 Vcharset_chinese_gb12345 =
2953 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2954 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2955 build_string ("G1"),
2956 build_string ("GB 12345)"),
2957 build_string ("GB 12345-1990"),
2958 build_string ("GB12345\\(\\.1990\\)?-0"),
2959 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2960 staticpro (&Vcharset_japanese_jisx0208);
2961 Vcharset_japanese_jisx0208 =
2962 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2963 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2964 build_string ("JISX0208"),
2965 build_string ("JIS X0208:1983 (Japanese)"),
2966 build_string ("JIS X0208:1983 Japanese Kanji"),
2967 build_string ("jisx0208\\.1983"),
2974 CONVERSION_IDENTICAL);
2976 staticpro (&Vcharset_japanese_jisx0208_1990);
2977 Vcharset_japanese_jisx0208_1990 =
2978 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2979 Qjapanese_jisx0208_1990, 94, 2,
2980 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2981 build_string ("JISX0208-1990"),
2982 build_string ("JIS X0208:1990 (Japanese)"),
2983 build_string ("JIS X0208:1990 Japanese Kanji"),
2984 build_string ("jisx0208\\.1990"),
2986 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2987 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2988 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2989 Vcharset_jis_x0208 /* Qnil */,
2990 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2992 staticpro (&Vcharset_korean_ksc5601);
2993 Vcharset_korean_ksc5601 =
2994 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2995 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2996 build_string ("KSC5601"),
2997 build_string ("KSC5601 (Korean"),
2998 build_string ("KSC5601 Korean Hangul and Hanja"),
2999 build_string ("ksc5601"),
3000 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3001 staticpro (&Vcharset_japanese_jisx0212);
3002 Vcharset_japanese_jisx0212 =
3003 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3004 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3005 build_string ("JISX0212"),
3006 build_string ("JISX0212 (Japanese)"),
3007 build_string ("JISX0212 Japanese Supplement"),
3008 build_string ("jisx0212"),
3009 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3011 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3012 staticpro (&Vcharset_chinese_cns11643_1);
3013 Vcharset_chinese_cns11643_1 =
3014 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3015 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3016 build_string ("CNS11643-1"),
3017 build_string ("CNS11643-1 (Chinese traditional)"),
3019 ("CNS 11643 Plane 1 Chinese traditional"),
3020 build_string (CHINESE_CNS_PLANE_RE("1")),
3021 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3022 staticpro (&Vcharset_chinese_cns11643_2);
3023 Vcharset_chinese_cns11643_2 =
3024 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3025 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3026 build_string ("CNS11643-2"),
3027 build_string ("CNS11643-2 (Chinese traditional)"),
3029 ("CNS 11643 Plane 2 Chinese traditional"),
3030 build_string (CHINESE_CNS_PLANE_RE("2")),
3031 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3033 staticpro (&Vcharset_latin_tcvn5712);
3034 Vcharset_latin_tcvn5712 =
3035 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3036 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3037 build_string ("TCVN 5712"),
3038 build_string ("TCVN 5712 (VSCII-2)"),
3039 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3040 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3041 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3042 staticpro (&Vcharset_latin_viscii_lower);
3043 Vcharset_latin_viscii_lower =
3044 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3045 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3046 build_string ("VISCII lower"),
3047 build_string ("VISCII lower (Vietnamese)"),
3048 build_string ("VISCII lower (Vietnamese)"),
3049 build_string ("MULEVISCII-LOWER"),
3050 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3051 staticpro (&Vcharset_latin_viscii_upper);
3052 Vcharset_latin_viscii_upper =
3053 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3054 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3055 build_string ("VISCII upper"),
3056 build_string ("VISCII upper (Vietnamese)"),
3057 build_string ("VISCII upper (Vietnamese)"),
3058 build_string ("MULEVISCII-UPPER"),
3059 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3060 staticpro (&Vcharset_latin_viscii);
3061 Vcharset_latin_viscii =
3062 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3063 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3064 build_string ("VISCII"),
3065 build_string ("VISCII 1.1 (Vietnamese)"),
3066 build_string ("VISCII 1.1 (Vietnamese)"),
3067 build_string ("VISCII1\\.1"),
3068 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3069 staticpro (&Vcharset_chinese_big5);
3070 Vcharset_chinese_big5 =
3071 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3072 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3073 build_string ("Big5"),
3074 build_string ("Big5"),
3075 build_string ("Big5 Chinese traditional"),
3076 build_string ("big5-0"),
3078 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3079 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3081 staticpro (&Vcharset_ethiopic_ucs);
3082 Vcharset_ethiopic_ucs =
3083 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3084 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3085 build_string ("Ethiopic (UCS)"),
3086 build_string ("Ethiopic (UCS)"),
3087 build_string ("Ethiopic of UCS"),
3088 build_string ("Ethiopic-Unicode"),
3089 Qnil, 0x1200, 0x137F, 0, 0,
3090 Qnil, CONVERSION_IDENTICAL);
3092 staticpro (&Vcharset_chinese_big5_1);
3093 Vcharset_chinese_big5_1 =
3094 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3095 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3096 build_string ("Big5"),
3097 build_string ("Big5 (Level-1)"),
3099 ("Big5 Level-1 Chinese traditional"),
3100 build_string ("big5"),
3101 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3102 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3103 staticpro (&Vcharset_chinese_big5_2);
3104 Vcharset_chinese_big5_2 =
3105 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3106 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3107 build_string ("Big5"),
3108 build_string ("Big5 (Level-2)"),
3110 ("Big5 Level-2 Chinese traditional"),
3111 build_string ("big5"),
3112 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3113 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3115 #ifdef ENABLE_COMPOSITE_CHARS
3116 /* #### For simplicity, we put composite chars into a 96x96 charset.
3117 This is going to lead to problems because you can run out of
3118 room, esp. as we don't yet recycle numbers. */
3119 staticpro (&Vcharset_composite);
3120 Vcharset_composite =
3121 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3122 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3123 build_string ("Composite"),
3124 build_string ("Composite characters"),
3125 build_string ("Composite characters"),
3128 /* #### not dumped properly */
3129 composite_char_row_next = 32;
3130 composite_char_col_next = 32;
3132 Vcomposite_char_string2char_hash_table =
3133 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3134 Vcomposite_char_char2string_hash_table =
3135 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3136 staticpro (&Vcomposite_char_string2char_hash_table);
3137 staticpro (&Vcomposite_char_char2string_hash_table);
3138 #endif /* ENABLE_COMPOSITE_CHARS */