1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_system_char_id;
71 Lisp_Object Vcharset_ucs;
72 Lisp_Object Vcharset_ucs_bmp;
73 Lisp_Object Vcharset_ucs_smp;
74 Lisp_Object Vcharset_ucs_sip;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 put_char_ccs_code_point (Lisp_Object character,
181 Lisp_Object ccs, Lisp_Object value)
183 if ( !(EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 && INTP (value) && (XINT (value) < 0xF0000))
186 /* || (XCHAR (character) != XINT (value)) */ )
188 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
192 { /* obsolete representation: value must be a list of bytes */
193 Lisp_Object ret = Fcar (value);
197 signal_simple_error ("Invalid value for coded-charset", value);
198 code_point = XINT (ret);
199 if (XCHARSET_GRAPHIC (ccs) == 1)
207 signal_simple_error ("Invalid value for coded-charset",
211 signal_simple_error ("Invalid value for coded-charset",
214 if (XCHARSET_GRAPHIC (ccs) == 1)
216 code_point = (code_point << 8) | j;
219 value = make_int (code_point);
221 else if (INTP (value))
223 code_point = XINT (value);
224 if (XCHARSET_GRAPHIC (ccs) == 1)
226 code_point &= 0x7F7F7F7F;
227 value = make_int (code_point);
231 signal_simple_error ("Invalid value for coded-charset", value);
235 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
238 decoding_table_remove_char (ccs, XINT (cpos));
241 decoding_table_put_char (ccs, code_point, character);
247 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
249 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
250 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
252 if (VECTORP (decoding_table))
254 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
258 decoding_table_remove_char (ccs, XINT (cpos));
261 if (CHAR_TABLEP (encoding_table))
263 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
271 int leading_code_private_11;
274 Lisp_Object Qcharsetp;
276 /* Qdoc_string, Qdimension, Qchars defined in general.c */
277 Lisp_Object Qregistry, Qfinal, Qgraphic;
278 Lisp_Object Qdirection;
279 Lisp_Object Qreverse_direction_charset;
280 Lisp_Object Qleading_byte;
281 Lisp_Object Qshort_name, Qlong_name;
284 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
285 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
319 Qvietnamese_viscii_lower,
320 Qvietnamese_viscii_upper,
330 Lisp_Object Ql2r, Qr2l;
332 Lisp_Object Vcharset_hash_table;
334 /* Composite characters are characters constructed by overstriking two
335 or more regular characters.
337 1) The old Mule implementation involves storing composite characters
338 in a buffer as a tag followed by all of the actual characters
339 used to make up the composite character. I think this is a bad
340 idea; it greatly complicates code that wants to handle strings
341 one character at a time because it has to deal with the possibility
342 of great big ungainly characters. It's much more reasonable to
343 simply store an index into a table of composite characters.
345 2) The current implementation only allows for 16,384 separate
346 composite characters over the lifetime of the XEmacs process.
347 This could become a potential problem if the user
348 edited lots of different files that use composite characters.
349 Due to FSF bogosity, increasing the number of allowable
350 composite characters under Mule would decrease the number
351 of possible faces that can exist. Mule already has shrunk
352 this to 2048, and further shrinkage would become uncomfortable.
353 No such problems exist in XEmacs.
355 Composite characters could be represented as 0x80 C1 C2 C3,
356 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
357 for slightly under 2^20 (one million) composite characters
358 over the XEmacs process lifetime, and you only need to
359 increase the size of a Mule character from 19 to 21 bits.
360 Or you could use 0x80 C1 C2 C3 C4, allowing for about
361 85 million (slightly over 2^26) composite characters. */
364 /************************************************************************/
365 /* Basic Emchar functions */
366 /************************************************************************/
368 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
369 string in STR. Returns the number of bytes stored.
370 Do not call this directly. Use the macro set_charptr_emchar() instead.
374 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
389 else if ( c <= 0x7ff )
391 *p++ = (c >> 6) | 0xc0;
392 *p++ = (c & 0x3f) | 0x80;
394 else if ( c <= 0xffff )
396 *p++ = (c >> 12) | 0xe0;
397 *p++ = ((c >> 6) & 0x3f) | 0x80;
398 *p++ = (c & 0x3f) | 0x80;
400 else if ( c <= 0x1fffff )
402 *p++ = (c >> 18) | 0xf0;
403 *p++ = ((c >> 12) & 0x3f) | 0x80;
404 *p++ = ((c >> 6) & 0x3f) | 0x80;
405 *p++ = (c & 0x3f) | 0x80;
407 else if ( c <= 0x3ffffff )
409 *p++ = (c >> 24) | 0xf8;
410 *p++ = ((c >> 18) & 0x3f) | 0x80;
411 *p++ = ((c >> 12) & 0x3f) | 0x80;
412 *p++ = ((c >> 6) & 0x3f) | 0x80;
413 *p++ = (c & 0x3f) | 0x80;
417 *p++ = (c >> 30) | 0xfc;
418 *p++ = ((c >> 24) & 0x3f) | 0x80;
419 *p++ = ((c >> 18) & 0x3f) | 0x80;
420 *p++ = ((c >> 12) & 0x3f) | 0x80;
421 *p++ = ((c >> 6) & 0x3f) | 0x80;
422 *p++ = (c & 0x3f) | 0x80;
425 BREAKUP_CHAR (c, charset, c1, c2);
426 lb = CHAR_LEADING_BYTE (c);
427 if (LEADING_BYTE_PRIVATE_P (lb))
428 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
430 if (EQ (charset, Vcharset_control_1))
439 /* Return the first character from a Mule-encoded string in STR,
440 assuming it's non-ASCII. Do not call this directly.
441 Use the macro charptr_emchar() instead. */
444 non_ascii_charptr_emchar (const Bufbyte *str)
457 else if ( b >= 0xf8 )
462 else if ( b >= 0xf0 )
467 else if ( b >= 0xe0 )
472 else if ( b >= 0xc0 )
482 for( ; len > 0; len-- )
485 ch = ( ch << 6 ) | ( b & 0x3f );
489 Bufbyte i0 = *str, i1, i2 = 0;
492 if (i0 == LEADING_BYTE_CONTROL_1)
493 return (Emchar) (*++str - 0x20);
495 if (LEADING_BYTE_PREFIX_P (i0))
500 charset = CHARSET_BY_LEADING_BYTE (i0);
501 if (XCHARSET_DIMENSION (charset) == 2)
504 return MAKE_CHAR (charset, i1, i2);
508 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
509 Do not call this directly. Use the macro valid_char_p() instead. */
513 non_ascii_valid_char_p (Emchar ch)
517 /* Must have only lowest 19 bits set */
521 f1 = CHAR_FIELD1 (ch);
522 f2 = CHAR_FIELD2 (ch);
523 f3 = CHAR_FIELD3 (ch);
529 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
530 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
531 f2 > MAX_CHAR_FIELD2_PRIVATE)
536 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
537 f2 <= MAX_CHAR_FIELD2_PRIVATE))
541 NOTE: This takes advantage of the fact that
542 FIELD2_TO_OFFICIAL_LEADING_BYTE and
543 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
545 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
546 if (EQ (charset, Qnil))
548 return (XCHARSET_CHARS (charset) == 96);
554 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
555 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
556 f1 > MAX_CHAR_FIELD1_PRIVATE)
558 if (f2 < 0x20 || f3 < 0x20)
561 #ifdef ENABLE_COMPOSITE_CHARS
562 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
564 if (UNBOUNDP (Fgethash (make_int (ch),
565 Vcomposite_char_char2string_hash_table,
570 #endif /* ENABLE_COMPOSITE_CHARS */
572 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
573 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
576 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
578 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
581 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
583 if (EQ (charset, Qnil))
585 return (XCHARSET_CHARS (charset) == 96);
591 /************************************************************************/
592 /* Basic string functions */
593 /************************************************************************/
595 /* Copy the character pointed to by SRC into DST. Do not call this
596 directly. Use the macro charptr_copy_char() instead.
597 Return the number of bytes copied. */
600 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
602 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
604 for (i = bytes; i; i--, dst++, src++)
610 /************************************************************************/
611 /* streams of Emchars */
612 /************************************************************************/
614 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
615 The functions below are not meant to be called directly; use
616 the macros in insdel.h. */
619 Lstream_get_emchar_1 (Lstream *stream, int ch)
621 Bufbyte str[MAX_EMCHAR_LEN];
622 Bufbyte *strptr = str;
625 str[0] = (Bufbyte) ch;
627 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
629 int c = Lstream_getc (stream);
630 bufpos_checking_assert (c >= 0);
631 *++strptr = (Bufbyte) c;
633 return charptr_emchar (str);
637 Lstream_fput_emchar (Lstream *stream, Emchar ch)
639 Bufbyte str[MAX_EMCHAR_LEN];
640 Bytecount len = set_charptr_emchar (str, ch);
641 return Lstream_write (stream, str, len);
645 Lstream_funget_emchar (Lstream *stream, Emchar ch)
647 Bufbyte str[MAX_EMCHAR_LEN];
648 Bytecount len = set_charptr_emchar (str, ch);
649 Lstream_unread (stream, str, len);
653 /************************************************************************/
655 /************************************************************************/
658 mark_charset (Lisp_Object obj)
660 Lisp_Charset *cs = XCHARSET (obj);
662 mark_object (cs->short_name);
663 mark_object (cs->long_name);
664 mark_object (cs->doc_string);
665 mark_object (cs->registry);
666 mark_object (cs->ccl_program);
668 mark_object (cs->decoding_table);
669 mark_object (cs->mother);
675 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
677 Lisp_Charset *cs = XCHARSET (obj);
681 error ("printing unreadable object #<charset %s 0x%x>",
682 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
685 write_c_string ("#<charset ", printcharfun);
686 print_internal (CHARSET_NAME (cs), printcharfun, 0);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
691 write_c_string (" ", printcharfun);
692 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
693 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
695 CHARSET_DIMENSION (cs),
696 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
697 CHARSET_COLUMNS (cs),
698 CHARSET_GRAPHIC (cs),
700 write_c_string (buf, printcharfun);
701 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
702 sprintf (buf, " 0x%x>", cs->header.uid);
703 write_c_string (buf, printcharfun);
706 static const struct lrecord_description charset_description[] = {
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
715 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
716 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
721 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
722 mark_charset, print_charset, 0, 0, 0,
726 /* Make a new charset. */
727 /* #### SJT Should generic properties be allowed? */
729 make_charset (Charset_ID id, Lisp_Object name,
730 unsigned short chars, unsigned char dimension,
731 unsigned char columns, unsigned char graphic,
732 Bufbyte final, unsigned char direction, Lisp_Object short_name,
733 Lisp_Object long_name, Lisp_Object doc,
735 Lisp_Object decoding_table,
736 Emchar min_code, Emchar max_code,
737 Emchar code_offset, unsigned char byte_offset,
738 Lisp_Object mother, unsigned char conversion)
741 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
745 XSETCHARSET (obj, cs);
747 CHARSET_ID (cs) = id;
748 CHARSET_NAME (cs) = name;
749 CHARSET_SHORT_NAME (cs) = short_name;
750 CHARSET_LONG_NAME (cs) = long_name;
751 CHARSET_CHARS (cs) = chars;
752 CHARSET_DIMENSION (cs) = dimension;
753 CHARSET_DIRECTION (cs) = direction;
754 CHARSET_COLUMNS (cs) = columns;
755 CHARSET_GRAPHIC (cs) = graphic;
756 CHARSET_FINAL (cs) = final;
757 CHARSET_DOC_STRING (cs) = doc;
758 CHARSET_REGISTRY (cs) = reg;
759 CHARSET_CCL_PROGRAM (cs) = Qnil;
760 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
762 CHARSET_DECODING_TABLE(cs) = Qunbound;
763 CHARSET_MIN_CODE (cs) = min_code;
764 CHARSET_MAX_CODE (cs) = max_code;
765 CHARSET_CODE_OFFSET (cs) = code_offset;
766 CHARSET_BYTE_OFFSET (cs) = byte_offset;
767 CHARSET_MOTHER (cs) = mother;
768 CHARSET_CONVERSION (cs) = conversion;
772 if (id == LEADING_BYTE_ASCII)
773 CHARSET_REP_BYTES (cs) = 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
777 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
782 /* some charsets do not have final characters. This includes
783 ASCII, Control-1, Composite, and the two faux private
785 unsigned char iso2022_type
786 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
788 if (code_offset == 0)
790 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
791 chlook->charset_by_attributes[iso2022_type][final] = obj;
795 (chlook->charset_by_attributes[iso2022_type][final][direction]));
796 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
800 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
801 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
803 /* Some charsets are "faux" and don't have names or really exist at
804 all except in the leading-byte table. */
806 Fputhash (name, obj, Vcharset_hash_table);
811 get_unallocated_leading_byte (int dimension)
816 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
819 lb = chlook->next_allocated_leading_byte++;
823 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
826 lb = chlook->next_allocated_1_byte_leading_byte++;
830 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
833 lb = chlook->next_allocated_2_byte_leading_byte++;
839 ("No more character sets free for this dimension",
840 make_int (dimension));
846 /* Number of Big5 characters which have the same code in 1st byte. */
848 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
851 decode_ccs_conversion (int conv_type, int code_point)
853 if ( conv_type == CONVERSION_IDENTICAL )
857 if ( conv_type == CONVERSION_94x60 )
859 int row = code_point >> 8;
860 int cell = code_point & 255;
864 else if (row < 16 + 32 + 30)
865 return (row - (16 + 32)) * 94 + cell - 33;
866 else if (row < 18 + 32 + 30)
868 else if (row < 18 + 32 + 60)
869 return (row - (18 + 32)) * 94 + cell - 33;
871 else if ( conv_type == CONVERSION_94x94x60 )
873 int plane = code_point >> 16;
874 int row = (code_point >> 8) & 255;
875 int cell = code_point & 255;
879 else if (row < 16 + 32 + 30)
881 (plane - 33) * 94 * 60
882 + (row - (16 + 32)) * 94
884 else if (row < 18 + 32 + 30)
886 else if (row < 18 + 32 + 60)
888 (plane - 33) * 94 * 60
889 + (row - (18 + 32)) * 94
892 else if ( conv_type == CONVERSION_BIG5_1 )
895 = (((code_point >> 8) & 0x7F) - 33) * 94
896 + (( code_point & 0x7F) - 33);
897 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
898 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
900 b2 += b2 < 0x3F ? 0x40 : 0x62;
901 return (b1 << 8) | b2;
903 else if ( conv_type == CONVERSION_BIG5_2 )
906 = (((code_point >> 8) & 0x7F) - 33) * 94
907 + (( code_point & 0x7F) - 33)
908 + BIG5_SAME_ROW * (0xC9 - 0xA1);
909 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
910 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
912 b2 += b2 < 0x3F ? 0x40 : 0x62;
913 return (b1 << 8) | b2;
919 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
921 int dim = XCHARSET_DIMENSION (ccs);
922 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
930 = get_ccs_octet_table (decoding_table, ccs,
931 (code_point >> (dim * 8)) & 255);
933 if (CHARP (decoding_table))
934 return XCHAR (decoding_table);
936 if (EQ (decoding_table, Qunloaded))
938 char_id = load_char_decoding_entry_maybe (ccs, code_point);
940 #endif /* HAVE_CHISE */
943 else if ( !without_inheritance
944 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
947 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
951 code += XCHARSET_CODE_OFFSET(ccs);
952 if ( EQ (mother, Vcharset_ucs) )
953 return DECODE_CHAR (mother, code, without_inheritance);
955 return decode_defined_char (mother, code,
956 without_inheritance);
963 decode_builtin_char (Lisp_Object charset, int code_point)
965 Lisp_Object mother = XCHARSET_MOTHER (charset);
968 if ( XCHARSET_MAX_CODE (charset) > 0 )
970 if ( CHARSETP (mother) )
973 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
978 decode_builtin_char (mother,
979 code + XCHARSET_CODE_OFFSET(charset));
986 = (XCHARSET_DIMENSION (charset) == 1
988 code_point - XCHARSET_BYTE_OFFSET (charset)
990 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
991 * XCHARSET_CHARS (charset)
992 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
993 + XCHARSET_CODE_OFFSET (charset);
994 if ((cid < XCHARSET_MIN_CODE (charset))
995 || (XCHARSET_MAX_CODE (charset) < cid))
1000 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1002 if (XCHARSET_DIMENSION (charset) == 1)
1004 switch (XCHARSET_CHARS (charset))
1008 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1011 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1019 switch (XCHARSET_CHARS (charset))
1022 return MIN_CHAR_94x94
1023 + (final - '0') * 94 * 94
1024 + (((code_point >> 8) & 0x7F) - 33) * 94
1025 + ((code_point & 0x7F) - 33);
1027 return MIN_CHAR_96x96
1028 + (final - '0') * 96 * 96
1029 + (((code_point >> 8) & 0x7F) - 32) * 96
1030 + ((code_point & 0x7F) - 32);
1042 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1044 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1047 if ( CHAR_TABLEP (encoding_table)
1048 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1053 Lisp_Object mother = XCHARSET_MOTHER (charset);
1054 int min = XCHARSET_MIN_CODE (charset);
1055 int max = XCHARSET_MAX_CODE (charset);
1058 if ( CHARSETP (mother) )
1060 if (XCHARSET_FINAL (charset) >= '0')
1061 code = charset_code_point (mother, ch, 1);
1063 code = charset_code_point (mother, ch, defined_only);
1065 else if (defined_only)
1067 else if ( ((max == 0) && CHARSETP (mother)
1068 && (XCHARSET_FINAL (charset) == 0))
1069 || ((min <= ch) && (ch <= max)) )
1071 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1072 || ((min <= code) && (code <= max)) )
1074 int d = code - XCHARSET_CODE_OFFSET (charset);
1076 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1078 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1080 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1082 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1085 int cell = d % 94 + 33;
1091 return (row << 8) | cell;
1093 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1095 int B1 = d >> 8, B2 = d & 0xFF;
1097 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1098 - (B2 < 0x7F ? 0x40 : 0x62);
1102 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1105 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1107 int B1 = d >> 8, B2 = d & 0xFF;
1109 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1110 - (B2 < 0x7F ? 0x40 : 0x62);
1114 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1115 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1119 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1120 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1121 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1122 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1124 int plane = d / (94 * 60) + 33;
1125 int row = (d % (94 * 60)) / 94;
1126 int cell = d % 94 + 33;
1132 return (plane << 16) | (row << 8) | cell;
1134 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1136 ( (d / (94 * 94) + 33) << 16)
1137 | ((d / 94 % 94 + 33) << 8)
1139 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1141 ( (d / (96 * 96) + 32) << 16)
1142 | ((d / 96 % 96 + 32) << 8)
1144 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1146 ( (d / (94 * 94 * 94) + 33) << 24)
1147 | ((d / (94 * 94) % 94 + 33) << 16)
1148 | ((d / 94 % 94 + 33) << 8)
1150 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1152 ( (d / (96 * 96 * 96) + 32) << 24)
1153 | ((d / (96 * 96) % 96 + 32) << 16)
1154 | ((d / 96 % 96 + 32) << 8)
1158 printf ("Unknown CCS-conversion %d is specified!",
1159 XCHARSET_CONVERSION (charset));
1163 else if (defined_only)
1165 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1166 ( XCHARSET_MIN_CODE (charset) == 0 )
1168 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1169 (XCHARSET_CODE_OFFSET (charset)
1170 == XCHARSET_MIN_CODE (charset))
1175 if (XCHARSET_DIMENSION (charset) == 1)
1177 if (XCHARSET_CHARS (charset) == 94)
1179 if (((d = ch - (MIN_CHAR_94
1180 + (XCHARSET_FINAL (charset) - '0') * 94))
1185 else if (XCHARSET_CHARS (charset) == 96)
1187 if (((d = ch - (MIN_CHAR_96
1188 + (XCHARSET_FINAL (charset) - '0') * 96))
1196 else if (XCHARSET_DIMENSION (charset) == 2)
1198 if (XCHARSET_CHARS (charset) == 94)
1200 if (((d = ch - (MIN_CHAR_94x94
1202 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1205 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1207 else if (XCHARSET_CHARS (charset) == 96)
1209 if (((d = ch - (MIN_CHAR_96x96
1211 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1214 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1225 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1227 if (c <= MAX_CHAR_BASIC_LATIN)
1229 *charset = Vcharset_ascii;
1234 *charset = Vcharset_control_1;
1239 *charset = Vcharset_latin_iso8859_1;
1243 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1245 *charset = Vcharset_hebrew_iso8859_8;
1246 return c - MIN_CHAR_HEBREW + 0x20;
1249 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1251 *charset = Vcharset_thai_tis620;
1252 return c - MIN_CHAR_THAI + 0x20;
1255 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1256 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1258 return list2 (Vcharset_katakana_jisx0201,
1259 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1262 else if (c <= MAX_CHAR_BMP)
1264 *charset = Vcharset_ucs_bmp;
1267 else if (c <= MAX_CHAR_SMP)
1269 *charset = Vcharset_ucs_smp;
1270 return c - MIN_CHAR_SMP;
1272 else if (c <= MAX_CHAR_SIP)
1274 *charset = Vcharset_ucs_sip;
1275 return c - MIN_CHAR_SIP;
1277 else if (c < MIN_CHAR_94)
1279 *charset = Vcharset_ucs;
1282 else if (c <= MAX_CHAR_94)
1284 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1285 ((c - MIN_CHAR_94) / 94) + '0',
1286 CHARSET_LEFT_TO_RIGHT);
1287 if (!NILP (*charset))
1288 return ((c - MIN_CHAR_94) % 94) + 33;
1291 *charset = Vcharset_ucs;
1295 else if (c <= MAX_CHAR_96)
1297 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1298 ((c - MIN_CHAR_96) / 96) + '0',
1299 CHARSET_LEFT_TO_RIGHT);
1300 if (!NILP (*charset))
1301 return ((c - MIN_CHAR_96) % 96) + 32;
1304 *charset = Vcharset_ucs;
1308 else if (c <= MAX_CHAR_94x94)
1311 = CHARSET_BY_ATTRIBUTES (94, 2,
1312 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1313 CHARSET_LEFT_TO_RIGHT);
1314 if (!NILP (*charset))
1315 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1316 | (((c - MIN_CHAR_94x94) % 94) + 33);
1319 *charset = Vcharset_ucs;
1323 else if (c <= MAX_CHAR_96x96)
1326 = CHARSET_BY_ATTRIBUTES (96, 2,
1327 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1328 CHARSET_LEFT_TO_RIGHT);
1329 if (!NILP (*charset))
1330 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1331 | (((c - MIN_CHAR_96x96) % 96) + 32);
1334 *charset = Vcharset_ucs;
1340 *charset = Vcharset_ucs;
1345 Lisp_Object Vdefault_coded_charset_priority_list;
1349 /************************************************************************/
1350 /* Basic charset Lisp functions */
1351 /************************************************************************/
1353 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1354 Return non-nil if OBJECT is a charset.
1358 return CHARSETP (object) ? Qt : Qnil;
1361 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1362 Retrieve the charset of the given name.
1363 If CHARSET-OR-NAME is a charset object, it is simply returned.
1364 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1365 nil is returned. Otherwise the associated charset object is returned.
1369 if (CHARSETP (charset_or_name))
1370 return charset_or_name;
1372 CHECK_SYMBOL (charset_or_name);
1373 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1376 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1377 Retrieve the charset of the given name.
1378 Same as `find-charset' except an error is signalled if there is no such
1379 charset instead of returning nil.
1383 Lisp_Object charset = Ffind_charset (name);
1386 signal_simple_error ("No such charset", name);
1390 /* We store the charsets in hash tables with the names as the key and the
1391 actual charset object as the value. Occasionally we need to use them
1392 in a list format. These routines provide us with that. */
1393 struct charset_list_closure
1395 Lisp_Object *charset_list;
1399 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1400 void *charset_list_closure)
1402 /* This function can GC */
1403 struct charset_list_closure *chcl =
1404 (struct charset_list_closure*) charset_list_closure;
1405 Lisp_Object *charset_list = chcl->charset_list;
1407 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1411 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1412 Return a list of the names of all defined charsets.
1416 Lisp_Object charset_list = Qnil;
1417 struct gcpro gcpro1;
1418 struct charset_list_closure charset_list_closure;
1420 GCPRO1 (charset_list);
1421 charset_list_closure.charset_list = &charset_list;
1422 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1423 &charset_list_closure);
1426 return charset_list;
1429 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1430 Return the name of charset CHARSET.
1434 return XCHARSET_NAME (Fget_charset (charset));
1437 /* #### SJT Should generic properties be allowed? */
1438 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1439 Define a new character set.
1440 This function is for use with Mule support.
1441 NAME is a symbol, the name by which the character set is normally referred.
1442 DOC-STRING is a string describing the character set.
1443 PROPS is a property list, describing the specific nature of the
1444 character set. Recognized properties are:
1446 'short-name Short version of the charset name (ex: Latin-1)
1447 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1448 'registry A regular expression matching the font registry field for
1450 'dimension Number of octets used to index a character in this charset.
1451 Either 1 or 2. Defaults to 1.
1452 If UTF-2000 feature is enabled, 3 or 4 are also available.
1453 'columns Number of columns used to display a character in this charset.
1454 Only used in TTY mode. (Under X, the actual width of a
1455 character can be derived from the font used to display the
1456 characters.) If unspecified, defaults to the dimension
1457 (this is almost always the correct value).
1458 'chars Number of characters in each dimension (94 or 96).
1459 Defaults to 94. Note that if the dimension is 2, the
1460 character set thus described is 94x94 or 96x96.
1461 If UTF-2000 feature is enabled, 128 or 256 are also available.
1462 'final Final byte of ISO 2022 escape sequence. Must be
1463 supplied. Each combination of (DIMENSION, CHARS) defines a
1464 separate namespace for final bytes. Note that ISO
1465 2022 restricts the final byte to the range
1466 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1467 dimension == 2. Note also that final bytes in the range
1468 0x30 - 0x3F are reserved for user-defined (not official)
1470 'graphic 0 (use left half of font on output) or 1 (use right half
1471 of font on output). Defaults to 0. For example, for
1472 a font whose registry is ISO8859-1, the left half
1473 (octets 0x20 - 0x7F) is the `ascii' character set, while
1474 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1475 character set. With 'graphic set to 0, the octets
1476 will have their high bit cleared; with it set to 1,
1477 the octets will have their high bit set.
1478 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1480 'ccl-program A compiled CCL program used to convert a character in
1481 this charset into an index into the font. This is in
1482 addition to the 'graphic property. The CCL program
1483 is passed the octets of the character, with the high
1484 bit cleared and set depending upon whether the value
1485 of the 'graphic property is 0 or 1.
1486 'mother [UTF-2000 only] Base coded-charset.
1487 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1488 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1489 'code-offset [UTF-2000 only] Offset for a code-point of a base
1491 'conversion [UTF-2000 only] Conversion for a code-point of a base
1492 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1494 (name, doc_string, props))
1496 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1497 int direction = CHARSET_LEFT_TO_RIGHT;
1498 Lisp_Object registry = Qnil;
1499 Lisp_Object charset;
1500 Lisp_Object ccl_program = Qnil;
1501 Lisp_Object short_name = Qnil, long_name = Qnil;
1502 Lisp_Object mother = Qnil;
1503 int min_code = 0, max_code = 0, code_offset = 0;
1504 int byte_offset = -1;
1507 CHECK_SYMBOL (name);
1508 if (!NILP (doc_string))
1509 CHECK_STRING (doc_string);
1511 charset = Ffind_charset (name);
1512 if (!NILP (charset))
1513 signal_simple_error ("Cannot redefine existing charset", name);
1516 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1518 if (EQ (keyword, Qshort_name))
1520 CHECK_STRING (value);
1524 else if (EQ (keyword, Qlong_name))
1526 CHECK_STRING (value);
1530 else if (EQ (keyword, Qiso_ir))
1534 id = - XINT (value);
1538 else if (EQ (keyword, Qdimension))
1541 dimension = XINT (value);
1542 if (dimension < 1 ||
1549 signal_simple_error ("Invalid value for 'dimension", value);
1552 else if (EQ (keyword, Qchars))
1555 chars = XINT (value);
1556 if (chars != 94 && chars != 96
1558 && chars != 128 && chars != 256
1561 signal_simple_error ("Invalid value for 'chars", value);
1564 else if (EQ (keyword, Qcolumns))
1567 columns = XINT (value);
1568 if (columns != 1 && columns != 2)
1569 signal_simple_error ("Invalid value for 'columns", value);
1572 else if (EQ (keyword, Qgraphic))
1575 graphic = XINT (value);
1583 signal_simple_error ("Invalid value for 'graphic", value);
1586 else if (EQ (keyword, Qregistry))
1588 CHECK_STRING (value);
1592 else if (EQ (keyword, Qdirection))
1594 if (EQ (value, Ql2r))
1595 direction = CHARSET_LEFT_TO_RIGHT;
1596 else if (EQ (value, Qr2l))
1597 direction = CHARSET_RIGHT_TO_LEFT;
1599 signal_simple_error ("Invalid value for 'direction", value);
1602 else if (EQ (keyword, Qfinal))
1604 CHECK_CHAR_COERCE_INT (value);
1605 final = XCHAR (value);
1606 if (final < '0' || final > '~')
1607 signal_simple_error ("Invalid value for 'final", value);
1611 else if (EQ (keyword, Qmother))
1613 mother = Fget_charset (value);
1616 else if (EQ (keyword, Qmin_code))
1619 min_code = XUINT (value);
1622 else if (EQ (keyword, Qmax_code))
1625 max_code = XUINT (value);
1628 else if (EQ (keyword, Qcode_offset))
1631 code_offset = XUINT (value);
1634 else if (EQ (keyword, Qconversion))
1636 if (EQ (value, Q94x60))
1637 conversion = CONVERSION_94x60;
1638 else if (EQ (value, Q94x94x60))
1639 conversion = CONVERSION_94x94x60;
1640 else if (EQ (value, Qbig5_1))
1641 conversion = CONVERSION_BIG5_1;
1642 else if (EQ (value, Qbig5_2))
1643 conversion = CONVERSION_BIG5_2;
1645 signal_simple_error ("Unrecognized conversion", value);
1649 else if (EQ (keyword, Qccl_program))
1651 struct ccl_program test_ccl;
1653 if (setup_ccl_program (&test_ccl, value) < 0)
1654 signal_simple_error ("Invalid value for 'ccl-program", value);
1655 ccl_program = value;
1659 signal_simple_error ("Unrecognized property", keyword);
1665 error ("'final must be specified");
1667 if (dimension == 2 && final > 0x5F)
1669 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1672 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1673 CHARSET_LEFT_TO_RIGHT)) ||
1674 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1675 CHARSET_RIGHT_TO_LEFT)))
1677 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1680 id = get_unallocated_leading_byte (dimension);
1682 if (NILP (doc_string))
1683 doc_string = build_string ("");
1685 if (NILP (registry))
1686 registry = build_string ("");
1688 if (NILP (short_name))
1689 XSETSTRING (short_name, XSYMBOL (name)->name);
1691 if (NILP (long_name))
1692 long_name = doc_string;
1695 columns = dimension;
1697 if (byte_offset < 0)
1701 else if (chars == 96)
1707 charset = make_charset (id, name, chars, dimension, columns, graphic,
1708 final, direction, short_name, long_name,
1709 doc_string, registry,
1710 Qnil, min_code, max_code, code_offset, byte_offset,
1711 mother, conversion);
1712 if (!NILP (ccl_program))
1713 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1717 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1719 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1720 NEW-NAME is the name of the new charset. Return the new charset.
1722 (charset, new_name))
1724 Lisp_Object new_charset = Qnil;
1725 int id, chars, dimension, columns, graphic, final;
1727 Lisp_Object registry, doc_string, short_name, long_name;
1730 charset = Fget_charset (charset);
1731 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1732 signal_simple_error ("Charset already has reverse-direction charset",
1735 CHECK_SYMBOL (new_name);
1736 if (!NILP (Ffind_charset (new_name)))
1737 signal_simple_error ("Cannot redefine existing charset", new_name);
1739 cs = XCHARSET (charset);
1741 chars = CHARSET_CHARS (cs);
1742 dimension = CHARSET_DIMENSION (cs);
1743 columns = CHARSET_COLUMNS (cs);
1744 id = get_unallocated_leading_byte (dimension);
1746 graphic = CHARSET_GRAPHIC (cs);
1747 final = CHARSET_FINAL (cs);
1748 direction = CHARSET_RIGHT_TO_LEFT;
1749 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1750 direction = CHARSET_LEFT_TO_RIGHT;
1751 doc_string = CHARSET_DOC_STRING (cs);
1752 short_name = CHARSET_SHORT_NAME (cs);
1753 long_name = CHARSET_LONG_NAME (cs);
1754 registry = CHARSET_REGISTRY (cs);
1756 new_charset = make_charset (id, new_name, chars, dimension, columns,
1757 graphic, final, direction, short_name, long_name,
1758 doc_string, registry,
1760 CHARSET_DECODING_TABLE(cs),
1761 CHARSET_MIN_CODE(cs),
1762 CHARSET_MAX_CODE(cs),
1763 CHARSET_CODE_OFFSET(cs),
1764 CHARSET_BYTE_OFFSET(cs),
1766 CHARSET_CONVERSION (cs)
1768 Qnil, 0, 0, 0, 0, Qnil, 0
1772 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1773 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1778 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1779 Define symbol ALIAS as an alias for CHARSET.
1783 CHECK_SYMBOL (alias);
1784 charset = Fget_charset (charset);
1785 return Fputhash (alias, charset, Vcharset_hash_table);
1788 /* #### Reverse direction charsets not yet implemented. */
1790 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1792 Return the reverse-direction charset parallel to CHARSET, if any.
1793 This is the charset with the same properties (in particular, the same
1794 dimension, number of characters per dimension, and final byte) as
1795 CHARSET but whose characters are displayed in the opposite direction.
1799 charset = Fget_charset (charset);
1800 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1804 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1805 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1806 If DIRECTION is omitted, both directions will be checked (left-to-right
1807 will be returned if character sets exist for both directions).
1809 (dimension, chars, final, direction))
1811 int dm, ch, fi, di = -1;
1812 Lisp_Object obj = Qnil;
1814 CHECK_INT (dimension);
1815 dm = XINT (dimension);
1816 if (dm < 1 || dm > 2)
1817 signal_simple_error ("Invalid value for DIMENSION", dimension);
1821 if (ch != 94 && ch != 96)
1822 signal_simple_error ("Invalid value for CHARS", chars);
1824 CHECK_CHAR_COERCE_INT (final);
1826 if (fi < '0' || fi > '~')
1827 signal_simple_error ("Invalid value for FINAL", final);
1829 if (EQ (direction, Ql2r))
1830 di = CHARSET_LEFT_TO_RIGHT;
1831 else if (EQ (direction, Qr2l))
1832 di = CHARSET_RIGHT_TO_LEFT;
1833 else if (!NILP (direction))
1834 signal_simple_error ("Invalid value for DIRECTION", direction);
1836 if (dm == 2 && fi > 0x5F)
1838 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1842 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1844 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1847 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1850 return XCHARSET_NAME (obj);
1854 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1855 Return short name of CHARSET.
1859 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1862 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1863 Return long name of CHARSET.
1867 return XCHARSET_LONG_NAME (Fget_charset (charset));
1870 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1871 Return description of CHARSET.
1875 return XCHARSET_DOC_STRING (Fget_charset (charset));
1878 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1879 Return dimension of CHARSET.
1883 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1886 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1887 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1888 Recognized properties are those listed in `make-charset', as well as
1889 'name and 'doc-string.
1895 charset = Fget_charset (charset);
1896 cs = XCHARSET (charset);
1898 CHECK_SYMBOL (prop);
1899 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1900 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1901 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1902 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1903 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1904 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1905 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1906 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1907 Qnil : make_char (CHARSET_FINAL (cs));
1908 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1909 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1910 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1911 if (EQ (prop, Qdirection))
1912 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1913 if (EQ (prop, Qreverse_direction_charset))
1915 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1916 /* #### Is this translation OK? If so, error checking sufficient? */
1917 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1920 if (EQ (prop, Qmother))
1921 return CHARSET_MOTHER (cs);
1922 if (EQ (prop, Qmin_code))
1923 return make_int (CHARSET_MIN_CODE (cs));
1924 if (EQ (prop, Qmax_code))
1925 return make_int (CHARSET_MAX_CODE (cs));
1927 signal_simple_error ("Unrecognized charset property name", prop);
1928 return Qnil; /* not reached */
1931 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1932 Return charset identification number of CHARSET.
1936 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1939 /* #### We need to figure out which properties we really want to
1942 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1943 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1945 (charset, ccl_program))
1947 struct ccl_program test_ccl;
1949 charset = Fget_charset (charset);
1950 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1951 signal_simple_error ("Invalid ccl-program", ccl_program);
1952 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1957 invalidate_charset_font_caches (Lisp_Object charset)
1959 /* Invalidate font cache entries for charset on all devices. */
1960 Lisp_Object devcons, concons, hash_table;
1961 DEVICE_LOOP_NO_BREAK (devcons, concons)
1963 struct device *d = XDEVICE (XCAR (devcons));
1964 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1965 if (!UNBOUNDP (hash_table))
1966 Fclrhash (hash_table);
1970 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1971 Set the 'registry property of CHARSET to REGISTRY.
1973 (charset, registry))
1975 charset = Fget_charset (charset);
1976 CHECK_STRING (registry);
1977 XCHARSET_REGISTRY (charset) = registry;
1978 invalidate_charset_font_caches (charset);
1979 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1984 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1985 Return mapping-table of CHARSET.
1989 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1992 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1993 Set mapping-table of CHARSET to TABLE.
1997 struct Lisp_Charset *cs;
2001 charset = Fget_charset (charset);
2002 cs = XCHARSET (charset);
2006 CHARSET_DECODING_TABLE(cs) = Qnil;
2009 else if (VECTORP (table))
2011 int ccs_len = CHARSET_BYTE_SIZE (cs);
2012 int ret = decoding_table_check_elements (table,
2013 CHARSET_DIMENSION (cs),
2018 signal_simple_error ("Too big table", table);
2020 signal_simple_error ("Invalid element is found", table);
2022 signal_simple_error ("Something wrong", table);
2024 CHARSET_DECODING_TABLE(cs) = Qnil;
2027 signal_error (Qwrong_type_argument,
2028 list2 (build_translated_string ("vector-or-nil-p"),
2031 byte_offset = CHARSET_BYTE_OFFSET (cs);
2032 switch (CHARSET_DIMENSION (cs))
2035 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2037 Lisp_Object c = XVECTOR_DATA(table)[i];
2040 Fput_char_attribute (c, XCHARSET_NAME (charset),
2041 make_int (i + byte_offset));
2045 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2047 Lisp_Object v = XVECTOR_DATA(table)[i];
2053 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2055 Lisp_Object c = XVECTOR_DATA(v)[j];
2059 (c, XCHARSET_NAME (charset),
2060 make_int ( ( (i + byte_offset) << 8 )
2066 Fput_char_attribute (v, XCHARSET_NAME (charset),
2067 make_int (i + byte_offset));
2075 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2076 Save mapping-table of CHARSET.
2080 struct Lisp_Charset *cs;
2081 int byte_min, byte_max;
2082 #ifdef HAVE_LIBCHISE
2084 #else /* HAVE_LIBCHISE */
2086 Lisp_Object db_file;
2087 #endif /* not HAVE_LIBCHISE */
2089 charset = Fget_charset (charset);
2090 cs = XCHARSET (charset);
2092 #ifdef HAVE_LIBCHISE
2093 if ( open_chise_data_source_maybe () )
2097 = chise_ds_get_ccs (default_chise_data_source,
2098 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2101 printf ("Can't open decoding-table %s\n",
2102 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2105 #else /* HAVE_LIBCHISE */
2106 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2107 Qsystem_char_id, 1);
2108 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2109 #endif /* not HAVE_LIBCHISE */
2111 byte_min = CHARSET_BYTE_OFFSET (cs);
2112 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2113 switch (CHARSET_DIMENSION (cs))
2117 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2120 for (cell = byte_min; cell < byte_max; cell++)
2122 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2126 #ifdef HAVE_LIBCHISE
2127 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2128 #else /* HAVE_LIBCHISE */
2129 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2130 Fprin1_to_string (c, Qnil),
2132 #endif /* not HAVE_LIBCHISE */
2139 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2142 for (row = byte_min; row < byte_max; row++)
2144 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2147 for (cell = byte_min; cell < byte_max; cell++)
2149 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2153 #ifdef HAVE_LIBCHISE
2154 chise_ccs_set_decoded_char
2156 (row << 8) | cell, XCHAR (c));
2157 #else /* HAVE_LIBCHISE */
2158 Fput_database (Fprin1_to_string (make_int ((row << 8)
2161 Fprin1_to_string (c, Qnil),
2163 #endif /* not HAVE_LIBCHISE */
2171 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2174 for (plane = byte_min; plane < byte_max; plane++)
2177 = get_ccs_octet_table (table_p, charset, plane);
2180 for (row = byte_min; row < byte_max; row++)
2183 = get_ccs_octet_table (table_r, charset, row);
2186 for (cell = byte_min; cell < byte_max; cell++)
2188 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2193 #ifdef HAVE_LIBCHISE
2194 chise_ccs_set_decoded_char
2199 #else /* HAVE_LIBCHISE */
2200 Fput_database (Fprin1_to_string
2201 (make_int ((plane << 16)
2205 Fprin1_to_string (c, Qnil),
2207 #endif /* not HAVE_LIBCHISE */
2216 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2219 for (group = byte_min; group < byte_max; group++)
2222 = get_ccs_octet_table (table_g, charset, group);
2225 for (plane = byte_min; plane < byte_max; plane++)
2228 = get_ccs_octet_table (table_p, charset, plane);
2231 for (row = byte_min; row < byte_max; row++)
2234 = get_ccs_octet_table (table_r, charset, row);
2237 for (cell = byte_min; cell < byte_max; cell++)
2240 = get_ccs_octet_table (table_c, charset, cell);
2244 #ifdef HAVE_LIBCHISE
2245 chise_ccs_set_decoded_char
2251 #else /* HAVE_LIBCHISE */
2252 Fput_database (Fprin1_to_string
2253 (make_int (( group << 24)
2258 Fprin1_to_string (c, Qnil),
2260 #endif /* not HAVE_LIBCHISE */
2268 #ifdef HAVE_LIBCHISE
2269 chise_ccs_sync (dt_ccs);
2271 #else /* HAVE_LIBCHISE */
2272 return Fclose_database (db);
2273 #endif /* not HAVE_LIBCHISE */
2276 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2277 Reset mapping-table of CCS with database file.
2281 #ifdef HAVE_LIBCHISE
2282 CHISE_CCS chise_ccs;
2284 Lisp_Object db_file;
2287 ccs = Fget_charset (ccs);
2289 #ifdef HAVE_LIBCHISE
2290 if ( open_chise_data_source_maybe () )
2293 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2294 XSTRING_DATA (Fsymbol_name
2295 (XCHARSET_NAME(ccs))));
2296 if (chise_ccs == NULL)
2299 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2300 Qsystem_char_id, 0);
2304 #ifdef HAVE_LIBCHISE
2305 chise_ccs_setup_db (chise_ccs, 0) == 0
2307 !NILP (Ffile_exists_p (db_file))
2311 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2318 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2320 #ifdef HAVE_LIBCHISE
2321 CHISE_Char_ID char_id;
2323 if ( open_chise_data_source_maybe () )
2327 = chise_ds_decode_char (default_chise_data_source,
2328 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2331 decoding_table_put_char (ccs, code_point, make_char (char_id));
2333 decoding_table_put_char (ccs, code_point, Qnil);
2335 /* chise_ccst_close (dt_ccs); */
2337 #else /* HAVE_LIBCHISE */
2340 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2343 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2347 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2354 decoding_table_put_char (ccs, code_point, ret);
2355 Fclose_database (db);
2359 decoding_table_put_char (ccs, code_point, Qnil);
2360 Fclose_database (db);
2363 #endif /* not HAVE_LIBCHISE */
2365 #endif /* HAVE_CHISE */
2366 #endif /* UTF2000 */
2369 /************************************************************************/
2370 /* Lisp primitives for working with characters */
2371 /************************************************************************/
2374 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2375 Make a character from CHARSET and code-point CODE.
2376 If DEFINED_ONLY is non-nil, builtin character is not returned.
2377 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2378 If corresponding character is not found, nil is returned.
2380 (charset, code, defined_only, without_inheritance))
2384 charset = Fget_charset (charset);
2387 if (XCHARSET_GRAPHIC (charset) == 1)
2389 if (NILP (defined_only))
2390 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2392 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2393 return c >= 0 ? make_char (c) : Qnil;
2396 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2397 Make a builtin character from CHARSET and code-point CODE.
2403 charset = Fget_charset (charset);
2405 if (EQ (charset, Vcharset_latin_viscii))
2407 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2413 (ret = Fget_char_attribute (chr,
2414 Vcharset_latin_viscii_lower,
2417 charset = Vcharset_latin_viscii_lower;
2421 (ret = Fget_char_attribute (chr,
2422 Vcharset_latin_viscii_upper,
2425 charset = Vcharset_latin_viscii_upper;
2432 if (XCHARSET_GRAPHIC (charset) == 1)
2435 c = decode_builtin_char (charset, c);
2437 c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil, Qnil);
2441 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2442 Make a character from CHARSET and octets ARG1 and ARG2.
2443 ARG2 is required only for characters from two-dimensional charsets.
2444 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2445 character s with caron.
2447 (charset, arg1, arg2))
2451 int lowlim, highlim;
2453 charset = Fget_charset (charset);
2454 cs = XCHARSET (charset);
2456 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2457 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2459 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2461 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2462 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2465 /* It is useful (and safe, according to Olivier Galibert) to strip
2466 the 8th bit off ARG1 and ARG2 because it allows programmers to
2467 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2468 Latin 2 code of the character. */
2476 if (a1 < lowlim || a1 > highlim)
2477 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2479 if (CHARSET_DIMENSION (cs) == 1)
2483 ("Charset is of dimension one; second octet must be nil", arg2);
2484 return make_char (MAKE_CHAR (charset, a1, 0));
2493 a2 = XINT (arg2) & 0x7f;
2495 if (a2 < lowlim || a2 > highlim)
2496 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2498 return make_char (MAKE_CHAR (charset, a1, a2));
2501 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2502 Return the character set of CHARACTER.
2506 CHECK_CHAR_COERCE_INT (character);
2508 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2511 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2512 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2513 N defaults to 0 if omitted.
2517 Lisp_Object charset;
2520 CHECK_CHAR_COERCE_INT (character);
2522 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2524 if (NILP (n) || EQ (n, Qzero))
2525 return make_int (octet0);
2526 else if (EQ (n, make_int (1)))
2527 return make_int (octet1);
2529 signal_simple_error ("Octet number must be 0 or 1", n);
2533 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2534 Return code-point of CHARACTER in specified CHARSET.
2536 (character, charset, defined_only))
2540 CHECK_CHAR_COERCE_INT (character);
2541 charset = Fget_charset (charset);
2542 code_point = charset_code_point (charset, XCHAR (character),
2543 !NILP (defined_only));
2544 if (code_point >= 0)
2545 return make_int (code_point);
2551 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2552 Return list of charset and one or two position-codes of CHARACTER.
2556 /* This function can GC */
2557 struct gcpro gcpro1, gcpro2;
2558 Lisp_Object charset = Qnil;
2559 Lisp_Object rc = Qnil;
2567 GCPRO2 (charset, rc);
2568 CHECK_CHAR_COERCE_INT (character);
2571 code_point = ENCODE_CHAR (XCHAR (character), charset);
2572 dimension = XCHARSET_DIMENSION (charset);
2573 while (dimension > 0)
2575 rc = Fcons (make_int (code_point & 255), rc);
2579 rc = Fcons (XCHARSET_NAME (charset), rc);
2581 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2583 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2585 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2589 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2598 #ifdef ENABLE_COMPOSITE_CHARS
2599 /************************************************************************/
2600 /* composite character functions */
2601 /************************************************************************/
2604 lookup_composite_char (Bufbyte *str, int len)
2606 Lisp_Object lispstr = make_string (str, len);
2607 Lisp_Object ch = Fgethash (lispstr,
2608 Vcomposite_char_string2char_hash_table,
2614 if (composite_char_row_next >= 128)
2615 signal_simple_error ("No more composite chars available", lispstr);
2616 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2617 composite_char_col_next);
2618 Fputhash (make_char (emch), lispstr,
2619 Vcomposite_char_char2string_hash_table);
2620 Fputhash (lispstr, make_char (emch),
2621 Vcomposite_char_string2char_hash_table);
2622 composite_char_col_next++;
2623 if (composite_char_col_next >= 128)
2625 composite_char_col_next = 32;
2626 composite_char_row_next++;
2635 composite_char_string (Emchar ch)
2637 Lisp_Object str = Fgethash (make_char (ch),
2638 Vcomposite_char_char2string_hash_table,
2640 assert (!UNBOUNDP (str));
2644 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2645 Convert a string into a single composite character.
2646 The character is the result of overstriking all the characters in
2651 CHECK_STRING (string);
2652 return make_char (lookup_composite_char (XSTRING_DATA (string),
2653 XSTRING_LENGTH (string)));
2656 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2657 Return a string of the characters comprising a composite character.
2665 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2666 signal_simple_error ("Must be composite char", ch);
2667 return composite_char_string (emch);
2669 #endif /* ENABLE_COMPOSITE_CHARS */
2672 /************************************************************************/
2673 /* initialization */
2674 /************************************************************************/
2677 syms_of_mule_charset (void)
2679 INIT_LRECORD_IMPLEMENTATION (charset);
2681 DEFSUBR (Fcharsetp);
2682 DEFSUBR (Ffind_charset);
2683 DEFSUBR (Fget_charset);
2684 DEFSUBR (Fcharset_list);
2685 DEFSUBR (Fcharset_name);
2686 DEFSUBR (Fmake_charset);
2687 DEFSUBR (Fmake_reverse_direction_charset);
2688 /* DEFSUBR (Freverse_direction_charset); */
2689 DEFSUBR (Fdefine_charset_alias);
2690 DEFSUBR (Fcharset_from_attributes);
2691 DEFSUBR (Fcharset_short_name);
2692 DEFSUBR (Fcharset_long_name);
2693 DEFSUBR (Fcharset_description);
2694 DEFSUBR (Fcharset_dimension);
2695 DEFSUBR (Fcharset_property);
2696 DEFSUBR (Fcharset_id);
2697 DEFSUBR (Fset_charset_ccl_program);
2698 DEFSUBR (Fset_charset_registry);
2701 DEFSUBR (Fcharset_mapping_table);
2702 DEFSUBR (Fset_charset_mapping_table);
2704 DEFSUBR (Fsave_charset_mapping_table);
2705 DEFSUBR (Freset_charset_mapping_table);
2706 #endif /* HAVE_CHISE */
2707 DEFSUBR (Fdecode_char);
2708 DEFSUBR (Fdecode_builtin_char);
2709 DEFSUBR (Fencode_char);
2712 DEFSUBR (Fmake_char);
2713 DEFSUBR (Fchar_charset);
2714 DEFSUBR (Fchar_octet);
2715 DEFSUBR (Fsplit_char);
2717 #ifdef ENABLE_COMPOSITE_CHARS
2718 DEFSUBR (Fmake_composite_char);
2719 DEFSUBR (Fcomposite_char_string);
2722 defsymbol (&Qcharsetp, "charsetp");
2723 defsymbol (&Qregistry, "registry");
2724 defsymbol (&Qfinal, "final");
2725 defsymbol (&Qgraphic, "graphic");
2726 defsymbol (&Qdirection, "direction");
2727 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2728 defsymbol (&Qshort_name, "short-name");
2729 defsymbol (&Qlong_name, "long-name");
2730 defsymbol (&Qiso_ir, "iso-ir");
2732 defsymbol (&Qmother, "mother");
2733 defsymbol (&Qmin_code, "min-code");
2734 defsymbol (&Qmax_code, "max-code");
2735 defsymbol (&Qcode_offset, "code-offset");
2736 defsymbol (&Qconversion, "conversion");
2737 defsymbol (&Q94x60, "94x60");
2738 defsymbol (&Q94x94x60, "94x94x60");
2739 defsymbol (&Qbig5_1, "big5-1");
2740 defsymbol (&Qbig5_2, "big5-2");
2743 defsymbol (&Ql2r, "l2r");
2744 defsymbol (&Qr2l, "r2l");
2746 /* Charsets, compatible with FSF 20.3
2747 Naming convention is Script-Charset[-Edition] */
2748 defsymbol (&Qascii, "ascii");
2749 defsymbol (&Qcontrol_1, "control-1");
2750 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2751 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2752 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2753 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2754 defsymbol (&Qthai_tis620, "thai-tis620");
2755 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2756 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2757 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2758 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2759 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2760 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2761 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2762 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2763 defsymbol (&Qmap_gb2312, "=gb2312");
2764 defsymbol (&Qmap_gb12345, "=gb12345");
2765 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2766 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2767 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2768 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2769 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2771 defsymbol (&Qsystem_char_id, "system-char-id");
2772 defsymbol (&Qmap_ucs, "=ucs");
2773 defsymbol (&Qucs, "ucs");
2774 defsymbol (&Qucs_bmp, "ucs-bmp");
2775 defsymbol (&Qucs_smp, "ucs-smp");
2776 defsymbol (&Qucs_sip, "ucs-sip");
2777 defsymbol (&Qlatin_viscii, "latin-viscii");
2778 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2779 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2780 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2781 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2782 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2783 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2784 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2785 defsymbol (&Qmap_big5, "=big5");
2786 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2788 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2789 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2791 defsymbol (&Qcomposite, "composite");
2795 vars_of_mule_charset (void)
2802 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2803 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2805 /* Table of charsets indexed by leading byte. */
2806 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2807 chlook->charset_by_leading_byte[i] = Qnil;
2810 /* Table of charsets indexed by type/final-byte. */
2811 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2812 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2813 chlook->charset_by_attributes[i][j] = Qnil;
2815 /* Table of charsets indexed by type/final-byte/direction. */
2816 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2817 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2818 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2819 chlook->charset_by_attributes[i][j][k] = Qnil;
2823 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2825 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2826 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2830 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2831 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2832 Leading-code of private TYPE9N charset of column-width 1.
2834 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2838 Vdefault_coded_charset_priority_list = Qnil;
2839 DEFVAR_LISP ("default-coded-charset-priority-list",
2840 &Vdefault_coded_charset_priority_list /*
2841 Default order of preferred coded-character-sets.
2847 complex_vars_of_mule_charset (void)
2849 staticpro (&Vcharset_hash_table);
2850 Vcharset_hash_table =
2851 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2853 /* Predefined character sets. We store them into variables for
2857 staticpro (&Vcharset_system_char_id);
2858 Vcharset_system_char_id =
2859 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
2860 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2861 build_string ("SCID"),
2862 build_string ("CHAR-ID"),
2863 build_string ("System char-id"),
2865 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2866 staticpro (&Vcharset_ucs);
2868 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2869 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2870 build_string ("UCS"),
2871 build_string ("UCS"),
2872 build_string ("ISO/IEC 10646"),
2874 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2875 staticpro (&Vcharset_ucs_bmp);
2877 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2878 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2879 build_string ("BMP"),
2880 build_string ("UCS-BMP"),
2881 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2883 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2884 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2885 staticpro (&Vcharset_ucs_smp);
2887 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2888 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2889 build_string ("SMP"),
2890 build_string ("UCS-SMP"),
2891 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2892 build_string ("UCS00-1"),
2893 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2894 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2895 staticpro (&Vcharset_ucs_sip);
2897 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2898 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2899 build_string ("SIP"),
2900 build_string ("UCS-SIP"),
2901 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2902 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2903 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2904 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2906 # define MIN_CHAR_THAI 0
2907 # define MAX_CHAR_THAI 0
2908 /* # define MIN_CHAR_HEBREW 0 */
2909 /* # define MAX_CHAR_HEBREW 0 */
2910 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2911 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2913 staticpro (&Vcharset_ascii);
2915 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2916 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2917 build_string ("ASCII"),
2918 build_string ("ASCII)"),
2919 build_string ("ASCII (ISO646 IRV)"),
2920 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2921 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2922 staticpro (&Vcharset_control_1);
2923 Vcharset_control_1 =
2924 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2925 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2926 build_string ("C1"),
2927 build_string ("Control characters"),
2928 build_string ("Control characters 128-191"),
2930 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2931 staticpro (&Vcharset_latin_iso8859_1);
2932 Vcharset_latin_iso8859_1 =
2933 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2934 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2935 build_string ("Latin-1"),
2936 build_string ("ISO8859-1 (Latin-1)"),
2937 build_string ("ISO8859-1 (Latin-1)"),
2938 build_string ("iso8859-1"),
2939 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2940 staticpro (&Vcharset_latin_iso8859_2);
2941 Vcharset_latin_iso8859_2 =
2942 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2943 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2944 build_string ("Latin-2"),
2945 build_string ("ISO8859-2 (Latin-2)"),
2946 build_string ("ISO8859-2 (Latin-2)"),
2947 build_string ("iso8859-2"),
2948 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2949 staticpro (&Vcharset_latin_iso8859_3);
2950 Vcharset_latin_iso8859_3 =
2951 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2952 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2953 build_string ("Latin-3"),
2954 build_string ("ISO8859-3 (Latin-3)"),
2955 build_string ("ISO8859-3 (Latin-3)"),
2956 build_string ("iso8859-3"),
2957 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2958 staticpro (&Vcharset_latin_iso8859_4);
2959 Vcharset_latin_iso8859_4 =
2960 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2961 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2962 build_string ("Latin-4"),
2963 build_string ("ISO8859-4 (Latin-4)"),
2964 build_string ("ISO8859-4 (Latin-4)"),
2965 build_string ("iso8859-4"),
2966 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2967 staticpro (&Vcharset_thai_tis620);
2968 Vcharset_thai_tis620 =
2969 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2970 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2971 build_string ("TIS620"),
2972 build_string ("TIS620 (Thai)"),
2973 build_string ("TIS620.2529 (Thai)"),
2974 build_string ("tis620"),
2975 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2976 staticpro (&Vcharset_greek_iso8859_7);
2977 Vcharset_greek_iso8859_7 =
2978 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2979 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2980 build_string ("ISO8859-7"),
2981 build_string ("ISO8859-7 (Greek)"),
2982 build_string ("ISO8859-7 (Greek)"),
2983 build_string ("iso8859-7"),
2984 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2985 staticpro (&Vcharset_arabic_iso8859_6);
2986 Vcharset_arabic_iso8859_6 =
2987 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2988 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2989 build_string ("ISO8859-6"),
2990 build_string ("ISO8859-6 (Arabic)"),
2991 build_string ("ISO8859-6 (Arabic)"),
2992 build_string ("iso8859-6"),
2993 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2994 staticpro (&Vcharset_hebrew_iso8859_8);
2995 Vcharset_hebrew_iso8859_8 =
2996 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2997 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2998 build_string ("ISO8859-8"),
2999 build_string ("ISO8859-8 (Hebrew)"),
3000 build_string ("ISO8859-8 (Hebrew)"),
3001 build_string ("iso8859-8"),
3003 0 /* MIN_CHAR_HEBREW */,
3004 0 /* MAX_CHAR_HEBREW */, 0, 32,
3005 Qnil, CONVERSION_IDENTICAL);
3006 staticpro (&Vcharset_katakana_jisx0201);
3007 Vcharset_katakana_jisx0201 =
3008 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3009 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3010 build_string ("JISX0201 Kana"),
3011 build_string ("JISX0201.1976 (Japanese Kana)"),
3012 build_string ("JISX0201.1976 Japanese Kana"),
3013 build_string ("jisx0201\\.1976"),
3014 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3015 staticpro (&Vcharset_latin_jisx0201);
3016 Vcharset_latin_jisx0201 =
3017 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3018 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3019 build_string ("JISX0201 Roman"),
3020 build_string ("JISX0201.1976 (Japanese Roman)"),
3021 build_string ("JISX0201.1976 Japanese Roman"),
3022 build_string ("jisx0201\\.1976"),
3023 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3024 staticpro (&Vcharset_cyrillic_iso8859_5);
3025 Vcharset_cyrillic_iso8859_5 =
3026 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3027 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3028 build_string ("ISO8859-5"),
3029 build_string ("ISO8859-5 (Cyrillic)"),
3030 build_string ("ISO8859-5 (Cyrillic)"),
3031 build_string ("iso8859-5"),
3032 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3033 staticpro (&Vcharset_latin_iso8859_9);
3034 Vcharset_latin_iso8859_9 =
3035 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3036 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3037 build_string ("Latin-5"),
3038 build_string ("ISO8859-9 (Latin-5)"),
3039 build_string ("ISO8859-9 (Latin-5)"),
3040 build_string ("iso8859-9"),
3041 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3043 staticpro (&Vcharset_jis_x0208);
3044 Vcharset_jis_x0208 =
3045 make_charset (LEADING_BYTE_JIS_X0208,
3046 Qmap_jis_x0208, 94, 2,
3047 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3048 build_string ("JIS X0208"),
3049 build_string ("JIS X0208 Common"),
3050 build_string ("JIS X0208 Common part"),
3051 build_string ("jisx0208\\.1990"),
3053 MIN_CHAR_JIS_X0208_1990,
3054 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3055 Qnil, CONVERSION_94x94);
3057 staticpro (&Vcharset_japanese_jisx0208_1978);
3058 Vcharset_japanese_jisx0208_1978 =
3059 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3060 Qmap_jis_x0208_1978, 94, 2,
3061 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3062 build_string ("JIS X0208:1978"),
3063 build_string ("JIS X0208:1978 (Japanese)"),
3065 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3066 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3073 CONVERSION_IDENTICAL);
3074 staticpro (&Vcharset_chinese_gb2312);
3075 Vcharset_chinese_gb2312 =
3076 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3077 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3078 build_string ("GB2312"),
3079 build_string ("GB2312)"),
3080 build_string ("GB2312 Chinese simplified"),
3081 build_string ("gb2312"),
3082 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3083 staticpro (&Vcharset_chinese_gb12345);
3084 Vcharset_chinese_gb12345 =
3085 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3086 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3087 build_string ("G1"),
3088 build_string ("GB 12345)"),
3089 build_string ("GB 12345-1990"),
3090 build_string ("GB12345\\(\\.1990\\)?-0"),
3091 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3092 staticpro (&Vcharset_japanese_jisx0208);
3093 Vcharset_japanese_jisx0208 =
3094 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3095 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3096 build_string ("JISX0208"),
3097 build_string ("JIS X0208:1983 (Japanese)"),
3098 build_string ("JIS X0208:1983 Japanese Kanji"),
3099 build_string ("jisx0208\\.1983"),
3106 CONVERSION_IDENTICAL);
3108 staticpro (&Vcharset_japanese_jisx0208_1990);
3109 Vcharset_japanese_jisx0208_1990 =
3110 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3111 Qmap_jis_x0208_1990, 94, 2,
3112 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3113 build_string ("JISX0208-1990"),
3114 build_string ("JIS X0208:1990 (Japanese)"),
3115 build_string ("JIS X0208:1990 Japanese Kanji"),
3116 build_string ("jisx0208\\.1990"),
3118 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3119 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3120 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3121 Vcharset_jis_x0208 /* Qnil */,
3122 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3124 staticpro (&Vcharset_korean_ksc5601);
3125 Vcharset_korean_ksc5601 =
3126 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3127 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3128 build_string ("KSC5601"),
3129 build_string ("KSC5601 (Korean"),
3130 build_string ("KSC5601 Korean Hangul and Hanja"),
3131 build_string ("ksc5601"),
3132 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3133 staticpro (&Vcharset_japanese_jisx0212);
3134 Vcharset_japanese_jisx0212 =
3135 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3136 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3137 build_string ("JISX0212"),
3138 build_string ("JISX0212 (Japanese)"),
3139 build_string ("JISX0212 Japanese Supplement"),
3140 build_string ("jisx0212"),
3141 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3143 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3144 staticpro (&Vcharset_chinese_cns11643_1);
3145 Vcharset_chinese_cns11643_1 =
3146 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3147 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3148 build_string ("CNS11643-1"),
3149 build_string ("CNS11643-1 (Chinese traditional)"),
3151 ("CNS 11643 Plane 1 Chinese traditional"),
3152 build_string (CHINESE_CNS_PLANE_RE("1")),
3153 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3154 staticpro (&Vcharset_chinese_cns11643_2);
3155 Vcharset_chinese_cns11643_2 =
3156 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3157 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3158 build_string ("CNS11643-2"),
3159 build_string ("CNS11643-2 (Chinese traditional)"),
3161 ("CNS 11643 Plane 2 Chinese traditional"),
3162 build_string (CHINESE_CNS_PLANE_RE("2")),
3163 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3165 staticpro (&Vcharset_latin_tcvn5712);
3166 Vcharset_latin_tcvn5712 =
3167 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3168 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3169 build_string ("TCVN 5712"),
3170 build_string ("TCVN 5712 (VSCII-2)"),
3171 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3172 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3173 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3174 staticpro (&Vcharset_latin_viscii_lower);
3175 Vcharset_latin_viscii_lower =
3176 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3177 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3178 build_string ("VISCII lower"),
3179 build_string ("VISCII lower (Vietnamese)"),
3180 build_string ("VISCII lower (Vietnamese)"),
3181 build_string ("MULEVISCII-LOWER"),
3182 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3183 staticpro (&Vcharset_latin_viscii_upper);
3184 Vcharset_latin_viscii_upper =
3185 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3186 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3187 build_string ("VISCII upper"),
3188 build_string ("VISCII upper (Vietnamese)"),
3189 build_string ("VISCII upper (Vietnamese)"),
3190 build_string ("MULEVISCII-UPPER"),
3191 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3192 staticpro (&Vcharset_latin_viscii);
3193 Vcharset_latin_viscii =
3194 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3195 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3196 build_string ("VISCII"),
3197 build_string ("VISCII 1.1 (Vietnamese)"),
3198 build_string ("VISCII 1.1 (Vietnamese)"),
3199 build_string ("VISCII1\\.1"),
3200 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3201 staticpro (&Vcharset_chinese_big5);
3202 Vcharset_chinese_big5 =
3203 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3204 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3205 build_string ("Big5"),
3206 build_string ("Big5"),
3207 build_string ("Big5 Chinese traditional"),
3208 build_string ("big5-0"),
3210 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3211 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3213 staticpro (&Vcharset_ethiopic_ucs);
3214 Vcharset_ethiopic_ucs =
3215 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3216 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3217 build_string ("Ethiopic (UCS)"),
3218 build_string ("Ethiopic (UCS)"),
3219 build_string ("Ethiopic of UCS"),
3220 build_string ("Ethiopic-Unicode"),
3221 Qnil, 0x1200, 0x137F, 0, 0,
3222 Qnil, CONVERSION_IDENTICAL);
3224 staticpro (&Vcharset_chinese_big5_1);
3225 Vcharset_chinese_big5_1 =
3226 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3227 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3228 build_string ("Big5"),
3229 build_string ("Big5 (Level-1)"),
3231 ("Big5 Level-1 Chinese traditional"),
3232 build_string ("big5"),
3233 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3234 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3235 staticpro (&Vcharset_chinese_big5_2);
3236 Vcharset_chinese_big5_2 =
3237 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3238 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3239 build_string ("Big5"),
3240 build_string ("Big5 (Level-2)"),
3242 ("Big5 Level-2 Chinese traditional"),
3243 build_string ("big5"),
3244 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3245 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3247 #ifdef ENABLE_COMPOSITE_CHARS
3248 /* #### For simplicity, we put composite chars into a 96x96 charset.
3249 This is going to lead to problems because you can run out of
3250 room, esp. as we don't yet recycle numbers. */
3251 staticpro (&Vcharset_composite);
3252 Vcharset_composite =
3253 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3254 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3255 build_string ("Composite"),
3256 build_string ("Composite characters"),
3257 build_string ("Composite characters"),
3260 /* #### not dumped properly */
3261 composite_char_row_next = 32;
3262 composite_char_col_next = 32;
3264 Vcomposite_char_string2char_hash_table =
3265 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3266 Vcomposite_char_char2string_hash_table =
3267 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3268 staticpro (&Vcomposite_char_string2char_hash_table);
3269 staticpro (&Vcomposite_char_char2string_hash_table);
3270 #endif /* ENABLE_COMPOSITE_CHARS */