1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_ucs;
71 Lisp_Object Vcharset_ucs_bmp;
72 Lisp_Object Vcharset_ucs_smp;
73 Lisp_Object Vcharset_ucs_sip;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
282 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
283 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
317 Qvietnamese_viscii_lower,
318 Qvietnamese_viscii_upper,
328 Lisp_Object Ql2r, Qr2l;
330 Lisp_Object Vcharset_hash_table;
332 /* Composite characters are characters constructed by overstriking two
333 or more regular characters.
335 1) The old Mule implementation involves storing composite characters
336 in a buffer as a tag followed by all of the actual characters
337 used to make up the composite character. I think this is a bad
338 idea; it greatly complicates code that wants to handle strings
339 one character at a time because it has to deal with the possibility
340 of great big ungainly characters. It's much more reasonable to
341 simply store an index into a table of composite characters.
343 2) The current implementation only allows for 16,384 separate
344 composite characters over the lifetime of the XEmacs process.
345 This could become a potential problem if the user
346 edited lots of different files that use composite characters.
347 Due to FSF bogosity, increasing the number of allowable
348 composite characters under Mule would decrease the number
349 of possible faces that can exist. Mule already has shrunk
350 this to 2048, and further shrinkage would become uncomfortable.
351 No such problems exist in XEmacs.
353 Composite characters could be represented as 0x80 C1 C2 C3,
354 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
355 for slightly under 2^20 (one million) composite characters
356 over the XEmacs process lifetime, and you only need to
357 increase the size of a Mule character from 19 to 21 bits.
358 Or you could use 0x80 C1 C2 C3 C4, allowing for about
359 85 million (slightly over 2^26) composite characters. */
362 /************************************************************************/
363 /* Basic Emchar functions */
364 /************************************************************************/
366 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
367 string in STR. Returns the number of bytes stored.
368 Do not call this directly. Use the macro set_charptr_emchar() instead.
372 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
387 else if ( c <= 0x7ff )
389 *p++ = (c >> 6) | 0xc0;
390 *p++ = (c & 0x3f) | 0x80;
392 else if ( c <= 0xffff )
394 *p++ = (c >> 12) | 0xe0;
395 *p++ = ((c >> 6) & 0x3f) | 0x80;
396 *p++ = (c & 0x3f) | 0x80;
398 else if ( c <= 0x1fffff )
400 *p++ = (c >> 18) | 0xf0;
401 *p++ = ((c >> 12) & 0x3f) | 0x80;
402 *p++ = ((c >> 6) & 0x3f) | 0x80;
403 *p++ = (c & 0x3f) | 0x80;
405 else if ( c <= 0x3ffffff )
407 *p++ = (c >> 24) | 0xf8;
408 *p++ = ((c >> 18) & 0x3f) | 0x80;
409 *p++ = ((c >> 12) & 0x3f) | 0x80;
410 *p++ = ((c >> 6) & 0x3f) | 0x80;
411 *p++ = (c & 0x3f) | 0x80;
415 *p++ = (c >> 30) | 0xfc;
416 *p++ = ((c >> 24) & 0x3f) | 0x80;
417 *p++ = ((c >> 18) & 0x3f) | 0x80;
418 *p++ = ((c >> 12) & 0x3f) | 0x80;
419 *p++ = ((c >> 6) & 0x3f) | 0x80;
420 *p++ = (c & 0x3f) | 0x80;
423 BREAKUP_CHAR (c, charset, c1, c2);
424 lb = CHAR_LEADING_BYTE (c);
425 if (LEADING_BYTE_PRIVATE_P (lb))
426 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
428 if (EQ (charset, Vcharset_control_1))
437 /* Return the first character from a Mule-encoded string in STR,
438 assuming it's non-ASCII. Do not call this directly.
439 Use the macro charptr_emchar() instead. */
442 non_ascii_charptr_emchar (const Bufbyte *str)
455 else if ( b >= 0xf8 )
460 else if ( b >= 0xf0 )
465 else if ( b >= 0xe0 )
470 else if ( b >= 0xc0 )
480 for( ; len > 0; len-- )
483 ch = ( ch << 6 ) | ( b & 0x3f );
487 Bufbyte i0 = *str, i1, i2 = 0;
490 if (i0 == LEADING_BYTE_CONTROL_1)
491 return (Emchar) (*++str - 0x20);
493 if (LEADING_BYTE_PREFIX_P (i0))
498 charset = CHARSET_BY_LEADING_BYTE (i0);
499 if (XCHARSET_DIMENSION (charset) == 2)
502 return MAKE_CHAR (charset, i1, i2);
506 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
507 Do not call this directly. Use the macro valid_char_p() instead. */
511 non_ascii_valid_char_p (Emchar ch)
515 /* Must have only lowest 19 bits set */
519 f1 = CHAR_FIELD1 (ch);
520 f2 = CHAR_FIELD2 (ch);
521 f3 = CHAR_FIELD3 (ch);
527 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
528 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
529 f2 > MAX_CHAR_FIELD2_PRIVATE)
534 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
535 f2 <= MAX_CHAR_FIELD2_PRIVATE))
539 NOTE: This takes advantage of the fact that
540 FIELD2_TO_OFFICIAL_LEADING_BYTE and
541 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
543 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
544 if (EQ (charset, Qnil))
546 return (XCHARSET_CHARS (charset) == 96);
552 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
553 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
554 f1 > MAX_CHAR_FIELD1_PRIVATE)
556 if (f2 < 0x20 || f3 < 0x20)
559 #ifdef ENABLE_COMPOSITE_CHARS
560 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
562 if (UNBOUNDP (Fgethash (make_int (ch),
563 Vcomposite_char_char2string_hash_table,
568 #endif /* ENABLE_COMPOSITE_CHARS */
570 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
571 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
574 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
581 if (EQ (charset, Qnil))
583 return (XCHARSET_CHARS (charset) == 96);
589 /************************************************************************/
590 /* Basic string functions */
591 /************************************************************************/
593 /* Copy the character pointed to by SRC into DST. Do not call this
594 directly. Use the macro charptr_copy_char() instead.
595 Return the number of bytes copied. */
598 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
600 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
602 for (i = bytes; i; i--, dst++, src++)
608 /************************************************************************/
609 /* streams of Emchars */
610 /************************************************************************/
612 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
613 The functions below are not meant to be called directly; use
614 the macros in insdel.h. */
617 Lstream_get_emchar_1 (Lstream *stream, int ch)
619 Bufbyte str[MAX_EMCHAR_LEN];
620 Bufbyte *strptr = str;
623 str[0] = (Bufbyte) ch;
625 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
627 int c = Lstream_getc (stream);
628 bufpos_checking_assert (c >= 0);
629 *++strptr = (Bufbyte) c;
631 return charptr_emchar (str);
635 Lstream_fput_emchar (Lstream *stream, Emchar ch)
637 Bufbyte str[MAX_EMCHAR_LEN];
638 Bytecount len = set_charptr_emchar (str, ch);
639 return Lstream_write (stream, str, len);
643 Lstream_funget_emchar (Lstream *stream, Emchar ch)
645 Bufbyte str[MAX_EMCHAR_LEN];
646 Bytecount len = set_charptr_emchar (str, ch);
647 Lstream_unread (stream, str, len);
651 /************************************************************************/
653 /************************************************************************/
656 mark_charset (Lisp_Object obj)
658 Lisp_Charset *cs = XCHARSET (obj);
660 mark_object (cs->short_name);
661 mark_object (cs->long_name);
662 mark_object (cs->doc_string);
663 mark_object (cs->registry);
664 mark_object (cs->ccl_program);
666 mark_object (cs->decoding_table);
667 mark_object (cs->mother);
673 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
675 Lisp_Charset *cs = XCHARSET (obj);
679 error ("printing unreadable object #<charset %s 0x%x>",
680 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
683 write_c_string ("#<charset ", printcharfun);
684 print_internal (CHARSET_NAME (cs), printcharfun, 0);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
691 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
693 CHARSET_DIMENSION (cs),
694 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
695 CHARSET_COLUMNS (cs),
696 CHARSET_GRAPHIC (cs),
698 write_c_string (buf, printcharfun);
699 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
700 sprintf (buf, " 0x%x>", cs->header.uid);
701 write_c_string (buf, printcharfun);
704 static const struct lrecord_description charset_description[] = {
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
719 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
720 mark_charset, print_charset, 0, 0, 0,
724 /* Make a new charset. */
725 /* #### SJT Should generic properties be allowed? */
727 make_charset (Charset_ID id, Lisp_Object name,
728 unsigned short chars, unsigned char dimension,
729 unsigned char columns, unsigned char graphic,
730 Bufbyte final, unsigned char direction, Lisp_Object short_name,
731 Lisp_Object long_name, Lisp_Object doc,
733 Lisp_Object decoding_table,
734 Emchar min_code, Emchar max_code,
735 Emchar code_offset, unsigned char byte_offset,
736 Lisp_Object mother, unsigned char conversion)
739 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
743 XSETCHARSET (obj, cs);
745 CHARSET_ID (cs) = id;
746 CHARSET_NAME (cs) = name;
747 CHARSET_SHORT_NAME (cs) = short_name;
748 CHARSET_LONG_NAME (cs) = long_name;
749 CHARSET_CHARS (cs) = chars;
750 CHARSET_DIMENSION (cs) = dimension;
751 CHARSET_DIRECTION (cs) = direction;
752 CHARSET_COLUMNS (cs) = columns;
753 CHARSET_GRAPHIC (cs) = graphic;
754 CHARSET_FINAL (cs) = final;
755 CHARSET_DOC_STRING (cs) = doc;
756 CHARSET_REGISTRY (cs) = reg;
757 CHARSET_CCL_PROGRAM (cs) = Qnil;
758 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
760 CHARSET_DECODING_TABLE(cs) = Qunbound;
761 CHARSET_MIN_CODE (cs) = min_code;
762 CHARSET_MAX_CODE (cs) = max_code;
763 CHARSET_CODE_OFFSET (cs) = code_offset;
764 CHARSET_BYTE_OFFSET (cs) = byte_offset;
765 CHARSET_MOTHER (cs) = mother;
766 CHARSET_CONVERSION (cs) = conversion;
770 if (id == LEADING_BYTE_ASCII)
771 CHARSET_REP_BYTES (cs) = 1;
773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
780 /* some charsets do not have final characters. This includes
781 ASCII, Control-1, Composite, and the two faux private
783 unsigned char iso2022_type
784 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
786 if (code_offset == 0)
788 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
789 chlook->charset_by_attributes[iso2022_type][final] = obj;
793 (chlook->charset_by_attributes[iso2022_type][final][direction]));
794 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
798 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
799 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
801 /* Some charsets are "faux" and don't have names or really exist at
802 all except in the leading-byte table. */
804 Fputhash (name, obj, Vcharset_hash_table);
809 get_unallocated_leading_byte (int dimension)
814 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
817 lb = chlook->next_allocated_leading_byte++;
821 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
824 lb = chlook->next_allocated_1_byte_leading_byte++;
828 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
831 lb = chlook->next_allocated_2_byte_leading_byte++;
837 ("No more character sets free for this dimension",
838 make_int (dimension));
844 /* Number of Big5 characters which have the same code in 1st byte. */
846 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
849 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
851 int dim = XCHARSET_DIMENSION (ccs);
852 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
860 = get_ccs_octet_table (decoding_table, ccs,
861 (code_point >> (dim * 8)) & 255);
863 if (CHARP (decoding_table))
864 return XCHAR (decoding_table);
866 if (EQ (decoding_table, Qunloaded))
868 char_id = load_char_decoding_entry_maybe (ccs, code_point);
870 #endif /* HAVE_CHISE */
873 else if ( !without_inheritance
874 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
876 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
878 if ( EQ (mother, Vcharset_ucs) )
879 return DECODE_CHAR (mother, code_point, without_inheritance);
881 return decode_defined_char (mother, code_point,
882 without_inheritance);
884 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
887 = (((code_point >> 8) & 0x7F) - 33) * 94
888 + (( code_point & 0x7F) - 33);
889 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
890 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
892 b2 += b2 < 0x3F ? 0x40 : 0x62;
893 return decode_defined_char (mother, (b1 << 8) | b2,
894 without_inheritance);
896 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
899 = (((code_point >> 8) & 0x7F) - 33) * 94
900 + (( code_point & 0x7F) - 33)
901 + BIG5_SAME_ROW * (0xC9 - 0xA1);
902 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
903 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
905 b2 += b2 < 0x3F ? 0x40 : 0x62;
906 return decode_defined_char (mother, (b1 << 8) | b2,
907 without_inheritance);
914 decode_builtin_char (Lisp_Object charset, int code_point)
916 Lisp_Object mother = XCHARSET_MOTHER (charset);
919 if ( XCHARSET_MAX_CODE (charset) > 0 )
921 if ( CHARSETP (mother) )
923 int code = code_point;
925 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
927 int row = code_point >> 8;
928 int cell = code_point & 255;
932 else if (row < 16 + 32 + 30)
933 code = (row - (16 + 32)) * 94 + cell - 33;
934 else if (row < 18 + 32 + 30)
936 else if (row < 18 + 32 + 60)
937 code = (row - (18 + 32)) * 94 + cell - 33;
939 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
941 int plane = code_point >> 16;
942 int row = (code_point >> 8) & 255;
943 int cell = code_point & 255;
947 else if (row < 16 + 32 + 30)
949 = (plane - 33) * 94 * 60
950 + (row - (16 + 32)) * 94
952 else if (row < 18 + 32 + 30)
954 else if (row < 18 + 32 + 60)
956 = (plane - 33) * 94 * 60
957 + (row - (18 + 32)) * 94
960 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
963 = (((code_point >> 8) & 0x7F) - 33) * 94
964 + (( code_point & 0x7F) - 33);
965 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
966 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
968 b2 += b2 < 0x3F ? 0x40 : 0x62;
969 code = (b1 << 8) | b2;
971 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
974 = (((code_point >> 8) & 0x7F) - 33) * 94
975 + (( code_point & 0x7F) - 33)
976 + BIG5_SAME_ROW * (0xC9 - 0xA1);
977 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
978 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
980 b2 += b2 < 0x3F ? 0x40 : 0x62;
981 code = (b1 << 8) | b2;
984 decode_builtin_char (mother,
985 code + XCHARSET_CODE_OFFSET(charset));
990 = (XCHARSET_DIMENSION (charset) == 1
992 code_point - XCHARSET_BYTE_OFFSET (charset)
994 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
995 * XCHARSET_CHARS (charset)
996 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
997 + XCHARSET_CODE_OFFSET (charset);
998 if ((cid < XCHARSET_MIN_CODE (charset))
999 || (XCHARSET_MAX_CODE (charset) < cid))
1004 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1006 if (XCHARSET_DIMENSION (charset) == 1)
1008 switch (XCHARSET_CHARS (charset))
1012 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1015 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1023 switch (XCHARSET_CHARS (charset))
1026 return MIN_CHAR_94x94
1027 + (final - '0') * 94 * 94
1028 + (((code_point >> 8) & 0x7F) - 33) * 94
1029 + ((code_point & 0x7F) - 33);
1031 return MIN_CHAR_96x96
1032 + (final - '0') * 96 * 96
1033 + (((code_point >> 8) & 0x7F) - 32) * 96
1034 + ((code_point & 0x7F) - 32);
1046 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1048 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1051 if ( CHAR_TABLEP (encoding_table)
1052 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1057 Lisp_Object mother = XCHARSET_MOTHER (charset);
1058 int min = XCHARSET_MIN_CODE (charset);
1059 int max = XCHARSET_MAX_CODE (charset);
1062 if ( CHARSETP (mother) )
1064 if (XCHARSET_FINAL (charset) >= '0')
1065 code = charset_code_point (mother, ch, 1);
1067 code = charset_code_point (mother, ch, defined_only);
1069 else if (defined_only)
1071 else if ( ((max == 0) && CHARSETP (mother)
1072 && (XCHARSET_FINAL (charset) == 0))
1073 || ((min <= ch) && (ch <= max)) )
1075 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1076 || ((min <= code) && (code <= max)) )
1078 int d = code - XCHARSET_CODE_OFFSET (charset);
1080 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1082 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1084 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1086 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1089 int cell = d % 94 + 33;
1095 return (row << 8) | cell;
1097 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1099 int B1 = d >> 8, B2 = d & 0xFF;
1101 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1102 - (B2 < 0x7F ? 0x40 : 0x62);
1106 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1109 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1111 int B1 = d >> 8, B2 = d & 0xFF;
1113 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1114 - (B2 < 0x7F ? 0x40 : 0x62);
1118 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1119 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1122 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1123 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1124 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1125 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1126 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1128 int plane = d / (94 * 60) + 33;
1129 int row = (d % (94 * 60)) / 94;
1130 int cell = d % 94 + 33;
1136 return (plane << 16) | (row << 8) | cell;
1138 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1140 ( (d / (94 * 94) + 33) << 16)
1141 | ((d / 94 % 94 + 33) << 8)
1143 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1145 ( (d / (96 * 96) + 32) << 16)
1146 | ((d / 96 % 96 + 32) << 8)
1148 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1150 ( (d / (94 * 94 * 94) + 33) << 24)
1151 | ((d / (94 * 94) % 94 + 33) << 16)
1152 | ((d / 94 % 94 + 33) << 8)
1154 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1156 ( (d / (96 * 96 * 96) + 32) << 24)
1157 | ((d / (96 * 96) % 96 + 32) << 16)
1158 | ((d / 96 % 96 + 32) << 8)
1162 printf ("Unknown CCS-conversion %d is specified!",
1163 XCHARSET_CONVERSION (charset));
1167 else if (defined_only)
1169 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1170 ( XCHARSET_MIN_CODE (charset) == 0 )
1172 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1173 (XCHARSET_CODE_OFFSET (charset)
1174 == XCHARSET_MIN_CODE (charset))
1179 if (XCHARSET_DIMENSION (charset) == 1)
1181 if (XCHARSET_CHARS (charset) == 94)
1183 if (((d = ch - (MIN_CHAR_94
1184 + (XCHARSET_FINAL (charset) - '0') * 94))
1189 else if (XCHARSET_CHARS (charset) == 96)
1191 if (((d = ch - (MIN_CHAR_96
1192 + (XCHARSET_FINAL (charset) - '0') * 96))
1200 else if (XCHARSET_DIMENSION (charset) == 2)
1202 if (XCHARSET_CHARS (charset) == 94)
1204 if (((d = ch - (MIN_CHAR_94x94
1206 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1209 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1211 else if (XCHARSET_CHARS (charset) == 96)
1213 if (((d = ch - (MIN_CHAR_96x96
1215 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1218 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1229 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1231 if (c <= MAX_CHAR_BASIC_LATIN)
1233 *charset = Vcharset_ascii;
1238 *charset = Vcharset_control_1;
1243 *charset = Vcharset_latin_iso8859_1;
1247 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1249 *charset = Vcharset_hebrew_iso8859_8;
1250 return c - MIN_CHAR_HEBREW + 0x20;
1253 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1255 *charset = Vcharset_thai_tis620;
1256 return c - MIN_CHAR_THAI + 0x20;
1259 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1260 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1262 return list2 (Vcharset_katakana_jisx0201,
1263 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1266 else if (c <= MAX_CHAR_BMP)
1268 *charset = Vcharset_ucs_bmp;
1271 else if (c <= MAX_CHAR_SMP)
1273 *charset = Vcharset_ucs_smp;
1274 return c - MIN_CHAR_SMP;
1276 else if (c <= MAX_CHAR_SIP)
1278 *charset = Vcharset_ucs_sip;
1279 return c - MIN_CHAR_SIP;
1281 else if (c < MIN_CHAR_94)
1283 *charset = Vcharset_ucs;
1286 else if (c <= MAX_CHAR_94)
1288 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1289 ((c - MIN_CHAR_94) / 94) + '0',
1290 CHARSET_LEFT_TO_RIGHT);
1291 if (!NILP (*charset))
1292 return ((c - MIN_CHAR_94) % 94) + 33;
1295 *charset = Vcharset_ucs;
1299 else if (c <= MAX_CHAR_96)
1301 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1302 ((c - MIN_CHAR_96) / 96) + '0',
1303 CHARSET_LEFT_TO_RIGHT);
1304 if (!NILP (*charset))
1305 return ((c - MIN_CHAR_96) % 96) + 32;
1308 *charset = Vcharset_ucs;
1312 else if (c <= MAX_CHAR_94x94)
1315 = CHARSET_BY_ATTRIBUTES (94, 2,
1316 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1317 CHARSET_LEFT_TO_RIGHT);
1318 if (!NILP (*charset))
1319 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1320 | (((c - MIN_CHAR_94x94) % 94) + 33);
1323 *charset = Vcharset_ucs;
1327 else if (c <= MAX_CHAR_96x96)
1330 = CHARSET_BY_ATTRIBUTES (96, 2,
1331 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1332 CHARSET_LEFT_TO_RIGHT);
1333 if (!NILP (*charset))
1334 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1335 | (((c - MIN_CHAR_96x96) % 96) + 32);
1338 *charset = Vcharset_ucs;
1344 *charset = Vcharset_ucs;
1349 Lisp_Object Vdefault_coded_charset_priority_list;
1353 /************************************************************************/
1354 /* Basic charset Lisp functions */
1355 /************************************************************************/
1357 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1358 Return non-nil if OBJECT is a charset.
1362 return CHARSETP (object) ? Qt : Qnil;
1365 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1366 Retrieve the charset of the given name.
1367 If CHARSET-OR-NAME is a charset object, it is simply returned.
1368 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1369 nil is returned. Otherwise the associated charset object is returned.
1373 if (CHARSETP (charset_or_name))
1374 return charset_or_name;
1376 CHECK_SYMBOL (charset_or_name);
1377 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1380 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1381 Retrieve the charset of the given name.
1382 Same as `find-charset' except an error is signalled if there is no such
1383 charset instead of returning nil.
1387 Lisp_Object charset = Ffind_charset (name);
1390 signal_simple_error ("No such charset", name);
1394 /* We store the charsets in hash tables with the names as the key and the
1395 actual charset object as the value. Occasionally we need to use them
1396 in a list format. These routines provide us with that. */
1397 struct charset_list_closure
1399 Lisp_Object *charset_list;
1403 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1404 void *charset_list_closure)
1406 /* This function can GC */
1407 struct charset_list_closure *chcl =
1408 (struct charset_list_closure*) charset_list_closure;
1409 Lisp_Object *charset_list = chcl->charset_list;
1411 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1415 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1416 Return a list of the names of all defined charsets.
1420 Lisp_Object charset_list = Qnil;
1421 struct gcpro gcpro1;
1422 struct charset_list_closure charset_list_closure;
1424 GCPRO1 (charset_list);
1425 charset_list_closure.charset_list = &charset_list;
1426 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1427 &charset_list_closure);
1430 return charset_list;
1433 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1434 Return the name of charset CHARSET.
1438 return XCHARSET_NAME (Fget_charset (charset));
1441 /* #### SJT Should generic properties be allowed? */
1442 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1443 Define a new character set.
1444 This function is for use with Mule support.
1445 NAME is a symbol, the name by which the character set is normally referred.
1446 DOC-STRING is a string describing the character set.
1447 PROPS is a property list, describing the specific nature of the
1448 character set. Recognized properties are:
1450 'short-name Short version of the charset name (ex: Latin-1)
1451 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1452 'registry A regular expression matching the font registry field for
1454 'dimension Number of octets used to index a character in this charset.
1455 Either 1 or 2. Defaults to 1.
1456 If UTF-2000 feature is enabled, 3 or 4 are also available.
1457 'columns Number of columns used to display a character in this charset.
1458 Only used in TTY mode. (Under X, the actual width of a
1459 character can be derived from the font used to display the
1460 characters.) If unspecified, defaults to the dimension
1461 (this is almost always the correct value).
1462 'chars Number of characters in each dimension (94 or 96).
1463 Defaults to 94. Note that if the dimension is 2, the
1464 character set thus described is 94x94 or 96x96.
1465 If UTF-2000 feature is enabled, 128 or 256 are also available.
1466 'final Final byte of ISO 2022 escape sequence. Must be
1467 supplied. Each combination of (DIMENSION, CHARS) defines a
1468 separate namespace for final bytes. Note that ISO
1469 2022 restricts the final byte to the range
1470 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1471 dimension == 2. Note also that final bytes in the range
1472 0x30 - 0x3F are reserved for user-defined (not official)
1474 'graphic 0 (use left half of font on output) or 1 (use right half
1475 of font on output). Defaults to 0. For example, for
1476 a font whose registry is ISO8859-1, the left half
1477 (octets 0x20 - 0x7F) is the `ascii' character set, while
1478 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1479 character set. With 'graphic set to 0, the octets
1480 will have their high bit cleared; with it set to 1,
1481 the octets will have their high bit set.
1482 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1484 'ccl-program A compiled CCL program used to convert a character in
1485 this charset into an index into the font. This is in
1486 addition to the 'graphic property. The CCL program
1487 is passed the octets of the character, with the high
1488 bit cleared and set depending upon whether the value
1489 of the 'graphic property is 0 or 1.
1490 'mother [UTF-2000 only] Base coded-charset.
1491 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1492 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1493 'code-offset [UTF-2000 only] Offset for a code-point of a base
1495 'conversion [UTF-2000 only] Conversion for a code-point of a base
1496 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1498 (name, doc_string, props))
1500 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1501 int direction = CHARSET_LEFT_TO_RIGHT;
1502 Lisp_Object registry = Qnil;
1503 Lisp_Object charset;
1504 Lisp_Object ccl_program = Qnil;
1505 Lisp_Object short_name = Qnil, long_name = Qnil;
1506 Lisp_Object mother = Qnil;
1507 int min_code = 0, max_code = 0, code_offset = 0;
1508 int byte_offset = -1;
1511 CHECK_SYMBOL (name);
1512 if (!NILP (doc_string))
1513 CHECK_STRING (doc_string);
1515 charset = Ffind_charset (name);
1516 if (!NILP (charset))
1517 signal_simple_error ("Cannot redefine existing charset", name);
1520 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1522 if (EQ (keyword, Qshort_name))
1524 CHECK_STRING (value);
1528 else if (EQ (keyword, Qlong_name))
1530 CHECK_STRING (value);
1534 else if (EQ (keyword, Qiso_ir))
1538 id = - XINT (value);
1542 else if (EQ (keyword, Qdimension))
1545 dimension = XINT (value);
1546 if (dimension < 1 ||
1553 signal_simple_error ("Invalid value for 'dimension", value);
1556 else if (EQ (keyword, Qchars))
1559 chars = XINT (value);
1560 if (chars != 94 && chars != 96
1562 && chars != 128 && chars != 256
1565 signal_simple_error ("Invalid value for 'chars", value);
1568 else if (EQ (keyword, Qcolumns))
1571 columns = XINT (value);
1572 if (columns != 1 && columns != 2)
1573 signal_simple_error ("Invalid value for 'columns", value);
1576 else if (EQ (keyword, Qgraphic))
1579 graphic = XINT (value);
1587 signal_simple_error ("Invalid value for 'graphic", value);
1590 else if (EQ (keyword, Qregistry))
1592 CHECK_STRING (value);
1596 else if (EQ (keyword, Qdirection))
1598 if (EQ (value, Ql2r))
1599 direction = CHARSET_LEFT_TO_RIGHT;
1600 else if (EQ (value, Qr2l))
1601 direction = CHARSET_RIGHT_TO_LEFT;
1603 signal_simple_error ("Invalid value for 'direction", value);
1606 else if (EQ (keyword, Qfinal))
1608 CHECK_CHAR_COERCE_INT (value);
1609 final = XCHAR (value);
1610 if (final < '0' || final > '~')
1611 signal_simple_error ("Invalid value for 'final", value);
1615 else if (EQ (keyword, Qmother))
1617 mother = Fget_charset (value);
1620 else if (EQ (keyword, Qmin_code))
1623 min_code = XUINT (value);
1626 else if (EQ (keyword, Qmax_code))
1629 max_code = XUINT (value);
1632 else if (EQ (keyword, Qcode_offset))
1635 code_offset = XUINT (value);
1638 else if (EQ (keyword, Qconversion))
1640 if (EQ (value, Q94x60))
1641 conversion = CONVERSION_94x60;
1642 else if (EQ (value, Q94x94x60))
1643 conversion = CONVERSION_94x94x60;
1644 else if (EQ (value, Qbig5_1))
1645 conversion = CONVERSION_BIG5_1;
1646 else if (EQ (value, Qbig5_2))
1647 conversion = CONVERSION_BIG5_2;
1649 signal_simple_error ("Unrecognized conversion", value);
1653 else if (EQ (keyword, Qccl_program))
1655 struct ccl_program test_ccl;
1657 if (setup_ccl_program (&test_ccl, value) < 0)
1658 signal_simple_error ("Invalid value for 'ccl-program", value);
1659 ccl_program = value;
1663 signal_simple_error ("Unrecognized property", keyword);
1669 error ("'final must be specified");
1671 if (dimension == 2 && final > 0x5F)
1673 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1676 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1677 CHARSET_LEFT_TO_RIGHT)) ||
1678 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1679 CHARSET_RIGHT_TO_LEFT)))
1681 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1684 id = get_unallocated_leading_byte (dimension);
1686 if (NILP (doc_string))
1687 doc_string = build_string ("");
1689 if (NILP (registry))
1690 registry = build_string ("");
1692 if (NILP (short_name))
1693 XSETSTRING (short_name, XSYMBOL (name)->name);
1695 if (NILP (long_name))
1696 long_name = doc_string;
1699 columns = dimension;
1701 if (byte_offset < 0)
1705 else if (chars == 96)
1711 charset = make_charset (id, name, chars, dimension, columns, graphic,
1712 final, direction, short_name, long_name,
1713 doc_string, registry,
1714 Qnil, min_code, max_code, code_offset, byte_offset,
1715 mother, conversion);
1716 if (!NILP (ccl_program))
1717 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1721 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1723 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1724 NEW-NAME is the name of the new charset. Return the new charset.
1726 (charset, new_name))
1728 Lisp_Object new_charset = Qnil;
1729 int id, chars, dimension, columns, graphic, final;
1731 Lisp_Object registry, doc_string, short_name, long_name;
1734 charset = Fget_charset (charset);
1735 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1736 signal_simple_error ("Charset already has reverse-direction charset",
1739 CHECK_SYMBOL (new_name);
1740 if (!NILP (Ffind_charset (new_name)))
1741 signal_simple_error ("Cannot redefine existing charset", new_name);
1743 cs = XCHARSET (charset);
1745 chars = CHARSET_CHARS (cs);
1746 dimension = CHARSET_DIMENSION (cs);
1747 columns = CHARSET_COLUMNS (cs);
1748 id = get_unallocated_leading_byte (dimension);
1750 graphic = CHARSET_GRAPHIC (cs);
1751 final = CHARSET_FINAL (cs);
1752 direction = CHARSET_RIGHT_TO_LEFT;
1753 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1754 direction = CHARSET_LEFT_TO_RIGHT;
1755 doc_string = CHARSET_DOC_STRING (cs);
1756 short_name = CHARSET_SHORT_NAME (cs);
1757 long_name = CHARSET_LONG_NAME (cs);
1758 registry = CHARSET_REGISTRY (cs);
1760 new_charset = make_charset (id, new_name, chars, dimension, columns,
1761 graphic, final, direction, short_name, long_name,
1762 doc_string, registry,
1764 CHARSET_DECODING_TABLE(cs),
1765 CHARSET_MIN_CODE(cs),
1766 CHARSET_MAX_CODE(cs),
1767 CHARSET_CODE_OFFSET(cs),
1768 CHARSET_BYTE_OFFSET(cs),
1770 CHARSET_CONVERSION (cs)
1772 Qnil, 0, 0, 0, 0, Qnil, 0
1776 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1777 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1782 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1783 Define symbol ALIAS as an alias for CHARSET.
1787 CHECK_SYMBOL (alias);
1788 charset = Fget_charset (charset);
1789 return Fputhash (alias, charset, Vcharset_hash_table);
1792 /* #### Reverse direction charsets not yet implemented. */
1794 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1796 Return the reverse-direction charset parallel to CHARSET, if any.
1797 This is the charset with the same properties (in particular, the same
1798 dimension, number of characters per dimension, and final byte) as
1799 CHARSET but whose characters are displayed in the opposite direction.
1803 charset = Fget_charset (charset);
1804 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1808 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1809 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1810 If DIRECTION is omitted, both directions will be checked (left-to-right
1811 will be returned if character sets exist for both directions).
1813 (dimension, chars, final, direction))
1815 int dm, ch, fi, di = -1;
1816 Lisp_Object obj = Qnil;
1818 CHECK_INT (dimension);
1819 dm = XINT (dimension);
1820 if (dm < 1 || dm > 2)
1821 signal_simple_error ("Invalid value for DIMENSION", dimension);
1825 if (ch != 94 && ch != 96)
1826 signal_simple_error ("Invalid value for CHARS", chars);
1828 CHECK_CHAR_COERCE_INT (final);
1830 if (fi < '0' || fi > '~')
1831 signal_simple_error ("Invalid value for FINAL", final);
1833 if (EQ (direction, Ql2r))
1834 di = CHARSET_LEFT_TO_RIGHT;
1835 else if (EQ (direction, Qr2l))
1836 di = CHARSET_RIGHT_TO_LEFT;
1837 else if (!NILP (direction))
1838 signal_simple_error ("Invalid value for DIRECTION", direction);
1840 if (dm == 2 && fi > 0x5F)
1842 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1846 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1848 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1851 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1854 return XCHARSET_NAME (obj);
1858 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1859 Return short name of CHARSET.
1863 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1866 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1867 Return long name of CHARSET.
1871 return XCHARSET_LONG_NAME (Fget_charset (charset));
1874 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1875 Return description of CHARSET.
1879 return XCHARSET_DOC_STRING (Fget_charset (charset));
1882 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1883 Return dimension of CHARSET.
1887 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1890 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1891 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1892 Recognized properties are those listed in `make-charset', as well as
1893 'name and 'doc-string.
1899 charset = Fget_charset (charset);
1900 cs = XCHARSET (charset);
1902 CHECK_SYMBOL (prop);
1903 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1904 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1905 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1906 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1907 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1908 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1909 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1910 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1911 Qnil : make_char (CHARSET_FINAL (cs));
1912 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1913 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1914 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1915 if (EQ (prop, Qdirection))
1916 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1917 if (EQ (prop, Qreverse_direction_charset))
1919 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1920 /* #### Is this translation OK? If so, error checking sufficient? */
1921 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1924 if (EQ (prop, Qmother))
1925 return CHARSET_MOTHER (cs);
1926 if (EQ (prop, Qmin_code))
1927 return make_int (CHARSET_MIN_CODE (cs));
1928 if (EQ (prop, Qmax_code))
1929 return make_int (CHARSET_MAX_CODE (cs));
1931 signal_simple_error ("Unrecognized charset property name", prop);
1932 return Qnil; /* not reached */
1935 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1936 Return charset identification number of CHARSET.
1940 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1943 /* #### We need to figure out which properties we really want to
1946 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1947 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1949 (charset, ccl_program))
1951 struct ccl_program test_ccl;
1953 charset = Fget_charset (charset);
1954 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1955 signal_simple_error ("Invalid ccl-program", ccl_program);
1956 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1961 invalidate_charset_font_caches (Lisp_Object charset)
1963 /* Invalidate font cache entries for charset on all devices. */
1964 Lisp_Object devcons, concons, hash_table;
1965 DEVICE_LOOP_NO_BREAK (devcons, concons)
1967 struct device *d = XDEVICE (XCAR (devcons));
1968 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1969 if (!UNBOUNDP (hash_table))
1970 Fclrhash (hash_table);
1974 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1975 Set the 'registry property of CHARSET to REGISTRY.
1977 (charset, registry))
1979 charset = Fget_charset (charset);
1980 CHECK_STRING (registry);
1981 XCHARSET_REGISTRY (charset) = registry;
1982 invalidate_charset_font_caches (charset);
1983 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1988 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1989 Return mapping-table of CHARSET.
1993 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1996 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1997 Set mapping-table of CHARSET to TABLE.
2001 struct Lisp_Charset *cs;
2005 charset = Fget_charset (charset);
2006 cs = XCHARSET (charset);
2010 CHARSET_DECODING_TABLE(cs) = Qnil;
2013 else if (VECTORP (table))
2015 int ccs_len = CHARSET_BYTE_SIZE (cs);
2016 int ret = decoding_table_check_elements (table,
2017 CHARSET_DIMENSION (cs),
2022 signal_simple_error ("Too big table", table);
2024 signal_simple_error ("Invalid element is found", table);
2026 signal_simple_error ("Something wrong", table);
2028 CHARSET_DECODING_TABLE(cs) = Qnil;
2031 signal_error (Qwrong_type_argument,
2032 list2 (build_translated_string ("vector-or-nil-p"),
2035 byte_offset = CHARSET_BYTE_OFFSET (cs);
2036 switch (CHARSET_DIMENSION (cs))
2039 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2041 Lisp_Object c = XVECTOR_DATA(table)[i];
2044 Fput_char_attribute (c, XCHARSET_NAME (charset),
2045 make_int (i + byte_offset));
2049 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2051 Lisp_Object v = XVECTOR_DATA(table)[i];
2057 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2059 Lisp_Object c = XVECTOR_DATA(v)[j];
2063 (c, XCHARSET_NAME (charset),
2064 make_int ( ( (i + byte_offset) << 8 )
2070 Fput_char_attribute (v, XCHARSET_NAME (charset),
2071 make_int (i + byte_offset));
2079 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2080 Save mapping-table of CHARSET.
2084 struct Lisp_Charset *cs;
2085 int byte_min, byte_max;
2086 #ifdef HAVE_LIBCHISE
2088 #else /* HAVE_LIBCHISE */
2090 Lisp_Object db_file;
2091 #endif /* not HAVE_LIBCHISE */
2093 charset = Fget_charset (charset);
2094 cs = XCHARSET (charset);
2096 #ifdef HAVE_LIBCHISE
2097 if ( open_chise_data_source_maybe () )
2101 = chise_ds_get_ccs (default_chise_data_source,
2102 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2105 printf ("Can't open decoding-table %s\n",
2106 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2109 #else /* HAVE_LIBCHISE */
2110 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2111 Qsystem_char_id, 1);
2112 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2113 #endif /* not HAVE_LIBCHISE */
2115 byte_min = CHARSET_BYTE_OFFSET (cs);
2116 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2117 switch (CHARSET_DIMENSION (cs))
2121 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2124 for (cell = byte_min; cell < byte_max; cell++)
2126 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2130 #ifdef HAVE_LIBCHISE
2131 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2132 #else /* HAVE_LIBCHISE */
2133 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2134 Fprin1_to_string (c, Qnil),
2136 #endif /* not HAVE_LIBCHISE */
2143 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2146 for (row = byte_min; row < byte_max; row++)
2148 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2151 for (cell = byte_min; cell < byte_max; cell++)
2153 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2157 #ifdef HAVE_LIBCHISE
2158 chise_ccs_set_decoded_char
2160 (row << 8) | cell, XCHAR (c));
2161 #else /* HAVE_LIBCHISE */
2162 Fput_database (Fprin1_to_string (make_int ((row << 8)
2165 Fprin1_to_string (c, Qnil),
2167 #endif /* not HAVE_LIBCHISE */
2175 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2178 for (plane = byte_min; plane < byte_max; plane++)
2181 = get_ccs_octet_table (table_p, charset, plane);
2184 for (row = byte_min; row < byte_max; row++)
2187 = get_ccs_octet_table (table_r, charset, row);
2190 for (cell = byte_min; cell < byte_max; cell++)
2192 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2197 #ifdef HAVE_LIBCHISE
2198 chise_ccs_set_decoded_char
2203 #else /* HAVE_LIBCHISE */
2204 Fput_database (Fprin1_to_string
2205 (make_int ((plane << 16)
2209 Fprin1_to_string (c, Qnil),
2211 #endif /* not HAVE_LIBCHISE */
2220 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2223 for (group = byte_min; group < byte_max; group++)
2226 = get_ccs_octet_table (table_g, charset, group);
2229 for (plane = byte_min; plane < byte_max; plane++)
2232 = get_ccs_octet_table (table_p, charset, plane);
2235 for (row = byte_min; row < byte_max; row++)
2238 = get_ccs_octet_table (table_r, charset, row);
2241 for (cell = byte_min; cell < byte_max; cell++)
2244 = get_ccs_octet_table (table_c, charset, cell);
2248 #ifdef HAVE_LIBCHISE
2249 chise_ccs_set_decoded_char
2255 #else /* HAVE_LIBCHISE */
2256 Fput_database (Fprin1_to_string
2257 (make_int (( group << 24)
2262 Fprin1_to_string (c, Qnil),
2264 #endif /* not HAVE_LIBCHISE */
2272 #ifdef HAVE_LIBCHISE
2273 chise_ccs_sync (dt_ccs);
2275 #else /* HAVE_LIBCHISE */
2276 return Fclose_database (db);
2277 #endif /* not HAVE_LIBCHISE */
2280 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2281 Reset mapping-table of CCS with database file.
2285 #ifdef HAVE_LIBCHISE
2286 CHISE_CCS chise_ccs;
2288 Lisp_Object db_file;
2291 ccs = Fget_charset (ccs);
2293 #ifdef HAVE_LIBCHISE
2294 if ( open_chise_data_source_maybe () )
2297 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2298 XSTRING_DATA (Fsymbol_name
2299 (XCHARSET_NAME(ccs))));
2300 if (chise_ccs == NULL)
2303 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2304 Qsystem_char_id, 0);
2308 #ifdef HAVE_LIBCHISE
2309 chise_ccs_setup_db (chise_ccs, 0) == 0
2311 !NILP (Ffile_exists_p (db_file))
2315 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2322 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2324 #ifdef HAVE_LIBCHISE
2325 CHISE_Char_ID char_id;
2327 if ( open_chise_data_source_maybe () )
2331 = chise_ds_decode_char (default_chise_data_source,
2332 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2335 decoding_table_put_char (ccs, code_point, make_char (char_id));
2337 decoding_table_put_char (ccs, code_point, Qnil);
2339 /* chise_ccst_close (dt_ccs); */
2341 #else /* HAVE_LIBCHISE */
2344 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2347 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2351 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2358 decoding_table_put_char (ccs, code_point, ret);
2359 Fclose_database (db);
2363 decoding_table_put_char (ccs, code_point, Qnil);
2364 Fclose_database (db);
2367 #endif /* not HAVE_LIBCHISE */
2369 #endif /* HAVE_CHISE */
2370 #endif /* UTF2000 */
2373 /************************************************************************/
2374 /* Lisp primitives for working with characters */
2375 /************************************************************************/
2378 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2379 Make a character from CHARSET and code-point CODE.
2380 If DEFINED_ONLY is non-nil, builtin character is not returned.
2381 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2382 If corresponding character is not found, nil is returned.
2384 (charset, code, defined_only, without_inheritance))
2388 charset = Fget_charset (charset);
2391 if (XCHARSET_GRAPHIC (charset) == 1)
2393 if (NILP (defined_only))
2394 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2396 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2397 return c >= 0 ? make_char (c) : Qnil;
2400 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2401 Make a builtin character from CHARSET and code-point CODE.
2407 charset = Fget_charset (charset);
2409 if (EQ (charset, Vcharset_latin_viscii))
2411 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2417 (ret = Fget_char_attribute (chr,
2418 Vcharset_latin_viscii_lower,
2421 charset = Vcharset_latin_viscii_lower;
2425 (ret = Fget_char_attribute (chr,
2426 Vcharset_latin_viscii_upper,
2429 charset = Vcharset_latin_viscii_upper;
2436 if (XCHARSET_GRAPHIC (charset) == 1)
2439 c = decode_builtin_char (charset, c);
2441 c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil, Qnil);
2445 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2446 Make a character from CHARSET and octets ARG1 and ARG2.
2447 ARG2 is required only for characters from two-dimensional charsets.
2448 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2449 character s with caron.
2451 (charset, arg1, arg2))
2455 int lowlim, highlim;
2457 charset = Fget_charset (charset);
2458 cs = XCHARSET (charset);
2460 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2461 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2463 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2465 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2466 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2469 /* It is useful (and safe, according to Olivier Galibert) to strip
2470 the 8th bit off ARG1 and ARG2 because it allows programmers to
2471 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2472 Latin 2 code of the character. */
2480 if (a1 < lowlim || a1 > highlim)
2481 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2483 if (CHARSET_DIMENSION (cs) == 1)
2487 ("Charset is of dimension one; second octet must be nil", arg2);
2488 return make_char (MAKE_CHAR (charset, a1, 0));
2497 a2 = XINT (arg2) & 0x7f;
2499 if (a2 < lowlim || a2 > highlim)
2500 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2502 return make_char (MAKE_CHAR (charset, a1, a2));
2505 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2506 Return the character set of CHARACTER.
2510 CHECK_CHAR_COERCE_INT (character);
2512 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2515 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2516 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2517 N defaults to 0 if omitted.
2521 Lisp_Object charset;
2524 CHECK_CHAR_COERCE_INT (character);
2526 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2528 if (NILP (n) || EQ (n, Qzero))
2529 return make_int (octet0);
2530 else if (EQ (n, make_int (1)))
2531 return make_int (octet1);
2533 signal_simple_error ("Octet number must be 0 or 1", n);
2537 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2538 Return code-point of CHARACTER in specified CHARSET.
2540 (character, charset, defined_only))
2544 CHECK_CHAR_COERCE_INT (character);
2545 charset = Fget_charset (charset);
2546 code_point = charset_code_point (charset, XCHAR (character),
2547 !NILP (defined_only));
2548 if (code_point >= 0)
2549 return make_int (code_point);
2555 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2556 Return list of charset and one or two position-codes of CHARACTER.
2560 /* This function can GC */
2561 struct gcpro gcpro1, gcpro2;
2562 Lisp_Object charset = Qnil;
2563 Lisp_Object rc = Qnil;
2571 GCPRO2 (charset, rc);
2572 CHECK_CHAR_COERCE_INT (character);
2575 code_point = ENCODE_CHAR (XCHAR (character), charset);
2576 dimension = XCHARSET_DIMENSION (charset);
2577 while (dimension > 0)
2579 rc = Fcons (make_int (code_point & 255), rc);
2583 rc = Fcons (XCHARSET_NAME (charset), rc);
2585 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2587 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2589 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2593 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2602 #ifdef ENABLE_COMPOSITE_CHARS
2603 /************************************************************************/
2604 /* composite character functions */
2605 /************************************************************************/
2608 lookup_composite_char (Bufbyte *str, int len)
2610 Lisp_Object lispstr = make_string (str, len);
2611 Lisp_Object ch = Fgethash (lispstr,
2612 Vcomposite_char_string2char_hash_table,
2618 if (composite_char_row_next >= 128)
2619 signal_simple_error ("No more composite chars available", lispstr);
2620 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2621 composite_char_col_next);
2622 Fputhash (make_char (emch), lispstr,
2623 Vcomposite_char_char2string_hash_table);
2624 Fputhash (lispstr, make_char (emch),
2625 Vcomposite_char_string2char_hash_table);
2626 composite_char_col_next++;
2627 if (composite_char_col_next >= 128)
2629 composite_char_col_next = 32;
2630 composite_char_row_next++;
2639 composite_char_string (Emchar ch)
2641 Lisp_Object str = Fgethash (make_char (ch),
2642 Vcomposite_char_char2string_hash_table,
2644 assert (!UNBOUNDP (str));
2648 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2649 Convert a string into a single composite character.
2650 The character is the result of overstriking all the characters in
2655 CHECK_STRING (string);
2656 return make_char (lookup_composite_char (XSTRING_DATA (string),
2657 XSTRING_LENGTH (string)));
2660 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2661 Return a string of the characters comprising a composite character.
2669 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2670 signal_simple_error ("Must be composite char", ch);
2671 return composite_char_string (emch);
2673 #endif /* ENABLE_COMPOSITE_CHARS */
2676 /************************************************************************/
2677 /* initialization */
2678 /************************************************************************/
2681 syms_of_mule_charset (void)
2683 INIT_LRECORD_IMPLEMENTATION (charset);
2685 DEFSUBR (Fcharsetp);
2686 DEFSUBR (Ffind_charset);
2687 DEFSUBR (Fget_charset);
2688 DEFSUBR (Fcharset_list);
2689 DEFSUBR (Fcharset_name);
2690 DEFSUBR (Fmake_charset);
2691 DEFSUBR (Fmake_reverse_direction_charset);
2692 /* DEFSUBR (Freverse_direction_charset); */
2693 DEFSUBR (Fdefine_charset_alias);
2694 DEFSUBR (Fcharset_from_attributes);
2695 DEFSUBR (Fcharset_short_name);
2696 DEFSUBR (Fcharset_long_name);
2697 DEFSUBR (Fcharset_description);
2698 DEFSUBR (Fcharset_dimension);
2699 DEFSUBR (Fcharset_property);
2700 DEFSUBR (Fcharset_id);
2701 DEFSUBR (Fset_charset_ccl_program);
2702 DEFSUBR (Fset_charset_registry);
2705 DEFSUBR (Fcharset_mapping_table);
2706 DEFSUBR (Fset_charset_mapping_table);
2708 DEFSUBR (Fsave_charset_mapping_table);
2709 DEFSUBR (Freset_charset_mapping_table);
2710 #endif /* HAVE_CHISE */
2711 DEFSUBR (Fdecode_char);
2712 DEFSUBR (Fdecode_builtin_char);
2713 DEFSUBR (Fencode_char);
2716 DEFSUBR (Fmake_char);
2717 DEFSUBR (Fchar_charset);
2718 DEFSUBR (Fchar_octet);
2719 DEFSUBR (Fsplit_char);
2721 #ifdef ENABLE_COMPOSITE_CHARS
2722 DEFSUBR (Fmake_composite_char);
2723 DEFSUBR (Fcomposite_char_string);
2726 defsymbol (&Qcharsetp, "charsetp");
2727 defsymbol (&Qregistry, "registry");
2728 defsymbol (&Qfinal, "final");
2729 defsymbol (&Qgraphic, "graphic");
2730 defsymbol (&Qdirection, "direction");
2731 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2732 defsymbol (&Qshort_name, "short-name");
2733 defsymbol (&Qlong_name, "long-name");
2734 defsymbol (&Qiso_ir, "iso-ir");
2736 defsymbol (&Qmother, "mother");
2737 defsymbol (&Qmin_code, "min-code");
2738 defsymbol (&Qmax_code, "max-code");
2739 defsymbol (&Qcode_offset, "code-offset");
2740 defsymbol (&Qconversion, "conversion");
2741 defsymbol (&Q94x60, "94x60");
2742 defsymbol (&Q94x94x60, "94x94x60");
2743 defsymbol (&Qbig5_1, "big5-1");
2744 defsymbol (&Qbig5_2, "big5-2");
2747 defsymbol (&Ql2r, "l2r");
2748 defsymbol (&Qr2l, "r2l");
2750 /* Charsets, compatible with FSF 20.3
2751 Naming convention is Script-Charset[-Edition] */
2752 defsymbol (&Qascii, "ascii");
2753 defsymbol (&Qcontrol_1, "control-1");
2754 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2755 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2756 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2757 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2758 defsymbol (&Qthai_tis620, "thai-tis620");
2759 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2760 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2761 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2762 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2763 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2764 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2765 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2766 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2767 defsymbol (&Qmap_gb2312, "=gb2312");
2768 defsymbol (&Qmap_gb12345, "=gb12345");
2769 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2770 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2771 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2772 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2773 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2775 defsymbol (&Qmap_ucs, "=ucs");
2776 defsymbol (&Qucs, "ucs");
2777 defsymbol (&Qucs_bmp, "ucs-bmp");
2778 defsymbol (&Qucs_smp, "ucs-smp");
2779 defsymbol (&Qucs_sip, "ucs-sip");
2780 defsymbol (&Qlatin_viscii, "latin-viscii");
2781 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2782 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2783 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2784 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2785 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2786 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2787 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2788 defsymbol (&Qmap_big5, "=big5");
2789 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2791 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2792 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2794 defsymbol (&Qcomposite, "composite");
2798 vars_of_mule_charset (void)
2805 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2806 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2808 /* Table of charsets indexed by leading byte. */
2809 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2810 chlook->charset_by_leading_byte[i] = Qnil;
2813 /* Table of charsets indexed by type/final-byte. */
2814 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2815 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2816 chlook->charset_by_attributes[i][j] = Qnil;
2818 /* Table of charsets indexed by type/final-byte/direction. */
2819 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2820 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2821 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2822 chlook->charset_by_attributes[i][j][k] = Qnil;
2826 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2828 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2829 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2833 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2834 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2835 Leading-code of private TYPE9N charset of column-width 1.
2837 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2841 Vdefault_coded_charset_priority_list = Qnil;
2842 DEFVAR_LISP ("default-coded-charset-priority-list",
2843 &Vdefault_coded_charset_priority_list /*
2844 Default order of preferred coded-character-sets.
2850 complex_vars_of_mule_charset (void)
2852 staticpro (&Vcharset_hash_table);
2853 Vcharset_hash_table =
2854 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2856 /* Predefined character sets. We store them into variables for
2860 staticpro (&Vcharset_ucs);
2862 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2863 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2864 build_string ("UCS"),
2865 build_string ("UCS"),
2866 build_string ("ISO/IEC 10646"),
2868 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2869 staticpro (&Vcharset_ucs_bmp);
2871 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2872 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2873 build_string ("BMP"),
2874 build_string ("UCS-BMP"),
2875 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2877 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2878 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2879 staticpro (&Vcharset_ucs_smp);
2881 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2882 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2883 build_string ("SMP"),
2884 build_string ("UCS-SMP"),
2885 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2886 build_string ("UCS00-1"),
2887 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2888 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2889 staticpro (&Vcharset_ucs_sip);
2891 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2892 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2893 build_string ("SIP"),
2894 build_string ("UCS-SIP"),
2895 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2896 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2897 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2898 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2900 # define MIN_CHAR_THAI 0
2901 # define MAX_CHAR_THAI 0
2902 /* # define MIN_CHAR_HEBREW 0 */
2903 /* # define MAX_CHAR_HEBREW 0 */
2904 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2905 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2907 staticpro (&Vcharset_ascii);
2909 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2910 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2911 build_string ("ASCII"),
2912 build_string ("ASCII)"),
2913 build_string ("ASCII (ISO646 IRV)"),
2914 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2915 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2916 staticpro (&Vcharset_control_1);
2917 Vcharset_control_1 =
2918 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2919 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2920 build_string ("C1"),
2921 build_string ("Control characters"),
2922 build_string ("Control characters 128-191"),
2924 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2925 staticpro (&Vcharset_latin_iso8859_1);
2926 Vcharset_latin_iso8859_1 =
2927 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2928 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2929 build_string ("Latin-1"),
2930 build_string ("ISO8859-1 (Latin-1)"),
2931 build_string ("ISO8859-1 (Latin-1)"),
2932 build_string ("iso8859-1"),
2933 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2934 staticpro (&Vcharset_latin_iso8859_2);
2935 Vcharset_latin_iso8859_2 =
2936 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2937 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2938 build_string ("Latin-2"),
2939 build_string ("ISO8859-2 (Latin-2)"),
2940 build_string ("ISO8859-2 (Latin-2)"),
2941 build_string ("iso8859-2"),
2942 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2943 staticpro (&Vcharset_latin_iso8859_3);
2944 Vcharset_latin_iso8859_3 =
2945 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2946 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2947 build_string ("Latin-3"),
2948 build_string ("ISO8859-3 (Latin-3)"),
2949 build_string ("ISO8859-3 (Latin-3)"),
2950 build_string ("iso8859-3"),
2951 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2952 staticpro (&Vcharset_latin_iso8859_4);
2953 Vcharset_latin_iso8859_4 =
2954 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2955 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2956 build_string ("Latin-4"),
2957 build_string ("ISO8859-4 (Latin-4)"),
2958 build_string ("ISO8859-4 (Latin-4)"),
2959 build_string ("iso8859-4"),
2960 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2961 staticpro (&Vcharset_thai_tis620);
2962 Vcharset_thai_tis620 =
2963 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2964 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2965 build_string ("TIS620"),
2966 build_string ("TIS620 (Thai)"),
2967 build_string ("TIS620.2529 (Thai)"),
2968 build_string ("tis620"),
2969 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2970 staticpro (&Vcharset_greek_iso8859_7);
2971 Vcharset_greek_iso8859_7 =
2972 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2973 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2974 build_string ("ISO8859-7"),
2975 build_string ("ISO8859-7 (Greek)"),
2976 build_string ("ISO8859-7 (Greek)"),
2977 build_string ("iso8859-7"),
2978 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2979 staticpro (&Vcharset_arabic_iso8859_6);
2980 Vcharset_arabic_iso8859_6 =
2981 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2982 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2983 build_string ("ISO8859-6"),
2984 build_string ("ISO8859-6 (Arabic)"),
2985 build_string ("ISO8859-6 (Arabic)"),
2986 build_string ("iso8859-6"),
2987 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2988 staticpro (&Vcharset_hebrew_iso8859_8);
2989 Vcharset_hebrew_iso8859_8 =
2990 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2991 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2992 build_string ("ISO8859-8"),
2993 build_string ("ISO8859-8 (Hebrew)"),
2994 build_string ("ISO8859-8 (Hebrew)"),
2995 build_string ("iso8859-8"),
2997 0 /* MIN_CHAR_HEBREW */,
2998 0 /* MAX_CHAR_HEBREW */, 0, 32,
2999 Qnil, CONVERSION_IDENTICAL);
3000 staticpro (&Vcharset_katakana_jisx0201);
3001 Vcharset_katakana_jisx0201 =
3002 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3003 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3004 build_string ("JISX0201 Kana"),
3005 build_string ("JISX0201.1976 (Japanese Kana)"),
3006 build_string ("JISX0201.1976 Japanese Kana"),
3007 build_string ("jisx0201\\.1976"),
3008 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3009 staticpro (&Vcharset_latin_jisx0201);
3010 Vcharset_latin_jisx0201 =
3011 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3012 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3013 build_string ("JISX0201 Roman"),
3014 build_string ("JISX0201.1976 (Japanese Roman)"),
3015 build_string ("JISX0201.1976 Japanese Roman"),
3016 build_string ("jisx0201\\.1976"),
3017 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3018 staticpro (&Vcharset_cyrillic_iso8859_5);
3019 Vcharset_cyrillic_iso8859_5 =
3020 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3021 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3022 build_string ("ISO8859-5"),
3023 build_string ("ISO8859-5 (Cyrillic)"),
3024 build_string ("ISO8859-5 (Cyrillic)"),
3025 build_string ("iso8859-5"),
3026 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3027 staticpro (&Vcharset_latin_iso8859_9);
3028 Vcharset_latin_iso8859_9 =
3029 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3030 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3031 build_string ("Latin-5"),
3032 build_string ("ISO8859-9 (Latin-5)"),
3033 build_string ("ISO8859-9 (Latin-5)"),
3034 build_string ("iso8859-9"),
3035 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3037 staticpro (&Vcharset_jis_x0208);
3038 Vcharset_jis_x0208 =
3039 make_charset (LEADING_BYTE_JIS_X0208,
3040 Qmap_jis_x0208, 94, 2,
3041 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3042 build_string ("JIS X0208"),
3043 build_string ("JIS X0208 Common"),
3044 build_string ("JIS X0208 Common part"),
3045 build_string ("jisx0208\\.1990"),
3047 MIN_CHAR_JIS_X0208_1990,
3048 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3049 Qnil, CONVERSION_94x94);
3051 staticpro (&Vcharset_japanese_jisx0208_1978);
3052 Vcharset_japanese_jisx0208_1978 =
3053 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3054 Qmap_jis_x0208_1978, 94, 2,
3055 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3056 build_string ("JIS X0208:1978"),
3057 build_string ("JIS X0208:1978 (Japanese)"),
3059 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3060 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3067 CONVERSION_IDENTICAL);
3068 staticpro (&Vcharset_chinese_gb2312);
3069 Vcharset_chinese_gb2312 =
3070 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3071 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3072 build_string ("GB2312"),
3073 build_string ("GB2312)"),
3074 build_string ("GB2312 Chinese simplified"),
3075 build_string ("gb2312"),
3076 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3077 staticpro (&Vcharset_chinese_gb12345);
3078 Vcharset_chinese_gb12345 =
3079 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3080 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3081 build_string ("G1"),
3082 build_string ("GB 12345)"),
3083 build_string ("GB 12345-1990"),
3084 build_string ("GB12345\\(\\.1990\\)?-0"),
3085 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3086 staticpro (&Vcharset_japanese_jisx0208);
3087 Vcharset_japanese_jisx0208 =
3088 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3089 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3090 build_string ("JISX0208"),
3091 build_string ("JIS X0208:1983 (Japanese)"),
3092 build_string ("JIS X0208:1983 Japanese Kanji"),
3093 build_string ("jisx0208\\.1983"),
3100 CONVERSION_IDENTICAL);
3102 staticpro (&Vcharset_japanese_jisx0208_1990);
3103 Vcharset_japanese_jisx0208_1990 =
3104 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3105 Qmap_jis_x0208_1990, 94, 2,
3106 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3107 build_string ("JISX0208-1990"),
3108 build_string ("JIS X0208:1990 (Japanese)"),
3109 build_string ("JIS X0208:1990 Japanese Kanji"),
3110 build_string ("jisx0208\\.1990"),
3112 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3113 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3114 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3115 Vcharset_jis_x0208 /* Qnil */,
3116 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3118 staticpro (&Vcharset_korean_ksc5601);
3119 Vcharset_korean_ksc5601 =
3120 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3121 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3122 build_string ("KSC5601"),
3123 build_string ("KSC5601 (Korean"),
3124 build_string ("KSC5601 Korean Hangul and Hanja"),
3125 build_string ("ksc5601"),
3126 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3127 staticpro (&Vcharset_japanese_jisx0212);
3128 Vcharset_japanese_jisx0212 =
3129 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3130 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3131 build_string ("JISX0212"),
3132 build_string ("JISX0212 (Japanese)"),
3133 build_string ("JISX0212 Japanese Supplement"),
3134 build_string ("jisx0212"),
3135 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3137 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3138 staticpro (&Vcharset_chinese_cns11643_1);
3139 Vcharset_chinese_cns11643_1 =
3140 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3141 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3142 build_string ("CNS11643-1"),
3143 build_string ("CNS11643-1 (Chinese traditional)"),
3145 ("CNS 11643 Plane 1 Chinese traditional"),
3146 build_string (CHINESE_CNS_PLANE_RE("1")),
3147 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3148 staticpro (&Vcharset_chinese_cns11643_2);
3149 Vcharset_chinese_cns11643_2 =
3150 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3151 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3152 build_string ("CNS11643-2"),
3153 build_string ("CNS11643-2 (Chinese traditional)"),
3155 ("CNS 11643 Plane 2 Chinese traditional"),
3156 build_string (CHINESE_CNS_PLANE_RE("2")),
3157 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3159 staticpro (&Vcharset_latin_tcvn5712);
3160 Vcharset_latin_tcvn5712 =
3161 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3162 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3163 build_string ("TCVN 5712"),
3164 build_string ("TCVN 5712 (VSCII-2)"),
3165 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3166 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3167 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3168 staticpro (&Vcharset_latin_viscii_lower);
3169 Vcharset_latin_viscii_lower =
3170 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3171 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3172 build_string ("VISCII lower"),
3173 build_string ("VISCII lower (Vietnamese)"),
3174 build_string ("VISCII lower (Vietnamese)"),
3175 build_string ("MULEVISCII-LOWER"),
3176 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3177 staticpro (&Vcharset_latin_viscii_upper);
3178 Vcharset_latin_viscii_upper =
3179 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3180 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3181 build_string ("VISCII upper"),
3182 build_string ("VISCII upper (Vietnamese)"),
3183 build_string ("VISCII upper (Vietnamese)"),
3184 build_string ("MULEVISCII-UPPER"),
3185 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3186 staticpro (&Vcharset_latin_viscii);
3187 Vcharset_latin_viscii =
3188 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3189 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3190 build_string ("VISCII"),
3191 build_string ("VISCII 1.1 (Vietnamese)"),
3192 build_string ("VISCII 1.1 (Vietnamese)"),
3193 build_string ("VISCII1\\.1"),
3194 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3195 staticpro (&Vcharset_chinese_big5);
3196 Vcharset_chinese_big5 =
3197 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3198 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3199 build_string ("Big5"),
3200 build_string ("Big5"),
3201 build_string ("Big5 Chinese traditional"),
3202 build_string ("big5-0"),
3204 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3205 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3207 staticpro (&Vcharset_ethiopic_ucs);
3208 Vcharset_ethiopic_ucs =
3209 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3210 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3211 build_string ("Ethiopic (UCS)"),
3212 build_string ("Ethiopic (UCS)"),
3213 build_string ("Ethiopic of UCS"),
3214 build_string ("Ethiopic-Unicode"),
3215 Qnil, 0x1200, 0x137F, 0, 0,
3216 Qnil, CONVERSION_IDENTICAL);
3218 staticpro (&Vcharset_chinese_big5_1);
3219 Vcharset_chinese_big5_1 =
3220 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3221 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3222 build_string ("Big5"),
3223 build_string ("Big5 (Level-1)"),
3225 ("Big5 Level-1 Chinese traditional"),
3226 build_string ("big5"),
3227 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3228 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3229 staticpro (&Vcharset_chinese_big5_2);
3230 Vcharset_chinese_big5_2 =
3231 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3232 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3233 build_string ("Big5"),
3234 build_string ("Big5 (Level-2)"),
3236 ("Big5 Level-2 Chinese traditional"),
3237 build_string ("big5"),
3238 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3239 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3241 #ifdef ENABLE_COMPOSITE_CHARS
3242 /* #### For simplicity, we put composite chars into a 96x96 charset.
3243 This is going to lead to problems because you can run out of
3244 room, esp. as we don't yet recycle numbers. */
3245 staticpro (&Vcharset_composite);
3246 Vcharset_composite =
3247 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3248 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3249 build_string ("Composite"),
3250 build_string ("Composite characters"),
3251 build_string ("Composite characters"),
3254 /* #### not dumped properly */
3255 composite_char_row_next = 32;
3256 composite_char_col_next = 32;
3258 Vcomposite_char_string2char_hash_table =
3259 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3260 Vcomposite_char_char2string_hash_table =
3261 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3262 staticpro (&Vcomposite_char_string2char_hash_table);
3263 staticpro (&Vcomposite_char_char2string_hash_table);
3264 #endif /* ENABLE_COMPOSITE_CHARS */