1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_ucs;
71 Lisp_Object Vcharset_ucs_bmp;
72 Lisp_Object Vcharset_ucs_smp;
73 Lisp_Object Vcharset_ucs_sip;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
282 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
283 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
317 Qvietnamese_viscii_lower,
318 Qvietnamese_viscii_upper,
328 Lisp_Object Ql2r, Qr2l;
330 Lisp_Object Vcharset_hash_table;
332 /* Composite characters are characters constructed by overstriking two
333 or more regular characters.
335 1) The old Mule implementation involves storing composite characters
336 in a buffer as a tag followed by all of the actual characters
337 used to make up the composite character. I think this is a bad
338 idea; it greatly complicates code that wants to handle strings
339 one character at a time because it has to deal with the possibility
340 of great big ungainly characters. It's much more reasonable to
341 simply store an index into a table of composite characters.
343 2) The current implementation only allows for 16,384 separate
344 composite characters over the lifetime of the XEmacs process.
345 This could become a potential problem if the user
346 edited lots of different files that use composite characters.
347 Due to FSF bogosity, increasing the number of allowable
348 composite characters under Mule would decrease the number
349 of possible faces that can exist. Mule already has shrunk
350 this to 2048, and further shrinkage would become uncomfortable.
351 No such problems exist in XEmacs.
353 Composite characters could be represented as 0x80 C1 C2 C3,
354 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
355 for slightly under 2^20 (one million) composite characters
356 over the XEmacs process lifetime, and you only need to
357 increase the size of a Mule character from 19 to 21 bits.
358 Or you could use 0x80 C1 C2 C3 C4, allowing for about
359 85 million (slightly over 2^26) composite characters. */
362 /************************************************************************/
363 /* Basic Emchar functions */
364 /************************************************************************/
366 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
367 string in STR. Returns the number of bytes stored.
368 Do not call this directly. Use the macro set_charptr_emchar() instead.
372 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
387 else if ( c <= 0x7ff )
389 *p++ = (c >> 6) | 0xc0;
390 *p++ = (c & 0x3f) | 0x80;
392 else if ( c <= 0xffff )
394 *p++ = (c >> 12) | 0xe0;
395 *p++ = ((c >> 6) & 0x3f) | 0x80;
396 *p++ = (c & 0x3f) | 0x80;
398 else if ( c <= 0x1fffff )
400 *p++ = (c >> 18) | 0xf0;
401 *p++ = ((c >> 12) & 0x3f) | 0x80;
402 *p++ = ((c >> 6) & 0x3f) | 0x80;
403 *p++ = (c & 0x3f) | 0x80;
405 else if ( c <= 0x3ffffff )
407 *p++ = (c >> 24) | 0xf8;
408 *p++ = ((c >> 18) & 0x3f) | 0x80;
409 *p++ = ((c >> 12) & 0x3f) | 0x80;
410 *p++ = ((c >> 6) & 0x3f) | 0x80;
411 *p++ = (c & 0x3f) | 0x80;
415 *p++ = (c >> 30) | 0xfc;
416 *p++ = ((c >> 24) & 0x3f) | 0x80;
417 *p++ = ((c >> 18) & 0x3f) | 0x80;
418 *p++ = ((c >> 12) & 0x3f) | 0x80;
419 *p++ = ((c >> 6) & 0x3f) | 0x80;
420 *p++ = (c & 0x3f) | 0x80;
423 BREAKUP_CHAR (c, charset, c1, c2);
424 lb = CHAR_LEADING_BYTE (c);
425 if (LEADING_BYTE_PRIVATE_P (lb))
426 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
428 if (EQ (charset, Vcharset_control_1))
437 /* Return the first character from a Mule-encoded string in STR,
438 assuming it's non-ASCII. Do not call this directly.
439 Use the macro charptr_emchar() instead. */
442 non_ascii_charptr_emchar (const Bufbyte *str)
455 else if ( b >= 0xf8 )
460 else if ( b >= 0xf0 )
465 else if ( b >= 0xe0 )
470 else if ( b >= 0xc0 )
480 for( ; len > 0; len-- )
483 ch = ( ch << 6 ) | ( b & 0x3f );
487 Bufbyte i0 = *str, i1, i2 = 0;
490 if (i0 == LEADING_BYTE_CONTROL_1)
491 return (Emchar) (*++str - 0x20);
493 if (LEADING_BYTE_PREFIX_P (i0))
498 charset = CHARSET_BY_LEADING_BYTE (i0);
499 if (XCHARSET_DIMENSION (charset) == 2)
502 return MAKE_CHAR (charset, i1, i2);
506 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
507 Do not call this directly. Use the macro valid_char_p() instead. */
511 non_ascii_valid_char_p (Emchar ch)
515 /* Must have only lowest 19 bits set */
519 f1 = CHAR_FIELD1 (ch);
520 f2 = CHAR_FIELD2 (ch);
521 f3 = CHAR_FIELD3 (ch);
527 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
528 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
529 f2 > MAX_CHAR_FIELD2_PRIVATE)
534 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
535 f2 <= MAX_CHAR_FIELD2_PRIVATE))
539 NOTE: This takes advantage of the fact that
540 FIELD2_TO_OFFICIAL_LEADING_BYTE and
541 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
543 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
544 if (EQ (charset, Qnil))
546 return (XCHARSET_CHARS (charset) == 96);
552 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
553 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
554 f1 > MAX_CHAR_FIELD1_PRIVATE)
556 if (f2 < 0x20 || f3 < 0x20)
559 #ifdef ENABLE_COMPOSITE_CHARS
560 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
562 if (UNBOUNDP (Fgethash (make_int (ch),
563 Vcomposite_char_char2string_hash_table,
568 #endif /* ENABLE_COMPOSITE_CHARS */
570 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
571 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
574 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
581 if (EQ (charset, Qnil))
583 return (XCHARSET_CHARS (charset) == 96);
589 /************************************************************************/
590 /* Basic string functions */
591 /************************************************************************/
593 /* Copy the character pointed to by SRC into DST. Do not call this
594 directly. Use the macro charptr_copy_char() instead.
595 Return the number of bytes copied. */
598 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
600 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
602 for (i = bytes; i; i--, dst++, src++)
608 /************************************************************************/
609 /* streams of Emchars */
610 /************************************************************************/
612 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
613 The functions below are not meant to be called directly; use
614 the macros in insdel.h. */
617 Lstream_get_emchar_1 (Lstream *stream, int ch)
619 Bufbyte str[MAX_EMCHAR_LEN];
620 Bufbyte *strptr = str;
623 str[0] = (Bufbyte) ch;
625 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
627 int c = Lstream_getc (stream);
628 bufpos_checking_assert (c >= 0);
629 *++strptr = (Bufbyte) c;
631 return charptr_emchar (str);
635 Lstream_fput_emchar (Lstream *stream, Emchar ch)
637 Bufbyte str[MAX_EMCHAR_LEN];
638 Bytecount len = set_charptr_emchar (str, ch);
639 return Lstream_write (stream, str, len);
643 Lstream_funget_emchar (Lstream *stream, Emchar ch)
645 Bufbyte str[MAX_EMCHAR_LEN];
646 Bytecount len = set_charptr_emchar (str, ch);
647 Lstream_unread (stream, str, len);
651 /************************************************************************/
653 /************************************************************************/
656 mark_charset (Lisp_Object obj)
658 Lisp_Charset *cs = XCHARSET (obj);
660 mark_object (cs->short_name);
661 mark_object (cs->long_name);
662 mark_object (cs->doc_string);
663 mark_object (cs->registry);
664 mark_object (cs->ccl_program);
666 mark_object (cs->decoding_table);
667 mark_object (cs->mother);
673 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
675 Lisp_Charset *cs = XCHARSET (obj);
679 error ("printing unreadable object #<charset %s 0x%x>",
680 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
683 write_c_string ("#<charset ", printcharfun);
684 print_internal (CHARSET_NAME (cs), printcharfun, 0);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
691 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
693 CHARSET_DIMENSION (cs),
694 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
695 CHARSET_COLUMNS (cs),
696 CHARSET_GRAPHIC (cs),
698 write_c_string (buf, printcharfun);
699 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
700 sprintf (buf, " 0x%x>", cs->header.uid);
701 write_c_string (buf, printcharfun);
704 static const struct lrecord_description charset_description[] = {
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
719 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
720 mark_charset, print_charset, 0, 0, 0,
724 /* Make a new charset. */
725 /* #### SJT Should generic properties be allowed? */
727 make_charset (Charset_ID id, Lisp_Object name,
728 unsigned short chars, unsigned char dimension,
729 unsigned char columns, unsigned char graphic,
730 Bufbyte final, unsigned char direction, Lisp_Object short_name,
731 Lisp_Object long_name, Lisp_Object doc,
733 Lisp_Object decoding_table,
734 Emchar min_code, Emchar max_code,
735 Emchar code_offset, unsigned char byte_offset,
736 Lisp_Object mother, unsigned char conversion)
739 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
743 XSETCHARSET (obj, cs);
745 CHARSET_ID (cs) = id;
746 CHARSET_NAME (cs) = name;
747 CHARSET_SHORT_NAME (cs) = short_name;
748 CHARSET_LONG_NAME (cs) = long_name;
749 CHARSET_CHARS (cs) = chars;
750 CHARSET_DIMENSION (cs) = dimension;
751 CHARSET_DIRECTION (cs) = direction;
752 CHARSET_COLUMNS (cs) = columns;
753 CHARSET_GRAPHIC (cs) = graphic;
754 CHARSET_FINAL (cs) = final;
755 CHARSET_DOC_STRING (cs) = doc;
756 CHARSET_REGISTRY (cs) = reg;
757 CHARSET_CCL_PROGRAM (cs) = Qnil;
758 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
760 CHARSET_DECODING_TABLE(cs) = Qunbound;
761 CHARSET_MIN_CODE (cs) = min_code;
762 CHARSET_MAX_CODE (cs) = max_code;
763 CHARSET_CODE_OFFSET (cs) = code_offset;
764 CHARSET_BYTE_OFFSET (cs) = byte_offset;
765 CHARSET_MOTHER (cs) = mother;
766 CHARSET_CONVERSION (cs) = conversion;
770 if (id == LEADING_BYTE_ASCII)
771 CHARSET_REP_BYTES (cs) = 1;
773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
780 /* some charsets do not have final characters. This includes
781 ASCII, Control-1, Composite, and the two faux private
783 unsigned char iso2022_type
784 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
786 if (code_offset == 0)
788 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
789 chlook->charset_by_attributes[iso2022_type][final] = obj;
793 (chlook->charset_by_attributes[iso2022_type][final][direction]));
794 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
798 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
799 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
801 /* Some charsets are "faux" and don't have names or really exist at
802 all except in the leading-byte table. */
804 Fputhash (name, obj, Vcharset_hash_table);
809 get_unallocated_leading_byte (int dimension)
814 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
817 lb = chlook->next_allocated_leading_byte++;
821 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
824 lb = chlook->next_allocated_1_byte_leading_byte++;
828 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
831 lb = chlook->next_allocated_2_byte_leading_byte++;
837 ("No more character sets free for this dimension",
838 make_int (dimension));
844 /* Number of Big5 characters which have the same code in 1st byte. */
846 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
849 decode_defined_char (Lisp_Object ccs, int code_point)
851 int dim = XCHARSET_DIMENSION (ccs);
852 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
860 = get_ccs_octet_table (decoding_table, ccs,
861 (code_point >> (dim * 8)) & 255);
863 if (CHARP (decoding_table))
864 return XCHAR (decoding_table);
865 #ifdef HAVE_CHISE_CLIENT
866 if (EQ (decoding_table, Qunloaded))
868 char_id = load_char_decoding_entry_maybe (ccs, code_point);
873 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
875 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
877 if ( EQ (mother, Vcharset_ucs) )
878 return DECODE_CHAR (mother, code_point);
880 return decode_defined_char (mother, code_point);
882 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
885 = (((code_point >> 8) & 0x7F) - 33) * 94
886 + (( code_point & 0x7F) - 33);
887 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
888 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
890 b2 += b2 < 0x3F ? 0x40 : 0x62;
891 return decode_defined_char (mother, (b1 << 8) | b2);
893 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
896 = (((code_point >> 8) & 0x7F) - 33) * 94
897 + (( code_point & 0x7F) - 33)
898 + BIG5_SAME_ROW * (0xC9 - 0xA1);
899 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
900 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
902 b2 += b2 < 0x3F ? 0x40 : 0x62;
903 return decode_defined_char (mother, (b1 << 8) | b2);
910 decode_builtin_char (Lisp_Object charset, int code_point)
912 Lisp_Object mother = XCHARSET_MOTHER (charset);
915 if ( XCHARSET_MAX_CODE (charset) > 0 )
917 if ( CHARSETP (mother) )
919 int code = code_point;
921 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
923 int row = code_point >> 8;
924 int cell = code_point & 255;
928 else if (row < 16 + 32 + 30)
929 code = (row - (16 + 32)) * 94 + cell - 33;
930 else if (row < 18 + 32 + 30)
932 else if (row < 18 + 32 + 60)
933 code = (row - (18 + 32)) * 94 + cell - 33;
935 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
937 int plane = code_point >> 16;
938 int row = (code_point >> 8) & 255;
939 int cell = code_point & 255;
943 else if (row < 16 + 32 + 30)
945 = (plane - 33) * 94 * 60
946 + (row - (16 + 32)) * 94
948 else if (row < 18 + 32 + 30)
950 else if (row < 18 + 32 + 60)
952 = (plane - 33) * 94 * 60
953 + (row - (18 + 32)) * 94
956 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
959 = (((code_point >> 8) & 0x7F) - 33) * 94
960 + (( code_point & 0x7F) - 33);
961 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
962 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
964 b2 += b2 < 0x3F ? 0x40 : 0x62;
965 code = (b1 << 8) | b2;
967 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
970 = (((code_point >> 8) & 0x7F) - 33) * 94
971 + (( code_point & 0x7F) - 33)
972 + BIG5_SAME_ROW * (0xC9 - 0xA1);
973 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
974 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
976 b2 += b2 < 0x3F ? 0x40 : 0x62;
977 code = (b1 << 8) | b2;
980 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
985 = (XCHARSET_DIMENSION (charset) == 1
987 code_point - XCHARSET_BYTE_OFFSET (charset)
989 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
990 * XCHARSET_CHARS (charset)
991 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
992 + XCHARSET_CODE_OFFSET (charset);
993 if ((cid < XCHARSET_MIN_CODE (charset))
994 || (XCHARSET_MAX_CODE (charset) < cid))
999 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1001 if (XCHARSET_DIMENSION (charset) == 1)
1003 switch (XCHARSET_CHARS (charset))
1007 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1010 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1018 switch (XCHARSET_CHARS (charset))
1021 return MIN_CHAR_94x94
1022 + (final - '0') * 94 * 94
1023 + (((code_point >> 8) & 0x7F) - 33) * 94
1024 + ((code_point & 0x7F) - 33);
1026 return MIN_CHAR_96x96
1027 + (final - '0') * 96 * 96
1028 + (((code_point >> 8) & 0x7F) - 32) * 96
1029 + ((code_point & 0x7F) - 32);
1041 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1043 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1046 if ( CHAR_TABLEP (encoding_table)
1047 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1052 Lisp_Object mother = XCHARSET_MOTHER (charset);
1053 int min = XCHARSET_MIN_CODE (charset);
1054 int max = XCHARSET_MAX_CODE (charset);
1057 if ( CHARSETP (mother) )
1059 if (XCHARSET_FINAL (charset) >= '0')
1060 code = charset_code_point (mother, ch, 1);
1062 code = charset_code_point (mother, ch, defined_only);
1064 else if (defined_only)
1066 else if ( ((max == 0) && CHARSETP (mother)
1067 && (XCHARSET_FINAL (charset) == 0))
1068 || ((min <= ch) && (ch <= max)) )
1070 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1071 || ((min <= code) && (code <= max)) )
1073 int d = code - XCHARSET_CODE_OFFSET (charset);
1075 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1077 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1084 int cell = d % 94 + 33;
1090 return (row << 8) | cell;
1092 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1094 int B1 = d >> 8, B2 = d & 0xFF;
1096 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1097 - (B2 < 0x7F ? 0x40 : 0x62);
1101 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1104 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1106 int B1 = d >> 8, B2 = d & 0xFF;
1108 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1109 - (B2 < 0x7F ? 0x40 : 0x62);
1113 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1114 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1117 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1118 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1119 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1120 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1123 int plane = d / (94 * 60) + 33;
1124 int row = (d % (94 * 60)) / 94;
1125 int cell = d % 94 + 33;
1131 return (plane << 16) | (row << 8) | cell;
1133 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1135 ( (d / (94 * 94) + 33) << 16)
1136 | ((d / 94 % 94 + 33) << 8)
1138 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1140 ( (d / (96 * 96) + 32) << 16)
1141 | ((d / 96 % 96 + 32) << 8)
1143 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1145 ( (d / (94 * 94 * 94) + 33) << 24)
1146 | ((d / (94 * 94) % 94 + 33) << 16)
1147 | ((d / 94 % 94 + 33) << 8)
1149 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1151 ( (d / (96 * 96 * 96) + 32) << 24)
1152 | ((d / (96 * 96) % 96 + 32) << 16)
1153 | ((d / 96 % 96 + 32) << 8)
1157 printf ("Unknown CCS-conversion %d is specified!",
1158 XCHARSET_CONVERSION (charset));
1162 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1163 ( XCHARSET_MIN_CODE (charset) == 0 )
1165 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1166 (XCHARSET_CODE_OFFSET (charset)
1167 == XCHARSET_MIN_CODE (charset))
1172 if (XCHARSET_DIMENSION (charset) == 1)
1174 if (XCHARSET_CHARS (charset) == 94)
1176 if (((d = ch - (MIN_CHAR_94
1177 + (XCHARSET_FINAL (charset) - '0') * 94))
1182 else if (XCHARSET_CHARS (charset) == 96)
1184 if (((d = ch - (MIN_CHAR_96
1185 + (XCHARSET_FINAL (charset) - '0') * 96))
1193 else if (XCHARSET_DIMENSION (charset) == 2)
1195 if (XCHARSET_CHARS (charset) == 94)
1197 if (((d = ch - (MIN_CHAR_94x94
1199 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1202 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1204 else if (XCHARSET_CHARS (charset) == 96)
1206 if (((d = ch - (MIN_CHAR_96x96
1208 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1211 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1222 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1224 if (c <= MAX_CHAR_BASIC_LATIN)
1226 *charset = Vcharset_ascii;
1231 *charset = Vcharset_control_1;
1236 *charset = Vcharset_latin_iso8859_1;
1240 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1242 *charset = Vcharset_hebrew_iso8859_8;
1243 return c - MIN_CHAR_HEBREW + 0x20;
1246 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1248 *charset = Vcharset_thai_tis620;
1249 return c - MIN_CHAR_THAI + 0x20;
1252 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1253 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1255 return list2 (Vcharset_katakana_jisx0201,
1256 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1259 else if (c <= MAX_CHAR_BMP)
1261 *charset = Vcharset_ucs_bmp;
1264 else if (c <= MAX_CHAR_SMP)
1266 *charset = Vcharset_ucs_smp;
1267 return c - MIN_CHAR_SMP;
1269 else if (c <= MAX_CHAR_SIP)
1271 *charset = Vcharset_ucs_sip;
1272 return c - MIN_CHAR_SIP;
1274 else if (c < MIN_CHAR_94)
1276 *charset = Vcharset_ucs;
1279 else if (c <= MAX_CHAR_94)
1281 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1282 ((c - MIN_CHAR_94) / 94) + '0',
1283 CHARSET_LEFT_TO_RIGHT);
1284 if (!NILP (*charset))
1285 return ((c - MIN_CHAR_94) % 94) + 33;
1288 *charset = Vcharset_ucs;
1292 else if (c <= MAX_CHAR_96)
1294 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1295 ((c - MIN_CHAR_96) / 96) + '0',
1296 CHARSET_LEFT_TO_RIGHT);
1297 if (!NILP (*charset))
1298 return ((c - MIN_CHAR_96) % 96) + 32;
1301 *charset = Vcharset_ucs;
1305 else if (c <= MAX_CHAR_94x94)
1308 = CHARSET_BY_ATTRIBUTES (94, 2,
1309 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1310 CHARSET_LEFT_TO_RIGHT);
1311 if (!NILP (*charset))
1312 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1313 | (((c - MIN_CHAR_94x94) % 94) + 33);
1316 *charset = Vcharset_ucs;
1320 else if (c <= MAX_CHAR_96x96)
1323 = CHARSET_BY_ATTRIBUTES (96, 2,
1324 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1325 CHARSET_LEFT_TO_RIGHT);
1326 if (!NILP (*charset))
1327 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1328 | (((c - MIN_CHAR_96x96) % 96) + 32);
1331 *charset = Vcharset_ucs;
1337 *charset = Vcharset_ucs;
1342 Lisp_Object Vdefault_coded_charset_priority_list;
1346 /************************************************************************/
1347 /* Basic charset Lisp functions */
1348 /************************************************************************/
1350 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1351 Return non-nil if OBJECT is a charset.
1355 return CHARSETP (object) ? Qt : Qnil;
1358 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1359 Retrieve the charset of the given name.
1360 If CHARSET-OR-NAME is a charset object, it is simply returned.
1361 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1362 nil is returned. Otherwise the associated charset object is returned.
1366 if (CHARSETP (charset_or_name))
1367 return charset_or_name;
1369 CHECK_SYMBOL (charset_or_name);
1370 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1373 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1374 Retrieve the charset of the given name.
1375 Same as `find-charset' except an error is signalled if there is no such
1376 charset instead of returning nil.
1380 Lisp_Object charset = Ffind_charset (name);
1383 signal_simple_error ("No such charset", name);
1387 /* We store the charsets in hash tables with the names as the key and the
1388 actual charset object as the value. Occasionally we need to use them
1389 in a list format. These routines provide us with that. */
1390 struct charset_list_closure
1392 Lisp_Object *charset_list;
1396 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1397 void *charset_list_closure)
1399 /* This function can GC */
1400 struct charset_list_closure *chcl =
1401 (struct charset_list_closure*) charset_list_closure;
1402 Lisp_Object *charset_list = chcl->charset_list;
1404 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1408 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1409 Return a list of the names of all defined charsets.
1413 Lisp_Object charset_list = Qnil;
1414 struct gcpro gcpro1;
1415 struct charset_list_closure charset_list_closure;
1417 GCPRO1 (charset_list);
1418 charset_list_closure.charset_list = &charset_list;
1419 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1420 &charset_list_closure);
1423 return charset_list;
1426 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1427 Return the name of charset CHARSET.
1431 return XCHARSET_NAME (Fget_charset (charset));
1434 /* #### SJT Should generic properties be allowed? */
1435 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1436 Define a new character set.
1437 This function is for use with Mule support.
1438 NAME is a symbol, the name by which the character set is normally referred.
1439 DOC-STRING is a string describing the character set.
1440 PROPS is a property list, describing the specific nature of the
1441 character set. Recognized properties are:
1443 'short-name Short version of the charset name (ex: Latin-1)
1444 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1445 'registry A regular expression matching the font registry field for
1447 'dimension Number of octets used to index a character in this charset.
1448 Either 1 or 2. Defaults to 1.
1449 If UTF-2000 feature is enabled, 3 or 4 are also available.
1450 'columns Number of columns used to display a character in this charset.
1451 Only used in TTY mode. (Under X, the actual width of a
1452 character can be derived from the font used to display the
1453 characters.) If unspecified, defaults to the dimension
1454 (this is almost always the correct value).
1455 'chars Number of characters in each dimension (94 or 96).
1456 Defaults to 94. Note that if the dimension is 2, the
1457 character set thus described is 94x94 or 96x96.
1458 If UTF-2000 feature is enabled, 128 or 256 are also available.
1459 'final Final byte of ISO 2022 escape sequence. Must be
1460 supplied. Each combination of (DIMENSION, CHARS) defines a
1461 separate namespace for final bytes. Note that ISO
1462 2022 restricts the final byte to the range
1463 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1464 dimension == 2. Note also that final bytes in the range
1465 0x30 - 0x3F are reserved for user-defined (not official)
1467 'graphic 0 (use left half of font on output) or 1 (use right half
1468 of font on output). Defaults to 0. For example, for
1469 a font whose registry is ISO8859-1, the left half
1470 (octets 0x20 - 0x7F) is the `ascii' character set, while
1471 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1472 character set. With 'graphic set to 0, the octets
1473 will have their high bit cleared; with it set to 1,
1474 the octets will have their high bit set.
1475 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1477 'ccl-program A compiled CCL program used to convert a character in
1478 this charset into an index into the font. This is in
1479 addition to the 'graphic property. The CCL program
1480 is passed the octets of the character, with the high
1481 bit cleared and set depending upon whether the value
1482 of the 'graphic property is 0 or 1.
1483 'mother [UTF-2000 only] Base coded-charset.
1484 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1485 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1486 'code-offset [UTF-2000 only] Offset for a code-point of a base
1488 'conversion [UTF-2000 only] Conversion for a code-point of a base
1489 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1491 (name, doc_string, props))
1493 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1494 int direction = CHARSET_LEFT_TO_RIGHT;
1495 Lisp_Object registry = Qnil;
1496 Lisp_Object charset;
1497 Lisp_Object ccl_program = Qnil;
1498 Lisp_Object short_name = Qnil, long_name = Qnil;
1499 Lisp_Object mother = Qnil;
1500 int min_code = 0, max_code = 0, code_offset = 0;
1501 int byte_offset = -1;
1504 CHECK_SYMBOL (name);
1505 if (!NILP (doc_string))
1506 CHECK_STRING (doc_string);
1508 charset = Ffind_charset (name);
1509 if (!NILP (charset))
1510 signal_simple_error ("Cannot redefine existing charset", name);
1513 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1515 if (EQ (keyword, Qshort_name))
1517 CHECK_STRING (value);
1521 else if (EQ (keyword, Qlong_name))
1523 CHECK_STRING (value);
1527 else if (EQ (keyword, Qiso_ir))
1531 id = - XINT (value);
1535 else if (EQ (keyword, Qdimension))
1538 dimension = XINT (value);
1539 if (dimension < 1 ||
1546 signal_simple_error ("Invalid value for 'dimension", value);
1549 else if (EQ (keyword, Qchars))
1552 chars = XINT (value);
1553 if (chars != 94 && chars != 96
1555 && chars != 128 && chars != 256
1558 signal_simple_error ("Invalid value for 'chars", value);
1561 else if (EQ (keyword, Qcolumns))
1564 columns = XINT (value);
1565 if (columns != 1 && columns != 2)
1566 signal_simple_error ("Invalid value for 'columns", value);
1569 else if (EQ (keyword, Qgraphic))
1572 graphic = XINT (value);
1580 signal_simple_error ("Invalid value for 'graphic", value);
1583 else if (EQ (keyword, Qregistry))
1585 CHECK_STRING (value);
1589 else if (EQ (keyword, Qdirection))
1591 if (EQ (value, Ql2r))
1592 direction = CHARSET_LEFT_TO_RIGHT;
1593 else if (EQ (value, Qr2l))
1594 direction = CHARSET_RIGHT_TO_LEFT;
1596 signal_simple_error ("Invalid value for 'direction", value);
1599 else if (EQ (keyword, Qfinal))
1601 CHECK_CHAR_COERCE_INT (value);
1602 final = XCHAR (value);
1603 if (final < '0' || final > '~')
1604 signal_simple_error ("Invalid value for 'final", value);
1608 else if (EQ (keyword, Qmother))
1610 mother = Fget_charset (value);
1613 else if (EQ (keyword, Qmin_code))
1616 min_code = XUINT (value);
1619 else if (EQ (keyword, Qmax_code))
1622 max_code = XUINT (value);
1625 else if (EQ (keyword, Qcode_offset))
1628 code_offset = XUINT (value);
1631 else if (EQ (keyword, Qconversion))
1633 if (EQ (value, Q94x60))
1634 conversion = CONVERSION_94x60;
1635 else if (EQ (value, Q94x94x60))
1636 conversion = CONVERSION_94x94x60;
1637 else if (EQ (value, Qbig5_1))
1638 conversion = CONVERSION_BIG5_1;
1639 else if (EQ (value, Qbig5_2))
1640 conversion = CONVERSION_BIG5_2;
1642 signal_simple_error ("Unrecognized conversion", value);
1646 else if (EQ (keyword, Qccl_program))
1648 struct ccl_program test_ccl;
1650 if (setup_ccl_program (&test_ccl, value) < 0)
1651 signal_simple_error ("Invalid value for 'ccl-program", value);
1652 ccl_program = value;
1656 signal_simple_error ("Unrecognized property", keyword);
1662 error ("'final must be specified");
1664 if (dimension == 2 && final > 0x5F)
1666 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1669 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1670 CHARSET_LEFT_TO_RIGHT)) ||
1671 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1672 CHARSET_RIGHT_TO_LEFT)))
1674 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1677 id = get_unallocated_leading_byte (dimension);
1679 if (NILP (doc_string))
1680 doc_string = build_string ("");
1682 if (NILP (registry))
1683 registry = build_string ("");
1685 if (NILP (short_name))
1686 XSETSTRING (short_name, XSYMBOL (name)->name);
1688 if (NILP (long_name))
1689 long_name = doc_string;
1692 columns = dimension;
1694 if (byte_offset < 0)
1698 else if (chars == 96)
1704 charset = make_charset (id, name, chars, dimension, columns, graphic,
1705 final, direction, short_name, long_name,
1706 doc_string, registry,
1707 Qnil, min_code, max_code, code_offset, byte_offset,
1708 mother, conversion);
1709 if (!NILP (ccl_program))
1710 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1714 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1716 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1717 NEW-NAME is the name of the new charset. Return the new charset.
1719 (charset, new_name))
1721 Lisp_Object new_charset = Qnil;
1722 int id, chars, dimension, columns, graphic, final;
1724 Lisp_Object registry, doc_string, short_name, long_name;
1727 charset = Fget_charset (charset);
1728 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1729 signal_simple_error ("Charset already has reverse-direction charset",
1732 CHECK_SYMBOL (new_name);
1733 if (!NILP (Ffind_charset (new_name)))
1734 signal_simple_error ("Cannot redefine existing charset", new_name);
1736 cs = XCHARSET (charset);
1738 chars = CHARSET_CHARS (cs);
1739 dimension = CHARSET_DIMENSION (cs);
1740 columns = CHARSET_COLUMNS (cs);
1741 id = get_unallocated_leading_byte (dimension);
1743 graphic = CHARSET_GRAPHIC (cs);
1744 final = CHARSET_FINAL (cs);
1745 direction = CHARSET_RIGHT_TO_LEFT;
1746 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1747 direction = CHARSET_LEFT_TO_RIGHT;
1748 doc_string = CHARSET_DOC_STRING (cs);
1749 short_name = CHARSET_SHORT_NAME (cs);
1750 long_name = CHARSET_LONG_NAME (cs);
1751 registry = CHARSET_REGISTRY (cs);
1753 new_charset = make_charset (id, new_name, chars, dimension, columns,
1754 graphic, final, direction, short_name, long_name,
1755 doc_string, registry,
1757 CHARSET_DECODING_TABLE(cs),
1758 CHARSET_MIN_CODE(cs),
1759 CHARSET_MAX_CODE(cs),
1760 CHARSET_CODE_OFFSET(cs),
1761 CHARSET_BYTE_OFFSET(cs),
1763 CHARSET_CONVERSION (cs)
1765 Qnil, 0, 0, 0, 0, Qnil, 0
1769 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1770 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1775 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1776 Define symbol ALIAS as an alias for CHARSET.
1780 CHECK_SYMBOL (alias);
1781 charset = Fget_charset (charset);
1782 return Fputhash (alias, charset, Vcharset_hash_table);
1785 /* #### Reverse direction charsets not yet implemented. */
1787 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1789 Return the reverse-direction charset parallel to CHARSET, if any.
1790 This is the charset with the same properties (in particular, the same
1791 dimension, number of characters per dimension, and final byte) as
1792 CHARSET but whose characters are displayed in the opposite direction.
1796 charset = Fget_charset (charset);
1797 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1801 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1802 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1803 If DIRECTION is omitted, both directions will be checked (left-to-right
1804 will be returned if character sets exist for both directions).
1806 (dimension, chars, final, direction))
1808 int dm, ch, fi, di = -1;
1809 Lisp_Object obj = Qnil;
1811 CHECK_INT (dimension);
1812 dm = XINT (dimension);
1813 if (dm < 1 || dm > 2)
1814 signal_simple_error ("Invalid value for DIMENSION", dimension);
1818 if (ch != 94 && ch != 96)
1819 signal_simple_error ("Invalid value for CHARS", chars);
1821 CHECK_CHAR_COERCE_INT (final);
1823 if (fi < '0' || fi > '~')
1824 signal_simple_error ("Invalid value for FINAL", final);
1826 if (EQ (direction, Ql2r))
1827 di = CHARSET_LEFT_TO_RIGHT;
1828 else if (EQ (direction, Qr2l))
1829 di = CHARSET_RIGHT_TO_LEFT;
1830 else if (!NILP (direction))
1831 signal_simple_error ("Invalid value for DIRECTION", direction);
1833 if (dm == 2 && fi > 0x5F)
1835 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1839 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1841 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1844 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1847 return XCHARSET_NAME (obj);
1851 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1852 Return short name of CHARSET.
1856 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1859 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1860 Return long name of CHARSET.
1864 return XCHARSET_LONG_NAME (Fget_charset (charset));
1867 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1868 Return description of CHARSET.
1872 return XCHARSET_DOC_STRING (Fget_charset (charset));
1875 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1876 Return dimension of CHARSET.
1880 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1883 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1884 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1885 Recognized properties are those listed in `make-charset', as well as
1886 'name and 'doc-string.
1892 charset = Fget_charset (charset);
1893 cs = XCHARSET (charset);
1895 CHECK_SYMBOL (prop);
1896 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1897 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1898 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1899 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1900 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1901 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1902 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1903 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1904 Qnil : make_char (CHARSET_FINAL (cs));
1905 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1906 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1907 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1908 if (EQ (prop, Qdirection))
1909 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1910 if (EQ (prop, Qreverse_direction_charset))
1912 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1913 /* #### Is this translation OK? If so, error checking sufficient? */
1914 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1917 if (EQ (prop, Qmother))
1918 return CHARSET_MOTHER (cs);
1919 if (EQ (prop, Qmin_code))
1920 return make_int (CHARSET_MIN_CODE (cs));
1921 if (EQ (prop, Qmax_code))
1922 return make_int (CHARSET_MAX_CODE (cs));
1924 signal_simple_error ("Unrecognized charset property name", prop);
1925 return Qnil; /* not reached */
1928 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1929 Return charset identification number of CHARSET.
1933 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1936 /* #### We need to figure out which properties we really want to
1939 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1940 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1942 (charset, ccl_program))
1944 struct ccl_program test_ccl;
1946 charset = Fget_charset (charset);
1947 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1948 signal_simple_error ("Invalid ccl-program", ccl_program);
1949 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1954 invalidate_charset_font_caches (Lisp_Object charset)
1956 /* Invalidate font cache entries for charset on all devices. */
1957 Lisp_Object devcons, concons, hash_table;
1958 DEVICE_LOOP_NO_BREAK (devcons, concons)
1960 struct device *d = XDEVICE (XCAR (devcons));
1961 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1962 if (!UNBOUNDP (hash_table))
1963 Fclrhash (hash_table);
1967 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1968 Set the 'registry property of CHARSET to REGISTRY.
1970 (charset, registry))
1972 charset = Fget_charset (charset);
1973 CHECK_STRING (registry);
1974 XCHARSET_REGISTRY (charset) = registry;
1975 invalidate_charset_font_caches (charset);
1976 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1981 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1982 Return mapping-table of CHARSET.
1986 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1989 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1990 Set mapping-table of CHARSET to TABLE.
1994 struct Lisp_Charset *cs;
1998 charset = Fget_charset (charset);
1999 cs = XCHARSET (charset);
2003 CHARSET_DECODING_TABLE(cs) = Qnil;
2006 else if (VECTORP (table))
2008 int ccs_len = CHARSET_BYTE_SIZE (cs);
2009 int ret = decoding_table_check_elements (table,
2010 CHARSET_DIMENSION (cs),
2015 signal_simple_error ("Too big table", table);
2017 signal_simple_error ("Invalid element is found", table);
2019 signal_simple_error ("Something wrong", table);
2021 CHARSET_DECODING_TABLE(cs) = Qnil;
2024 signal_error (Qwrong_type_argument,
2025 list2 (build_translated_string ("vector-or-nil-p"),
2028 byte_offset = CHARSET_BYTE_OFFSET (cs);
2029 switch (CHARSET_DIMENSION (cs))
2032 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2034 Lisp_Object c = XVECTOR_DATA(table)[i];
2037 Fput_char_attribute (c, XCHARSET_NAME (charset),
2038 make_int (i + byte_offset));
2042 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2044 Lisp_Object v = XVECTOR_DATA(table)[i];
2050 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2052 Lisp_Object c = XVECTOR_DATA(v)[j];
2056 (c, XCHARSET_NAME (charset),
2057 make_int ( ( (i + byte_offset) << 8 )
2063 Fput_char_attribute (v, XCHARSET_NAME (charset),
2064 make_int (i + byte_offset));
2071 #ifdef HAVE_CHISE_CLIENT
2072 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2073 Save mapping-table of CHARSET.
2077 struct Lisp_Charset *cs;
2078 int byte_min, byte_max;
2080 Lisp_Object db_file;
2082 charset = Fget_charset (charset);
2083 cs = XCHARSET (charset);
2085 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2086 Qsystem_char_id, 1);
2087 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2089 byte_min = CHARSET_BYTE_OFFSET (cs);
2090 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2091 switch (CHARSET_DIMENSION (cs))
2095 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2098 for (cell = byte_min; cell < byte_max; cell++)
2100 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2103 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2104 Fprin1_to_string (c, Qnil),
2111 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2114 for (row = byte_min; row < byte_max; row++)
2116 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2119 for (cell = byte_min; cell < byte_max; cell++)
2121 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2124 Fput_database (Fprin1_to_string (make_int ((row << 8)
2127 Fprin1_to_string (c, Qnil),
2135 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2138 for (plane = byte_min; plane < byte_max; plane++)
2141 = get_ccs_octet_table (table_p, charset, plane);
2144 for (row = byte_min; row < byte_max; row++)
2147 = get_ccs_octet_table (table_r, charset, row);
2150 for (cell = byte_min; cell < byte_max; cell++)
2152 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2156 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2160 Fprin1_to_string (c, Qnil),
2169 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2172 for (group = byte_min; group < byte_max; group++)
2175 = get_ccs_octet_table (table_g, charset, group);
2178 for (plane = byte_min; plane < byte_max; plane++)
2181 = get_ccs_octet_table (table_p, charset, plane);
2184 for (row = byte_min; row < byte_max; row++)
2187 = get_ccs_octet_table (table_r, charset, row);
2190 for (cell = byte_min; cell < byte_max; cell++)
2193 = get_ccs_octet_table (table_c, charset, cell);
2196 Fput_database (Fprin1_to_string
2197 (make_int (( group << 24)
2202 Fprin1_to_string (c, Qnil),
2210 return Fclose_database (db);
2213 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2214 Reset mapping-table of CCS with database file.
2218 Lisp_Object db_file;
2220 ccs = Fget_charset (ccs);
2221 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2222 Qsystem_char_id, 0);
2224 if (!NILP (Ffile_exists_p (db_file)))
2226 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2233 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2236 Lisp_Object db_dir = Vexec_directory;
2238 CHISE_Decoding_Table *dt_ccs;
2241 DBTYPE real_subtype;
2243 CHISE_Char_ID char_id;
2246 db_dir = build_string ("../lib-src");
2247 db_dir = Fexpand_file_name (build_string ("char-db"), db_dir);
2249 status = chise_open_data_source (&ds, CHISE_DS_Berkeley_DB,
2250 XSTRING_DATA (db_dir));
2253 chise_close_data_source (&ds);
2257 modemask = 0755; /* rwxr-xr-x */
2258 real_subtype = DB_HASH;
2259 accessmask = DB_RDONLY;
2262 = chise_open_decoding_table (&dt_ccs, &ds,
2263 XSTRING_DATA (Fsymbol_name
2264 (XCHARSET_NAME(ccs))),
2266 accessmask, modemask);
2269 printf ("Can't open decoding-table %s\n",
2270 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(ccs))));
2271 chise_close_decoding_table (dt_ccs);
2272 chise_close_data_source (&ds);
2276 char_id = chise_dt_get_char (dt_ccs, code_point);
2278 printf ("%s's 0x%X (%d) => 0x%X\n",
2279 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(ccs))),
2280 code_point, code_point, char_id);
2283 decoding_table_put_char (ccs, code_point, make_char (char_id));
2285 decoding_table_put_char (ccs, code_point, Qnil);
2287 chise_close_decoding_table (dt_ccs);
2289 chise_close_data_source (&ds);
2295 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2298 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2302 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2309 decoding_table_put_char (ccs, code_point, ret);
2310 Fclose_database (db);
2314 decoding_table_put_char (ccs, code_point, Qnil);
2315 Fclose_database (db);
2320 #endif /* HAVE_CHISE_CLIENT */
2321 #endif /* UTF2000 */
2324 /************************************************************************/
2325 /* Lisp primitives for working with characters */
2326 /************************************************************************/
2329 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2330 Make a character from CHARSET and code-point CODE.
2331 If DEFINED_ONLY is non-nil, builtin character is not returned.
2332 If corresponding character is not found, nil is returned.
2334 (charset, code, defined_only))
2338 charset = Fget_charset (charset);
2341 if (XCHARSET_GRAPHIC (charset) == 1)
2343 if (NILP (defined_only))
2344 c = DECODE_CHAR (charset, c);
2346 c = decode_defined_char (charset, c);
2347 return c >= 0 ? make_char (c) : Qnil;
2350 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2351 Make a builtin character from CHARSET and code-point CODE.
2357 charset = Fget_charset (charset);
2359 if (EQ (charset, Vcharset_latin_viscii))
2361 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2367 (ret = Fget_char_attribute (chr,
2368 Vcharset_latin_viscii_lower,
2371 charset = Vcharset_latin_viscii_lower;
2375 (ret = Fget_char_attribute (chr,
2376 Vcharset_latin_viscii_upper,
2379 charset = Vcharset_latin_viscii_upper;
2386 if (XCHARSET_GRAPHIC (charset) == 1)
2389 c = decode_builtin_char (charset, c);
2390 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2394 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2395 Make a character from CHARSET and octets ARG1 and ARG2.
2396 ARG2 is required only for characters from two-dimensional charsets.
2397 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2398 character s with caron.
2400 (charset, arg1, arg2))
2404 int lowlim, highlim;
2406 charset = Fget_charset (charset);
2407 cs = XCHARSET (charset);
2409 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2410 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2412 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2414 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2415 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2418 /* It is useful (and safe, according to Olivier Galibert) to strip
2419 the 8th bit off ARG1 and ARG2 because it allows programmers to
2420 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2421 Latin 2 code of the character. */
2429 if (a1 < lowlim || a1 > highlim)
2430 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2432 if (CHARSET_DIMENSION (cs) == 1)
2436 ("Charset is of dimension one; second octet must be nil", arg2);
2437 return make_char (MAKE_CHAR (charset, a1, 0));
2446 a2 = XINT (arg2) & 0x7f;
2448 if (a2 < lowlim || a2 > highlim)
2449 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2451 return make_char (MAKE_CHAR (charset, a1, a2));
2454 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2455 Return the character set of CHARACTER.
2459 CHECK_CHAR_COERCE_INT (character);
2461 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2464 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2465 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2466 N defaults to 0 if omitted.
2470 Lisp_Object charset;
2473 CHECK_CHAR_COERCE_INT (character);
2475 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2477 if (NILP (n) || EQ (n, Qzero))
2478 return make_int (octet0);
2479 else if (EQ (n, make_int (1)))
2480 return make_int (octet1);
2482 signal_simple_error ("Octet number must be 0 or 1", n);
2486 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2487 Return code-point of CHARACTER in specified CHARSET.
2489 (character, charset, defined_only))
2493 CHECK_CHAR_COERCE_INT (character);
2494 charset = Fget_charset (charset);
2495 code_point = charset_code_point (charset, XCHAR (character),
2496 !NILP (defined_only));
2497 if (code_point >= 0)
2498 return make_int (code_point);
2504 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2505 Return list of charset and one or two position-codes of CHARACTER.
2509 /* This function can GC */
2510 struct gcpro gcpro1, gcpro2;
2511 Lisp_Object charset = Qnil;
2512 Lisp_Object rc = Qnil;
2520 GCPRO2 (charset, rc);
2521 CHECK_CHAR_COERCE_INT (character);
2524 code_point = ENCODE_CHAR (XCHAR (character), charset);
2525 dimension = XCHARSET_DIMENSION (charset);
2526 while (dimension > 0)
2528 rc = Fcons (make_int (code_point & 255), rc);
2532 rc = Fcons (XCHARSET_NAME (charset), rc);
2534 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2536 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2538 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2542 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2551 #ifdef ENABLE_COMPOSITE_CHARS
2552 /************************************************************************/
2553 /* composite character functions */
2554 /************************************************************************/
2557 lookup_composite_char (Bufbyte *str, int len)
2559 Lisp_Object lispstr = make_string (str, len);
2560 Lisp_Object ch = Fgethash (lispstr,
2561 Vcomposite_char_string2char_hash_table,
2567 if (composite_char_row_next >= 128)
2568 signal_simple_error ("No more composite chars available", lispstr);
2569 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2570 composite_char_col_next);
2571 Fputhash (make_char (emch), lispstr,
2572 Vcomposite_char_char2string_hash_table);
2573 Fputhash (lispstr, make_char (emch),
2574 Vcomposite_char_string2char_hash_table);
2575 composite_char_col_next++;
2576 if (composite_char_col_next >= 128)
2578 composite_char_col_next = 32;
2579 composite_char_row_next++;
2588 composite_char_string (Emchar ch)
2590 Lisp_Object str = Fgethash (make_char (ch),
2591 Vcomposite_char_char2string_hash_table,
2593 assert (!UNBOUNDP (str));
2597 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2598 Convert a string into a single composite character.
2599 The character is the result of overstriking all the characters in
2604 CHECK_STRING (string);
2605 return make_char (lookup_composite_char (XSTRING_DATA (string),
2606 XSTRING_LENGTH (string)));
2609 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2610 Return a string of the characters comprising a composite character.
2618 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2619 signal_simple_error ("Must be composite char", ch);
2620 return composite_char_string (emch);
2622 #endif /* ENABLE_COMPOSITE_CHARS */
2625 /************************************************************************/
2626 /* initialization */
2627 /************************************************************************/
2630 syms_of_mule_charset (void)
2632 INIT_LRECORD_IMPLEMENTATION (charset);
2634 DEFSUBR (Fcharsetp);
2635 DEFSUBR (Ffind_charset);
2636 DEFSUBR (Fget_charset);
2637 DEFSUBR (Fcharset_list);
2638 DEFSUBR (Fcharset_name);
2639 DEFSUBR (Fmake_charset);
2640 DEFSUBR (Fmake_reverse_direction_charset);
2641 /* DEFSUBR (Freverse_direction_charset); */
2642 DEFSUBR (Fdefine_charset_alias);
2643 DEFSUBR (Fcharset_from_attributes);
2644 DEFSUBR (Fcharset_short_name);
2645 DEFSUBR (Fcharset_long_name);
2646 DEFSUBR (Fcharset_description);
2647 DEFSUBR (Fcharset_dimension);
2648 DEFSUBR (Fcharset_property);
2649 DEFSUBR (Fcharset_id);
2650 DEFSUBR (Fset_charset_ccl_program);
2651 DEFSUBR (Fset_charset_registry);
2653 DEFSUBR (Fcharset_mapping_table);
2654 DEFSUBR (Fset_charset_mapping_table);
2655 #ifdef HAVE_CHISE_CLIENT
2656 DEFSUBR (Fsave_charset_mapping_table);
2657 DEFSUBR (Freset_charset_mapping_table);
2660 DEFSUBR (Fdecode_char);
2661 DEFSUBR (Fdecode_builtin_char);
2662 DEFSUBR (Fencode_char);
2664 DEFSUBR (Fmake_char);
2665 DEFSUBR (Fchar_charset);
2666 DEFSUBR (Fchar_octet);
2667 DEFSUBR (Fsplit_char);
2669 #ifdef ENABLE_COMPOSITE_CHARS
2670 DEFSUBR (Fmake_composite_char);
2671 DEFSUBR (Fcomposite_char_string);
2674 defsymbol (&Qcharsetp, "charsetp");
2675 defsymbol (&Qregistry, "registry");
2676 defsymbol (&Qfinal, "final");
2677 defsymbol (&Qgraphic, "graphic");
2678 defsymbol (&Qdirection, "direction");
2679 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2680 defsymbol (&Qshort_name, "short-name");
2681 defsymbol (&Qlong_name, "long-name");
2682 defsymbol (&Qiso_ir, "iso-ir");
2684 defsymbol (&Qmother, "mother");
2685 defsymbol (&Qmin_code, "min-code");
2686 defsymbol (&Qmax_code, "max-code");
2687 defsymbol (&Qcode_offset, "code-offset");
2688 defsymbol (&Qconversion, "conversion");
2689 defsymbol (&Q94x60, "94x60");
2690 defsymbol (&Q94x94x60, "94x94x60");
2691 defsymbol (&Qbig5_1, "big5-1");
2692 defsymbol (&Qbig5_2, "big5-2");
2695 defsymbol (&Ql2r, "l2r");
2696 defsymbol (&Qr2l, "r2l");
2698 /* Charsets, compatible with FSF 20.3
2699 Naming convention is Script-Charset[-Edition] */
2700 defsymbol (&Qascii, "ascii");
2701 defsymbol (&Qcontrol_1, "control-1");
2702 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2703 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2704 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2705 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2706 defsymbol (&Qthai_tis620, "thai-tis620");
2707 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2708 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2709 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2710 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2711 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2712 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2713 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2714 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2715 defsymbol (&Qmap_gb2312, "=gb2312");
2716 defsymbol (&Qmap_gb12345, "=gb12345");
2717 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2718 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2719 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2720 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2721 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2723 defsymbol (&Qmap_ucs, "=ucs");
2724 defsymbol (&Qucs, "ucs");
2725 defsymbol (&Qucs_bmp, "ucs-bmp");
2726 defsymbol (&Qucs_smp, "ucs-smp");
2727 defsymbol (&Qucs_sip, "ucs-sip");
2728 defsymbol (&Qlatin_viscii, "latin-viscii");
2729 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2730 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2731 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2732 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2733 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2734 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2735 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2736 defsymbol (&Qmap_big5, "=big5");
2737 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2739 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2740 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2742 defsymbol (&Qcomposite, "composite");
2746 vars_of_mule_charset (void)
2753 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2754 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2756 /* Table of charsets indexed by leading byte. */
2757 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2758 chlook->charset_by_leading_byte[i] = Qnil;
2761 /* Table of charsets indexed by type/final-byte. */
2762 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2763 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2764 chlook->charset_by_attributes[i][j] = Qnil;
2766 /* Table of charsets indexed by type/final-byte/direction. */
2767 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2768 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2769 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2770 chlook->charset_by_attributes[i][j][k] = Qnil;
2774 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2776 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2777 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2781 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2782 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2783 Leading-code of private TYPE9N charset of column-width 1.
2785 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2789 Vdefault_coded_charset_priority_list = Qnil;
2790 DEFVAR_LISP ("default-coded-charset-priority-list",
2791 &Vdefault_coded_charset_priority_list /*
2792 Default order of preferred coded-character-sets.
2798 complex_vars_of_mule_charset (void)
2800 staticpro (&Vcharset_hash_table);
2801 Vcharset_hash_table =
2802 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2804 /* Predefined character sets. We store them into variables for
2808 staticpro (&Vcharset_ucs);
2810 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2811 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2812 build_string ("UCS"),
2813 build_string ("UCS"),
2814 build_string ("ISO/IEC 10646"),
2816 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2817 staticpro (&Vcharset_ucs_bmp);
2819 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2820 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2821 build_string ("BMP"),
2822 build_string ("UCS-BMP"),
2823 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2825 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2826 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2827 staticpro (&Vcharset_ucs_smp);
2829 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2830 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2831 build_string ("SMP"),
2832 build_string ("UCS-SMP"),
2833 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2834 build_string ("UCS00-1"),
2835 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2836 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2837 staticpro (&Vcharset_ucs_sip);
2839 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2840 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2841 build_string ("SIP"),
2842 build_string ("UCS-SIP"),
2843 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2844 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2845 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2846 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2848 # define MIN_CHAR_THAI 0
2849 # define MAX_CHAR_THAI 0
2850 /* # define MIN_CHAR_HEBREW 0 */
2851 /* # define MAX_CHAR_HEBREW 0 */
2852 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2853 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2855 staticpro (&Vcharset_ascii);
2857 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2858 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2859 build_string ("ASCII"),
2860 build_string ("ASCII)"),
2861 build_string ("ASCII (ISO646 IRV)"),
2862 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2863 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2864 staticpro (&Vcharset_control_1);
2865 Vcharset_control_1 =
2866 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2867 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2868 build_string ("C1"),
2869 build_string ("Control characters"),
2870 build_string ("Control characters 128-191"),
2872 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2873 staticpro (&Vcharset_latin_iso8859_1);
2874 Vcharset_latin_iso8859_1 =
2875 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2876 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2877 build_string ("Latin-1"),
2878 build_string ("ISO8859-1 (Latin-1)"),
2879 build_string ("ISO8859-1 (Latin-1)"),
2880 build_string ("iso8859-1"),
2881 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2882 staticpro (&Vcharset_latin_iso8859_2);
2883 Vcharset_latin_iso8859_2 =
2884 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2885 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2886 build_string ("Latin-2"),
2887 build_string ("ISO8859-2 (Latin-2)"),
2888 build_string ("ISO8859-2 (Latin-2)"),
2889 build_string ("iso8859-2"),
2890 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2891 staticpro (&Vcharset_latin_iso8859_3);
2892 Vcharset_latin_iso8859_3 =
2893 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2894 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2895 build_string ("Latin-3"),
2896 build_string ("ISO8859-3 (Latin-3)"),
2897 build_string ("ISO8859-3 (Latin-3)"),
2898 build_string ("iso8859-3"),
2899 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2900 staticpro (&Vcharset_latin_iso8859_4);
2901 Vcharset_latin_iso8859_4 =
2902 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2903 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2904 build_string ("Latin-4"),
2905 build_string ("ISO8859-4 (Latin-4)"),
2906 build_string ("ISO8859-4 (Latin-4)"),
2907 build_string ("iso8859-4"),
2908 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2909 staticpro (&Vcharset_thai_tis620);
2910 Vcharset_thai_tis620 =
2911 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2912 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2913 build_string ("TIS620"),
2914 build_string ("TIS620 (Thai)"),
2915 build_string ("TIS620.2529 (Thai)"),
2916 build_string ("tis620"),
2917 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2918 staticpro (&Vcharset_greek_iso8859_7);
2919 Vcharset_greek_iso8859_7 =
2920 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2921 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2922 build_string ("ISO8859-7"),
2923 build_string ("ISO8859-7 (Greek)"),
2924 build_string ("ISO8859-7 (Greek)"),
2925 build_string ("iso8859-7"),
2926 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2927 staticpro (&Vcharset_arabic_iso8859_6);
2928 Vcharset_arabic_iso8859_6 =
2929 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2930 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2931 build_string ("ISO8859-6"),
2932 build_string ("ISO8859-6 (Arabic)"),
2933 build_string ("ISO8859-6 (Arabic)"),
2934 build_string ("iso8859-6"),
2935 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2936 staticpro (&Vcharset_hebrew_iso8859_8);
2937 Vcharset_hebrew_iso8859_8 =
2938 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2939 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2940 build_string ("ISO8859-8"),
2941 build_string ("ISO8859-8 (Hebrew)"),
2942 build_string ("ISO8859-8 (Hebrew)"),
2943 build_string ("iso8859-8"),
2945 0 /* MIN_CHAR_HEBREW */,
2946 0 /* MAX_CHAR_HEBREW */, 0, 32,
2947 Qnil, CONVERSION_IDENTICAL);
2948 staticpro (&Vcharset_katakana_jisx0201);
2949 Vcharset_katakana_jisx0201 =
2950 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2951 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2952 build_string ("JISX0201 Kana"),
2953 build_string ("JISX0201.1976 (Japanese Kana)"),
2954 build_string ("JISX0201.1976 Japanese Kana"),
2955 build_string ("jisx0201\\.1976"),
2956 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2957 staticpro (&Vcharset_latin_jisx0201);
2958 Vcharset_latin_jisx0201 =
2959 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2960 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2961 build_string ("JISX0201 Roman"),
2962 build_string ("JISX0201.1976 (Japanese Roman)"),
2963 build_string ("JISX0201.1976 Japanese Roman"),
2964 build_string ("jisx0201\\.1976"),
2965 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2966 staticpro (&Vcharset_cyrillic_iso8859_5);
2967 Vcharset_cyrillic_iso8859_5 =
2968 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2969 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2970 build_string ("ISO8859-5"),
2971 build_string ("ISO8859-5 (Cyrillic)"),
2972 build_string ("ISO8859-5 (Cyrillic)"),
2973 build_string ("iso8859-5"),
2974 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2975 staticpro (&Vcharset_latin_iso8859_9);
2976 Vcharset_latin_iso8859_9 =
2977 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2978 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2979 build_string ("Latin-5"),
2980 build_string ("ISO8859-9 (Latin-5)"),
2981 build_string ("ISO8859-9 (Latin-5)"),
2982 build_string ("iso8859-9"),
2983 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2985 staticpro (&Vcharset_jis_x0208);
2986 Vcharset_jis_x0208 =
2987 make_charset (LEADING_BYTE_JIS_X0208,
2988 Qmap_jis_x0208, 94, 2,
2989 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2990 build_string ("JIS X0208"),
2991 build_string ("JIS X0208 Common"),
2992 build_string ("JIS X0208 Common part"),
2993 build_string ("jisx0208\\.1990"),
2995 MIN_CHAR_JIS_X0208_1990,
2996 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2997 Qnil, CONVERSION_94x94);
2999 staticpro (&Vcharset_japanese_jisx0208_1978);
3000 Vcharset_japanese_jisx0208_1978 =
3001 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3002 Qmap_jis_x0208_1978, 94, 2,
3003 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3004 build_string ("JIS X0208:1978"),
3005 build_string ("JIS X0208:1978 (Japanese)"),
3007 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3008 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3015 CONVERSION_IDENTICAL);
3016 staticpro (&Vcharset_chinese_gb2312);
3017 Vcharset_chinese_gb2312 =
3018 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3019 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3020 build_string ("GB2312"),
3021 build_string ("GB2312)"),
3022 build_string ("GB2312 Chinese simplified"),
3023 build_string ("gb2312"),
3024 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3025 staticpro (&Vcharset_chinese_gb12345);
3026 Vcharset_chinese_gb12345 =
3027 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3028 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3029 build_string ("G1"),
3030 build_string ("GB 12345)"),
3031 build_string ("GB 12345-1990"),
3032 build_string ("GB12345\\(\\.1990\\)?-0"),
3033 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3034 staticpro (&Vcharset_japanese_jisx0208);
3035 Vcharset_japanese_jisx0208 =
3036 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3037 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3038 build_string ("JISX0208"),
3039 build_string ("JIS X0208:1983 (Japanese)"),
3040 build_string ("JIS X0208:1983 Japanese Kanji"),
3041 build_string ("jisx0208\\.1983"),
3048 CONVERSION_IDENTICAL);
3050 staticpro (&Vcharset_japanese_jisx0208_1990);
3051 Vcharset_japanese_jisx0208_1990 =
3052 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3053 Qmap_jis_x0208_1990, 94, 2,
3054 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3055 build_string ("JISX0208-1990"),
3056 build_string ("JIS X0208:1990 (Japanese)"),
3057 build_string ("JIS X0208:1990 Japanese Kanji"),
3058 build_string ("jisx0208\\.1990"),
3060 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3061 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3062 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3063 Vcharset_jis_x0208 /* Qnil */,
3064 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3066 staticpro (&Vcharset_korean_ksc5601);
3067 Vcharset_korean_ksc5601 =
3068 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3069 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3070 build_string ("KSC5601"),
3071 build_string ("KSC5601 (Korean"),
3072 build_string ("KSC5601 Korean Hangul and Hanja"),
3073 build_string ("ksc5601"),
3074 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3075 staticpro (&Vcharset_japanese_jisx0212);
3076 Vcharset_japanese_jisx0212 =
3077 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3078 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3079 build_string ("JISX0212"),
3080 build_string ("JISX0212 (Japanese)"),
3081 build_string ("JISX0212 Japanese Supplement"),
3082 build_string ("jisx0212"),
3083 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3085 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3086 staticpro (&Vcharset_chinese_cns11643_1);
3087 Vcharset_chinese_cns11643_1 =
3088 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3089 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3090 build_string ("CNS11643-1"),
3091 build_string ("CNS11643-1 (Chinese traditional)"),
3093 ("CNS 11643 Plane 1 Chinese traditional"),
3094 build_string (CHINESE_CNS_PLANE_RE("1")),
3095 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3096 staticpro (&Vcharset_chinese_cns11643_2);
3097 Vcharset_chinese_cns11643_2 =
3098 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3099 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3100 build_string ("CNS11643-2"),
3101 build_string ("CNS11643-2 (Chinese traditional)"),
3103 ("CNS 11643 Plane 2 Chinese traditional"),
3104 build_string (CHINESE_CNS_PLANE_RE("2")),
3105 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3107 staticpro (&Vcharset_latin_tcvn5712);
3108 Vcharset_latin_tcvn5712 =
3109 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3110 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3111 build_string ("TCVN 5712"),
3112 build_string ("TCVN 5712 (VSCII-2)"),
3113 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3114 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3115 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3116 staticpro (&Vcharset_latin_viscii_lower);
3117 Vcharset_latin_viscii_lower =
3118 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3119 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3120 build_string ("VISCII lower"),
3121 build_string ("VISCII lower (Vietnamese)"),
3122 build_string ("VISCII lower (Vietnamese)"),
3123 build_string ("MULEVISCII-LOWER"),
3124 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3125 staticpro (&Vcharset_latin_viscii_upper);
3126 Vcharset_latin_viscii_upper =
3127 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3128 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3129 build_string ("VISCII upper"),
3130 build_string ("VISCII upper (Vietnamese)"),
3131 build_string ("VISCII upper (Vietnamese)"),
3132 build_string ("MULEVISCII-UPPER"),
3133 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3134 staticpro (&Vcharset_latin_viscii);
3135 Vcharset_latin_viscii =
3136 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3137 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3138 build_string ("VISCII"),
3139 build_string ("VISCII 1.1 (Vietnamese)"),
3140 build_string ("VISCII 1.1 (Vietnamese)"),
3141 build_string ("VISCII1\\.1"),
3142 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3143 staticpro (&Vcharset_chinese_big5);
3144 Vcharset_chinese_big5 =
3145 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3146 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3147 build_string ("Big5"),
3148 build_string ("Big5"),
3149 build_string ("Big5 Chinese traditional"),
3150 build_string ("big5-0"),
3152 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3153 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3155 staticpro (&Vcharset_ethiopic_ucs);
3156 Vcharset_ethiopic_ucs =
3157 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3158 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3159 build_string ("Ethiopic (UCS)"),
3160 build_string ("Ethiopic (UCS)"),
3161 build_string ("Ethiopic of UCS"),
3162 build_string ("Ethiopic-Unicode"),
3163 Qnil, 0x1200, 0x137F, 0, 0,
3164 Qnil, CONVERSION_IDENTICAL);
3166 staticpro (&Vcharset_chinese_big5_1);
3167 Vcharset_chinese_big5_1 =
3168 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3169 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3170 build_string ("Big5"),
3171 build_string ("Big5 (Level-1)"),
3173 ("Big5 Level-1 Chinese traditional"),
3174 build_string ("big5"),
3175 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3176 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3177 staticpro (&Vcharset_chinese_big5_2);
3178 Vcharset_chinese_big5_2 =
3179 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3180 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3181 build_string ("Big5"),
3182 build_string ("Big5 (Level-2)"),
3184 ("Big5 Level-2 Chinese traditional"),
3185 build_string ("big5"),
3186 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3187 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3189 #ifdef ENABLE_COMPOSITE_CHARS
3190 /* #### For simplicity, we put composite chars into a 96x96 charset.
3191 This is going to lead to problems because you can run out of
3192 room, esp. as we don't yet recycle numbers. */
3193 staticpro (&Vcharset_composite);
3194 Vcharset_composite =
3195 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3196 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3197 build_string ("Composite"),
3198 build_string ("Composite characters"),
3199 build_string ("Composite characters"),
3202 /* #### not dumped properly */
3203 composite_char_row_next = 32;
3204 composite_char_col_next = 32;
3206 Vcomposite_char_string2char_hash_table =
3207 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3208 Vcomposite_char_char2string_hash_table =
3209 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3210 staticpro (&Vcomposite_char_string2char_hash_table);
3211 staticpro (&Vcomposite_char_char2string_hash_table);
3212 #endif /* ENABLE_COMPOSITE_CHARS */