1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_ucs;
71 Lisp_Object Vcharset_ucs_bmp;
72 Lisp_Object Vcharset_ucs_smp;
73 Lisp_Object Vcharset_ucs_sip;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
281 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
282 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
299 Qjapanese_jisx0208_1978,
316 Qvietnamese_viscii_lower,
317 Qvietnamese_viscii_upper,
327 Lisp_Object Ql2r, Qr2l;
329 Lisp_Object Vcharset_hash_table;
331 /* Composite characters are characters constructed by overstriking two
332 or more regular characters.
334 1) The old Mule implementation involves storing composite characters
335 in a buffer as a tag followed by all of the actual characters
336 used to make up the composite character. I think this is a bad
337 idea; it greatly complicates code that wants to handle strings
338 one character at a time because it has to deal with the possibility
339 of great big ungainly characters. It's much more reasonable to
340 simply store an index into a table of composite characters.
342 2) The current implementation only allows for 16,384 separate
343 composite characters over the lifetime of the XEmacs process.
344 This could become a potential problem if the user
345 edited lots of different files that use composite characters.
346 Due to FSF bogosity, increasing the number of allowable
347 composite characters under Mule would decrease the number
348 of possible faces that can exist. Mule already has shrunk
349 this to 2048, and further shrinkage would become uncomfortable.
350 No such problems exist in XEmacs.
352 Composite characters could be represented as 0x80 C1 C2 C3,
353 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
354 for slightly under 2^20 (one million) composite characters
355 over the XEmacs process lifetime, and you only need to
356 increase the size of a Mule character from 19 to 21 bits.
357 Or you could use 0x80 C1 C2 C3 C4, allowing for about
358 85 million (slightly over 2^26) composite characters. */
361 /************************************************************************/
362 /* Basic Emchar functions */
363 /************************************************************************/
365 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
366 string in STR. Returns the number of bytes stored.
367 Do not call this directly. Use the macro set_charptr_emchar() instead.
371 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
386 else if ( c <= 0x7ff )
388 *p++ = (c >> 6) | 0xc0;
389 *p++ = (c & 0x3f) | 0x80;
391 else if ( c <= 0xffff )
393 *p++ = (c >> 12) | 0xe0;
394 *p++ = ((c >> 6) & 0x3f) | 0x80;
395 *p++ = (c & 0x3f) | 0x80;
397 else if ( c <= 0x1fffff )
399 *p++ = (c >> 18) | 0xf0;
400 *p++ = ((c >> 12) & 0x3f) | 0x80;
401 *p++ = ((c >> 6) & 0x3f) | 0x80;
402 *p++ = (c & 0x3f) | 0x80;
404 else if ( c <= 0x3ffffff )
406 *p++ = (c >> 24) | 0xf8;
407 *p++ = ((c >> 18) & 0x3f) | 0x80;
408 *p++ = ((c >> 12) & 0x3f) | 0x80;
409 *p++ = ((c >> 6) & 0x3f) | 0x80;
410 *p++ = (c & 0x3f) | 0x80;
414 *p++ = (c >> 30) | 0xfc;
415 *p++ = ((c >> 24) & 0x3f) | 0x80;
416 *p++ = ((c >> 18) & 0x3f) | 0x80;
417 *p++ = ((c >> 12) & 0x3f) | 0x80;
418 *p++ = ((c >> 6) & 0x3f) | 0x80;
419 *p++ = (c & 0x3f) | 0x80;
422 BREAKUP_CHAR (c, charset, c1, c2);
423 lb = CHAR_LEADING_BYTE (c);
424 if (LEADING_BYTE_PRIVATE_P (lb))
425 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
427 if (EQ (charset, Vcharset_control_1))
436 /* Return the first character from a Mule-encoded string in STR,
437 assuming it's non-ASCII. Do not call this directly.
438 Use the macro charptr_emchar() instead. */
441 non_ascii_charptr_emchar (const Bufbyte *str)
454 else if ( b >= 0xf8 )
459 else if ( b >= 0xf0 )
464 else if ( b >= 0xe0 )
469 else if ( b >= 0xc0 )
479 for( ; len > 0; len-- )
482 ch = ( ch << 6 ) | ( b & 0x3f );
486 Bufbyte i0 = *str, i1, i2 = 0;
489 if (i0 == LEADING_BYTE_CONTROL_1)
490 return (Emchar) (*++str - 0x20);
492 if (LEADING_BYTE_PREFIX_P (i0))
497 charset = CHARSET_BY_LEADING_BYTE (i0);
498 if (XCHARSET_DIMENSION (charset) == 2)
501 return MAKE_CHAR (charset, i1, i2);
505 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
506 Do not call this directly. Use the macro valid_char_p() instead. */
510 non_ascii_valid_char_p (Emchar ch)
514 /* Must have only lowest 19 bits set */
518 f1 = CHAR_FIELD1 (ch);
519 f2 = CHAR_FIELD2 (ch);
520 f3 = CHAR_FIELD3 (ch);
526 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
527 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
528 f2 > MAX_CHAR_FIELD2_PRIVATE)
533 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
534 f2 <= MAX_CHAR_FIELD2_PRIVATE))
538 NOTE: This takes advantage of the fact that
539 FIELD2_TO_OFFICIAL_LEADING_BYTE and
540 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
542 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
543 if (EQ (charset, Qnil))
545 return (XCHARSET_CHARS (charset) == 96);
551 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
552 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
553 f1 > MAX_CHAR_FIELD1_PRIVATE)
555 if (f2 < 0x20 || f3 < 0x20)
558 #ifdef ENABLE_COMPOSITE_CHARS
559 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
561 if (UNBOUNDP (Fgethash (make_int (ch),
562 Vcomposite_char_char2string_hash_table,
567 #endif /* ENABLE_COMPOSITE_CHARS */
569 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
570 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
573 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
575 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
578 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
580 if (EQ (charset, Qnil))
582 return (XCHARSET_CHARS (charset) == 96);
588 /************************************************************************/
589 /* Basic string functions */
590 /************************************************************************/
592 /* Copy the character pointed to by SRC into DST. Do not call this
593 directly. Use the macro charptr_copy_char() instead.
594 Return the number of bytes copied. */
597 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
599 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
601 for (i = bytes; i; i--, dst++, src++)
607 /************************************************************************/
608 /* streams of Emchars */
609 /************************************************************************/
611 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
612 The functions below are not meant to be called directly; use
613 the macros in insdel.h. */
616 Lstream_get_emchar_1 (Lstream *stream, int ch)
618 Bufbyte str[MAX_EMCHAR_LEN];
619 Bufbyte *strptr = str;
622 str[0] = (Bufbyte) ch;
624 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
626 int c = Lstream_getc (stream);
627 bufpos_checking_assert (c >= 0);
628 *++strptr = (Bufbyte) c;
630 return charptr_emchar (str);
634 Lstream_fput_emchar (Lstream *stream, Emchar ch)
636 Bufbyte str[MAX_EMCHAR_LEN];
637 Bytecount len = set_charptr_emchar (str, ch);
638 return Lstream_write (stream, str, len);
642 Lstream_funget_emchar (Lstream *stream, Emchar ch)
644 Bufbyte str[MAX_EMCHAR_LEN];
645 Bytecount len = set_charptr_emchar (str, ch);
646 Lstream_unread (stream, str, len);
650 /************************************************************************/
652 /************************************************************************/
655 mark_charset (Lisp_Object obj)
657 Lisp_Charset *cs = XCHARSET (obj);
659 mark_object (cs->short_name);
660 mark_object (cs->long_name);
661 mark_object (cs->doc_string);
662 mark_object (cs->registry);
663 mark_object (cs->ccl_program);
665 mark_object (cs->decoding_table);
666 mark_object (cs->mother);
672 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
674 Lisp_Charset *cs = XCHARSET (obj);
678 error ("printing unreadable object #<charset %s 0x%x>",
679 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
682 write_c_string ("#<charset ", printcharfun);
683 print_internal (CHARSET_NAME (cs), printcharfun, 0);
684 write_c_string (" ", printcharfun);
685 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
686 write_c_string (" ", printcharfun);
687 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
688 write_c_string (" ", printcharfun);
689 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
690 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
692 CHARSET_DIMENSION (cs),
693 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
694 CHARSET_COLUMNS (cs),
695 CHARSET_GRAPHIC (cs),
697 write_c_string (buf, printcharfun);
698 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
699 sprintf (buf, " 0x%x>", cs->header.uid);
700 write_c_string (buf, printcharfun);
703 static const struct lrecord_description charset_description[] = {
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
718 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
719 mark_charset, print_charset, 0, 0, 0,
723 /* Make a new charset. */
724 /* #### SJT Should generic properties be allowed? */
726 make_charset (Charset_ID id, Lisp_Object name,
727 unsigned short chars, unsigned char dimension,
728 unsigned char columns, unsigned char graphic,
729 Bufbyte final, unsigned char direction, Lisp_Object short_name,
730 Lisp_Object long_name, Lisp_Object doc,
732 Lisp_Object decoding_table,
733 Emchar min_code, Emchar max_code,
734 Emchar code_offset, unsigned char byte_offset,
735 Lisp_Object mother, unsigned char conversion)
738 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
742 XSETCHARSET (obj, cs);
744 CHARSET_ID (cs) = id;
745 CHARSET_NAME (cs) = name;
746 CHARSET_SHORT_NAME (cs) = short_name;
747 CHARSET_LONG_NAME (cs) = long_name;
748 CHARSET_CHARS (cs) = chars;
749 CHARSET_DIMENSION (cs) = dimension;
750 CHARSET_DIRECTION (cs) = direction;
751 CHARSET_COLUMNS (cs) = columns;
752 CHARSET_GRAPHIC (cs) = graphic;
753 CHARSET_FINAL (cs) = final;
754 CHARSET_DOC_STRING (cs) = doc;
755 CHARSET_REGISTRY (cs) = reg;
756 CHARSET_CCL_PROGRAM (cs) = Qnil;
757 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
759 CHARSET_DECODING_TABLE(cs) = Qunbound;
760 CHARSET_MIN_CODE (cs) = min_code;
761 CHARSET_MAX_CODE (cs) = max_code;
762 CHARSET_CODE_OFFSET (cs) = code_offset;
763 CHARSET_BYTE_OFFSET (cs) = byte_offset;
764 CHARSET_MOTHER (cs) = mother;
765 CHARSET_CONVERSION (cs) = conversion;
769 if (id == LEADING_BYTE_ASCII)
770 CHARSET_REP_BYTES (cs) = 1;
772 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
774 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
779 /* some charsets do not have final characters. This includes
780 ASCII, Control-1, Composite, and the two faux private
782 unsigned char iso2022_type
783 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
785 if (code_offset == 0)
787 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
788 chlook->charset_by_attributes[iso2022_type][final] = obj;
792 (chlook->charset_by_attributes[iso2022_type][final][direction]));
793 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
797 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
798 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
800 /* Some charsets are "faux" and don't have names or really exist at
801 all except in the leading-byte table. */
803 Fputhash (name, obj, Vcharset_hash_table);
808 get_unallocated_leading_byte (int dimension)
813 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
816 lb = chlook->next_allocated_leading_byte++;
820 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
823 lb = chlook->next_allocated_1_byte_leading_byte++;
827 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
830 lb = chlook->next_allocated_2_byte_leading_byte++;
836 ("No more character sets free for this dimension",
837 make_int (dimension));
843 /* Number of Big5 characters which have the same code in 1st byte. */
845 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
848 decode_defined_char (Lisp_Object ccs, int code_point)
850 int dim = XCHARSET_DIMENSION (ccs);
851 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
859 = get_ccs_octet_table (decoding_table, ccs,
860 (code_point >> (dim * 8)) & 255);
862 if (CHARP (decoding_table))
863 return XCHAR (decoding_table);
864 #ifdef HAVE_CHISE_CLIENT
865 if (EQ (decoding_table, Qunloaded))
867 char_id = load_char_decoding_entry_maybe (ccs, code_point);
872 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
874 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
876 if ( EQ (mother, Vcharset_ucs) )
877 return DECODE_CHAR (mother, code_point);
879 return decode_defined_char (mother, code_point);
881 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
884 = (((code_point >> 8) & 0x7F) - 33) * 94
885 + (( code_point & 0x7F) - 33);
886 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
887 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
889 b2 += b2 < 0x3F ? 0x40 : 0x62;
890 return decode_defined_char (mother, (b1 << 8) | b2);
892 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
895 = (((code_point >> 8) & 0x7F) - 33) * 94
896 + (( code_point & 0x7F) - 33)
897 + BIG5_SAME_ROW * (0xC9 - 0xA1);
898 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
899 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
901 b2 += b2 < 0x3F ? 0x40 : 0x62;
902 return decode_defined_char (mother, (b1 << 8) | b2);
909 decode_builtin_char (Lisp_Object charset, int code_point)
911 Lisp_Object mother = XCHARSET_MOTHER (charset);
914 if ( XCHARSET_MAX_CODE (charset) > 0 )
916 if ( CHARSETP (mother) )
918 int code = code_point;
920 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
922 int row = code_point >> 8;
923 int cell = code_point & 255;
927 else if (row < 16 + 32 + 30)
928 code = (row - (16 + 32)) * 94 + cell - 33;
929 else if (row < 18 + 32 + 30)
931 else if (row < 18 + 32 + 60)
932 code = (row - (18 + 32)) * 94 + cell - 33;
934 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
936 int plane = code_point >> 16;
937 int row = (code_point >> 8) & 255;
938 int cell = code_point & 255;
942 else if (row < 16 + 32 + 30)
944 = (plane - 33) * 94 * 60
945 + (row - (16 + 32)) * 94
947 else if (row < 18 + 32 + 30)
949 else if (row < 18 + 32 + 60)
951 = (plane - 33) * 94 * 60
952 + (row - (18 + 32)) * 94
955 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
958 = (((code_point >> 8) & 0x7F) - 33) * 94
959 + (( code_point & 0x7F) - 33);
960 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
961 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
963 b2 += b2 < 0x3F ? 0x40 : 0x62;
964 code = (b1 << 8) | b2;
966 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
969 = (((code_point >> 8) & 0x7F) - 33) * 94
970 + (( code_point & 0x7F) - 33)
971 + BIG5_SAME_ROW * (0xC9 - 0xA1);
972 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
973 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
975 b2 += b2 < 0x3F ? 0x40 : 0x62;
976 code = (b1 << 8) | b2;
979 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
984 = (XCHARSET_DIMENSION (charset) == 1
986 code_point - XCHARSET_BYTE_OFFSET (charset)
988 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
989 * XCHARSET_CHARS (charset)
990 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
991 + XCHARSET_CODE_OFFSET (charset);
992 if ((cid < XCHARSET_MIN_CODE (charset))
993 || (XCHARSET_MAX_CODE (charset) < cid))
998 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1000 if (XCHARSET_DIMENSION (charset) == 1)
1002 switch (XCHARSET_CHARS (charset))
1006 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1009 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1017 switch (XCHARSET_CHARS (charset))
1020 return MIN_CHAR_94x94
1021 + (final - '0') * 94 * 94
1022 + (((code_point >> 8) & 0x7F) - 33) * 94
1023 + ((code_point & 0x7F) - 33);
1025 return MIN_CHAR_96x96
1026 + (final - '0') * 96 * 96
1027 + (((code_point >> 8) & 0x7F) - 32) * 96
1028 + ((code_point & 0x7F) - 32);
1040 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1042 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1045 if ( CHAR_TABLEP (encoding_table)
1046 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1051 Lisp_Object mother = XCHARSET_MOTHER (charset);
1052 int min = XCHARSET_MIN_CODE (charset);
1053 int max = XCHARSET_MAX_CODE (charset);
1056 if ( CHARSETP (mother) )
1058 if (XCHARSET_FINAL (charset) >= '0')
1059 code = charset_code_point (mother, ch, 1);
1061 code = charset_code_point (mother, ch, defined_only);
1063 else if (defined_only)
1065 else if ( ((max == 0) && CHARSETP (mother)
1066 && (XCHARSET_FINAL (charset) == 0))
1067 || ((min <= ch) && (ch <= max)) )
1069 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1070 || ((min <= code) && (code <= max)) )
1072 int d = code - XCHARSET_CODE_OFFSET (charset);
1074 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1076 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1078 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1080 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1083 int cell = d % 94 + 33;
1089 return (row << 8) | cell;
1091 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1093 int B1 = d >> 8, B2 = d & 0xFF;
1095 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1096 - (B2 < 0x7F ? 0x40 : 0x62);
1100 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1103 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1105 int B1 = d >> 8, B2 = d & 0xFF;
1107 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1108 - (B2 < 0x7F ? 0x40 : 0x62);
1112 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1113 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1116 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1117 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1118 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1119 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1120 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1122 int plane = d / (94 * 60) + 33;
1123 int row = (d % (94 * 60)) / 94;
1124 int cell = d % 94 + 33;
1130 return (plane << 16) | (row << 8) | cell;
1132 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1134 ( (d / (94 * 94) + 33) << 16)
1135 | ((d / 94 % 94 + 33) << 8)
1137 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1139 ( (d / (96 * 96) + 32) << 16)
1140 | ((d / 96 % 96 + 32) << 8)
1142 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1144 ( (d / (94 * 94 * 94) + 33) << 24)
1145 | ((d / (94 * 94) % 94 + 33) << 16)
1146 | ((d / 94 % 94 + 33) << 8)
1148 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1150 ( (d / (96 * 96 * 96) + 32) << 24)
1151 | ((d / (96 * 96) % 96 + 32) << 16)
1152 | ((d / 96 % 96 + 32) << 8)
1156 printf ("Unknown CCS-conversion %d is specified!",
1157 XCHARSET_CONVERSION (charset));
1161 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1162 ( XCHARSET_MIN_CODE (charset) == 0 )
1164 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1165 (XCHARSET_CODE_OFFSET (charset)
1166 == XCHARSET_MIN_CODE (charset))
1171 if (XCHARSET_DIMENSION (charset) == 1)
1173 if (XCHARSET_CHARS (charset) == 94)
1175 if (((d = ch - (MIN_CHAR_94
1176 + (XCHARSET_FINAL (charset) - '0') * 94))
1181 else if (XCHARSET_CHARS (charset) == 96)
1183 if (((d = ch - (MIN_CHAR_96
1184 + (XCHARSET_FINAL (charset) - '0') * 96))
1192 else if (XCHARSET_DIMENSION (charset) == 2)
1194 if (XCHARSET_CHARS (charset) == 94)
1196 if (((d = ch - (MIN_CHAR_94x94
1198 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1201 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1203 else if (XCHARSET_CHARS (charset) == 96)
1205 if (((d = ch - (MIN_CHAR_96x96
1207 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1210 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1221 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1223 if (c <= MAX_CHAR_BASIC_LATIN)
1225 *charset = Vcharset_ascii;
1230 *charset = Vcharset_control_1;
1235 *charset = Vcharset_latin_iso8859_1;
1239 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1241 *charset = Vcharset_hebrew_iso8859_8;
1242 return c - MIN_CHAR_HEBREW + 0x20;
1245 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1247 *charset = Vcharset_thai_tis620;
1248 return c - MIN_CHAR_THAI + 0x20;
1251 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1252 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1254 return list2 (Vcharset_katakana_jisx0201,
1255 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1258 else if (c <= MAX_CHAR_BMP)
1260 *charset = Vcharset_ucs_bmp;
1263 else if (c <= MAX_CHAR_SMP)
1265 *charset = Vcharset_ucs_smp;
1266 return c - MIN_CHAR_SMP;
1268 else if (c <= MAX_CHAR_SIP)
1270 *charset = Vcharset_ucs_sip;
1271 return c - MIN_CHAR_SIP;
1273 else if (c < MIN_CHAR_94)
1275 *charset = Vcharset_ucs;
1278 else if (c <= MAX_CHAR_94)
1280 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1281 ((c - MIN_CHAR_94) / 94) + '0',
1282 CHARSET_LEFT_TO_RIGHT);
1283 if (!NILP (*charset))
1284 return ((c - MIN_CHAR_94) % 94) + 33;
1287 *charset = Vcharset_ucs;
1291 else if (c <= MAX_CHAR_96)
1293 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1294 ((c - MIN_CHAR_96) / 96) + '0',
1295 CHARSET_LEFT_TO_RIGHT);
1296 if (!NILP (*charset))
1297 return ((c - MIN_CHAR_96) % 96) + 32;
1300 *charset = Vcharset_ucs;
1304 else if (c <= MAX_CHAR_94x94)
1307 = CHARSET_BY_ATTRIBUTES (94, 2,
1308 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1309 CHARSET_LEFT_TO_RIGHT);
1310 if (!NILP (*charset))
1311 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1312 | (((c - MIN_CHAR_94x94) % 94) + 33);
1315 *charset = Vcharset_ucs;
1319 else if (c <= MAX_CHAR_96x96)
1322 = CHARSET_BY_ATTRIBUTES (96, 2,
1323 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1324 CHARSET_LEFT_TO_RIGHT);
1325 if (!NILP (*charset))
1326 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1327 | (((c - MIN_CHAR_96x96) % 96) + 32);
1330 *charset = Vcharset_ucs;
1336 *charset = Vcharset_ucs;
1341 Lisp_Object Vdefault_coded_charset_priority_list;
1345 /************************************************************************/
1346 /* Basic charset Lisp functions */
1347 /************************************************************************/
1349 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1350 Return non-nil if OBJECT is a charset.
1354 return CHARSETP (object) ? Qt : Qnil;
1357 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1358 Retrieve the charset of the given name.
1359 If CHARSET-OR-NAME is a charset object, it is simply returned.
1360 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1361 nil is returned. Otherwise the associated charset object is returned.
1365 if (CHARSETP (charset_or_name))
1366 return charset_or_name;
1368 CHECK_SYMBOL (charset_or_name);
1369 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1372 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1373 Retrieve the charset of the given name.
1374 Same as `find-charset' except an error is signalled if there is no such
1375 charset instead of returning nil.
1379 Lisp_Object charset = Ffind_charset (name);
1382 signal_simple_error ("No such charset", name);
1386 /* We store the charsets in hash tables with the names as the key and the
1387 actual charset object as the value. Occasionally we need to use them
1388 in a list format. These routines provide us with that. */
1389 struct charset_list_closure
1391 Lisp_Object *charset_list;
1395 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1396 void *charset_list_closure)
1398 /* This function can GC */
1399 struct charset_list_closure *chcl =
1400 (struct charset_list_closure*) charset_list_closure;
1401 Lisp_Object *charset_list = chcl->charset_list;
1403 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1407 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1408 Return a list of the names of all defined charsets.
1412 Lisp_Object charset_list = Qnil;
1413 struct gcpro gcpro1;
1414 struct charset_list_closure charset_list_closure;
1416 GCPRO1 (charset_list);
1417 charset_list_closure.charset_list = &charset_list;
1418 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1419 &charset_list_closure);
1422 return charset_list;
1425 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1426 Return the name of charset CHARSET.
1430 return XCHARSET_NAME (Fget_charset (charset));
1433 /* #### SJT Should generic properties be allowed? */
1434 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1435 Define a new character set.
1436 This function is for use with Mule support.
1437 NAME is a symbol, the name by which the character set is normally referred.
1438 DOC-STRING is a string describing the character set.
1439 PROPS is a property list, describing the specific nature of the
1440 character set. Recognized properties are:
1442 'short-name Short version of the charset name (ex: Latin-1)
1443 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1444 'registry A regular expression matching the font registry field for
1446 'dimension Number of octets used to index a character in this charset.
1447 Either 1 or 2. Defaults to 1.
1448 If UTF-2000 feature is enabled, 3 or 4 are also available.
1449 'columns Number of columns used to display a character in this charset.
1450 Only used in TTY mode. (Under X, the actual width of a
1451 character can be derived from the font used to display the
1452 characters.) If unspecified, defaults to the dimension
1453 (this is almost always the correct value).
1454 'chars Number of characters in each dimension (94 or 96).
1455 Defaults to 94. Note that if the dimension is 2, the
1456 character set thus described is 94x94 or 96x96.
1457 If UTF-2000 feature is enabled, 128 or 256 are also available.
1458 'final Final byte of ISO 2022 escape sequence. Must be
1459 supplied. Each combination of (DIMENSION, CHARS) defines a
1460 separate namespace for final bytes. Note that ISO
1461 2022 restricts the final byte to the range
1462 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1463 dimension == 2. Note also that final bytes in the range
1464 0x30 - 0x3F are reserved for user-defined (not official)
1466 'graphic 0 (use left half of font on output) or 1 (use right half
1467 of font on output). Defaults to 0. For example, for
1468 a font whose registry is ISO8859-1, the left half
1469 (octets 0x20 - 0x7F) is the `ascii' character set, while
1470 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1471 character set. With 'graphic set to 0, the octets
1472 will have their high bit cleared; with it set to 1,
1473 the octets will have their high bit set.
1474 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1476 'ccl-program A compiled CCL program used to convert a character in
1477 this charset into an index into the font. This is in
1478 addition to the 'graphic property. The CCL program
1479 is passed the octets of the character, with the high
1480 bit cleared and set depending upon whether the value
1481 of the 'graphic property is 0 or 1.
1482 'mother [UTF-2000 only] Base coded-charset.
1483 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1484 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1485 'code-offset [UTF-2000 only] Offset for a code-point of a base
1487 'conversion [UTF-2000 only] Conversion for a code-point of a base
1488 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1490 (name, doc_string, props))
1492 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1493 int direction = CHARSET_LEFT_TO_RIGHT;
1494 Lisp_Object registry = Qnil;
1495 Lisp_Object charset;
1496 Lisp_Object ccl_program = Qnil;
1497 Lisp_Object short_name = Qnil, long_name = Qnil;
1498 Lisp_Object mother = Qnil;
1499 int min_code = 0, max_code = 0, code_offset = 0;
1500 int byte_offset = -1;
1503 CHECK_SYMBOL (name);
1504 if (!NILP (doc_string))
1505 CHECK_STRING (doc_string);
1507 charset = Ffind_charset (name);
1508 if (!NILP (charset))
1509 signal_simple_error ("Cannot redefine existing charset", name);
1512 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1514 if (EQ (keyword, Qshort_name))
1516 CHECK_STRING (value);
1520 else if (EQ (keyword, Qlong_name))
1522 CHECK_STRING (value);
1526 else if (EQ (keyword, Qdimension))
1529 dimension = XINT (value);
1530 if (dimension < 1 ||
1537 signal_simple_error ("Invalid value for 'dimension", value);
1540 else if (EQ (keyword, Qchars))
1543 chars = XINT (value);
1544 if (chars != 94 && chars != 96
1546 && chars != 128 && chars != 256
1549 signal_simple_error ("Invalid value for 'chars", value);
1552 else if (EQ (keyword, Qcolumns))
1555 columns = XINT (value);
1556 if (columns != 1 && columns != 2)
1557 signal_simple_error ("Invalid value for 'columns", value);
1560 else if (EQ (keyword, Qgraphic))
1563 graphic = XINT (value);
1571 signal_simple_error ("Invalid value for 'graphic", value);
1574 else if (EQ (keyword, Qregistry))
1576 CHECK_STRING (value);
1580 else if (EQ (keyword, Qdirection))
1582 if (EQ (value, Ql2r))
1583 direction = CHARSET_LEFT_TO_RIGHT;
1584 else if (EQ (value, Qr2l))
1585 direction = CHARSET_RIGHT_TO_LEFT;
1587 signal_simple_error ("Invalid value for 'direction", value);
1590 else if (EQ (keyword, Qfinal))
1592 CHECK_CHAR_COERCE_INT (value);
1593 final = XCHAR (value);
1594 if (final < '0' || final > '~')
1595 signal_simple_error ("Invalid value for 'final", value);
1599 else if (EQ (keyword, Qmother))
1601 mother = Fget_charset (value);
1604 else if (EQ (keyword, Qmin_code))
1607 min_code = XUINT (value);
1610 else if (EQ (keyword, Qmax_code))
1613 max_code = XUINT (value);
1616 else if (EQ (keyword, Qcode_offset))
1619 code_offset = XUINT (value);
1622 else if (EQ (keyword, Qconversion))
1624 if (EQ (value, Q94x60))
1625 conversion = CONVERSION_94x60;
1626 else if (EQ (value, Q94x94x60))
1627 conversion = CONVERSION_94x94x60;
1628 else if (EQ (value, Qbig5_1))
1629 conversion = CONVERSION_BIG5_1;
1630 else if (EQ (value, Qbig5_2))
1631 conversion = CONVERSION_BIG5_2;
1633 signal_simple_error ("Unrecognized conversion", value);
1637 else if (EQ (keyword, Qccl_program))
1639 struct ccl_program test_ccl;
1641 if (setup_ccl_program (&test_ccl, value) < 0)
1642 signal_simple_error ("Invalid value for 'ccl-program", value);
1643 ccl_program = value;
1647 signal_simple_error ("Unrecognized property", keyword);
1653 error ("'final must be specified");
1655 if (dimension == 2 && final > 0x5F)
1657 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1660 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1661 CHARSET_LEFT_TO_RIGHT)) ||
1662 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1663 CHARSET_RIGHT_TO_LEFT)))
1665 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1667 id = get_unallocated_leading_byte (dimension);
1669 if (NILP (doc_string))
1670 doc_string = build_string ("");
1672 if (NILP (registry))
1673 registry = build_string ("");
1675 if (NILP (short_name))
1676 XSETSTRING (short_name, XSYMBOL (name)->name);
1678 if (NILP (long_name))
1679 long_name = doc_string;
1682 columns = dimension;
1684 if (byte_offset < 0)
1688 else if (chars == 96)
1694 charset = make_charset (id, name, chars, dimension, columns, graphic,
1695 final, direction, short_name, long_name,
1696 doc_string, registry,
1697 Qnil, min_code, max_code, code_offset, byte_offset,
1698 mother, conversion);
1699 if (!NILP (ccl_program))
1700 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1704 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1706 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1707 NEW-NAME is the name of the new charset. Return the new charset.
1709 (charset, new_name))
1711 Lisp_Object new_charset = Qnil;
1712 int id, chars, dimension, columns, graphic, final;
1714 Lisp_Object registry, doc_string, short_name, long_name;
1717 charset = Fget_charset (charset);
1718 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1719 signal_simple_error ("Charset already has reverse-direction charset",
1722 CHECK_SYMBOL (new_name);
1723 if (!NILP (Ffind_charset (new_name)))
1724 signal_simple_error ("Cannot redefine existing charset", new_name);
1726 cs = XCHARSET (charset);
1728 chars = CHARSET_CHARS (cs);
1729 dimension = CHARSET_DIMENSION (cs);
1730 columns = CHARSET_COLUMNS (cs);
1731 id = get_unallocated_leading_byte (dimension);
1733 graphic = CHARSET_GRAPHIC (cs);
1734 final = CHARSET_FINAL (cs);
1735 direction = CHARSET_RIGHT_TO_LEFT;
1736 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1737 direction = CHARSET_LEFT_TO_RIGHT;
1738 doc_string = CHARSET_DOC_STRING (cs);
1739 short_name = CHARSET_SHORT_NAME (cs);
1740 long_name = CHARSET_LONG_NAME (cs);
1741 registry = CHARSET_REGISTRY (cs);
1743 new_charset = make_charset (id, new_name, chars, dimension, columns,
1744 graphic, final, direction, short_name, long_name,
1745 doc_string, registry,
1747 CHARSET_DECODING_TABLE(cs),
1748 CHARSET_MIN_CODE(cs),
1749 CHARSET_MAX_CODE(cs),
1750 CHARSET_CODE_OFFSET(cs),
1751 CHARSET_BYTE_OFFSET(cs),
1753 CHARSET_CONVERSION (cs)
1755 Qnil, 0, 0, 0, 0, Qnil, 0
1759 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1760 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1765 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1766 Define symbol ALIAS as an alias for CHARSET.
1770 CHECK_SYMBOL (alias);
1771 charset = Fget_charset (charset);
1772 return Fputhash (alias, charset, Vcharset_hash_table);
1775 /* #### Reverse direction charsets not yet implemented. */
1777 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1779 Return the reverse-direction charset parallel to CHARSET, if any.
1780 This is the charset with the same properties (in particular, the same
1781 dimension, number of characters per dimension, and final byte) as
1782 CHARSET but whose characters are displayed in the opposite direction.
1786 charset = Fget_charset (charset);
1787 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1791 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1792 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1793 If DIRECTION is omitted, both directions will be checked (left-to-right
1794 will be returned if character sets exist for both directions).
1796 (dimension, chars, final, direction))
1798 int dm, ch, fi, di = -1;
1799 Lisp_Object obj = Qnil;
1801 CHECK_INT (dimension);
1802 dm = XINT (dimension);
1803 if (dm < 1 || dm > 2)
1804 signal_simple_error ("Invalid value for DIMENSION", dimension);
1808 if (ch != 94 && ch != 96)
1809 signal_simple_error ("Invalid value for CHARS", chars);
1811 CHECK_CHAR_COERCE_INT (final);
1813 if (fi < '0' || fi > '~')
1814 signal_simple_error ("Invalid value for FINAL", final);
1816 if (EQ (direction, Ql2r))
1817 di = CHARSET_LEFT_TO_RIGHT;
1818 else if (EQ (direction, Qr2l))
1819 di = CHARSET_RIGHT_TO_LEFT;
1820 else if (!NILP (direction))
1821 signal_simple_error ("Invalid value for DIRECTION", direction);
1823 if (dm == 2 && fi > 0x5F)
1825 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1829 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1831 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1834 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1837 return XCHARSET_NAME (obj);
1841 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1842 Return short name of CHARSET.
1846 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1849 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1850 Return long name of CHARSET.
1854 return XCHARSET_LONG_NAME (Fget_charset (charset));
1857 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1858 Return description of CHARSET.
1862 return XCHARSET_DOC_STRING (Fget_charset (charset));
1865 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1866 Return dimension of CHARSET.
1870 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1873 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1874 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1875 Recognized properties are those listed in `make-charset', as well as
1876 'name and 'doc-string.
1882 charset = Fget_charset (charset);
1883 cs = XCHARSET (charset);
1885 CHECK_SYMBOL (prop);
1886 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1887 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1888 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1889 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1890 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1891 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1892 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1893 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1894 Qnil : make_char (CHARSET_FINAL (cs));
1895 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1896 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1897 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1898 if (EQ (prop, Qdirection))
1899 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1900 if (EQ (prop, Qreverse_direction_charset))
1902 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1903 /* #### Is this translation OK? If so, error checking sufficient? */
1904 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1907 if (EQ (prop, Qmother))
1908 return CHARSET_MOTHER (cs);
1909 if (EQ (prop, Qmin_code))
1910 return make_int (CHARSET_MIN_CODE (cs));
1911 if (EQ (prop, Qmax_code))
1912 return make_int (CHARSET_MAX_CODE (cs));
1914 signal_simple_error ("Unrecognized charset property name", prop);
1915 return Qnil; /* not reached */
1918 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1919 Return charset identification number of CHARSET.
1923 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1926 /* #### We need to figure out which properties we really want to
1929 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1930 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1932 (charset, ccl_program))
1934 struct ccl_program test_ccl;
1936 charset = Fget_charset (charset);
1937 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1938 signal_simple_error ("Invalid ccl-program", ccl_program);
1939 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1944 invalidate_charset_font_caches (Lisp_Object charset)
1946 /* Invalidate font cache entries for charset on all devices. */
1947 Lisp_Object devcons, concons, hash_table;
1948 DEVICE_LOOP_NO_BREAK (devcons, concons)
1950 struct device *d = XDEVICE (XCAR (devcons));
1951 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1952 if (!UNBOUNDP (hash_table))
1953 Fclrhash (hash_table);
1957 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1958 Set the 'registry property of CHARSET to REGISTRY.
1960 (charset, registry))
1962 charset = Fget_charset (charset);
1963 CHECK_STRING (registry);
1964 XCHARSET_REGISTRY (charset) = registry;
1965 invalidate_charset_font_caches (charset);
1966 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1971 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1972 Return mapping-table of CHARSET.
1976 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1979 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1980 Set mapping-table of CHARSET to TABLE.
1984 struct Lisp_Charset *cs;
1988 charset = Fget_charset (charset);
1989 cs = XCHARSET (charset);
1993 CHARSET_DECODING_TABLE(cs) = Qnil;
1996 else if (VECTORP (table))
1998 int ccs_len = CHARSET_BYTE_SIZE (cs);
1999 int ret = decoding_table_check_elements (table,
2000 CHARSET_DIMENSION (cs),
2005 signal_simple_error ("Too big table", table);
2007 signal_simple_error ("Invalid element is found", table);
2009 signal_simple_error ("Something wrong", table);
2011 CHARSET_DECODING_TABLE(cs) = Qnil;
2014 signal_error (Qwrong_type_argument,
2015 list2 (build_translated_string ("vector-or-nil-p"),
2018 byte_offset = CHARSET_BYTE_OFFSET (cs);
2019 switch (CHARSET_DIMENSION (cs))
2022 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2024 Lisp_Object c = XVECTOR_DATA(table)[i];
2027 Fput_char_attribute (c, XCHARSET_NAME (charset),
2028 make_int (i + byte_offset));
2032 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2034 Lisp_Object v = XVECTOR_DATA(table)[i];
2040 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2042 Lisp_Object c = XVECTOR_DATA(v)[j];
2046 (c, XCHARSET_NAME (charset),
2047 make_int ( ( (i + byte_offset) << 8 )
2053 Fput_char_attribute (v, XCHARSET_NAME (charset),
2054 make_int (i + byte_offset));
2061 #ifdef HAVE_CHISE_CLIENT
2062 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2063 Save mapping-table of CHARSET.
2067 struct Lisp_Charset *cs;
2068 int byte_min, byte_max;
2070 Lisp_Object db_file;
2072 charset = Fget_charset (charset);
2073 cs = XCHARSET (charset);
2075 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2076 Qsystem_char_id, 1);
2077 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2079 byte_min = CHARSET_BYTE_OFFSET (cs);
2080 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2081 switch (CHARSET_DIMENSION (cs))
2085 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2088 for (cell = byte_min; cell < byte_max; cell++)
2090 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2093 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2094 Fprin1_to_string (c, Qnil),
2101 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2104 for (row = byte_min; row < byte_max; row++)
2106 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2109 for (cell = byte_min; cell < byte_max; cell++)
2111 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2114 Fput_database (Fprin1_to_string (make_int ((row << 8)
2117 Fprin1_to_string (c, Qnil),
2125 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2128 for (plane = byte_min; plane < byte_max; plane++)
2131 = get_ccs_octet_table (table_p, charset, plane);
2134 for (row = byte_min; row < byte_max; row++)
2137 = get_ccs_octet_table (table_r, charset, row);
2140 for (cell = byte_min; cell < byte_max; cell++)
2142 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2146 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2150 Fprin1_to_string (c, Qnil),
2159 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2162 for (group = byte_min; group < byte_max; group++)
2165 = get_ccs_octet_table (table_g, charset, group);
2168 for (plane = byte_min; plane < byte_max; plane++)
2171 = get_ccs_octet_table (table_p, charset, plane);
2174 for (row = byte_min; row < byte_max; row++)
2177 = get_ccs_octet_table (table_r, charset, row);
2180 for (cell = byte_min; cell < byte_max; cell++)
2183 = get_ccs_octet_table (table_c, charset, cell);
2186 Fput_database (Fprin1_to_string
2187 (make_int (( group << 24)
2192 Fprin1_to_string (c, Qnil),
2200 return Fclose_database (db);
2203 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2204 Reset mapping-table of CCS with database file.
2208 Lisp_Object db_file;
2210 ccs = Fget_charset (ccs);
2211 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2212 Qsystem_char_id, 0);
2214 if (!NILP (Ffile_exists_p (db_file)))
2216 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2223 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2226 Lisp_Object db_dir = Vexec_directory;
2228 CHISE_Decoding_Table *dt_ccs;
2231 DBTYPE real_subtype;
2233 CHISE_Char_ID char_id;
2236 db_dir = build_string ("../lib-src");
2237 db_dir = Fexpand_file_name (build_string ("char-db"), db_dir);
2239 status = chise_open_data_source (&ds, CHISE_DS_Berkeley_DB,
2240 XSTRING_DATA (db_dir));
2243 chise_close_data_source (&ds);
2247 modemask = 0755; /* rwxr-xr-x */
2248 real_subtype = DB_HASH;
2249 accessmask = DB_RDONLY;
2252 = chise_open_decoding_table (&dt_ccs, &ds,
2253 XSTRING_DATA (Fsymbol_name
2254 (XCHARSET_NAME(ccs))),
2256 accessmask, modemask);
2259 printf ("Can't open decoding-table %s\n",
2260 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(ccs))));
2261 chise_close_decoding_table (dt_ccs);
2262 chise_close_data_source (&ds);
2266 char_id = chise_dt_get_char (dt_ccs, code_point);
2268 printf ("%s's 0x%X (%d) => 0x%X\n",
2269 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(ccs))),
2270 code_point, code_point, char_id);
2273 decoding_table_put_char (ccs, code_point, make_char (char_id));
2275 decoding_table_put_char (ccs, code_point, Qnil);
2277 chise_close_decoding_table (dt_ccs);
2279 chise_close_data_source (&ds);
2285 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2288 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2292 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2299 decoding_table_put_char (ccs, code_point, ret);
2300 Fclose_database (db);
2304 decoding_table_put_char (ccs, code_point, Qnil);
2305 Fclose_database (db);
2310 #endif /* HAVE_CHISE_CLIENT */
2311 #endif /* UTF2000 */
2314 /************************************************************************/
2315 /* Lisp primitives for working with characters */
2316 /************************************************************************/
2319 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2320 Make a character from CHARSET and code-point CODE.
2321 If DEFINED_ONLY is non-nil, builtin character is not returned.
2322 If corresponding character is not found, nil is returned.
2324 (charset, code, defined_only))
2328 charset = Fget_charset (charset);
2331 if (XCHARSET_GRAPHIC (charset) == 1)
2333 if (NILP (defined_only))
2334 c = DECODE_CHAR (charset, c);
2336 c = decode_defined_char (charset, c);
2337 return c >= 0 ? make_char (c) : Qnil;
2340 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2341 Make a builtin character from CHARSET and code-point CODE.
2347 charset = Fget_charset (charset);
2349 if (EQ (charset, Vcharset_latin_viscii))
2351 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2357 (ret = Fget_char_attribute (chr,
2358 Vcharset_latin_viscii_lower,
2361 charset = Vcharset_latin_viscii_lower;
2365 (ret = Fget_char_attribute (chr,
2366 Vcharset_latin_viscii_upper,
2369 charset = Vcharset_latin_viscii_upper;
2376 if (XCHARSET_GRAPHIC (charset) == 1)
2379 c = decode_builtin_char (charset, c);
2380 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2384 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2385 Make a character from CHARSET and octets ARG1 and ARG2.
2386 ARG2 is required only for characters from two-dimensional charsets.
2387 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2388 character s with caron.
2390 (charset, arg1, arg2))
2394 int lowlim, highlim;
2396 charset = Fget_charset (charset);
2397 cs = XCHARSET (charset);
2399 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2400 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2402 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2404 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2405 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2408 /* It is useful (and safe, according to Olivier Galibert) to strip
2409 the 8th bit off ARG1 and ARG2 because it allows programmers to
2410 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2411 Latin 2 code of the character. */
2419 if (a1 < lowlim || a1 > highlim)
2420 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2422 if (CHARSET_DIMENSION (cs) == 1)
2426 ("Charset is of dimension one; second octet must be nil", arg2);
2427 return make_char (MAKE_CHAR (charset, a1, 0));
2436 a2 = XINT (arg2) & 0x7f;
2438 if (a2 < lowlim || a2 > highlim)
2439 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2441 return make_char (MAKE_CHAR (charset, a1, a2));
2444 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2445 Return the character set of CHARACTER.
2449 CHECK_CHAR_COERCE_INT (character);
2451 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2454 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2455 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2456 N defaults to 0 if omitted.
2460 Lisp_Object charset;
2463 CHECK_CHAR_COERCE_INT (character);
2465 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2467 if (NILP (n) || EQ (n, Qzero))
2468 return make_int (octet0);
2469 else if (EQ (n, make_int (1)))
2470 return make_int (octet1);
2472 signal_simple_error ("Octet number must be 0 or 1", n);
2476 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2477 Return code-point of CHARACTER in specified CHARSET.
2479 (character, charset, defined_only))
2483 CHECK_CHAR_COERCE_INT (character);
2484 charset = Fget_charset (charset);
2485 code_point = charset_code_point (charset, XCHAR (character),
2486 !NILP (defined_only));
2487 if (code_point >= 0)
2488 return make_int (code_point);
2494 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2495 Return list of charset and one or two position-codes of CHARACTER.
2499 /* This function can GC */
2500 struct gcpro gcpro1, gcpro2;
2501 Lisp_Object charset = Qnil;
2502 Lisp_Object rc = Qnil;
2510 GCPRO2 (charset, rc);
2511 CHECK_CHAR_COERCE_INT (character);
2514 code_point = ENCODE_CHAR (XCHAR (character), charset);
2515 dimension = XCHARSET_DIMENSION (charset);
2516 while (dimension > 0)
2518 rc = Fcons (make_int (code_point & 255), rc);
2522 rc = Fcons (XCHARSET_NAME (charset), rc);
2524 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2526 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2528 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2532 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2541 #ifdef ENABLE_COMPOSITE_CHARS
2542 /************************************************************************/
2543 /* composite character functions */
2544 /************************************************************************/
2547 lookup_composite_char (Bufbyte *str, int len)
2549 Lisp_Object lispstr = make_string (str, len);
2550 Lisp_Object ch = Fgethash (lispstr,
2551 Vcomposite_char_string2char_hash_table,
2557 if (composite_char_row_next >= 128)
2558 signal_simple_error ("No more composite chars available", lispstr);
2559 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2560 composite_char_col_next);
2561 Fputhash (make_char (emch), lispstr,
2562 Vcomposite_char_char2string_hash_table);
2563 Fputhash (lispstr, make_char (emch),
2564 Vcomposite_char_string2char_hash_table);
2565 composite_char_col_next++;
2566 if (composite_char_col_next >= 128)
2568 composite_char_col_next = 32;
2569 composite_char_row_next++;
2578 composite_char_string (Emchar ch)
2580 Lisp_Object str = Fgethash (make_char (ch),
2581 Vcomposite_char_char2string_hash_table,
2583 assert (!UNBOUNDP (str));
2587 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2588 Convert a string into a single composite character.
2589 The character is the result of overstriking all the characters in
2594 CHECK_STRING (string);
2595 return make_char (lookup_composite_char (XSTRING_DATA (string),
2596 XSTRING_LENGTH (string)));
2599 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2600 Return a string of the characters comprising a composite character.
2608 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2609 signal_simple_error ("Must be composite char", ch);
2610 return composite_char_string (emch);
2612 #endif /* ENABLE_COMPOSITE_CHARS */
2615 /************************************************************************/
2616 /* initialization */
2617 /************************************************************************/
2620 syms_of_mule_charset (void)
2622 INIT_LRECORD_IMPLEMENTATION (charset);
2624 DEFSUBR (Fcharsetp);
2625 DEFSUBR (Ffind_charset);
2626 DEFSUBR (Fget_charset);
2627 DEFSUBR (Fcharset_list);
2628 DEFSUBR (Fcharset_name);
2629 DEFSUBR (Fmake_charset);
2630 DEFSUBR (Fmake_reverse_direction_charset);
2631 /* DEFSUBR (Freverse_direction_charset); */
2632 DEFSUBR (Fdefine_charset_alias);
2633 DEFSUBR (Fcharset_from_attributes);
2634 DEFSUBR (Fcharset_short_name);
2635 DEFSUBR (Fcharset_long_name);
2636 DEFSUBR (Fcharset_description);
2637 DEFSUBR (Fcharset_dimension);
2638 DEFSUBR (Fcharset_property);
2639 DEFSUBR (Fcharset_id);
2640 DEFSUBR (Fset_charset_ccl_program);
2641 DEFSUBR (Fset_charset_registry);
2643 DEFSUBR (Fcharset_mapping_table);
2644 DEFSUBR (Fset_charset_mapping_table);
2645 #ifdef HAVE_CHISE_CLIENT
2646 DEFSUBR (Fsave_charset_mapping_table);
2647 DEFSUBR (Freset_charset_mapping_table);
2650 DEFSUBR (Fdecode_char);
2651 DEFSUBR (Fdecode_builtin_char);
2652 DEFSUBR (Fencode_char);
2654 DEFSUBR (Fmake_char);
2655 DEFSUBR (Fchar_charset);
2656 DEFSUBR (Fchar_octet);
2657 DEFSUBR (Fsplit_char);
2659 #ifdef ENABLE_COMPOSITE_CHARS
2660 DEFSUBR (Fmake_composite_char);
2661 DEFSUBR (Fcomposite_char_string);
2664 defsymbol (&Qcharsetp, "charsetp");
2665 defsymbol (&Qregistry, "registry");
2666 defsymbol (&Qfinal, "final");
2667 defsymbol (&Qgraphic, "graphic");
2668 defsymbol (&Qdirection, "direction");
2669 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2670 defsymbol (&Qshort_name, "short-name");
2671 defsymbol (&Qlong_name, "long-name");
2673 defsymbol (&Qmother, "mother");
2674 defsymbol (&Qmin_code, "min-code");
2675 defsymbol (&Qmax_code, "max-code");
2676 defsymbol (&Qcode_offset, "code-offset");
2677 defsymbol (&Qconversion, "conversion");
2678 defsymbol (&Q94x60, "94x60");
2679 defsymbol (&Q94x94x60, "94x94x60");
2680 defsymbol (&Qbig5_1, "big5-1");
2681 defsymbol (&Qbig5_2, "big5-2");
2684 defsymbol (&Ql2r, "l2r");
2685 defsymbol (&Qr2l, "r2l");
2687 /* Charsets, compatible with FSF 20.3
2688 Naming convention is Script-Charset[-Edition] */
2689 defsymbol (&Qascii, "ascii");
2690 defsymbol (&Qcontrol_1, "control-1");
2691 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2692 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2693 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2694 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2695 defsymbol (&Qthai_tis620, "thai-tis620");
2696 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2697 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2698 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2699 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2700 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2701 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2702 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2703 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2704 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2705 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2706 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2707 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2708 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2709 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2710 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2712 defsymbol (&Qmap_ucs, "=ucs");
2713 defsymbol (&Qucs, "ucs");
2714 defsymbol (&Qucs_bmp, "ucs-bmp");
2715 defsymbol (&Qucs_smp, "ucs-smp");
2716 defsymbol (&Qucs_sip, "ucs-sip");
2717 defsymbol (&Qlatin_viscii, "latin-viscii");
2718 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2719 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2720 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2721 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2722 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2723 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2724 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2725 defsymbol (&Qchinese_big5, "chinese-big5");
2726 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2728 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2729 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2731 defsymbol (&Qcomposite, "composite");
2735 vars_of_mule_charset (void)
2742 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2743 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2745 /* Table of charsets indexed by leading byte. */
2746 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2747 chlook->charset_by_leading_byte[i] = Qnil;
2750 /* Table of charsets indexed by type/final-byte. */
2751 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2752 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2753 chlook->charset_by_attributes[i][j] = Qnil;
2755 /* Table of charsets indexed by type/final-byte/direction. */
2756 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2757 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2758 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2759 chlook->charset_by_attributes[i][j][k] = Qnil;
2763 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2765 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2766 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2770 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2771 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2772 Leading-code of private TYPE9N charset of column-width 1.
2774 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2778 Vdefault_coded_charset_priority_list = Qnil;
2779 DEFVAR_LISP ("default-coded-charset-priority-list",
2780 &Vdefault_coded_charset_priority_list /*
2781 Default order of preferred coded-character-sets.
2787 complex_vars_of_mule_charset (void)
2789 staticpro (&Vcharset_hash_table);
2790 Vcharset_hash_table =
2791 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2793 /* Predefined character sets. We store them into variables for
2797 staticpro (&Vcharset_ucs);
2799 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2800 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2801 build_string ("UCS"),
2802 build_string ("UCS"),
2803 build_string ("ISO/IEC 10646"),
2805 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2806 staticpro (&Vcharset_ucs_bmp);
2808 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2809 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2810 build_string ("BMP"),
2811 build_string ("UCS-BMP"),
2812 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2814 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2815 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2816 staticpro (&Vcharset_ucs_smp);
2818 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2819 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2820 build_string ("SMP"),
2821 build_string ("UCS-SMP"),
2822 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2823 build_string ("UCS00-1"),
2824 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2825 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2826 staticpro (&Vcharset_ucs_sip);
2828 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2829 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2830 build_string ("SIP"),
2831 build_string ("UCS-SIP"),
2832 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2833 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2834 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2835 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2837 # define MIN_CHAR_THAI 0
2838 # define MAX_CHAR_THAI 0
2839 /* # define MIN_CHAR_HEBREW 0 */
2840 /* # define MAX_CHAR_HEBREW 0 */
2841 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2842 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2844 staticpro (&Vcharset_ascii);
2846 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2847 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2848 build_string ("ASCII"),
2849 build_string ("ASCII)"),
2850 build_string ("ASCII (ISO646 IRV)"),
2851 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2852 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2853 staticpro (&Vcharset_control_1);
2854 Vcharset_control_1 =
2855 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2856 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2857 build_string ("C1"),
2858 build_string ("Control characters"),
2859 build_string ("Control characters 128-191"),
2861 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_latin_iso8859_1);
2863 Vcharset_latin_iso8859_1 =
2864 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2865 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2866 build_string ("Latin-1"),
2867 build_string ("ISO8859-1 (Latin-1)"),
2868 build_string ("ISO8859-1 (Latin-1)"),
2869 build_string ("iso8859-1"),
2870 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2871 staticpro (&Vcharset_latin_iso8859_2);
2872 Vcharset_latin_iso8859_2 =
2873 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2874 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2875 build_string ("Latin-2"),
2876 build_string ("ISO8859-2 (Latin-2)"),
2877 build_string ("ISO8859-2 (Latin-2)"),
2878 build_string ("iso8859-2"),
2879 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2880 staticpro (&Vcharset_latin_iso8859_3);
2881 Vcharset_latin_iso8859_3 =
2882 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2883 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2884 build_string ("Latin-3"),
2885 build_string ("ISO8859-3 (Latin-3)"),
2886 build_string ("ISO8859-3 (Latin-3)"),
2887 build_string ("iso8859-3"),
2888 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2889 staticpro (&Vcharset_latin_iso8859_4);
2890 Vcharset_latin_iso8859_4 =
2891 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2892 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2893 build_string ("Latin-4"),
2894 build_string ("ISO8859-4 (Latin-4)"),
2895 build_string ("ISO8859-4 (Latin-4)"),
2896 build_string ("iso8859-4"),
2897 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2898 staticpro (&Vcharset_thai_tis620);
2899 Vcharset_thai_tis620 =
2900 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2901 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2902 build_string ("TIS620"),
2903 build_string ("TIS620 (Thai)"),
2904 build_string ("TIS620.2529 (Thai)"),
2905 build_string ("tis620"),
2906 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2907 staticpro (&Vcharset_greek_iso8859_7);
2908 Vcharset_greek_iso8859_7 =
2909 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2910 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2911 build_string ("ISO8859-7"),
2912 build_string ("ISO8859-7 (Greek)"),
2913 build_string ("ISO8859-7 (Greek)"),
2914 build_string ("iso8859-7"),
2915 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2916 staticpro (&Vcharset_arabic_iso8859_6);
2917 Vcharset_arabic_iso8859_6 =
2918 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2919 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2920 build_string ("ISO8859-6"),
2921 build_string ("ISO8859-6 (Arabic)"),
2922 build_string ("ISO8859-6 (Arabic)"),
2923 build_string ("iso8859-6"),
2924 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2925 staticpro (&Vcharset_hebrew_iso8859_8);
2926 Vcharset_hebrew_iso8859_8 =
2927 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2928 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2929 build_string ("ISO8859-8"),
2930 build_string ("ISO8859-8 (Hebrew)"),
2931 build_string ("ISO8859-8 (Hebrew)"),
2932 build_string ("iso8859-8"),
2934 0 /* MIN_CHAR_HEBREW */,
2935 0 /* MAX_CHAR_HEBREW */, 0, 32,
2936 Qnil, CONVERSION_IDENTICAL);
2937 staticpro (&Vcharset_katakana_jisx0201);
2938 Vcharset_katakana_jisx0201 =
2939 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2940 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2941 build_string ("JISX0201 Kana"),
2942 build_string ("JISX0201.1976 (Japanese Kana)"),
2943 build_string ("JISX0201.1976 Japanese Kana"),
2944 build_string ("jisx0201\\.1976"),
2945 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2946 staticpro (&Vcharset_latin_jisx0201);
2947 Vcharset_latin_jisx0201 =
2948 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2949 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2950 build_string ("JISX0201 Roman"),
2951 build_string ("JISX0201.1976 (Japanese Roman)"),
2952 build_string ("JISX0201.1976 Japanese Roman"),
2953 build_string ("jisx0201\\.1976"),
2954 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2955 staticpro (&Vcharset_cyrillic_iso8859_5);
2956 Vcharset_cyrillic_iso8859_5 =
2957 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2958 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2959 build_string ("ISO8859-5"),
2960 build_string ("ISO8859-5 (Cyrillic)"),
2961 build_string ("ISO8859-5 (Cyrillic)"),
2962 build_string ("iso8859-5"),
2963 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2964 staticpro (&Vcharset_latin_iso8859_9);
2965 Vcharset_latin_iso8859_9 =
2966 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2967 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2968 build_string ("Latin-5"),
2969 build_string ("ISO8859-9 (Latin-5)"),
2970 build_string ("ISO8859-9 (Latin-5)"),
2971 build_string ("iso8859-9"),
2972 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2974 staticpro (&Vcharset_jis_x0208);
2975 Vcharset_jis_x0208 =
2976 make_charset (LEADING_BYTE_JIS_X0208,
2977 Qmap_jis_x0208, 94, 2,
2978 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2979 build_string ("JIS X0208"),
2980 build_string ("JIS X0208 Common"),
2981 build_string ("JIS X0208 Common part"),
2982 build_string ("jisx0208\\.1990"),
2984 MIN_CHAR_JIS_X0208_1990,
2985 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2986 Qnil, CONVERSION_94x94);
2988 staticpro (&Vcharset_japanese_jisx0208_1978);
2989 Vcharset_japanese_jisx0208_1978 =
2990 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2991 Qjapanese_jisx0208_1978, 94, 2,
2992 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2993 build_string ("JIS X0208:1978"),
2994 build_string ("JIS X0208:1978 (Japanese)"),
2996 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2997 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3004 CONVERSION_IDENTICAL);
3005 staticpro (&Vcharset_chinese_gb2312);
3006 Vcharset_chinese_gb2312 =
3007 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3008 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3009 build_string ("GB2312"),
3010 build_string ("GB2312)"),
3011 build_string ("GB2312 Chinese simplified"),
3012 build_string ("gb2312"),
3013 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3014 staticpro (&Vcharset_chinese_gb12345);
3015 Vcharset_chinese_gb12345 =
3016 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3017 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3018 build_string ("G1"),
3019 build_string ("GB 12345)"),
3020 build_string ("GB 12345-1990"),
3021 build_string ("GB12345\\(\\.1990\\)?-0"),
3022 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3023 staticpro (&Vcharset_japanese_jisx0208);
3024 Vcharset_japanese_jisx0208 =
3025 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3026 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3027 build_string ("JISX0208"),
3028 build_string ("JIS X0208:1983 (Japanese)"),
3029 build_string ("JIS X0208:1983 Japanese Kanji"),
3030 build_string ("jisx0208\\.1983"),
3037 CONVERSION_IDENTICAL);
3039 staticpro (&Vcharset_japanese_jisx0208_1990);
3040 Vcharset_japanese_jisx0208_1990 =
3041 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3042 Qmap_jis_x0208_1990, 94, 2,
3043 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3044 build_string ("JISX0208-1990"),
3045 build_string ("JIS X0208:1990 (Japanese)"),
3046 build_string ("JIS X0208:1990 Japanese Kanji"),
3047 build_string ("jisx0208\\.1990"),
3049 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3050 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3051 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3052 Vcharset_jis_x0208 /* Qnil */,
3053 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3055 staticpro (&Vcharset_korean_ksc5601);
3056 Vcharset_korean_ksc5601 =
3057 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3058 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3059 build_string ("KSC5601"),
3060 build_string ("KSC5601 (Korean"),
3061 build_string ("KSC5601 Korean Hangul and Hanja"),
3062 build_string ("ksc5601"),
3063 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3064 staticpro (&Vcharset_japanese_jisx0212);
3065 Vcharset_japanese_jisx0212 =
3066 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3067 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3068 build_string ("JISX0212"),
3069 build_string ("JISX0212 (Japanese)"),
3070 build_string ("JISX0212 Japanese Supplement"),
3071 build_string ("jisx0212"),
3072 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3074 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3075 staticpro (&Vcharset_chinese_cns11643_1);
3076 Vcharset_chinese_cns11643_1 =
3077 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3078 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3079 build_string ("CNS11643-1"),
3080 build_string ("CNS11643-1 (Chinese traditional)"),
3082 ("CNS 11643 Plane 1 Chinese traditional"),
3083 build_string (CHINESE_CNS_PLANE_RE("1")),
3084 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3085 staticpro (&Vcharset_chinese_cns11643_2);
3086 Vcharset_chinese_cns11643_2 =
3087 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3088 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3089 build_string ("CNS11643-2"),
3090 build_string ("CNS11643-2 (Chinese traditional)"),
3092 ("CNS 11643 Plane 2 Chinese traditional"),
3093 build_string (CHINESE_CNS_PLANE_RE("2")),
3094 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3096 staticpro (&Vcharset_latin_tcvn5712);
3097 Vcharset_latin_tcvn5712 =
3098 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3099 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3100 build_string ("TCVN 5712"),
3101 build_string ("TCVN 5712 (VSCII-2)"),
3102 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3103 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3104 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3105 staticpro (&Vcharset_latin_viscii_lower);
3106 Vcharset_latin_viscii_lower =
3107 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3108 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3109 build_string ("VISCII lower"),
3110 build_string ("VISCII lower (Vietnamese)"),
3111 build_string ("VISCII lower (Vietnamese)"),
3112 build_string ("MULEVISCII-LOWER"),
3113 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3114 staticpro (&Vcharset_latin_viscii_upper);
3115 Vcharset_latin_viscii_upper =
3116 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3117 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3118 build_string ("VISCII upper"),
3119 build_string ("VISCII upper (Vietnamese)"),
3120 build_string ("VISCII upper (Vietnamese)"),
3121 build_string ("MULEVISCII-UPPER"),
3122 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3123 staticpro (&Vcharset_latin_viscii);
3124 Vcharset_latin_viscii =
3125 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3126 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3127 build_string ("VISCII"),
3128 build_string ("VISCII 1.1 (Vietnamese)"),
3129 build_string ("VISCII 1.1 (Vietnamese)"),
3130 build_string ("VISCII1\\.1"),
3131 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3132 staticpro (&Vcharset_chinese_big5);
3133 Vcharset_chinese_big5 =
3134 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3135 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3136 build_string ("Big5"),
3137 build_string ("Big5"),
3138 build_string ("Big5 Chinese traditional"),
3139 build_string ("big5-0"),
3141 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3142 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3144 staticpro (&Vcharset_ethiopic_ucs);
3145 Vcharset_ethiopic_ucs =
3146 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3147 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3148 build_string ("Ethiopic (UCS)"),
3149 build_string ("Ethiopic (UCS)"),
3150 build_string ("Ethiopic of UCS"),
3151 build_string ("Ethiopic-Unicode"),
3152 Qnil, 0x1200, 0x137F, 0, 0,
3153 Qnil, CONVERSION_IDENTICAL);
3155 staticpro (&Vcharset_chinese_big5_1);
3156 Vcharset_chinese_big5_1 =
3157 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3158 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3159 build_string ("Big5"),
3160 build_string ("Big5 (Level-1)"),
3162 ("Big5 Level-1 Chinese traditional"),
3163 build_string ("big5"),
3164 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3165 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3166 staticpro (&Vcharset_chinese_big5_2);
3167 Vcharset_chinese_big5_2 =
3168 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3169 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3170 build_string ("Big5"),
3171 build_string ("Big5 (Level-2)"),
3173 ("Big5 Level-2 Chinese traditional"),
3174 build_string ("big5"),
3175 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3176 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3178 #ifdef ENABLE_COMPOSITE_CHARS
3179 /* #### For simplicity, we put composite chars into a 96x96 charset.
3180 This is going to lead to problems because you can run out of
3181 room, esp. as we don't yet recycle numbers. */
3182 staticpro (&Vcharset_composite);
3183 Vcharset_composite =
3184 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3185 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3186 build_string ("Composite"),
3187 build_string ("Composite characters"),
3188 build_string ("Composite characters"),
3191 /* #### not dumped properly */
3192 composite_char_row_next = 32;
3193 composite_char_col_next = 32;
3195 Vcomposite_char_string2char_hash_table =
3196 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3197 Vcomposite_char_char2string_hash_table =
3198 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3199 staticpro (&Vcomposite_char_string2char_hash_table);
3200 staticpro (&Vcomposite_char_char2string_hash_table);
3201 #endif /* ENABLE_COMPOSITE_CHARS */