1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
150 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
151 if (!char_byte_table_equal (cte1->property[i],
152 cte2->property[i], depth + 1))
157 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
163 char_byte_table_hash (Lisp_Object obj, int depth)
165 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
167 return internal_array_hash (cte->property, 256, depth);
170 static const struct lrecord_description char_byte_table_description[] = {
171 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
175 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
176 mark_char_byte_table,
177 internal_object_printer,
178 0, char_byte_table_equal,
179 char_byte_table_hash,
180 char_byte_table_description,
181 struct Lisp_Char_Byte_Table);
185 make_char_byte_table (Lisp_Object initval)
189 struct Lisp_Char_Byte_Table *cte =
190 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
191 &lrecord_char_byte_table);
193 for (i = 0; i < 256; i++)
194 cte->property[i] = initval;
196 XSETCHAR_BYTE_TABLE (obj, cte);
201 copy_char_byte_table (Lisp_Object entry)
203 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
206 struct Lisp_Char_Byte_Table *ctenew =
207 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
208 &lrecord_char_byte_table);
210 for (i = 0; i < 256; i++)
212 Lisp_Object new = cte->property[i];
213 if (CHAR_BYTE_TABLE_P (new))
214 ctenew->property[i] = copy_char_byte_table (new);
216 ctenew->property[i] = new;
219 XSETCHAR_BYTE_TABLE (obj, ctenew);
223 #define make_char_code_table(initval) make_char_byte_table(initval)
226 get_char_code_table (Emchar ch, Lisp_Object table)
228 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
229 Lisp_Object ret = cpt->property [ch >> 24];
231 if (CHAR_BYTE_TABLE_P (ret))
232 cpt = XCHAR_BYTE_TABLE (ret);
236 ret = cpt->property [(unsigned char) (ch >> 16)];
237 if (CHAR_BYTE_TABLE_P (ret))
238 cpt = XCHAR_BYTE_TABLE (ret);
242 ret = cpt->property [(unsigned char) (ch >> 8)];
243 if (CHAR_BYTE_TABLE_P (ret))
244 cpt = XCHAR_BYTE_TABLE (ret);
248 return cpt->property [(unsigned char) ch];
252 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
254 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
255 Lisp_Object ret = cpt1->property[ch >> 24];
257 if (CHAR_BYTE_TABLE_P (ret))
259 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
261 ret = cpt2->property[(unsigned char)(ch >> 16)];
262 if (CHAR_BYTE_TABLE_P (ret))
264 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
266 ret = cpt3->property[(unsigned char)(ch >> 8)];
267 if (CHAR_BYTE_TABLE_P (ret))
269 struct Lisp_Char_Byte_Table* cpt4
270 = XCHAR_BYTE_TABLE (ret);
272 cpt4->property[(unsigned char)ch] = value;
274 else if (!EQ (ret, value))
276 Lisp_Object cpt4 = make_char_byte_table (ret);
278 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
279 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
282 else if (!EQ (ret, value))
284 Lisp_Object cpt3 = make_char_byte_table (ret);
285 Lisp_Object cpt4 = make_char_byte_table (ret);
287 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
288 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
290 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
293 else if (!EQ (ret, value))
295 Lisp_Object cpt2 = make_char_byte_table (ret);
296 Lisp_Object cpt3 = make_char_byte_table (ret);
297 Lisp_Object cpt4 = make_char_byte_table (ret);
299 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
300 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
301 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
302 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
307 Lisp_Object Vutf_2000_version;
311 int leading_code_private_11;
314 Lisp_Object Qcharsetp;
316 /* Qdoc_string, Qdimension, Qchars defined in general.c */
317 Lisp_Object Qregistry, Qfinal, Qgraphic;
318 Lisp_Object Qdirection;
319 Lisp_Object Qreverse_direction_charset;
320 Lisp_Object Qleading_byte;
321 Lisp_Object Qshort_name, Qlong_name;
337 Qjapanese_jisx0208_1978,
349 Qvietnamese_viscii_lower,
350 Qvietnamese_viscii_upper,
358 Lisp_Object Ql2r, Qr2l;
360 Lisp_Object Vcharset_hash_table;
362 static Charset_ID next_allocated_1_byte_leading_byte;
363 static Charset_ID next_allocated_2_byte_leading_byte;
365 /* Composite characters are characters constructed by overstriking two
366 or more regular characters.
368 1) The old Mule implementation involves storing composite characters
369 in a buffer as a tag followed by all of the actual characters
370 used to make up the composite character. I think this is a bad
371 idea; it greatly complicates code that wants to handle strings
372 one character at a time because it has to deal with the possibility
373 of great big ungainly characters. It's much more reasonable to
374 simply store an index into a table of composite characters.
376 2) The current implementation only allows for 16,384 separate
377 composite characters over the lifetime of the XEmacs process.
378 This could become a potential problem if the user
379 edited lots of different files that use composite characters.
380 Due to FSF bogosity, increasing the number of allowable
381 composite characters under Mule would decrease the number
382 of possible faces that can exist. Mule already has shrunk
383 this to 2048, and further shrinkage would become uncomfortable.
384 No such problems exist in XEmacs.
386 Composite characters could be represented as 0x80 C1 C2 C3,
387 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
388 for slightly under 2^20 (one million) composite characters
389 over the XEmacs process lifetime, and you only need to
390 increase the size of a Mule character from 19 to 21 bits.
391 Or you could use 0x80 C1 C2 C3 C4, allowing for about
392 85 million (slightly over 2^26) composite characters. */
395 /************************************************************************/
396 /* Basic Emchar functions */
397 /************************************************************************/
399 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
400 string in STR. Returns the number of bytes stored.
401 Do not call this directly. Use the macro set_charptr_emchar() instead.
405 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
420 else if ( c <= 0x7ff )
422 *p++ = (c >> 6) | 0xc0;
423 *p++ = (c & 0x3f) | 0x80;
425 else if ( c <= 0xffff )
427 *p++ = (c >> 12) | 0xe0;
428 *p++ = ((c >> 6) & 0x3f) | 0x80;
429 *p++ = (c & 0x3f) | 0x80;
431 else if ( c <= 0x1fffff )
433 *p++ = (c >> 18) | 0xf0;
434 *p++ = ((c >> 12) & 0x3f) | 0x80;
435 *p++ = ((c >> 6) & 0x3f) | 0x80;
436 *p++ = (c & 0x3f) | 0x80;
438 else if ( c <= 0x3ffffff )
440 *p++ = (c >> 24) | 0xf8;
441 *p++ = ((c >> 18) & 0x3f) | 0x80;
442 *p++ = ((c >> 12) & 0x3f) | 0x80;
443 *p++ = ((c >> 6) & 0x3f) | 0x80;
444 *p++ = (c & 0x3f) | 0x80;
448 *p++ = (c >> 30) | 0xfc;
449 *p++ = ((c >> 24) & 0x3f) | 0x80;
450 *p++ = ((c >> 18) & 0x3f) | 0x80;
451 *p++ = ((c >> 12) & 0x3f) | 0x80;
452 *p++ = ((c >> 6) & 0x3f) | 0x80;
453 *p++ = (c & 0x3f) | 0x80;
456 BREAKUP_CHAR (c, charset, c1, c2);
457 lb = CHAR_LEADING_BYTE (c);
458 if (LEADING_BYTE_PRIVATE_P (lb))
459 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
461 if (EQ (charset, Vcharset_control_1))
470 /* Return the first character from a Mule-encoded string in STR,
471 assuming it's non-ASCII. Do not call this directly.
472 Use the macro charptr_emchar() instead. */
475 non_ascii_charptr_emchar (CONST Bufbyte *str)
488 else if ( b >= 0xf8 )
493 else if ( b >= 0xf0 )
498 else if ( b >= 0xe0 )
503 else if ( b >= 0xc0 )
513 for( ; len > 0; len-- )
516 ch = ( ch << 6 ) | ( b & 0x3f );
520 Bufbyte i0 = *str, i1, i2 = 0;
523 if (i0 == LEADING_BYTE_CONTROL_1)
524 return (Emchar) (*++str - 0x20);
526 if (LEADING_BYTE_PREFIX_P (i0))
531 charset = CHARSET_BY_LEADING_BYTE (i0);
532 if (XCHARSET_DIMENSION (charset) == 2)
535 return MAKE_CHAR (charset, i1, i2);
539 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
540 Do not call this directly. Use the macro valid_char_p() instead. */
544 non_ascii_valid_char_p (Emchar ch)
548 /* Must have only lowest 19 bits set */
552 f1 = CHAR_FIELD1 (ch);
553 f2 = CHAR_FIELD2 (ch);
554 f3 = CHAR_FIELD3 (ch);
560 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
561 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
562 f2 > MAX_CHAR_FIELD2_PRIVATE)
567 if (f3 != 0x20 && f3 != 0x7F)
571 NOTE: This takes advantage of the fact that
572 FIELD2_TO_OFFICIAL_LEADING_BYTE and
573 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
575 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
576 return (XCHARSET_CHARS (charset) == 96);
582 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
583 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
584 f1 > MAX_CHAR_FIELD1_PRIVATE)
586 if (f2 < 0x20 || f3 < 0x20)
589 #ifdef ENABLE_COMPOSITE_CHARS
590 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
592 if (UNBOUNDP (Fgethash (make_int (ch),
593 Vcomposite_char_char2string_hash_table,
598 #endif /* ENABLE_COMPOSITE_CHARS */
600 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
603 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
605 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
608 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
610 return (XCHARSET_CHARS (charset) == 96);
616 /************************************************************************/
617 /* Basic string functions */
618 /************************************************************************/
620 /* Copy the character pointed to by PTR into STR, assuming it's
621 non-ASCII. Do not call this directly. Use the macro
622 charptr_copy_char() instead. */
625 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
627 Bufbyte *strptr = str;
629 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
631 /* Notice fallthrough. */
633 case 6: *++strptr = *ptr++;
634 case 5: *++strptr = *ptr++;
636 case 4: *++strptr = *ptr++;
637 case 3: *++strptr = *ptr++;
638 case 2: *++strptr = *ptr;
643 return strptr + 1 - str;
647 /************************************************************************/
648 /* streams of Emchars */
649 /************************************************************************/
651 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
652 The functions below are not meant to be called directly; use
653 the macros in insdel.h. */
656 Lstream_get_emchar_1 (Lstream *stream, int ch)
658 Bufbyte str[MAX_EMCHAR_LEN];
659 Bufbyte *strptr = str;
661 str[0] = (Bufbyte) ch;
662 switch (REP_BYTES_BY_FIRST_BYTE (ch))
664 /* Notice fallthrough. */
667 ch = Lstream_getc (stream);
669 *++strptr = (Bufbyte) ch;
671 ch = Lstream_getc (stream);
673 *++strptr = (Bufbyte) ch;
676 ch = Lstream_getc (stream);
678 *++strptr = (Bufbyte) ch;
680 ch = Lstream_getc (stream);
682 *++strptr = (Bufbyte) ch;
684 ch = Lstream_getc (stream);
686 *++strptr = (Bufbyte) ch;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
718 struct Lisp_Charset *cs = XCHARSET (obj);
720 markobj (cs->short_name);
721 markobj (cs->long_name);
722 markobj (cs->doc_string);
723 markobj (cs->registry);
724 markobj (cs->ccl_program);
726 markobj (cs->decoding_table);
727 markobj (cs->encoding_table);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 struct Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
752 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
753 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
754 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
756 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
757 CHARSET_COLUMNS (cs),
758 CHARSET_GRAPHIC (cs),
760 write_c_string (buf, printcharfun);
761 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
762 sprintf (buf, " 0x%x>", cs->header.uid);
763 write_c_string (buf, printcharfun);
766 static const struct lrecord_description charset_description[] = {
767 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
769 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
774 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
775 mark_charset, print_charset, 0, 0, 0,
777 struct Lisp_Charset);
779 /* Make a new charset. */
782 make_charset (Charset_ID id, Lisp_Object name,
783 unsigned char type, unsigned char columns, unsigned char graphic,
784 Bufbyte final, unsigned char direction, Lisp_Object short_name,
785 Lisp_Object long_name, Lisp_Object doc,
787 Lisp_Object decoding_table,
788 Emchar ucs_min, Emchar ucs_max,
789 Emchar code_offset, unsigned char byte_offset)
792 struct Lisp_Charset *cs =
793 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
794 XSETCHARSET (obj, cs);
796 CHARSET_ID (cs) = id;
797 CHARSET_NAME (cs) = name;
798 CHARSET_SHORT_NAME (cs) = short_name;
799 CHARSET_LONG_NAME (cs) = long_name;
800 CHARSET_DIRECTION (cs) = direction;
801 CHARSET_TYPE (cs) = type;
802 CHARSET_COLUMNS (cs) = columns;
803 CHARSET_GRAPHIC (cs) = graphic;
804 CHARSET_FINAL (cs) = final;
805 CHARSET_DOC_STRING (cs) = doc;
806 CHARSET_REGISTRY (cs) = reg;
807 CHARSET_CCL_PROGRAM (cs) = Qnil;
808 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
810 CHARSET_DECODING_TABLE(cs) = Qnil;
811 CHARSET_ENCODING_TABLE(cs) = Qnil;
812 CHARSET_UCS_MIN(cs) = ucs_min;
813 CHARSET_UCS_MAX(cs) = ucs_max;
814 CHARSET_CODE_OFFSET(cs) = code_offset;
815 CHARSET_BYTE_OFFSET(cs) = byte_offset;
818 switch (CHARSET_TYPE (cs))
820 case CHARSET_TYPE_94:
821 CHARSET_DIMENSION (cs) = 1;
822 CHARSET_CHARS (cs) = 94;
824 case CHARSET_TYPE_96:
825 CHARSET_DIMENSION (cs) = 1;
826 CHARSET_CHARS (cs) = 96;
828 case CHARSET_TYPE_94X94:
829 CHARSET_DIMENSION (cs) = 2;
830 CHARSET_CHARS (cs) = 94;
832 case CHARSET_TYPE_96X96:
833 CHARSET_DIMENSION (cs) = 2;
834 CHARSET_CHARS (cs) = 96;
837 case CHARSET_TYPE_128:
838 CHARSET_DIMENSION (cs) = 1;
839 CHARSET_CHARS (cs) = 128;
841 case CHARSET_TYPE_128X128:
842 CHARSET_DIMENSION (cs) = 2;
843 CHARSET_CHARS (cs) = 128;
845 case CHARSET_TYPE_256:
846 CHARSET_DIMENSION (cs) = 1;
847 CHARSET_CHARS (cs) = 256;
849 case CHARSET_TYPE_256X256:
850 CHARSET_DIMENSION (cs) = 2;
851 CHARSET_CHARS (cs) = 256;
857 if (id == LEADING_BYTE_ASCII)
858 CHARSET_REP_BYTES (cs) = 1;
860 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
862 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
867 /* some charsets do not have final characters. This includes
868 ASCII, Control-1, Composite, and the two faux private
871 if (code_offset == 0)
873 assert (NILP (charset_by_attributes[type][final]));
874 charset_by_attributes[type][final] = obj;
877 assert (NILP (charset_by_attributes[type][final][direction]));
878 charset_by_attributes[type][final][direction] = obj;
882 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
883 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
886 /* official leading byte */
887 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
890 /* Some charsets are "faux" and don't have names or really exist at
891 all except in the leading-byte table. */
893 Fputhash (name, obj, Vcharset_hash_table);
898 get_unallocated_leading_byte (int dimension)
904 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
907 lb = next_allocated_1_byte_leading_byte++;
911 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
914 lb = next_allocated_2_byte_leading_byte++;
919 ("No more character sets free for this dimension",
920 make_int (dimension));
927 charset_get_byte1 (Lisp_Object charset, Emchar ch)
932 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
934 Lisp_Object value = get_char_code_table (ch, table);
938 Emchar code = XINT (value);
942 else if (code < (1 << 16))
944 else if (code < (1 << 24))
950 if ((XCHARSET_UCS_MIN (charset) <= ch)
951 && (ch <= XCHARSET_UCS_MAX (charset)))
952 return (ch - XCHARSET_UCS_MIN (charset)
953 + XCHARSET_CODE_OFFSET (charset))
954 / (XCHARSET_DIMENSION (charset) == 1 ?
957 XCHARSET_DIMENSION (charset) == 2 ?
958 XCHARSET_CHARS (charset)
960 XCHARSET_DIMENSION (charset) == 3 ?
961 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
963 XCHARSET_CHARS (charset)
964 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
965 + XCHARSET_BYTE_OFFSET (charset);
966 else if (XCHARSET_CODE_OFFSET (charset) == 0)
968 if (XCHARSET_DIMENSION (charset) == 1)
970 if (XCHARSET_CHARS (charset) == 94)
972 if (((d = ch - (MIN_CHAR_94
973 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
977 else if (XCHARSET_CHARS (charset) == 96)
979 if (((d = ch - (MIN_CHAR_96
980 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
987 else if (XCHARSET_DIMENSION (charset) == 2)
989 if (XCHARSET_CHARS (charset) == 94)
991 if (((d = ch - (MIN_CHAR_94x94
992 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
995 return (d / 94) + 33;
997 else if (XCHARSET_CHARS (charset) == 96)
999 if (((d = ch - (MIN_CHAR_96x96
1000 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1003 return (d / 96) + 32;
1011 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1013 if (XCHARSET_DIMENSION (charset) == 1)
1019 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1021 Lisp_Object value = get_char_code_table (ch, table);
1025 Emchar code = XINT (value);
1027 if (code < (1 << 16))
1028 return (unsigned char)code;
1029 else if (code < (1 << 24))
1030 return (unsigned char)(code >> 16);
1032 return (unsigned char)(code >> 24);
1035 if ((XCHARSET_UCS_MIN (charset) <= ch)
1036 && (ch <= XCHARSET_UCS_MAX (charset)))
1037 return ((ch - XCHARSET_UCS_MIN (charset)
1038 + XCHARSET_CODE_OFFSET (charset))
1039 / (XCHARSET_DIMENSION (charset) == 2 ?
1042 XCHARSET_DIMENSION (charset) == 3 ?
1043 XCHARSET_CHARS (charset)
1045 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1046 % XCHARSET_CHARS (charset)
1047 + XCHARSET_BYTE_OFFSET (charset);
1048 else if (XCHARSET_CHARS (charset) == 94)
1049 return (MIN_CHAR_94x94
1050 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1051 && (ch < MIN_CHAR_94x94
1052 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1053 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1054 else /* if (XCHARSET_CHARS (charset) == 96) */
1055 return (MIN_CHAR_96x96
1056 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1057 && (ch < MIN_CHAR_96x96
1058 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1059 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1063 Lisp_Object Vdefault_coded_charset_priority_list;
1064 Lisp_Object Vdefault_coded_charset_priority_list_for_font;
1068 /************************************************************************/
1069 /* Basic charset Lisp functions */
1070 /************************************************************************/
1072 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1073 Return non-nil if OBJECT is a charset.
1077 return CHARSETP (object) ? Qt : Qnil;
1080 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1081 Retrieve the charset of the given name.
1082 If CHARSET-OR-NAME is a charset object, it is simply returned.
1083 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1084 nil is returned. Otherwise the associated charset object is returned.
1088 if (CHARSETP (charset_or_name))
1089 return charset_or_name;
1091 CHECK_SYMBOL (charset_or_name);
1092 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1095 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1096 Retrieve the charset of the given name.
1097 Same as `find-charset' except an error is signalled if there is no such
1098 charset instead of returning nil.
1102 Lisp_Object charset = Ffind_charset (name);
1105 signal_simple_error ("No such charset", name);
1109 /* We store the charsets in hash tables with the names as the key and the
1110 actual charset object as the value. Occasionally we need to use them
1111 in a list format. These routines provide us with that. */
1112 struct charset_list_closure
1114 Lisp_Object *charset_list;
1118 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1119 void *charset_list_closure)
1121 /* This function can GC */
1122 struct charset_list_closure *chcl =
1123 (struct charset_list_closure*) charset_list_closure;
1124 Lisp_Object *charset_list = chcl->charset_list;
1126 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1130 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1131 Return a list of the names of all defined charsets.
1135 Lisp_Object charset_list = Qnil;
1136 struct gcpro gcpro1;
1137 struct charset_list_closure charset_list_closure;
1139 GCPRO1 (charset_list);
1140 charset_list_closure.charset_list = &charset_list;
1141 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1142 &charset_list_closure);
1145 return charset_list;
1148 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1149 Return the name of the given charset.
1153 return XCHARSET_NAME (Fget_charset (charset));
1156 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1157 Define a new character set.
1158 This function is for use with Mule support.
1159 NAME is a symbol, the name by which the character set is normally referred.
1160 DOC-STRING is a string describing the character set.
1161 PROPS is a property list, describing the specific nature of the
1162 character set. Recognized properties are:
1164 'short-name Short version of the charset name (ex: Latin-1)
1165 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1166 'registry A regular expression matching the font registry field for
1168 'dimension Number of octets used to index a character in this charset.
1169 Either 1 or 2. Defaults to 1.
1170 'columns Number of columns used to display a character in this charset.
1171 Only used in TTY mode. (Under X, the actual width of a
1172 character can be derived from the font used to display the
1173 characters.) If unspecified, defaults to the dimension
1174 (this is almost always the correct value).
1175 'chars Number of characters in each dimension (94 or 96).
1176 Defaults to 94. Note that if the dimension is 2, the
1177 character set thus described is 94x94 or 96x96.
1178 'final Final byte of ISO 2022 escape sequence. Must be
1179 supplied. Each combination of (DIMENSION, CHARS) defines a
1180 separate namespace for final bytes. Note that ISO
1181 2022 restricts the final byte to the range
1182 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1183 dimension == 2. Note also that final bytes in the range
1184 0x30 - 0x3F are reserved for user-defined (not official)
1186 'graphic 0 (use left half of font on output) or 1 (use right half
1187 of font on output). Defaults to 0. For example, for
1188 a font whose registry is ISO8859-1, the left half
1189 (octets 0x20 - 0x7F) is the `ascii' character set, while
1190 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1191 character set. With 'graphic set to 0, the octets
1192 will have their high bit cleared; with it set to 1,
1193 the octets will have their high bit set.
1194 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1196 'ccl-program A compiled CCL program used to convert a character in
1197 this charset into an index into the font. This is in
1198 addition to the 'graphic property. The CCL program
1199 is passed the octets of the character, with the high
1200 bit cleared and set depending upon whether the value
1201 of the 'graphic property is 0 or 1.
1203 (name, doc_string, props))
1205 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1206 int direction = CHARSET_LEFT_TO_RIGHT;
1208 Lisp_Object registry = Qnil;
1209 Lisp_Object charset;
1210 Lisp_Object rest, keyword, value;
1211 Lisp_Object ccl_program = Qnil;
1212 Lisp_Object short_name = Qnil, long_name = Qnil;
1214 unsigned char byte_offset = 0;
1217 CHECK_SYMBOL (name);
1218 if (!NILP (doc_string))
1219 CHECK_STRING (doc_string);
1221 charset = Ffind_charset (name);
1222 if (!NILP (charset))
1223 signal_simple_error ("Cannot redefine existing charset", name);
1225 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1227 if (EQ (keyword, Qshort_name))
1229 CHECK_STRING (value);
1233 if (EQ (keyword, Qlong_name))
1235 CHECK_STRING (value);
1239 else if (EQ (keyword, Qdimension))
1242 dimension = XINT (value);
1243 if (dimension < 1 || dimension > 2)
1244 signal_simple_error ("Invalid value for 'dimension", value);
1247 else if (EQ (keyword, Qchars))
1250 chars = XINT (value);
1251 if (chars != 94 && chars != 96)
1252 signal_simple_error ("Invalid value for 'chars", value);
1255 else if (EQ (keyword, Qcolumns))
1258 columns = XINT (value);
1259 if (columns != 1 && columns != 2)
1260 signal_simple_error ("Invalid value for 'columns", value);
1263 else if (EQ (keyword, Qgraphic))
1266 graphic = XINT (value);
1268 if (graphic < 0 || graphic > 2)
1270 if (graphic < 0 || graphic > 1)
1272 signal_simple_error ("Invalid value for 'graphic", value);
1275 else if (EQ (keyword, Qregistry))
1277 CHECK_STRING (value);
1281 else if (EQ (keyword, Qdirection))
1283 if (EQ (value, Ql2r))
1284 direction = CHARSET_LEFT_TO_RIGHT;
1285 else if (EQ (value, Qr2l))
1286 direction = CHARSET_RIGHT_TO_LEFT;
1288 signal_simple_error ("Invalid value for 'direction", value);
1291 else if (EQ (keyword, Qfinal))
1293 CHECK_CHAR_COERCE_INT (value);
1294 final = XCHAR (value);
1295 if (final < '0' || final > '~')
1296 signal_simple_error ("Invalid value for 'final", value);
1299 else if (EQ (keyword, Qccl_program))
1301 CHECK_VECTOR (value);
1302 ccl_program = value;
1306 signal_simple_error ("Unrecognized property", keyword);
1310 error ("'final must be specified");
1311 if (dimension == 2 && final > 0x5F)
1313 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1317 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1319 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1321 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1322 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1324 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1331 /* id = CHARSET_ID_OFFSET_94 + final; */
1332 id = get_unallocated_leading_byte (dimension);
1334 else if (chars == 96)
1336 id = get_unallocated_leading_byte (dimension);
1343 else if (dimension == 2)
1347 id = get_unallocated_leading_byte (dimension);
1349 else if (chars == 96)
1351 id = get_unallocated_leading_byte (dimension);
1365 else if (chars == 96)
1368 id = get_unallocated_leading_byte (dimension);
1371 if (NILP (doc_string))
1372 doc_string = build_string ("");
1374 if (NILP (registry))
1375 registry = build_string ("");
1377 if (NILP (short_name))
1378 XSETSTRING (short_name, XSYMBOL (name)->name);
1380 if (NILP (long_name))
1381 long_name = doc_string;
1384 columns = dimension;
1385 charset = make_charset (id, name, type, columns, graphic,
1386 final, direction, short_name, long_name,
1387 doc_string, registry,
1388 Qnil, 0, 0, 0, byte_offset);
1389 if (!NILP (ccl_program))
1390 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1394 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1396 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1397 NEW-NAME is the name of the new charset. Return the new charset.
1399 (charset, new_name))
1401 Lisp_Object new_charset = Qnil;
1402 int id, dimension, columns, graphic, final;
1403 int direction, type;
1404 Lisp_Object registry, doc_string, short_name, long_name;
1405 struct Lisp_Charset *cs;
1407 charset = Fget_charset (charset);
1408 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1409 signal_simple_error ("Charset already has reverse-direction charset",
1412 CHECK_SYMBOL (new_name);
1413 if (!NILP (Ffind_charset (new_name)))
1414 signal_simple_error ("Cannot redefine existing charset", new_name);
1416 cs = XCHARSET (charset);
1418 type = CHARSET_TYPE (cs);
1419 columns = CHARSET_COLUMNS (cs);
1420 dimension = CHARSET_DIMENSION (cs);
1421 id = get_unallocated_leading_byte (dimension);
1423 graphic = CHARSET_GRAPHIC (cs);
1424 final = CHARSET_FINAL (cs);
1425 direction = CHARSET_RIGHT_TO_LEFT;
1426 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1427 direction = CHARSET_LEFT_TO_RIGHT;
1428 doc_string = CHARSET_DOC_STRING (cs);
1429 short_name = CHARSET_SHORT_NAME (cs);
1430 long_name = CHARSET_LONG_NAME (cs);
1431 registry = CHARSET_REGISTRY (cs);
1433 new_charset = make_charset (id, new_name, type, columns,
1434 graphic, final, direction, short_name, long_name,
1435 doc_string, registry,
1437 CHARSET_DECODING_TABLE(cs),
1438 CHARSET_UCS_MIN(cs),
1439 CHARSET_UCS_MAX(cs),
1440 CHARSET_CODE_OFFSET(cs),
1441 CHARSET_BYTE_OFFSET(cs)
1447 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1448 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1453 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1454 Define symbol ALIAS as an alias for CHARSET.
1458 CHECK_SYMBOL (alias);
1459 charset = Fget_charset (charset);
1460 return Fputhash (alias, charset, Vcharset_hash_table);
1463 /* #### Reverse direction charsets not yet implemented. */
1465 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1467 Return the reverse-direction charset parallel to CHARSET, if any.
1468 This is the charset with the same properties (in particular, the same
1469 dimension, number of characters per dimension, and final byte) as
1470 CHARSET but whose characters are displayed in the opposite direction.
1474 charset = Fget_charset (charset);
1475 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1479 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1480 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1481 If DIRECTION is omitted, both directions will be checked (left-to-right
1482 will be returned if character sets exist for both directions).
1484 (dimension, chars, final, direction))
1486 int dm, ch, fi, di = -1;
1488 Lisp_Object obj = Qnil;
1490 CHECK_INT (dimension);
1491 dm = XINT (dimension);
1492 if (dm < 1 || dm > 2)
1493 signal_simple_error ("Invalid value for DIMENSION", dimension);
1497 if (ch != 94 && ch != 96)
1498 signal_simple_error ("Invalid value for CHARS", chars);
1500 CHECK_CHAR_COERCE_INT (final);
1502 if (fi < '0' || fi > '~')
1503 signal_simple_error ("Invalid value for FINAL", final);
1505 if (EQ (direction, Ql2r))
1506 di = CHARSET_LEFT_TO_RIGHT;
1507 else if (EQ (direction, Qr2l))
1508 di = CHARSET_RIGHT_TO_LEFT;
1509 else if (!NILP (direction))
1510 signal_simple_error ("Invalid value for DIRECTION", direction);
1512 if (dm == 2 && fi > 0x5F)
1514 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1517 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1519 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1523 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1525 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1528 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1531 return XCHARSET_NAME (obj);
1535 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1536 Return short name of CHARSET.
1540 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1543 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1544 Return long name of CHARSET.
1548 return XCHARSET_LONG_NAME (Fget_charset (charset));
1551 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1552 Return description of CHARSET.
1556 return XCHARSET_DOC_STRING (Fget_charset (charset));
1559 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1560 Return dimension of CHARSET.
1564 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1567 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1568 Return property PROP of CHARSET.
1569 Recognized properties are those listed in `make-charset', as well as
1570 'name and 'doc-string.
1574 struct Lisp_Charset *cs;
1576 charset = Fget_charset (charset);
1577 cs = XCHARSET (charset);
1579 CHECK_SYMBOL (prop);
1580 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1581 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1582 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1583 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1584 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1585 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1586 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1587 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1588 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1589 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1590 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1591 if (EQ (prop, Qdirection))
1592 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1593 if (EQ (prop, Qreverse_direction_charset))
1595 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1599 return XCHARSET_NAME (obj);
1601 signal_simple_error ("Unrecognized charset property name", prop);
1602 return Qnil; /* not reached */
1605 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1606 Return charset identification number of CHARSET.
1610 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1613 /* #### We need to figure out which properties we really want to
1616 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1617 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1619 (charset, ccl_program))
1621 charset = Fget_charset (charset);
1622 CHECK_VECTOR (ccl_program);
1623 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1628 invalidate_charset_font_caches (Lisp_Object charset)
1630 /* Invalidate font cache entries for charset on all devices. */
1631 Lisp_Object devcons, concons, hash_table;
1632 DEVICE_LOOP_NO_BREAK (devcons, concons)
1634 struct device *d = XDEVICE (XCAR (devcons));
1635 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1636 if (!UNBOUNDP (hash_table))
1637 Fclrhash (hash_table);
1641 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1642 Set the 'registry property of CHARSET to REGISTRY.
1644 (charset, registry))
1646 charset = Fget_charset (charset);
1647 CHECK_STRING (registry);
1648 XCHARSET_REGISTRY (charset) = registry;
1649 invalidate_charset_font_caches (charset);
1650 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1655 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1656 Return mapping-table of CHARSET.
1660 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1663 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1664 Set mapping-table of CHARSET to TABLE.
1668 struct Lisp_Charset *cs;
1669 Lisp_Object old_table;
1672 charset = Fget_charset (charset);
1673 cs = XCHARSET (charset);
1675 if (EQ (table, Qnil))
1677 CHARSET_DECODING_TABLE(cs) = table;
1678 CHARSET_ENCODING_TABLE(cs) = Qnil;
1681 else if (VECTORP (table))
1683 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1684 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1685 old_table = CHARSET_ENCODING_TABLE(cs);
1686 CHARSET_DECODING_TABLE(cs) = table;
1689 signal_error (Qwrong_type_argument,
1690 list2 (build_translated_string ("vector-or-nil-p"),
1692 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1694 switch (CHARSET_DIMENSION (cs))
1697 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1698 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1700 Lisp_Object c = XVECTOR_DATA(table)[i];
1703 put_char_code_table (XCHAR (c),
1704 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1705 CHARSET_ENCODING_TABLE(cs));
1709 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1710 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1712 Lisp_Object v = XVECTOR_DATA(table)[i];
1718 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1720 CHARSET_DECODING_TABLE(cs) = old_table;
1721 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1723 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1725 Lisp_Object c = XVECTOR_DATA(v)[j];
1730 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1731 | (j + CHARSET_BYTE_OFFSET (cs))),
1732 CHARSET_ENCODING_TABLE(cs));
1736 put_char_code_table (XCHAR (v),
1737 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1738 CHARSET_ENCODING_TABLE(cs));
1747 /************************************************************************/
1748 /* Lisp primitives for working with characters */
1749 /************************************************************************/
1751 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1752 Make a character from CHARSET and octets ARG1 and ARG2.
1753 ARG2 is required only for characters from two-dimensional charsets.
1754 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1755 character s with caron.
1757 (charset, arg1, arg2))
1759 struct Lisp_Charset *cs;
1761 int lowlim, highlim;
1763 charset = Fget_charset (charset);
1764 cs = XCHARSET (charset);
1766 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1767 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1769 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1771 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1772 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1775 /* It is useful (and safe, according to Olivier Galibert) to strip
1776 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1777 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1778 Latin 2 code of the character. */
1786 if (a1 < lowlim || a1 > highlim)
1787 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1789 if (CHARSET_DIMENSION (cs) == 1)
1793 ("Charset is of dimension one; second octet must be nil", arg2);
1794 return make_char (MAKE_CHAR (charset, a1, 0));
1803 a2 = XINT (arg2) & 0x7f;
1805 if (a2 < lowlim || a2 > highlim)
1806 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1808 return make_char (MAKE_CHAR (charset, a1, a2));
1811 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1812 Return the character set of char CH.
1816 CHECK_CHAR_COERCE_INT (ch);
1818 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1821 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1822 Return list of charset and one or two position-codes of CHAR.
1826 /* This function can GC */
1827 struct gcpro gcpro1, gcpro2;
1828 Lisp_Object charset = Qnil;
1829 Lisp_Object rc = Qnil;
1832 GCPRO2 (charset, rc);
1833 CHECK_CHAR_COERCE_INT (character);
1835 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1837 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1839 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1843 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1851 #ifdef ENABLE_COMPOSITE_CHARS
1852 /************************************************************************/
1853 /* composite character functions */
1854 /************************************************************************/
1857 lookup_composite_char (Bufbyte *str, int len)
1859 Lisp_Object lispstr = make_string (str, len);
1860 Lisp_Object ch = Fgethash (lispstr,
1861 Vcomposite_char_string2char_hash_table,
1867 if (composite_char_row_next >= 128)
1868 signal_simple_error ("No more composite chars available", lispstr);
1869 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1870 composite_char_col_next);
1871 Fputhash (make_char (emch), lispstr,
1872 Vcomposite_char_char2string_hash_table);
1873 Fputhash (lispstr, make_char (emch),
1874 Vcomposite_char_string2char_hash_table);
1875 composite_char_col_next++;
1876 if (composite_char_col_next >= 128)
1878 composite_char_col_next = 32;
1879 composite_char_row_next++;
1888 composite_char_string (Emchar ch)
1890 Lisp_Object str = Fgethash (make_char (ch),
1891 Vcomposite_char_char2string_hash_table,
1893 assert (!UNBOUNDP (str));
1897 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1898 Convert a string into a single composite character.
1899 The character is the result of overstriking all the characters in
1904 CHECK_STRING (string);
1905 return make_char (lookup_composite_char (XSTRING_DATA (string),
1906 XSTRING_LENGTH (string)));
1909 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1910 Return a string of the characters comprising a composite character.
1918 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1919 signal_simple_error ("Must be composite char", ch);
1920 return composite_char_string (emch);
1922 #endif /* ENABLE_COMPOSITE_CHARS */
1925 /************************************************************************/
1926 /* initialization */
1927 /************************************************************************/
1930 syms_of_mule_charset (void)
1932 DEFSUBR (Fcharsetp);
1933 DEFSUBR (Ffind_charset);
1934 DEFSUBR (Fget_charset);
1935 DEFSUBR (Fcharset_list);
1936 DEFSUBR (Fcharset_name);
1937 DEFSUBR (Fmake_charset);
1938 DEFSUBR (Fmake_reverse_direction_charset);
1939 /* DEFSUBR (Freverse_direction_charset); */
1940 DEFSUBR (Fdefine_charset_alias);
1941 DEFSUBR (Fcharset_from_attributes);
1942 DEFSUBR (Fcharset_short_name);
1943 DEFSUBR (Fcharset_long_name);
1944 DEFSUBR (Fcharset_description);
1945 DEFSUBR (Fcharset_dimension);
1946 DEFSUBR (Fcharset_property);
1947 DEFSUBR (Fcharset_id);
1948 DEFSUBR (Fset_charset_ccl_program);
1949 DEFSUBR (Fset_charset_registry);
1951 DEFSUBR (Fcharset_mapping_table);
1952 DEFSUBR (Fset_charset_mapping_table);
1955 DEFSUBR (Fmake_char);
1956 DEFSUBR (Fchar_charset);
1957 DEFSUBR (Fsplit_char);
1959 #ifdef ENABLE_COMPOSITE_CHARS
1960 DEFSUBR (Fmake_composite_char);
1961 DEFSUBR (Fcomposite_char_string);
1964 defsymbol (&Qcharsetp, "charsetp");
1965 defsymbol (&Qregistry, "registry");
1966 defsymbol (&Qfinal, "final");
1967 defsymbol (&Qgraphic, "graphic");
1968 defsymbol (&Qdirection, "direction");
1969 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1970 defsymbol (&Qshort_name, "short-name");
1971 defsymbol (&Qlong_name, "long-name");
1973 defsymbol (&Ql2r, "l2r");
1974 defsymbol (&Qr2l, "r2l");
1976 /* Charsets, compatible with FSF 20.3
1977 Naming convention is Script-Charset[-Edition] */
1978 defsymbol (&Qascii, "ascii");
1979 defsymbol (&Qcontrol_1, "control-1");
1980 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1981 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1982 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1983 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1984 defsymbol (&Qthai_tis620, "thai-tis620");
1985 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1986 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1987 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1988 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1989 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1990 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1991 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1992 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1993 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1994 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1995 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1996 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1997 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1998 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2000 defsymbol (&Qucs_bmp, "ucs-bmp");
2001 defsymbol (&Qlatin_viscii, "latin-viscii");
2002 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2003 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2004 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2005 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2006 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2007 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2009 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2010 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2012 defsymbol (&Qcomposite, "composite");
2016 vars_of_mule_charset (void)
2023 /* Table of charsets indexed by leading byte. */
2024 for (i = 0; i < countof (charset_by_leading_byte); i++)
2025 charset_by_leading_byte[i] = Qnil;
2028 /* Table of charsets indexed by type/final-byte. */
2029 for (i = 0; i < countof (charset_by_attributes); i++)
2030 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2031 charset_by_attributes[i][j] = Qnil;
2033 /* Table of charsets indexed by type/final-byte/direction. */
2034 for (i = 0; i < countof (charset_by_attributes); i++)
2035 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2036 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2037 charset_by_attributes[i][j][k] = Qnil;
2040 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2042 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2044 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2048 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2049 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2050 Leading-code of private TYPE9N charset of column-width 1.
2052 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2056 Vutf_2000_version = build_string("0.8 (Kami)");
2057 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2058 Version number of UTF-2000.
2061 Vdefault_coded_charset_priority_list = Qnil;
2062 DEFVAR_LISP ("default-coded-charset-priority-list",
2063 &Vdefault_coded_charset_priority_list /*
2064 Default order of preferred coded-character-set.
2067 Vdefault_coded_charset_priority_list_for_font = Qnil;
2068 DEFVAR_LISP ("default-coded-charset-priority-list-for-font",
2069 &Vdefault_coded_charset_priority_list_for_font /*
2070 Default order of preferred coded-character-set for font.
2076 complex_vars_of_mule_charset (void)
2078 staticpro (&Vcharset_hash_table);
2079 Vcharset_hash_table =
2080 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2082 /* Predefined character sets. We store them into variables for
2087 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2088 CHARSET_TYPE_256X256, 1, 2, 0,
2089 CHARSET_LEFT_TO_RIGHT,
2090 build_string ("BMP"),
2091 build_string ("BMP"),
2092 build_string ("BMP"),
2094 Qnil, 0, 0xFFFF, 0, 0);
2096 # define MIN_CHAR_THAI 0
2097 # define MAX_CHAR_THAI 0
2098 # define MIN_CHAR_GREEK 0
2099 # define MAX_CHAR_GREEK 0
2100 # define MIN_CHAR_HEBREW 0
2101 # define MAX_CHAR_HEBREW 0
2102 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2103 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2104 # define MIN_CHAR_CYRILLIC 0
2105 # define MAX_CHAR_CYRILLIC 0
2108 make_charset (LEADING_BYTE_ASCII, Qascii,
2109 CHARSET_TYPE_94, 1, 0, 'B',
2110 CHARSET_LEFT_TO_RIGHT,
2111 build_string ("ASCII"),
2112 build_string ("ASCII)"),
2113 build_string ("ASCII (ISO646 IRV)"),
2114 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2115 Qnil, 0, 0x7F, 0, 0);
2116 Vcharset_control_1 =
2117 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2118 CHARSET_TYPE_94, 1, 1, 0,
2119 CHARSET_LEFT_TO_RIGHT,
2120 build_string ("C1"),
2121 build_string ("Control characters"),
2122 build_string ("Control characters 128-191"),
2124 Qnil, 0x80, 0x9F, 0, 0);
2125 Vcharset_latin_iso8859_1 =
2126 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2127 CHARSET_TYPE_96, 1, 1, 'A',
2128 CHARSET_LEFT_TO_RIGHT,
2129 build_string ("Latin-1"),
2130 build_string ("ISO8859-1 (Latin-1)"),
2131 build_string ("ISO8859-1 (Latin-1)"),
2132 build_string ("iso8859-1"),
2133 Qnil, 0xA0, 0xFF, 0, 32);
2134 Vcharset_latin_iso8859_2 =
2135 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2136 CHARSET_TYPE_96, 1, 1, 'B',
2137 CHARSET_LEFT_TO_RIGHT,
2138 build_string ("Latin-2"),
2139 build_string ("ISO8859-2 (Latin-2)"),
2140 build_string ("ISO8859-2 (Latin-2)"),
2141 build_string ("iso8859-2"),
2143 Vcharset_latin_iso8859_3 =
2144 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2145 CHARSET_TYPE_96, 1, 1, 'C',
2146 CHARSET_LEFT_TO_RIGHT,
2147 build_string ("Latin-3"),
2148 build_string ("ISO8859-3 (Latin-3)"),
2149 build_string ("ISO8859-3 (Latin-3)"),
2150 build_string ("iso8859-3"),
2152 Vcharset_latin_iso8859_4 =
2153 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2154 CHARSET_TYPE_96, 1, 1, 'D',
2155 CHARSET_LEFT_TO_RIGHT,
2156 build_string ("Latin-4"),
2157 build_string ("ISO8859-4 (Latin-4)"),
2158 build_string ("ISO8859-4 (Latin-4)"),
2159 build_string ("iso8859-4"),
2161 Vcharset_thai_tis620 =
2162 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2163 CHARSET_TYPE_96, 1, 1, 'T',
2164 CHARSET_LEFT_TO_RIGHT,
2165 build_string ("TIS620"),
2166 build_string ("TIS620 (Thai)"),
2167 build_string ("TIS620.2529 (Thai)"),
2168 build_string ("tis620"),
2169 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2170 Vcharset_greek_iso8859_7 =
2171 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2172 CHARSET_TYPE_96, 1, 1, 'F',
2173 CHARSET_LEFT_TO_RIGHT,
2174 build_string ("ISO8859-7"),
2175 build_string ("ISO8859-7 (Greek)"),
2176 build_string ("ISO8859-7 (Greek)"),
2177 build_string ("iso8859-7"),
2178 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2179 Vcharset_arabic_iso8859_6 =
2180 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2181 CHARSET_TYPE_96, 1, 1, 'G',
2182 CHARSET_RIGHT_TO_LEFT,
2183 build_string ("ISO8859-6"),
2184 build_string ("ISO8859-6 (Arabic)"),
2185 build_string ("ISO8859-6 (Arabic)"),
2186 build_string ("iso8859-6"),
2188 Vcharset_hebrew_iso8859_8 =
2189 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2190 CHARSET_TYPE_96, 1, 1, 'H',
2191 CHARSET_RIGHT_TO_LEFT,
2192 build_string ("ISO8859-8"),
2193 build_string ("ISO8859-8 (Hebrew)"),
2194 build_string ("ISO8859-8 (Hebrew)"),
2195 build_string ("iso8859-8"),
2196 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2197 Vcharset_katakana_jisx0201 =
2198 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2199 CHARSET_TYPE_94, 1, 1, 'I',
2200 CHARSET_LEFT_TO_RIGHT,
2201 build_string ("JISX0201 Kana"),
2202 build_string ("JISX0201.1976 (Japanese Kana)"),
2203 build_string ("JISX0201.1976 Japanese Kana"),
2204 build_string ("jisx0201\\.1976"),
2206 MIN_CHAR_HALFWIDTH_KATAKANA,
2207 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2208 Vcharset_latin_jisx0201 =
2209 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2210 CHARSET_TYPE_94, 1, 0, 'J',
2211 CHARSET_LEFT_TO_RIGHT,
2212 build_string ("JISX0201 Roman"),
2213 build_string ("JISX0201.1976 (Japanese Roman)"),
2214 build_string ("JISX0201.1976 Japanese Roman"),
2215 build_string ("jisx0201\\.1976"),
2217 Vcharset_cyrillic_iso8859_5 =
2218 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2219 CHARSET_TYPE_96, 1, 1, 'L',
2220 CHARSET_LEFT_TO_RIGHT,
2221 build_string ("ISO8859-5"),
2222 build_string ("ISO8859-5 (Cyrillic)"),
2223 build_string ("ISO8859-5 (Cyrillic)"),
2224 build_string ("iso8859-5"),
2225 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2226 Vcharset_latin_iso8859_9 =
2227 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2228 CHARSET_TYPE_96, 1, 1, 'M',
2229 CHARSET_LEFT_TO_RIGHT,
2230 build_string ("Latin-5"),
2231 build_string ("ISO8859-9 (Latin-5)"),
2232 build_string ("ISO8859-9 (Latin-5)"),
2233 build_string ("iso8859-9"),
2235 Vcharset_japanese_jisx0208_1978 =
2236 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2237 CHARSET_TYPE_94X94, 2, 0, '@',
2238 CHARSET_LEFT_TO_RIGHT,
2239 build_string ("JIS X0208:1978"),
2240 build_string ("JIS X0208:1978 (Japanese)"),
2242 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2243 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2245 Vcharset_chinese_gb2312 =
2246 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2247 CHARSET_TYPE_94X94, 2, 0, 'A',
2248 CHARSET_LEFT_TO_RIGHT,
2249 build_string ("GB2312"),
2250 build_string ("GB2312)"),
2251 build_string ("GB2312 Chinese simplified"),
2252 build_string ("gb2312"),
2254 Vcharset_japanese_jisx0208 =
2255 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2256 CHARSET_TYPE_94X94, 2, 0, 'B',
2257 CHARSET_LEFT_TO_RIGHT,
2258 build_string ("JISX0208"),
2259 build_string ("JIS X0208:1983 (Japanese)"),
2260 build_string ("JIS X0208:1983 Japanese Kanji"),
2261 build_string ("jisx0208\\.1983"),
2263 Vcharset_korean_ksc5601 =
2264 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2265 CHARSET_TYPE_94X94, 2, 0, 'C',
2266 CHARSET_LEFT_TO_RIGHT,
2267 build_string ("KSC5601"),
2268 build_string ("KSC5601 (Korean"),
2269 build_string ("KSC5601 Korean Hangul and Hanja"),
2270 build_string ("ksc5601"),
2272 Vcharset_japanese_jisx0212 =
2273 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2274 CHARSET_TYPE_94X94, 2, 0, 'D',
2275 CHARSET_LEFT_TO_RIGHT,
2276 build_string ("JISX0212"),
2277 build_string ("JISX0212 (Japanese)"),
2278 build_string ("JISX0212 Japanese Supplement"),
2279 build_string ("jisx0212"),
2282 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2283 Vcharset_chinese_cns11643_1 =
2284 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2285 CHARSET_TYPE_94X94, 2, 0, 'G',
2286 CHARSET_LEFT_TO_RIGHT,
2287 build_string ("CNS11643-1"),
2288 build_string ("CNS11643-1 (Chinese traditional)"),
2290 ("CNS 11643 Plane 1 Chinese traditional"),
2291 build_string (CHINESE_CNS_PLANE_RE("1")),
2293 Vcharset_chinese_cns11643_2 =
2294 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2295 CHARSET_TYPE_94X94, 2, 0, 'H',
2296 CHARSET_LEFT_TO_RIGHT,
2297 build_string ("CNS11643-2"),
2298 build_string ("CNS11643-2 (Chinese traditional)"),
2300 ("CNS 11643 Plane 2 Chinese traditional"),
2301 build_string (CHINESE_CNS_PLANE_RE("2")),
2304 Vcharset_latin_viscii_lower =
2305 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2306 CHARSET_TYPE_96, 1, 1, '1',
2307 CHARSET_LEFT_TO_RIGHT,
2308 build_string ("VISCII lower"),
2309 build_string ("VISCII lower (Vietnamese)"),
2310 build_string ("VISCII lower (Vietnamese)"),
2311 build_string ("MULEVISCII-LOWER"),
2313 Vcharset_latin_viscii_upper =
2314 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2315 CHARSET_TYPE_96, 1, 1, '2',
2316 CHARSET_LEFT_TO_RIGHT,
2317 build_string ("VISCII upper"),
2318 build_string ("VISCII upper (Vietnamese)"),
2319 build_string ("VISCII upper (Vietnamese)"),
2320 build_string ("MULEVISCII-UPPER"),
2322 Vcharset_latin_viscii =
2323 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2324 CHARSET_TYPE_256, 1, 2, 0,
2325 CHARSET_LEFT_TO_RIGHT,
2326 build_string ("VISCII"),
2327 build_string ("VISCII 1.1 (Vietnamese)"),
2328 build_string ("VISCII 1.1 (Vietnamese)"),
2329 build_string ("VISCII1\\.1"),
2331 Vcharset_hiragana_jisx0208 =
2332 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2333 CHARSET_TYPE_94X94, 2, 0, 'B',
2334 CHARSET_LEFT_TO_RIGHT,
2335 build_string ("Hiragana"),
2336 build_string ("Hiragana of JIS X0208"),
2337 build_string ("Japanese Hiragana of JIS X0208"),
2338 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2339 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2340 (0x24 - 33) * 94 + (0x21 - 33), 33);
2341 Vcharset_katakana_jisx0208 =
2342 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2343 CHARSET_TYPE_94X94, 2, 0, 'B',
2344 CHARSET_LEFT_TO_RIGHT,
2345 build_string ("Katakana"),
2346 build_string ("Katakana of JIS X0208"),
2347 build_string ("Japanese Katakana of JIS X0208"),
2348 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2349 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2350 (0x25 - 33) * 94 + (0x21 - 33), 33);
2352 Vcharset_chinese_big5_1 =
2353 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2354 CHARSET_TYPE_94X94, 2, 0, '0',
2355 CHARSET_LEFT_TO_RIGHT,
2356 build_string ("Big5"),
2357 build_string ("Big5 (Level-1)"),
2359 ("Big5 Level-1 Chinese traditional"),
2360 build_string ("big5"),
2362 Vcharset_chinese_big5_2 =
2363 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2364 CHARSET_TYPE_94X94, 2, 0, '1',
2365 CHARSET_LEFT_TO_RIGHT,
2366 build_string ("Big5"),
2367 build_string ("Big5 (Level-2)"),
2369 ("Big5 Level-2 Chinese traditional"),
2370 build_string ("big5"),
2373 #ifdef ENABLE_COMPOSITE_CHARS
2374 /* #### For simplicity, we put composite chars into a 96x96 charset.
2375 This is going to lead to problems because you can run out of
2376 room, esp. as we don't yet recycle numbers. */
2377 Vcharset_composite =
2378 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2379 CHARSET_TYPE_96X96, 2, 0, 0,
2380 CHARSET_LEFT_TO_RIGHT,
2381 build_string ("Composite"),
2382 build_string ("Composite characters"),
2383 build_string ("Composite characters"),
2386 composite_char_row_next = 32;
2387 composite_char_col_next = 32;
2389 Vcomposite_char_string2char_hash_table =
2390 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2391 Vcomposite_char_char2string_hash_table =
2392 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2393 staticpro (&Vcomposite_char_string2char_hash_table);
2394 staticpro (&Vcomposite_char_char2string_hash_table);
2395 #endif /* ENABLE_COMPOSITE_CHARS */