1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vutf_2000_version;
315 int leading_code_private_11;
318 Lisp_Object Qcharsetp;
320 /* Qdoc_string, Qdimension, Qchars defined in general.c */
321 Lisp_Object Qregistry, Qfinal, Qgraphic;
322 Lisp_Object Qdirection;
323 Lisp_Object Qreverse_direction_charset;
324 Lisp_Object Qleading_byte;
325 Lisp_Object Qshort_name, Qlong_name;
341 Qjapanese_jisx0208_1978,
353 Qvietnamese_viscii_lower,
354 Qvietnamese_viscii_upper,
362 Lisp_Object Ql2r, Qr2l;
364 Lisp_Object Vcharset_hash_table;
367 static Charset_ID next_allocated_leading_byte;
369 static Charset_ID next_allocated_1_byte_leading_byte;
370 static Charset_ID next_allocated_2_byte_leading_byte;
373 /* Composite characters are characters constructed by overstriking two
374 or more regular characters.
376 1) The old Mule implementation involves storing composite characters
377 in a buffer as a tag followed by all of the actual characters
378 used to make up the composite character. I think this is a bad
379 idea; it greatly complicates code that wants to handle strings
380 one character at a time because it has to deal with the possibility
381 of great big ungainly characters. It's much more reasonable to
382 simply store an index into a table of composite characters.
384 2) The current implementation only allows for 16,384 separate
385 composite characters over the lifetime of the XEmacs process.
386 This could become a potential problem if the user
387 edited lots of different files that use composite characters.
388 Due to FSF bogosity, increasing the number of allowable
389 composite characters under Mule would decrease the number
390 of possible faces that can exist. Mule already has shrunk
391 this to 2048, and further shrinkage would become uncomfortable.
392 No such problems exist in XEmacs.
394 Composite characters could be represented as 0x80 C1 C2 C3,
395 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
396 for slightly under 2^20 (one million) composite characters
397 over the XEmacs process lifetime, and you only need to
398 increase the size of a Mule character from 19 to 21 bits.
399 Or you could use 0x80 C1 C2 C3 C4, allowing for about
400 85 million (slightly over 2^26) composite characters. */
403 /************************************************************************/
404 /* Basic Emchar functions */
405 /************************************************************************/
407 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
408 string in STR. Returns the number of bytes stored.
409 Do not call this directly. Use the macro set_charptr_emchar() instead.
413 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
428 else if ( c <= 0x7ff )
430 *p++ = (c >> 6) | 0xc0;
431 *p++ = (c & 0x3f) | 0x80;
433 else if ( c <= 0xffff )
435 *p++ = (c >> 12) | 0xe0;
436 *p++ = ((c >> 6) & 0x3f) | 0x80;
437 *p++ = (c & 0x3f) | 0x80;
439 else if ( c <= 0x1fffff )
441 *p++ = (c >> 18) | 0xf0;
442 *p++ = ((c >> 12) & 0x3f) | 0x80;
443 *p++ = ((c >> 6) & 0x3f) | 0x80;
444 *p++ = (c & 0x3f) | 0x80;
446 else if ( c <= 0x3ffffff )
448 *p++ = (c >> 24) | 0xf8;
449 *p++ = ((c >> 18) & 0x3f) | 0x80;
450 *p++ = ((c >> 12) & 0x3f) | 0x80;
451 *p++ = ((c >> 6) & 0x3f) | 0x80;
452 *p++ = (c & 0x3f) | 0x80;
456 *p++ = (c >> 30) | 0xfc;
457 *p++ = ((c >> 24) & 0x3f) | 0x80;
458 *p++ = ((c >> 18) & 0x3f) | 0x80;
459 *p++ = ((c >> 12) & 0x3f) | 0x80;
460 *p++ = ((c >> 6) & 0x3f) | 0x80;
461 *p++ = (c & 0x3f) | 0x80;
464 BREAKUP_CHAR (c, charset, c1, c2);
465 lb = CHAR_LEADING_BYTE (c);
466 if (LEADING_BYTE_PRIVATE_P (lb))
467 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
469 if (EQ (charset, Vcharset_control_1))
478 /* Return the first character from a Mule-encoded string in STR,
479 assuming it's non-ASCII. Do not call this directly.
480 Use the macro charptr_emchar() instead. */
483 non_ascii_charptr_emchar (CONST Bufbyte *str)
496 else if ( b >= 0xf8 )
501 else if ( b >= 0xf0 )
506 else if ( b >= 0xe0 )
511 else if ( b >= 0xc0 )
521 for( ; len > 0; len-- )
524 ch = ( ch << 6 ) | ( b & 0x3f );
528 Bufbyte i0 = *str, i1, i2 = 0;
531 if (i0 == LEADING_BYTE_CONTROL_1)
532 return (Emchar) (*++str - 0x20);
534 if (LEADING_BYTE_PREFIX_P (i0))
539 charset = CHARSET_BY_LEADING_BYTE (i0);
540 if (XCHARSET_DIMENSION (charset) == 2)
543 return MAKE_CHAR (charset, i1, i2);
547 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
548 Do not call this directly. Use the macro valid_char_p() instead. */
552 non_ascii_valid_char_p (Emchar ch)
556 /* Must have only lowest 19 bits set */
560 f1 = CHAR_FIELD1 (ch);
561 f2 = CHAR_FIELD2 (ch);
562 f3 = CHAR_FIELD3 (ch);
568 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
569 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
570 f2 > MAX_CHAR_FIELD2_PRIVATE)
575 if (f3 != 0x20 && f3 != 0x7F)
579 NOTE: This takes advantage of the fact that
580 FIELD2_TO_OFFICIAL_LEADING_BYTE and
581 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
583 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
584 return (XCHARSET_CHARS (charset) == 96);
590 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
591 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
592 f1 > MAX_CHAR_FIELD1_PRIVATE)
594 if (f2 < 0x20 || f3 < 0x20)
597 #ifdef ENABLE_COMPOSITE_CHARS
598 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
600 if (UNBOUNDP (Fgethash (make_int (ch),
601 Vcomposite_char_char2string_hash_table,
606 #endif /* ENABLE_COMPOSITE_CHARS */
608 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
611 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
613 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
616 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
618 return (XCHARSET_CHARS (charset) == 96);
624 /************************************************************************/
625 /* Basic string functions */
626 /************************************************************************/
628 /* Copy the character pointed to by PTR into STR, assuming it's
629 non-ASCII. Do not call this directly. Use the macro
630 charptr_copy_char() instead. */
633 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
635 Bufbyte *strptr = str;
637 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
639 /* Notice fallthrough. */
641 case 6: *++strptr = *ptr++;
642 case 5: *++strptr = *ptr++;
644 case 4: *++strptr = *ptr++;
645 case 3: *++strptr = *ptr++;
646 case 2: *++strptr = *ptr;
651 return strptr + 1 - str;
655 /************************************************************************/
656 /* streams of Emchars */
657 /************************************************************************/
659 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
660 The functions below are not meant to be called directly; use
661 the macros in insdel.h. */
664 Lstream_get_emchar_1 (Lstream *stream, int ch)
666 Bufbyte str[MAX_EMCHAR_LEN];
667 Bufbyte *strptr = str;
669 str[0] = (Bufbyte) ch;
670 switch (REP_BYTES_BY_FIRST_BYTE (ch))
672 /* Notice fallthrough. */
675 ch = Lstream_getc (stream);
677 *++strptr = (Bufbyte) ch;
679 ch = Lstream_getc (stream);
681 *++strptr = (Bufbyte) ch;
684 ch = Lstream_getc (stream);
686 *++strptr = (Bufbyte) ch;
688 ch = Lstream_getc (stream);
690 *++strptr = (Bufbyte) ch;
692 ch = Lstream_getc (stream);
694 *++strptr = (Bufbyte) ch;
699 return charptr_emchar (str);
703 Lstream_fput_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 return Lstream_write (stream, str, len);
711 Lstream_funget_emchar (Lstream *stream, Emchar ch)
713 Bufbyte str[MAX_EMCHAR_LEN];
714 Bytecount len = set_charptr_emchar (str, ch);
715 Lstream_unread (stream, str, len);
719 /************************************************************************/
721 /************************************************************************/
724 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
726 struct Lisp_Charset *cs = XCHARSET (obj);
728 markobj (cs->short_name);
729 markobj (cs->long_name);
730 markobj (cs->doc_string);
731 markobj (cs->registry);
732 markobj (cs->ccl_program);
734 markobj (cs->decoding_table);
735 markobj (cs->encoding_table);
741 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
743 struct Lisp_Charset *cs = XCHARSET (obj);
747 error ("printing unreadable object #<charset %s 0x%x>",
748 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
751 write_c_string ("#<charset ", printcharfun);
752 print_internal (CHARSET_NAME (cs), printcharfun, 0);
753 write_c_string (" ", printcharfun);
754 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
755 write_c_string (" ", printcharfun);
756 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
757 write_c_string (" ", printcharfun);
758 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
759 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
760 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
761 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
762 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
764 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
765 CHARSET_COLUMNS (cs),
766 CHARSET_GRAPHIC (cs),
768 write_c_string (buf, printcharfun);
769 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
770 sprintf (buf, " 0x%x>", cs->header.uid);
771 write_c_string (buf, printcharfun);
774 static const struct lrecord_description charset_description[] = {
775 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
777 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
782 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
783 mark_charset, print_charset, 0, 0, 0,
785 struct Lisp_Charset);
787 /* Make a new charset. */
790 make_charset (Charset_ID id, Lisp_Object name,
791 unsigned char type, unsigned char columns, unsigned char graphic,
792 Bufbyte final, unsigned char direction, Lisp_Object short_name,
793 Lisp_Object long_name, Lisp_Object doc,
795 Lisp_Object decoding_table,
796 Emchar ucs_min, Emchar ucs_max,
797 Emchar code_offset, unsigned char byte_offset)
800 struct Lisp_Charset *cs =
801 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
802 XSETCHARSET (obj, cs);
804 CHARSET_ID (cs) = id;
805 CHARSET_NAME (cs) = name;
806 CHARSET_SHORT_NAME (cs) = short_name;
807 CHARSET_LONG_NAME (cs) = long_name;
808 CHARSET_DIRECTION (cs) = direction;
809 CHARSET_TYPE (cs) = type;
810 CHARSET_COLUMNS (cs) = columns;
811 CHARSET_GRAPHIC (cs) = graphic;
812 CHARSET_FINAL (cs) = final;
813 CHARSET_DOC_STRING (cs) = doc;
814 CHARSET_REGISTRY (cs) = reg;
815 CHARSET_CCL_PROGRAM (cs) = Qnil;
816 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
818 CHARSET_DECODING_TABLE(cs) = Qnil;
819 CHARSET_ENCODING_TABLE(cs) = Qnil;
820 CHARSET_UCS_MIN(cs) = ucs_min;
821 CHARSET_UCS_MAX(cs) = ucs_max;
822 CHARSET_CODE_OFFSET(cs) = code_offset;
823 CHARSET_BYTE_OFFSET(cs) = byte_offset;
826 switch (CHARSET_TYPE (cs))
828 case CHARSET_TYPE_94:
829 CHARSET_DIMENSION (cs) = 1;
830 CHARSET_CHARS (cs) = 94;
832 case CHARSET_TYPE_96:
833 CHARSET_DIMENSION (cs) = 1;
834 CHARSET_CHARS (cs) = 96;
836 case CHARSET_TYPE_94X94:
837 CHARSET_DIMENSION (cs) = 2;
838 CHARSET_CHARS (cs) = 94;
840 case CHARSET_TYPE_96X96:
841 CHARSET_DIMENSION (cs) = 2;
842 CHARSET_CHARS (cs) = 96;
845 case CHARSET_TYPE_128:
846 CHARSET_DIMENSION (cs) = 1;
847 CHARSET_CHARS (cs) = 128;
849 case CHARSET_TYPE_128X128:
850 CHARSET_DIMENSION (cs) = 2;
851 CHARSET_CHARS (cs) = 128;
853 case CHARSET_TYPE_256:
854 CHARSET_DIMENSION (cs) = 1;
855 CHARSET_CHARS (cs) = 256;
857 case CHARSET_TYPE_256X256:
858 CHARSET_DIMENSION (cs) = 2;
859 CHARSET_CHARS (cs) = 256;
865 if (id == LEADING_BYTE_ASCII)
866 CHARSET_REP_BYTES (cs) = 1;
868 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
870 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
875 /* some charsets do not have final characters. This includes
876 ASCII, Control-1, Composite, and the two faux private
879 if (code_offset == 0)
881 assert (NILP (charset_by_attributes[type][final]));
882 charset_by_attributes[type][final] = obj;
885 assert (NILP (charset_by_attributes[type][final][direction]));
886 charset_by_attributes[type][final][direction] = obj;
890 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
891 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
894 /* official leading byte */
895 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
898 /* Some charsets are "faux" and don't have names or really exist at
899 all except in the leading-byte table. */
901 Fputhash (name, obj, Vcharset_hash_table);
906 get_unallocated_leading_byte (int dimension)
911 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
914 lb = next_allocated_leading_byte++;
918 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
921 lb = next_allocated_1_byte_leading_byte++;
925 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
928 lb = next_allocated_2_byte_leading_byte++;
934 ("No more character sets free for this dimension",
935 make_int (dimension));
942 charset_get_byte1 (Lisp_Object charset, Emchar ch)
947 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
949 Lisp_Object value = get_char_code_table (ch, table);
953 Emchar code = XINT (value);
957 else if (code < (1 << 16))
959 else if (code < (1 << 24))
965 if ((XCHARSET_UCS_MIN (charset) <= ch)
966 && (ch <= XCHARSET_UCS_MAX (charset)))
967 return (ch - XCHARSET_UCS_MIN (charset)
968 + XCHARSET_CODE_OFFSET (charset))
969 / (XCHARSET_DIMENSION (charset) == 1 ?
972 XCHARSET_DIMENSION (charset) == 2 ?
973 XCHARSET_CHARS (charset)
975 XCHARSET_DIMENSION (charset) == 3 ?
976 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
978 XCHARSET_CHARS (charset)
979 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
980 + XCHARSET_BYTE_OFFSET (charset);
981 else if (XCHARSET_CODE_OFFSET (charset) == 0)
983 if (XCHARSET_DIMENSION (charset) == 1)
985 if (XCHARSET_CHARS (charset) == 94)
987 if (((d = ch - (MIN_CHAR_94
988 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
992 else if (XCHARSET_CHARS (charset) == 96)
994 if (((d = ch - (MIN_CHAR_96
995 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1002 else if (XCHARSET_DIMENSION (charset) == 2)
1004 if (XCHARSET_CHARS (charset) == 94)
1006 if (((d = ch - (MIN_CHAR_94x94
1007 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1010 return (d / 94) + 33;
1012 else if (XCHARSET_CHARS (charset) == 96)
1014 if (((d = ch - (MIN_CHAR_96x96
1015 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1018 return (d / 96) + 32;
1026 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1028 if (XCHARSET_DIMENSION (charset) == 1)
1034 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1036 Lisp_Object value = get_char_code_table (ch, table);
1040 Emchar code = XINT (value);
1042 if (code < (1 << 16))
1043 return (unsigned char)code;
1044 else if (code < (1 << 24))
1045 return (unsigned char)(code >> 16);
1047 return (unsigned char)(code >> 24);
1050 if ((XCHARSET_UCS_MIN (charset) <= ch)
1051 && (ch <= XCHARSET_UCS_MAX (charset)))
1052 return ((ch - XCHARSET_UCS_MIN (charset)
1053 + XCHARSET_CODE_OFFSET (charset))
1054 / (XCHARSET_DIMENSION (charset) == 2 ?
1057 XCHARSET_DIMENSION (charset) == 3 ?
1058 XCHARSET_CHARS (charset)
1060 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1061 % XCHARSET_CHARS (charset)
1062 + XCHARSET_BYTE_OFFSET (charset);
1063 else if (XCHARSET_CHARS (charset) == 94)
1064 return (MIN_CHAR_94x94
1065 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1066 && (ch < MIN_CHAR_94x94
1067 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1068 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1069 else /* if (XCHARSET_CHARS (charset) == 96) */
1070 return (MIN_CHAR_96x96
1071 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1072 && (ch < MIN_CHAR_96x96
1073 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1074 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1078 Lisp_Object Vdefault_coded_charset_priority_list;
1082 /************************************************************************/
1083 /* Basic charset Lisp functions */
1084 /************************************************************************/
1086 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1087 Return non-nil if OBJECT is a charset.
1091 return CHARSETP (object) ? Qt : Qnil;
1094 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1095 Retrieve the charset of the given name.
1096 If CHARSET-OR-NAME is a charset object, it is simply returned.
1097 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1098 nil is returned. Otherwise the associated charset object is returned.
1102 if (CHARSETP (charset_or_name))
1103 return charset_or_name;
1105 CHECK_SYMBOL (charset_or_name);
1106 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1109 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1110 Retrieve the charset of the given name.
1111 Same as `find-charset' except an error is signalled if there is no such
1112 charset instead of returning nil.
1116 Lisp_Object charset = Ffind_charset (name);
1119 signal_simple_error ("No such charset", name);
1123 /* We store the charsets in hash tables with the names as the key and the
1124 actual charset object as the value. Occasionally we need to use them
1125 in a list format. These routines provide us with that. */
1126 struct charset_list_closure
1128 Lisp_Object *charset_list;
1132 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1133 void *charset_list_closure)
1135 /* This function can GC */
1136 struct charset_list_closure *chcl =
1137 (struct charset_list_closure*) charset_list_closure;
1138 Lisp_Object *charset_list = chcl->charset_list;
1140 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1144 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1145 Return a list of the names of all defined charsets.
1149 Lisp_Object charset_list = Qnil;
1150 struct gcpro gcpro1;
1151 struct charset_list_closure charset_list_closure;
1153 GCPRO1 (charset_list);
1154 charset_list_closure.charset_list = &charset_list;
1155 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1156 &charset_list_closure);
1159 return charset_list;
1162 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1163 Return the name of the given charset.
1167 return XCHARSET_NAME (Fget_charset (charset));
1170 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1171 Define a new character set.
1172 This function is for use with Mule support.
1173 NAME is a symbol, the name by which the character set is normally referred.
1174 DOC-STRING is a string describing the character set.
1175 PROPS is a property list, describing the specific nature of the
1176 character set. Recognized properties are:
1178 'short-name Short version of the charset name (ex: Latin-1)
1179 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1180 'registry A regular expression matching the font registry field for
1182 'dimension Number of octets used to index a character in this charset.
1183 Either 1 or 2. Defaults to 1.
1184 'columns Number of columns used to display a character in this charset.
1185 Only used in TTY mode. (Under X, the actual width of a
1186 character can be derived from the font used to display the
1187 characters.) If unspecified, defaults to the dimension
1188 (this is almost always the correct value).
1189 'chars Number of characters in each dimension (94 or 96).
1190 Defaults to 94. Note that if the dimension is 2, the
1191 character set thus described is 94x94 or 96x96.
1192 'final Final byte of ISO 2022 escape sequence. Must be
1193 supplied. Each combination of (DIMENSION, CHARS) defines a
1194 separate namespace for final bytes. Note that ISO
1195 2022 restricts the final byte to the range
1196 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1197 dimension == 2. Note also that final bytes in the range
1198 0x30 - 0x3F are reserved for user-defined (not official)
1200 'graphic 0 (use left half of font on output) or 1 (use right half
1201 of font on output). Defaults to 0. For example, for
1202 a font whose registry is ISO8859-1, the left half
1203 (octets 0x20 - 0x7F) is the `ascii' character set, while
1204 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1205 character set. With 'graphic set to 0, the octets
1206 will have their high bit cleared; with it set to 1,
1207 the octets will have their high bit set.
1208 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1210 'ccl-program A compiled CCL program used to convert a character in
1211 this charset into an index into the font. This is in
1212 addition to the 'graphic property. The CCL program
1213 is passed the octets of the character, with the high
1214 bit cleared and set depending upon whether the value
1215 of the 'graphic property is 0 or 1.
1217 (name, doc_string, props))
1219 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1220 int direction = CHARSET_LEFT_TO_RIGHT;
1222 Lisp_Object registry = Qnil;
1223 Lisp_Object charset;
1224 Lisp_Object rest, keyword, value;
1225 Lisp_Object ccl_program = Qnil;
1226 Lisp_Object short_name = Qnil, long_name = Qnil;
1228 Emchar code_offset = 0;
1229 unsigned char byte_offset = 0;
1232 CHECK_SYMBOL (name);
1233 if (!NILP (doc_string))
1234 CHECK_STRING (doc_string);
1236 charset = Ffind_charset (name);
1237 if (!NILP (charset))
1238 signal_simple_error ("Cannot redefine existing charset", name);
1240 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1242 if (EQ (keyword, Qshort_name))
1244 CHECK_STRING (value);
1248 if (EQ (keyword, Qlong_name))
1250 CHECK_STRING (value);
1254 else if (EQ (keyword, Qdimension))
1257 dimension = XINT (value);
1258 if (dimension < 1 || dimension > 2)
1259 signal_simple_error ("Invalid value for 'dimension", value);
1262 else if (EQ (keyword, Qchars))
1265 chars = XINT (value);
1266 if (chars != 94 && chars != 96)
1267 signal_simple_error ("Invalid value for 'chars", value);
1270 else if (EQ (keyword, Qcolumns))
1273 columns = XINT (value);
1274 if (columns != 1 && columns != 2)
1275 signal_simple_error ("Invalid value for 'columns", value);
1278 else if (EQ (keyword, Qgraphic))
1281 graphic = XINT (value);
1283 if (graphic < 0 || graphic > 2)
1285 if (graphic < 0 || graphic > 1)
1287 signal_simple_error ("Invalid value for 'graphic", value);
1290 else if (EQ (keyword, Qregistry))
1292 CHECK_STRING (value);
1296 else if (EQ (keyword, Qdirection))
1298 if (EQ (value, Ql2r))
1299 direction = CHARSET_LEFT_TO_RIGHT;
1300 else if (EQ (value, Qr2l))
1301 direction = CHARSET_RIGHT_TO_LEFT;
1303 signal_simple_error ("Invalid value for 'direction", value);
1306 else if (EQ (keyword, Qfinal))
1308 CHECK_CHAR_COERCE_INT (value);
1309 final = XCHAR (value);
1310 if (final < '0' || final > '~')
1311 signal_simple_error ("Invalid value for 'final", value);
1314 else if (EQ (keyword, Qccl_program))
1316 CHECK_VECTOR (value);
1317 ccl_program = value;
1321 signal_simple_error ("Unrecognized property", keyword);
1325 error ("'final must be specified");
1326 if (dimension == 2 && final > 0x5F)
1328 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1332 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1334 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1336 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1337 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1339 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1341 id = get_unallocated_leading_byte (dimension);
1343 if (NILP (doc_string))
1344 doc_string = build_string ("");
1346 if (NILP (registry))
1347 registry = build_string ("");
1349 if (NILP (short_name))
1350 XSETSTRING (short_name, XSYMBOL (name)->name);
1352 if (NILP (long_name))
1353 long_name = doc_string;
1356 columns = dimension;
1357 charset = make_charset (id, name, type, columns, graphic,
1358 final, direction, short_name, long_name,
1359 doc_string, registry,
1360 Qnil, 0, 0, 0, byte_offset);
1361 if (!NILP (ccl_program))
1362 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1366 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1368 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1369 NEW-NAME is the name of the new charset. Return the new charset.
1371 (charset, new_name))
1373 Lisp_Object new_charset = Qnil;
1374 int id, dimension, columns, graphic, final;
1375 int direction, type;
1376 Lisp_Object registry, doc_string, short_name, long_name;
1377 struct Lisp_Charset *cs;
1379 charset = Fget_charset (charset);
1380 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1381 signal_simple_error ("Charset already has reverse-direction charset",
1384 CHECK_SYMBOL (new_name);
1385 if (!NILP (Ffind_charset (new_name)))
1386 signal_simple_error ("Cannot redefine existing charset", new_name);
1388 cs = XCHARSET (charset);
1390 type = CHARSET_TYPE (cs);
1391 columns = CHARSET_COLUMNS (cs);
1392 dimension = CHARSET_DIMENSION (cs);
1393 id = get_unallocated_leading_byte (dimension);
1395 graphic = CHARSET_GRAPHIC (cs);
1396 final = CHARSET_FINAL (cs);
1397 direction = CHARSET_RIGHT_TO_LEFT;
1398 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1399 direction = CHARSET_LEFT_TO_RIGHT;
1400 doc_string = CHARSET_DOC_STRING (cs);
1401 short_name = CHARSET_SHORT_NAME (cs);
1402 long_name = CHARSET_LONG_NAME (cs);
1403 registry = CHARSET_REGISTRY (cs);
1405 new_charset = make_charset (id, new_name, type, columns,
1406 graphic, final, direction, short_name, long_name,
1407 doc_string, registry,
1409 CHARSET_DECODING_TABLE(cs),
1410 CHARSET_UCS_MIN(cs),
1411 CHARSET_UCS_MAX(cs),
1412 CHARSET_CODE_OFFSET(cs),
1413 CHARSET_BYTE_OFFSET(cs)
1419 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1420 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1425 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1426 Define symbol ALIAS as an alias for CHARSET.
1430 CHECK_SYMBOL (alias);
1431 charset = Fget_charset (charset);
1432 return Fputhash (alias, charset, Vcharset_hash_table);
1435 /* #### Reverse direction charsets not yet implemented. */
1437 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1439 Return the reverse-direction charset parallel to CHARSET, if any.
1440 This is the charset with the same properties (in particular, the same
1441 dimension, number of characters per dimension, and final byte) as
1442 CHARSET but whose characters are displayed in the opposite direction.
1446 charset = Fget_charset (charset);
1447 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1451 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1452 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1453 If DIRECTION is omitted, both directions will be checked (left-to-right
1454 will be returned if character sets exist for both directions).
1456 (dimension, chars, final, direction))
1458 int dm, ch, fi, di = -1;
1460 Lisp_Object obj = Qnil;
1462 CHECK_INT (dimension);
1463 dm = XINT (dimension);
1464 if (dm < 1 || dm > 2)
1465 signal_simple_error ("Invalid value for DIMENSION", dimension);
1469 if (ch != 94 && ch != 96)
1470 signal_simple_error ("Invalid value for CHARS", chars);
1472 CHECK_CHAR_COERCE_INT (final);
1474 if (fi < '0' || fi > '~')
1475 signal_simple_error ("Invalid value for FINAL", final);
1477 if (EQ (direction, Ql2r))
1478 di = CHARSET_LEFT_TO_RIGHT;
1479 else if (EQ (direction, Qr2l))
1480 di = CHARSET_RIGHT_TO_LEFT;
1481 else if (!NILP (direction))
1482 signal_simple_error ("Invalid value for DIRECTION", direction);
1484 if (dm == 2 && fi > 0x5F)
1486 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1489 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1491 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1495 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1497 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1500 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1503 return XCHARSET_NAME (obj);
1507 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1508 Return short name of CHARSET.
1512 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1515 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1516 Return long name of CHARSET.
1520 return XCHARSET_LONG_NAME (Fget_charset (charset));
1523 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1524 Return description of CHARSET.
1528 return XCHARSET_DOC_STRING (Fget_charset (charset));
1531 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1532 Return dimension of CHARSET.
1536 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1539 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1540 Return property PROP of CHARSET.
1541 Recognized properties are those listed in `make-charset', as well as
1542 'name and 'doc-string.
1546 struct Lisp_Charset *cs;
1548 charset = Fget_charset (charset);
1549 cs = XCHARSET (charset);
1551 CHECK_SYMBOL (prop);
1552 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1553 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1554 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1555 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1556 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1557 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1558 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1559 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1560 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1561 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1562 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1563 if (EQ (prop, Qdirection))
1564 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1565 if (EQ (prop, Qreverse_direction_charset))
1567 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1571 return XCHARSET_NAME (obj);
1573 signal_simple_error ("Unrecognized charset property name", prop);
1574 return Qnil; /* not reached */
1577 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1578 Return charset identification number of CHARSET.
1582 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1585 /* #### We need to figure out which properties we really want to
1588 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1589 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1591 (charset, ccl_program))
1593 charset = Fget_charset (charset);
1594 CHECK_VECTOR (ccl_program);
1595 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1600 invalidate_charset_font_caches (Lisp_Object charset)
1602 /* Invalidate font cache entries for charset on all devices. */
1603 Lisp_Object devcons, concons, hash_table;
1604 DEVICE_LOOP_NO_BREAK (devcons, concons)
1606 struct device *d = XDEVICE (XCAR (devcons));
1607 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1608 if (!UNBOUNDP (hash_table))
1609 Fclrhash (hash_table);
1613 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1614 Set the 'registry property of CHARSET to REGISTRY.
1616 (charset, registry))
1618 charset = Fget_charset (charset);
1619 CHECK_STRING (registry);
1620 XCHARSET_REGISTRY (charset) = registry;
1621 invalidate_charset_font_caches (charset);
1622 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1627 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1628 Return mapping-table of CHARSET.
1632 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1635 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1636 Set mapping-table of CHARSET to TABLE.
1640 struct Lisp_Charset *cs;
1641 Lisp_Object old_table;
1644 charset = Fget_charset (charset);
1645 cs = XCHARSET (charset);
1647 if (EQ (table, Qnil))
1649 CHARSET_DECODING_TABLE(cs) = table;
1650 CHARSET_ENCODING_TABLE(cs) = Qnil;
1653 else if (VECTORP (table))
1655 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1656 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1657 old_table = CHARSET_ENCODING_TABLE(cs);
1658 CHARSET_DECODING_TABLE(cs) = table;
1661 signal_error (Qwrong_type_argument,
1662 list2 (build_translated_string ("vector-or-nil-p"),
1664 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1666 switch (CHARSET_DIMENSION (cs))
1669 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1670 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1672 Lisp_Object c = XVECTOR_DATA(table)[i];
1675 put_char_code_table (XCHAR (c),
1676 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1677 CHARSET_ENCODING_TABLE(cs));
1681 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1682 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1684 Lisp_Object v = XVECTOR_DATA(table)[i];
1690 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1692 CHARSET_DECODING_TABLE(cs) = old_table;
1693 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1695 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1697 Lisp_Object c = XVECTOR_DATA(v)[j];
1702 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1703 | (j + CHARSET_BYTE_OFFSET (cs))),
1704 CHARSET_ENCODING_TABLE(cs));
1708 put_char_code_table (XCHAR (v),
1709 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1710 CHARSET_ENCODING_TABLE(cs));
1719 /************************************************************************/
1720 /* Lisp primitives for working with characters */
1721 /************************************************************************/
1723 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1724 Make a character from CHARSET and octets ARG1 and ARG2.
1725 ARG2 is required only for characters from two-dimensional charsets.
1726 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1727 character s with caron.
1729 (charset, arg1, arg2))
1731 struct Lisp_Charset *cs;
1733 int lowlim, highlim;
1735 charset = Fget_charset (charset);
1736 cs = XCHARSET (charset);
1738 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1739 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1741 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1743 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1744 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1747 /* It is useful (and safe, according to Olivier Galibert) to strip
1748 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1749 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1750 Latin 2 code of the character. */
1758 if (a1 < lowlim || a1 > highlim)
1759 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1761 if (CHARSET_DIMENSION (cs) == 1)
1765 ("Charset is of dimension one; second octet must be nil", arg2);
1766 return make_char (MAKE_CHAR (charset, a1, 0));
1775 a2 = XINT (arg2) & 0x7f;
1777 if (a2 < lowlim || a2 > highlim)
1778 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1780 return make_char (MAKE_CHAR (charset, a1, a2));
1783 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1784 Return the character set of char CH.
1788 CHECK_CHAR_COERCE_INT (ch);
1790 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1793 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1794 Return list of charset and one or two position-codes of CHAR.
1798 /* This function can GC */
1799 struct gcpro gcpro1, gcpro2;
1800 Lisp_Object charset = Qnil;
1801 Lisp_Object rc = Qnil;
1804 GCPRO2 (charset, rc);
1805 CHECK_CHAR_COERCE_INT (character);
1807 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1809 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1811 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1815 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1823 #ifdef ENABLE_COMPOSITE_CHARS
1824 /************************************************************************/
1825 /* composite character functions */
1826 /************************************************************************/
1829 lookup_composite_char (Bufbyte *str, int len)
1831 Lisp_Object lispstr = make_string (str, len);
1832 Lisp_Object ch = Fgethash (lispstr,
1833 Vcomposite_char_string2char_hash_table,
1839 if (composite_char_row_next >= 128)
1840 signal_simple_error ("No more composite chars available", lispstr);
1841 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1842 composite_char_col_next);
1843 Fputhash (make_char (emch), lispstr,
1844 Vcomposite_char_char2string_hash_table);
1845 Fputhash (lispstr, make_char (emch),
1846 Vcomposite_char_string2char_hash_table);
1847 composite_char_col_next++;
1848 if (composite_char_col_next >= 128)
1850 composite_char_col_next = 32;
1851 composite_char_row_next++;
1860 composite_char_string (Emchar ch)
1862 Lisp_Object str = Fgethash (make_char (ch),
1863 Vcomposite_char_char2string_hash_table,
1865 assert (!UNBOUNDP (str));
1869 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1870 Convert a string into a single composite character.
1871 The character is the result of overstriking all the characters in
1876 CHECK_STRING (string);
1877 return make_char (lookup_composite_char (XSTRING_DATA (string),
1878 XSTRING_LENGTH (string)));
1881 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1882 Return a string of the characters comprising a composite character.
1890 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1891 signal_simple_error ("Must be composite char", ch);
1892 return composite_char_string (emch);
1894 #endif /* ENABLE_COMPOSITE_CHARS */
1897 /************************************************************************/
1898 /* initialization */
1899 /************************************************************************/
1902 syms_of_mule_charset (void)
1904 DEFSUBR (Fcharsetp);
1905 DEFSUBR (Ffind_charset);
1906 DEFSUBR (Fget_charset);
1907 DEFSUBR (Fcharset_list);
1908 DEFSUBR (Fcharset_name);
1909 DEFSUBR (Fmake_charset);
1910 DEFSUBR (Fmake_reverse_direction_charset);
1911 /* DEFSUBR (Freverse_direction_charset); */
1912 DEFSUBR (Fdefine_charset_alias);
1913 DEFSUBR (Fcharset_from_attributes);
1914 DEFSUBR (Fcharset_short_name);
1915 DEFSUBR (Fcharset_long_name);
1916 DEFSUBR (Fcharset_description);
1917 DEFSUBR (Fcharset_dimension);
1918 DEFSUBR (Fcharset_property);
1919 DEFSUBR (Fcharset_id);
1920 DEFSUBR (Fset_charset_ccl_program);
1921 DEFSUBR (Fset_charset_registry);
1923 DEFSUBR (Fcharset_mapping_table);
1924 DEFSUBR (Fset_charset_mapping_table);
1927 DEFSUBR (Fmake_char);
1928 DEFSUBR (Fchar_charset);
1929 DEFSUBR (Fsplit_char);
1931 #ifdef ENABLE_COMPOSITE_CHARS
1932 DEFSUBR (Fmake_composite_char);
1933 DEFSUBR (Fcomposite_char_string);
1936 defsymbol (&Qcharsetp, "charsetp");
1937 defsymbol (&Qregistry, "registry");
1938 defsymbol (&Qfinal, "final");
1939 defsymbol (&Qgraphic, "graphic");
1940 defsymbol (&Qdirection, "direction");
1941 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1942 defsymbol (&Qshort_name, "short-name");
1943 defsymbol (&Qlong_name, "long-name");
1945 defsymbol (&Ql2r, "l2r");
1946 defsymbol (&Qr2l, "r2l");
1948 /* Charsets, compatible with FSF 20.3
1949 Naming convention is Script-Charset[-Edition] */
1950 defsymbol (&Qascii, "ascii");
1951 defsymbol (&Qcontrol_1, "control-1");
1952 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1953 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1954 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1955 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1956 defsymbol (&Qthai_tis620, "thai-tis620");
1957 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1958 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1959 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1960 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1961 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1962 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1963 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1964 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1965 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1966 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1967 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1968 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1969 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1970 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1972 defsymbol (&Qucs_bmp, "ucs-bmp");
1973 defsymbol (&Qlatin_viscii, "latin-viscii");
1974 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
1975 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
1976 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
1977 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
1978 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
1979 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
1981 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1982 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1984 defsymbol (&Qcomposite, "composite");
1988 vars_of_mule_charset (void)
1995 /* Table of charsets indexed by leading byte. */
1996 for (i = 0; i < countof (charset_by_leading_byte); i++)
1997 charset_by_leading_byte[i] = Qnil;
2000 /* Table of charsets indexed by type/final-byte. */
2001 for (i = 0; i < countof (charset_by_attributes); i++)
2002 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2003 charset_by_attributes[i][j] = Qnil;
2005 /* Table of charsets indexed by type/final-byte/direction. */
2006 for (i = 0; i < countof (charset_by_attributes); i++)
2007 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2008 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2009 charset_by_attributes[i][j][k] = Qnil;
2013 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2015 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2016 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2020 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2021 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2022 Leading-code of private TYPE9N charset of column-width 1.
2024 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2028 Vutf_2000_version = build_string("0.9 (Kyūhōji)");
2029 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2030 Version number of UTF-2000.
2033 Vdefault_coded_charset_priority_list = Qnil;
2034 DEFVAR_LISP ("default-coded-charset-priority-list",
2035 &Vdefault_coded_charset_priority_list /*
2036 Default order of preferred coded-character-sets.
2042 complex_vars_of_mule_charset (void)
2044 staticpro (&Vcharset_hash_table);
2045 Vcharset_hash_table =
2046 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2048 /* Predefined character sets. We store them into variables for
2053 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2054 CHARSET_TYPE_256X256, 1, 2, 0,
2055 CHARSET_LEFT_TO_RIGHT,
2056 build_string ("BMP"),
2057 build_string ("BMP"),
2058 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2059 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2060 Qnil, 0, 0xFFFF, 0, 0);
2062 # define MIN_CHAR_THAI 0
2063 # define MAX_CHAR_THAI 0
2064 # define MIN_CHAR_GREEK 0
2065 # define MAX_CHAR_GREEK 0
2066 # define MIN_CHAR_HEBREW 0
2067 # define MAX_CHAR_HEBREW 0
2068 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2069 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2070 # define MIN_CHAR_CYRILLIC 0
2071 # define MAX_CHAR_CYRILLIC 0
2074 make_charset (LEADING_BYTE_ASCII, Qascii,
2075 CHARSET_TYPE_94, 1, 0, 'B',
2076 CHARSET_LEFT_TO_RIGHT,
2077 build_string ("ASCII"),
2078 build_string ("ASCII)"),
2079 build_string ("ASCII (ISO646 IRV)"),
2080 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2081 Qnil, 0, 0x7F, 0, 0);
2082 Vcharset_control_1 =
2083 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2084 CHARSET_TYPE_94, 1, 1, 0,
2085 CHARSET_LEFT_TO_RIGHT,
2086 build_string ("C1"),
2087 build_string ("Control characters"),
2088 build_string ("Control characters 128-191"),
2090 Qnil, 0x80, 0x9F, 0, 0);
2091 Vcharset_latin_iso8859_1 =
2092 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2093 CHARSET_TYPE_96, 1, 1, 'A',
2094 CHARSET_LEFT_TO_RIGHT,
2095 build_string ("Latin-1"),
2096 build_string ("ISO8859-1 (Latin-1)"),
2097 build_string ("ISO8859-1 (Latin-1)"),
2098 build_string ("iso8859-1"),
2099 Qnil, 0xA0, 0xFF, 0, 32);
2100 Vcharset_latin_iso8859_2 =
2101 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2102 CHARSET_TYPE_96, 1, 1, 'B',
2103 CHARSET_LEFT_TO_RIGHT,
2104 build_string ("Latin-2"),
2105 build_string ("ISO8859-2 (Latin-2)"),
2106 build_string ("ISO8859-2 (Latin-2)"),
2107 build_string ("iso8859-2"),
2109 Vcharset_latin_iso8859_3 =
2110 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2111 CHARSET_TYPE_96, 1, 1, 'C',
2112 CHARSET_LEFT_TO_RIGHT,
2113 build_string ("Latin-3"),
2114 build_string ("ISO8859-3 (Latin-3)"),
2115 build_string ("ISO8859-3 (Latin-3)"),
2116 build_string ("iso8859-3"),
2118 Vcharset_latin_iso8859_4 =
2119 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2120 CHARSET_TYPE_96, 1, 1, 'D',
2121 CHARSET_LEFT_TO_RIGHT,
2122 build_string ("Latin-4"),
2123 build_string ("ISO8859-4 (Latin-4)"),
2124 build_string ("ISO8859-4 (Latin-4)"),
2125 build_string ("iso8859-4"),
2127 Vcharset_thai_tis620 =
2128 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2129 CHARSET_TYPE_96, 1, 1, 'T',
2130 CHARSET_LEFT_TO_RIGHT,
2131 build_string ("TIS620"),
2132 build_string ("TIS620 (Thai)"),
2133 build_string ("TIS620.2529 (Thai)"),
2134 build_string ("tis620"),
2135 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2136 Vcharset_greek_iso8859_7 =
2137 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2138 CHARSET_TYPE_96, 1, 1, 'F',
2139 CHARSET_LEFT_TO_RIGHT,
2140 build_string ("ISO8859-7"),
2141 build_string ("ISO8859-7 (Greek)"),
2142 build_string ("ISO8859-7 (Greek)"),
2143 build_string ("iso8859-7"),
2144 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2145 Vcharset_arabic_iso8859_6 =
2146 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2147 CHARSET_TYPE_96, 1, 1, 'G',
2148 CHARSET_RIGHT_TO_LEFT,
2149 build_string ("ISO8859-6"),
2150 build_string ("ISO8859-6 (Arabic)"),
2151 build_string ("ISO8859-6 (Arabic)"),
2152 build_string ("iso8859-6"),
2154 Vcharset_hebrew_iso8859_8 =
2155 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2156 CHARSET_TYPE_96, 1, 1, 'H',
2157 CHARSET_RIGHT_TO_LEFT,
2158 build_string ("ISO8859-8"),
2159 build_string ("ISO8859-8 (Hebrew)"),
2160 build_string ("ISO8859-8 (Hebrew)"),
2161 build_string ("iso8859-8"),
2162 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2163 Vcharset_katakana_jisx0201 =
2164 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2165 CHARSET_TYPE_94, 1, 1, 'I',
2166 CHARSET_LEFT_TO_RIGHT,
2167 build_string ("JISX0201 Kana"),
2168 build_string ("JISX0201.1976 (Japanese Kana)"),
2169 build_string ("JISX0201.1976 Japanese Kana"),
2170 build_string ("jisx0201\\.1976"),
2172 MIN_CHAR_HALFWIDTH_KATAKANA,
2173 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2174 Vcharset_latin_jisx0201 =
2175 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2176 CHARSET_TYPE_94, 1, 0, 'J',
2177 CHARSET_LEFT_TO_RIGHT,
2178 build_string ("JISX0201 Roman"),
2179 build_string ("JISX0201.1976 (Japanese Roman)"),
2180 build_string ("JISX0201.1976 Japanese Roman"),
2181 build_string ("jisx0201\\.1976"),
2183 Vcharset_cyrillic_iso8859_5 =
2184 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2185 CHARSET_TYPE_96, 1, 1, 'L',
2186 CHARSET_LEFT_TO_RIGHT,
2187 build_string ("ISO8859-5"),
2188 build_string ("ISO8859-5 (Cyrillic)"),
2189 build_string ("ISO8859-5 (Cyrillic)"),
2190 build_string ("iso8859-5"),
2191 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2192 Vcharset_latin_iso8859_9 =
2193 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2194 CHARSET_TYPE_96, 1, 1, 'M',
2195 CHARSET_LEFT_TO_RIGHT,
2196 build_string ("Latin-5"),
2197 build_string ("ISO8859-9 (Latin-5)"),
2198 build_string ("ISO8859-9 (Latin-5)"),
2199 build_string ("iso8859-9"),
2201 Vcharset_japanese_jisx0208_1978 =
2202 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2203 CHARSET_TYPE_94X94, 2, 0, '@',
2204 CHARSET_LEFT_TO_RIGHT,
2205 build_string ("JIS X0208:1978"),
2206 build_string ("JIS X0208:1978 (Japanese)"),
2208 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2209 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2211 Vcharset_chinese_gb2312 =
2212 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2213 CHARSET_TYPE_94X94, 2, 0, 'A',
2214 CHARSET_LEFT_TO_RIGHT,
2215 build_string ("GB2312"),
2216 build_string ("GB2312)"),
2217 build_string ("GB2312 Chinese simplified"),
2218 build_string ("gb2312"),
2220 Vcharset_japanese_jisx0208 =
2221 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2222 CHARSET_TYPE_94X94, 2, 0, 'B',
2223 CHARSET_LEFT_TO_RIGHT,
2224 build_string ("JISX0208"),
2225 build_string ("JIS X0208:1983 (Japanese)"),
2226 build_string ("JIS X0208:1983 Japanese Kanji"),
2227 build_string ("jisx0208\\.1983"),
2229 Vcharset_korean_ksc5601 =
2230 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2231 CHARSET_TYPE_94X94, 2, 0, 'C',
2232 CHARSET_LEFT_TO_RIGHT,
2233 build_string ("KSC5601"),
2234 build_string ("KSC5601 (Korean"),
2235 build_string ("KSC5601 Korean Hangul and Hanja"),
2236 build_string ("ksc5601"),
2238 Vcharset_japanese_jisx0212 =
2239 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2240 CHARSET_TYPE_94X94, 2, 0, 'D',
2241 CHARSET_LEFT_TO_RIGHT,
2242 build_string ("JISX0212"),
2243 build_string ("JISX0212 (Japanese)"),
2244 build_string ("JISX0212 Japanese Supplement"),
2245 build_string ("jisx0212"),
2248 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2249 Vcharset_chinese_cns11643_1 =
2250 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2251 CHARSET_TYPE_94X94, 2, 0, 'G',
2252 CHARSET_LEFT_TO_RIGHT,
2253 build_string ("CNS11643-1"),
2254 build_string ("CNS11643-1 (Chinese traditional)"),
2256 ("CNS 11643 Plane 1 Chinese traditional"),
2257 build_string (CHINESE_CNS_PLANE_RE("1")),
2259 Vcharset_chinese_cns11643_2 =
2260 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2261 CHARSET_TYPE_94X94, 2, 0, 'H',
2262 CHARSET_LEFT_TO_RIGHT,
2263 build_string ("CNS11643-2"),
2264 build_string ("CNS11643-2 (Chinese traditional)"),
2266 ("CNS 11643 Plane 2 Chinese traditional"),
2267 build_string (CHINESE_CNS_PLANE_RE("2")),
2270 Vcharset_latin_viscii_lower =
2271 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2272 CHARSET_TYPE_96, 1, 1, '1',
2273 CHARSET_LEFT_TO_RIGHT,
2274 build_string ("VISCII lower"),
2275 build_string ("VISCII lower (Vietnamese)"),
2276 build_string ("VISCII lower (Vietnamese)"),
2277 build_string ("MULEVISCII-LOWER"),
2279 Vcharset_latin_viscii_upper =
2280 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2281 CHARSET_TYPE_96, 1, 1, '2',
2282 CHARSET_LEFT_TO_RIGHT,
2283 build_string ("VISCII upper"),
2284 build_string ("VISCII upper (Vietnamese)"),
2285 build_string ("VISCII upper (Vietnamese)"),
2286 build_string ("MULEVISCII-UPPER"),
2288 Vcharset_latin_viscii =
2289 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2290 CHARSET_TYPE_256, 1, 2, 0,
2291 CHARSET_LEFT_TO_RIGHT,
2292 build_string ("VISCII"),
2293 build_string ("VISCII 1.1 (Vietnamese)"),
2294 build_string ("VISCII 1.1 (Vietnamese)"),
2295 build_string ("VISCII1\\.1"),
2297 Vcharset_hiragana_jisx0208 =
2298 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2299 CHARSET_TYPE_94X94, 2, 0, 'B',
2300 CHARSET_LEFT_TO_RIGHT,
2301 build_string ("Hiragana"),
2302 build_string ("Hiragana of JIS X0208"),
2303 build_string ("Japanese Hiragana of JIS X0208"),
2304 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2305 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2306 (0x24 - 33) * 94 + (0x21 - 33), 33);
2307 Vcharset_katakana_jisx0208 =
2308 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2309 CHARSET_TYPE_94X94, 2, 0, 'B',
2310 CHARSET_LEFT_TO_RIGHT,
2311 build_string ("Katakana"),
2312 build_string ("Katakana of JIS X0208"),
2313 build_string ("Japanese Katakana of JIS X0208"),
2314 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2315 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2316 (0x25 - 33) * 94 + (0x21 - 33), 33);
2318 Vcharset_chinese_big5_1 =
2319 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2320 CHARSET_TYPE_94X94, 2, 0, '0',
2321 CHARSET_LEFT_TO_RIGHT,
2322 build_string ("Big5"),
2323 build_string ("Big5 (Level-1)"),
2325 ("Big5 Level-1 Chinese traditional"),
2326 build_string ("big5"),
2328 Vcharset_chinese_big5_2 =
2329 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2330 CHARSET_TYPE_94X94, 2, 0, '1',
2331 CHARSET_LEFT_TO_RIGHT,
2332 build_string ("Big5"),
2333 build_string ("Big5 (Level-2)"),
2335 ("Big5 Level-2 Chinese traditional"),
2336 build_string ("big5"),
2339 #ifdef ENABLE_COMPOSITE_CHARS
2340 /* #### For simplicity, we put composite chars into a 96x96 charset.
2341 This is going to lead to problems because you can run out of
2342 room, esp. as we don't yet recycle numbers. */
2343 Vcharset_composite =
2344 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2345 CHARSET_TYPE_96X96, 2, 0, 0,
2346 CHARSET_LEFT_TO_RIGHT,
2347 build_string ("Composite"),
2348 build_string ("Composite characters"),
2349 build_string ("Composite characters"),
2352 composite_char_row_next = 32;
2353 composite_char_col_next = 32;
2355 Vcomposite_char_string2char_hash_table =
2356 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2357 Vcomposite_char_char2string_hash_table =
2358 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2359 staticpro (&Vcomposite_char_string2char_hash_table);
2360 staticpro (&Vcomposite_char_char2string_hash_table);
2361 #endif /* ENABLE_COMPOSITE_CHARS */