1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vutf_2000_version;
315 int leading_code_private_11;
318 Lisp_Object Qcharsetp;
320 /* Qdoc_string, Qdimension, Qchars defined in general.c */
321 Lisp_Object Qregistry, Qfinal, Qgraphic;
322 Lisp_Object Qdirection;
323 Lisp_Object Qreverse_direction_charset;
324 Lisp_Object Qleading_byte;
325 Lisp_Object Qshort_name, Qlong_name;
341 Qjapanese_jisx0208_1978,
353 Qvietnamese_viscii_lower,
354 Qvietnamese_viscii_upper,
362 Lisp_Object Ql2r, Qr2l;
364 Lisp_Object Vcharset_hash_table;
366 static Charset_ID next_allocated_1_byte_leading_byte;
367 static Charset_ID next_allocated_2_byte_leading_byte;
369 /* Composite characters are characters constructed by overstriking two
370 or more regular characters.
372 1) The old Mule implementation involves storing composite characters
373 in a buffer as a tag followed by all of the actual characters
374 used to make up the composite character. I think this is a bad
375 idea; it greatly complicates code that wants to handle strings
376 one character at a time because it has to deal with the possibility
377 of great big ungainly characters. It's much more reasonable to
378 simply store an index into a table of composite characters.
380 2) The current implementation only allows for 16,384 separate
381 composite characters over the lifetime of the XEmacs process.
382 This could become a potential problem if the user
383 edited lots of different files that use composite characters.
384 Due to FSF bogosity, increasing the number of allowable
385 composite characters under Mule would decrease the number
386 of possible faces that can exist. Mule already has shrunk
387 this to 2048, and further shrinkage would become uncomfortable.
388 No such problems exist in XEmacs.
390 Composite characters could be represented as 0x80 C1 C2 C3,
391 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
392 for slightly under 2^20 (one million) composite characters
393 over the XEmacs process lifetime, and you only need to
394 increase the size of a Mule character from 19 to 21 bits.
395 Or you could use 0x80 C1 C2 C3 C4, allowing for about
396 85 million (slightly over 2^26) composite characters. */
399 /************************************************************************/
400 /* Basic Emchar functions */
401 /************************************************************************/
403 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
404 string in STR. Returns the number of bytes stored.
405 Do not call this directly. Use the macro set_charptr_emchar() instead.
409 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
424 else if ( c <= 0x7ff )
426 *p++ = (c >> 6) | 0xc0;
427 *p++ = (c & 0x3f) | 0x80;
429 else if ( c <= 0xffff )
431 *p++ = (c >> 12) | 0xe0;
432 *p++ = ((c >> 6) & 0x3f) | 0x80;
433 *p++ = (c & 0x3f) | 0x80;
435 else if ( c <= 0x1fffff )
437 *p++ = (c >> 18) | 0xf0;
438 *p++ = ((c >> 12) & 0x3f) | 0x80;
439 *p++ = ((c >> 6) & 0x3f) | 0x80;
440 *p++ = (c & 0x3f) | 0x80;
442 else if ( c <= 0x3ffffff )
444 *p++ = (c >> 24) | 0xf8;
445 *p++ = ((c >> 18) & 0x3f) | 0x80;
446 *p++ = ((c >> 12) & 0x3f) | 0x80;
447 *p++ = ((c >> 6) & 0x3f) | 0x80;
448 *p++ = (c & 0x3f) | 0x80;
452 *p++ = (c >> 30) | 0xfc;
453 *p++ = ((c >> 24) & 0x3f) | 0x80;
454 *p++ = ((c >> 18) & 0x3f) | 0x80;
455 *p++ = ((c >> 12) & 0x3f) | 0x80;
456 *p++ = ((c >> 6) & 0x3f) | 0x80;
457 *p++ = (c & 0x3f) | 0x80;
460 BREAKUP_CHAR (c, charset, c1, c2);
461 lb = CHAR_LEADING_BYTE (c);
462 if (LEADING_BYTE_PRIVATE_P (lb))
463 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
465 if (EQ (charset, Vcharset_control_1))
474 /* Return the first character from a Mule-encoded string in STR,
475 assuming it's non-ASCII. Do not call this directly.
476 Use the macro charptr_emchar() instead. */
479 non_ascii_charptr_emchar (CONST Bufbyte *str)
492 else if ( b >= 0xf8 )
497 else if ( b >= 0xf0 )
502 else if ( b >= 0xe0 )
507 else if ( b >= 0xc0 )
517 for( ; len > 0; len-- )
520 ch = ( ch << 6 ) | ( b & 0x3f );
524 Bufbyte i0 = *str, i1, i2 = 0;
527 if (i0 == LEADING_BYTE_CONTROL_1)
528 return (Emchar) (*++str - 0x20);
530 if (LEADING_BYTE_PREFIX_P (i0))
535 charset = CHARSET_BY_LEADING_BYTE (i0);
536 if (XCHARSET_DIMENSION (charset) == 2)
539 return MAKE_CHAR (charset, i1, i2);
543 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
544 Do not call this directly. Use the macro valid_char_p() instead. */
548 non_ascii_valid_char_p (Emchar ch)
552 /* Must have only lowest 19 bits set */
556 f1 = CHAR_FIELD1 (ch);
557 f2 = CHAR_FIELD2 (ch);
558 f3 = CHAR_FIELD3 (ch);
564 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
565 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
566 f2 > MAX_CHAR_FIELD2_PRIVATE)
571 if (f3 != 0x20 && f3 != 0x7F)
575 NOTE: This takes advantage of the fact that
576 FIELD2_TO_OFFICIAL_LEADING_BYTE and
577 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
579 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
580 return (XCHARSET_CHARS (charset) == 96);
586 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
587 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
588 f1 > MAX_CHAR_FIELD1_PRIVATE)
590 if (f2 < 0x20 || f3 < 0x20)
593 #ifdef ENABLE_COMPOSITE_CHARS
594 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
596 if (UNBOUNDP (Fgethash (make_int (ch),
597 Vcomposite_char_char2string_hash_table,
602 #endif /* ENABLE_COMPOSITE_CHARS */
604 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
607 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
609 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
612 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
614 return (XCHARSET_CHARS (charset) == 96);
620 /************************************************************************/
621 /* Basic string functions */
622 /************************************************************************/
624 /* Copy the character pointed to by PTR into STR, assuming it's
625 non-ASCII. Do not call this directly. Use the macro
626 charptr_copy_char() instead. */
629 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
631 Bufbyte *strptr = str;
633 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
635 /* Notice fallthrough. */
637 case 6: *++strptr = *ptr++;
638 case 5: *++strptr = *ptr++;
640 case 4: *++strptr = *ptr++;
641 case 3: *++strptr = *ptr++;
642 case 2: *++strptr = *ptr;
647 return strptr + 1 - str;
651 /************************************************************************/
652 /* streams of Emchars */
653 /************************************************************************/
655 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
656 The functions below are not meant to be called directly; use
657 the macros in insdel.h. */
660 Lstream_get_emchar_1 (Lstream *stream, int ch)
662 Bufbyte str[MAX_EMCHAR_LEN];
663 Bufbyte *strptr = str;
665 str[0] = (Bufbyte) ch;
666 switch (REP_BYTES_BY_FIRST_BYTE (ch))
668 /* Notice fallthrough. */
671 ch = Lstream_getc (stream);
673 *++strptr = (Bufbyte) ch;
675 ch = Lstream_getc (stream);
677 *++strptr = (Bufbyte) ch;
680 ch = Lstream_getc (stream);
682 *++strptr = (Bufbyte) ch;
684 ch = Lstream_getc (stream);
686 *++strptr = (Bufbyte) ch;
688 ch = Lstream_getc (stream);
690 *++strptr = (Bufbyte) ch;
695 return charptr_emchar (str);
699 Lstream_fput_emchar (Lstream *stream, Emchar ch)
701 Bufbyte str[MAX_EMCHAR_LEN];
702 Bytecount len = set_charptr_emchar (str, ch);
703 return Lstream_write (stream, str, len);
707 Lstream_funget_emchar (Lstream *stream, Emchar ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bytecount len = set_charptr_emchar (str, ch);
711 Lstream_unread (stream, str, len);
715 /************************************************************************/
717 /************************************************************************/
720 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
722 struct Lisp_Charset *cs = XCHARSET (obj);
724 markobj (cs->short_name);
725 markobj (cs->long_name);
726 markobj (cs->doc_string);
727 markobj (cs->registry);
728 markobj (cs->ccl_program);
730 markobj (cs->decoding_table);
731 markobj (cs->encoding_table);
737 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
739 struct Lisp_Charset *cs = XCHARSET (obj);
743 error ("printing unreadable object #<charset %s 0x%x>",
744 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
747 write_c_string ("#<charset ", printcharfun);
748 print_internal (CHARSET_NAME (cs), printcharfun, 0);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
751 write_c_string (" ", printcharfun);
752 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
753 write_c_string (" ", printcharfun);
754 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
755 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
756 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
757 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
758 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
760 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
761 CHARSET_COLUMNS (cs),
762 CHARSET_GRAPHIC (cs),
764 write_c_string (buf, printcharfun);
765 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
766 sprintf (buf, " 0x%x>", cs->header.uid);
767 write_c_string (buf, printcharfun);
770 static const struct lrecord_description charset_description[] = {
771 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
773 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
778 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
779 mark_charset, print_charset, 0, 0, 0,
781 struct Lisp_Charset);
783 /* Make a new charset. */
786 make_charset (Charset_ID id, Lisp_Object name,
787 unsigned char type, unsigned char columns, unsigned char graphic,
788 Bufbyte final, unsigned char direction, Lisp_Object short_name,
789 Lisp_Object long_name, Lisp_Object doc,
791 Lisp_Object decoding_table,
792 Emchar ucs_min, Emchar ucs_max,
793 Emchar code_offset, unsigned char byte_offset)
796 struct Lisp_Charset *cs =
797 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
798 XSETCHARSET (obj, cs);
800 CHARSET_ID (cs) = id;
801 CHARSET_NAME (cs) = name;
802 CHARSET_SHORT_NAME (cs) = short_name;
803 CHARSET_LONG_NAME (cs) = long_name;
804 CHARSET_DIRECTION (cs) = direction;
805 CHARSET_TYPE (cs) = type;
806 CHARSET_COLUMNS (cs) = columns;
807 CHARSET_GRAPHIC (cs) = graphic;
808 CHARSET_FINAL (cs) = final;
809 CHARSET_DOC_STRING (cs) = doc;
810 CHARSET_REGISTRY (cs) = reg;
811 CHARSET_CCL_PROGRAM (cs) = Qnil;
812 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
814 CHARSET_DECODING_TABLE(cs) = Qnil;
815 CHARSET_ENCODING_TABLE(cs) = Qnil;
816 CHARSET_UCS_MIN(cs) = ucs_min;
817 CHARSET_UCS_MAX(cs) = ucs_max;
818 CHARSET_CODE_OFFSET(cs) = code_offset;
819 CHARSET_BYTE_OFFSET(cs) = byte_offset;
822 switch (CHARSET_TYPE (cs))
824 case CHARSET_TYPE_94:
825 CHARSET_DIMENSION (cs) = 1;
826 CHARSET_CHARS (cs) = 94;
828 case CHARSET_TYPE_96:
829 CHARSET_DIMENSION (cs) = 1;
830 CHARSET_CHARS (cs) = 96;
832 case CHARSET_TYPE_94X94:
833 CHARSET_DIMENSION (cs) = 2;
834 CHARSET_CHARS (cs) = 94;
836 case CHARSET_TYPE_96X96:
837 CHARSET_DIMENSION (cs) = 2;
838 CHARSET_CHARS (cs) = 96;
841 case CHARSET_TYPE_128:
842 CHARSET_DIMENSION (cs) = 1;
843 CHARSET_CHARS (cs) = 128;
845 case CHARSET_TYPE_128X128:
846 CHARSET_DIMENSION (cs) = 2;
847 CHARSET_CHARS (cs) = 128;
849 case CHARSET_TYPE_256:
850 CHARSET_DIMENSION (cs) = 1;
851 CHARSET_CHARS (cs) = 256;
853 case CHARSET_TYPE_256X256:
854 CHARSET_DIMENSION (cs) = 2;
855 CHARSET_CHARS (cs) = 256;
861 if (id == LEADING_BYTE_ASCII)
862 CHARSET_REP_BYTES (cs) = 1;
864 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
866 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
871 /* some charsets do not have final characters. This includes
872 ASCII, Control-1, Composite, and the two faux private
875 if (code_offset == 0)
877 assert (NILP (charset_by_attributes[type][final]));
878 charset_by_attributes[type][final] = obj;
881 assert (NILP (charset_by_attributes[type][final][direction]));
882 charset_by_attributes[type][final][direction] = obj;
886 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
887 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
890 /* official leading byte */
891 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
894 /* Some charsets are "faux" and don't have names or really exist at
895 all except in the leading-byte table. */
897 Fputhash (name, obj, Vcharset_hash_table);
902 get_unallocated_leading_byte (int dimension)
908 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
911 lb = next_allocated_1_byte_leading_byte++;
915 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
918 lb = next_allocated_2_byte_leading_byte++;
923 ("No more character sets free for this dimension",
924 make_int (dimension));
931 charset_get_byte1 (Lisp_Object charset, Emchar ch)
936 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
938 Lisp_Object value = get_char_code_table (ch, table);
942 Emchar code = XINT (value);
946 else if (code < (1 << 16))
948 else if (code < (1 << 24))
954 if ((XCHARSET_UCS_MIN (charset) <= ch)
955 && (ch <= XCHARSET_UCS_MAX (charset)))
956 return (ch - XCHARSET_UCS_MIN (charset)
957 + XCHARSET_CODE_OFFSET (charset))
958 / (XCHARSET_DIMENSION (charset) == 1 ?
961 XCHARSET_DIMENSION (charset) == 2 ?
962 XCHARSET_CHARS (charset)
964 XCHARSET_DIMENSION (charset) == 3 ?
965 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
967 XCHARSET_CHARS (charset)
968 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
969 + XCHARSET_BYTE_OFFSET (charset);
970 else if (XCHARSET_CODE_OFFSET (charset) == 0)
972 if (XCHARSET_DIMENSION (charset) == 1)
974 if (XCHARSET_CHARS (charset) == 94)
976 if (((d = ch - (MIN_CHAR_94
977 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
981 else if (XCHARSET_CHARS (charset) == 96)
983 if (((d = ch - (MIN_CHAR_96
984 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
991 else if (XCHARSET_DIMENSION (charset) == 2)
993 if (XCHARSET_CHARS (charset) == 94)
995 if (((d = ch - (MIN_CHAR_94x94
996 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
999 return (d / 94) + 33;
1001 else if (XCHARSET_CHARS (charset) == 96)
1003 if (((d = ch - (MIN_CHAR_96x96
1004 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1007 return (d / 96) + 32;
1015 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1017 if (XCHARSET_DIMENSION (charset) == 1)
1023 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1025 Lisp_Object value = get_char_code_table (ch, table);
1029 Emchar code = XINT (value);
1031 if (code < (1 << 16))
1032 return (unsigned char)code;
1033 else if (code < (1 << 24))
1034 return (unsigned char)(code >> 16);
1036 return (unsigned char)(code >> 24);
1039 if ((XCHARSET_UCS_MIN (charset) <= ch)
1040 && (ch <= XCHARSET_UCS_MAX (charset)))
1041 return ((ch - XCHARSET_UCS_MIN (charset)
1042 + XCHARSET_CODE_OFFSET (charset))
1043 / (XCHARSET_DIMENSION (charset) == 2 ?
1046 XCHARSET_DIMENSION (charset) == 3 ?
1047 XCHARSET_CHARS (charset)
1049 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1050 % XCHARSET_CHARS (charset)
1051 + XCHARSET_BYTE_OFFSET (charset);
1052 else if (XCHARSET_CHARS (charset) == 94)
1053 return (MIN_CHAR_94x94
1054 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1055 && (ch < MIN_CHAR_94x94
1056 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1057 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1058 else /* if (XCHARSET_CHARS (charset) == 96) */
1059 return (MIN_CHAR_96x96
1060 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1061 && (ch < MIN_CHAR_96x96
1062 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1063 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1067 Lisp_Object Vdefault_coded_charset_priority_list;
1071 /************************************************************************/
1072 /* Basic charset Lisp functions */
1073 /************************************************************************/
1075 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1076 Return non-nil if OBJECT is a charset.
1080 return CHARSETP (object) ? Qt : Qnil;
1083 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1084 Retrieve the charset of the given name.
1085 If CHARSET-OR-NAME is a charset object, it is simply returned.
1086 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1087 nil is returned. Otherwise the associated charset object is returned.
1091 if (CHARSETP (charset_or_name))
1092 return charset_or_name;
1094 CHECK_SYMBOL (charset_or_name);
1095 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1098 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1099 Retrieve the charset of the given name.
1100 Same as `find-charset' except an error is signalled if there is no such
1101 charset instead of returning nil.
1105 Lisp_Object charset = Ffind_charset (name);
1108 signal_simple_error ("No such charset", name);
1112 /* We store the charsets in hash tables with the names as the key and the
1113 actual charset object as the value. Occasionally we need to use them
1114 in a list format. These routines provide us with that. */
1115 struct charset_list_closure
1117 Lisp_Object *charset_list;
1121 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1122 void *charset_list_closure)
1124 /* This function can GC */
1125 struct charset_list_closure *chcl =
1126 (struct charset_list_closure*) charset_list_closure;
1127 Lisp_Object *charset_list = chcl->charset_list;
1129 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1133 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1134 Return a list of the names of all defined charsets.
1138 Lisp_Object charset_list = Qnil;
1139 struct gcpro gcpro1;
1140 struct charset_list_closure charset_list_closure;
1142 GCPRO1 (charset_list);
1143 charset_list_closure.charset_list = &charset_list;
1144 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1145 &charset_list_closure);
1148 return charset_list;
1151 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1152 Return the name of the given charset.
1156 return XCHARSET_NAME (Fget_charset (charset));
1159 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1160 Define a new character set.
1161 This function is for use with Mule support.
1162 NAME is a symbol, the name by which the character set is normally referred.
1163 DOC-STRING is a string describing the character set.
1164 PROPS is a property list, describing the specific nature of the
1165 character set. Recognized properties are:
1167 'short-name Short version of the charset name (ex: Latin-1)
1168 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1169 'registry A regular expression matching the font registry field for
1171 'dimension Number of octets used to index a character in this charset.
1172 Either 1 or 2. Defaults to 1.
1173 'columns Number of columns used to display a character in this charset.
1174 Only used in TTY mode. (Under X, the actual width of a
1175 character can be derived from the font used to display the
1176 characters.) If unspecified, defaults to the dimension
1177 (this is almost always the correct value).
1178 'chars Number of characters in each dimension (94 or 96).
1179 Defaults to 94. Note that if the dimension is 2, the
1180 character set thus described is 94x94 or 96x96.
1181 'final Final byte of ISO 2022 escape sequence. Must be
1182 supplied. Each combination of (DIMENSION, CHARS) defines a
1183 separate namespace for final bytes. Note that ISO
1184 2022 restricts the final byte to the range
1185 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1186 dimension == 2. Note also that final bytes in the range
1187 0x30 - 0x3F are reserved for user-defined (not official)
1189 'graphic 0 (use left half of font on output) or 1 (use right half
1190 of font on output). Defaults to 0. For example, for
1191 a font whose registry is ISO8859-1, the left half
1192 (octets 0x20 - 0x7F) is the `ascii' character set, while
1193 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1194 character set. With 'graphic set to 0, the octets
1195 will have their high bit cleared; with it set to 1,
1196 the octets will have their high bit set.
1197 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1199 'ccl-program A compiled CCL program used to convert a character in
1200 this charset into an index into the font. This is in
1201 addition to the 'graphic property. The CCL program
1202 is passed the octets of the character, with the high
1203 bit cleared and set depending upon whether the value
1204 of the 'graphic property is 0 or 1.
1206 (name, doc_string, props))
1208 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1209 int direction = CHARSET_LEFT_TO_RIGHT;
1211 Lisp_Object registry = Qnil;
1212 Lisp_Object charset;
1213 Lisp_Object rest, keyword, value;
1214 Lisp_Object ccl_program = Qnil;
1215 Lisp_Object short_name = Qnil, long_name = Qnil;
1217 Emchar code_offset = 0;
1218 unsigned char byte_offset = 0;
1221 CHECK_SYMBOL (name);
1222 if (!NILP (doc_string))
1223 CHECK_STRING (doc_string);
1225 charset = Ffind_charset (name);
1226 if (!NILP (charset))
1227 signal_simple_error ("Cannot redefine existing charset", name);
1229 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1231 if (EQ (keyword, Qshort_name))
1233 CHECK_STRING (value);
1237 if (EQ (keyword, Qlong_name))
1239 CHECK_STRING (value);
1243 else if (EQ (keyword, Qdimension))
1246 dimension = XINT (value);
1247 if (dimension < 1 || dimension > 2)
1248 signal_simple_error ("Invalid value for 'dimension", value);
1251 else if (EQ (keyword, Qchars))
1254 chars = XINT (value);
1255 if (chars != 94 && chars != 96)
1256 signal_simple_error ("Invalid value for 'chars", value);
1259 else if (EQ (keyword, Qcolumns))
1262 columns = XINT (value);
1263 if (columns != 1 && columns != 2)
1264 signal_simple_error ("Invalid value for 'columns", value);
1267 else if (EQ (keyword, Qgraphic))
1270 graphic = XINT (value);
1272 if (graphic < 0 || graphic > 2)
1274 if (graphic < 0 || graphic > 1)
1276 signal_simple_error ("Invalid value for 'graphic", value);
1279 else if (EQ (keyword, Qregistry))
1281 CHECK_STRING (value);
1285 else if (EQ (keyword, Qdirection))
1287 if (EQ (value, Ql2r))
1288 direction = CHARSET_LEFT_TO_RIGHT;
1289 else if (EQ (value, Qr2l))
1290 direction = CHARSET_RIGHT_TO_LEFT;
1292 signal_simple_error ("Invalid value for 'direction", value);
1295 else if (EQ (keyword, Qfinal))
1297 CHECK_CHAR_COERCE_INT (value);
1298 final = XCHAR (value);
1299 if (final < '0' || final > '~')
1300 signal_simple_error ("Invalid value for 'final", value);
1303 else if (EQ (keyword, Qccl_program))
1305 CHECK_VECTOR (value);
1306 ccl_program = value;
1310 signal_simple_error ("Unrecognized property", keyword);
1314 error ("'final must be specified");
1315 if (dimension == 2 && final > 0x5F)
1317 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1321 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1323 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1325 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1326 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1328 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1335 if (code_offset == 0)
1336 id = CHARSET_ID_OFFSET_94 + final;
1338 id = get_unallocated_leading_byte (dimension);
1340 else if (chars == 96)
1342 if (code_offset == 0)
1343 id = CHARSET_ID_OFFSET_96 + final;
1345 id = get_unallocated_leading_byte (dimension);
1352 else if (dimension == 2)
1356 if (code_offset == 0)
1357 id = CHARSET_ID_OFFSET_94x94 + final;
1359 id = get_unallocated_leading_byte (dimension);
1361 else if (chars == 96)
1363 id = get_unallocated_leading_byte (dimension);
1378 else if (chars == 96)
1382 id = get_unallocated_leading_byte (dimension);
1385 if (NILP (doc_string))
1386 doc_string = build_string ("");
1388 if (NILP (registry))
1389 registry = build_string ("");
1391 if (NILP (short_name))
1392 XSETSTRING (short_name, XSYMBOL (name)->name);
1394 if (NILP (long_name))
1395 long_name = doc_string;
1398 columns = dimension;
1399 charset = make_charset (id, name, type, columns, graphic,
1400 final, direction, short_name, long_name,
1401 doc_string, registry,
1402 Qnil, 0, 0, 0, byte_offset);
1403 if (!NILP (ccl_program))
1404 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1408 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1410 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1411 NEW-NAME is the name of the new charset. Return the new charset.
1413 (charset, new_name))
1415 Lisp_Object new_charset = Qnil;
1416 int id, dimension, columns, graphic, final;
1417 int direction, type;
1418 Lisp_Object registry, doc_string, short_name, long_name;
1419 struct Lisp_Charset *cs;
1421 charset = Fget_charset (charset);
1422 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1423 signal_simple_error ("Charset already has reverse-direction charset",
1426 CHECK_SYMBOL (new_name);
1427 if (!NILP (Ffind_charset (new_name)))
1428 signal_simple_error ("Cannot redefine existing charset", new_name);
1430 cs = XCHARSET (charset);
1432 type = CHARSET_TYPE (cs);
1433 columns = CHARSET_COLUMNS (cs);
1434 dimension = CHARSET_DIMENSION (cs);
1435 id = get_unallocated_leading_byte (dimension);
1437 graphic = CHARSET_GRAPHIC (cs);
1438 final = CHARSET_FINAL (cs);
1439 direction = CHARSET_RIGHT_TO_LEFT;
1440 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1441 direction = CHARSET_LEFT_TO_RIGHT;
1442 doc_string = CHARSET_DOC_STRING (cs);
1443 short_name = CHARSET_SHORT_NAME (cs);
1444 long_name = CHARSET_LONG_NAME (cs);
1445 registry = CHARSET_REGISTRY (cs);
1447 new_charset = make_charset (id, new_name, type, columns,
1448 graphic, final, direction, short_name, long_name,
1449 doc_string, registry,
1451 CHARSET_DECODING_TABLE(cs),
1452 CHARSET_UCS_MIN(cs),
1453 CHARSET_UCS_MAX(cs),
1454 CHARSET_CODE_OFFSET(cs),
1455 CHARSET_BYTE_OFFSET(cs)
1461 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1462 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1467 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1468 Define symbol ALIAS as an alias for CHARSET.
1472 CHECK_SYMBOL (alias);
1473 charset = Fget_charset (charset);
1474 return Fputhash (alias, charset, Vcharset_hash_table);
1477 /* #### Reverse direction charsets not yet implemented. */
1479 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1481 Return the reverse-direction charset parallel to CHARSET, if any.
1482 This is the charset with the same properties (in particular, the same
1483 dimension, number of characters per dimension, and final byte) as
1484 CHARSET but whose characters are displayed in the opposite direction.
1488 charset = Fget_charset (charset);
1489 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1493 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1494 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1495 If DIRECTION is omitted, both directions will be checked (left-to-right
1496 will be returned if character sets exist for both directions).
1498 (dimension, chars, final, direction))
1500 int dm, ch, fi, di = -1;
1502 Lisp_Object obj = Qnil;
1504 CHECK_INT (dimension);
1505 dm = XINT (dimension);
1506 if (dm < 1 || dm > 2)
1507 signal_simple_error ("Invalid value for DIMENSION", dimension);
1511 if (ch != 94 && ch != 96)
1512 signal_simple_error ("Invalid value for CHARS", chars);
1514 CHECK_CHAR_COERCE_INT (final);
1516 if (fi < '0' || fi > '~')
1517 signal_simple_error ("Invalid value for FINAL", final);
1519 if (EQ (direction, Ql2r))
1520 di = CHARSET_LEFT_TO_RIGHT;
1521 else if (EQ (direction, Qr2l))
1522 di = CHARSET_RIGHT_TO_LEFT;
1523 else if (!NILP (direction))
1524 signal_simple_error ("Invalid value for DIRECTION", direction);
1526 if (dm == 2 && fi > 0x5F)
1528 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1531 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1533 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1537 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1539 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1542 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1545 return XCHARSET_NAME (obj);
1549 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1550 Return short name of CHARSET.
1554 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1557 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1558 Return long name of CHARSET.
1562 return XCHARSET_LONG_NAME (Fget_charset (charset));
1565 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1566 Return description of CHARSET.
1570 return XCHARSET_DOC_STRING (Fget_charset (charset));
1573 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1574 Return dimension of CHARSET.
1578 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1581 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1582 Return property PROP of CHARSET.
1583 Recognized properties are those listed in `make-charset', as well as
1584 'name and 'doc-string.
1588 struct Lisp_Charset *cs;
1590 charset = Fget_charset (charset);
1591 cs = XCHARSET (charset);
1593 CHECK_SYMBOL (prop);
1594 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1595 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1596 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1597 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1598 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1599 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1600 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1601 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1602 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1603 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1604 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1605 if (EQ (prop, Qdirection))
1606 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1607 if (EQ (prop, Qreverse_direction_charset))
1609 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1613 return XCHARSET_NAME (obj);
1615 signal_simple_error ("Unrecognized charset property name", prop);
1616 return Qnil; /* not reached */
1619 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1620 Return charset identification number of CHARSET.
1624 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1627 /* #### We need to figure out which properties we really want to
1630 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1631 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1633 (charset, ccl_program))
1635 charset = Fget_charset (charset);
1636 CHECK_VECTOR (ccl_program);
1637 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1642 invalidate_charset_font_caches (Lisp_Object charset)
1644 /* Invalidate font cache entries for charset on all devices. */
1645 Lisp_Object devcons, concons, hash_table;
1646 DEVICE_LOOP_NO_BREAK (devcons, concons)
1648 struct device *d = XDEVICE (XCAR (devcons));
1649 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1650 if (!UNBOUNDP (hash_table))
1651 Fclrhash (hash_table);
1655 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1656 Set the 'registry property of CHARSET to REGISTRY.
1658 (charset, registry))
1660 charset = Fget_charset (charset);
1661 CHECK_STRING (registry);
1662 XCHARSET_REGISTRY (charset) = registry;
1663 invalidate_charset_font_caches (charset);
1664 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1669 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1670 Return mapping-table of CHARSET.
1674 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1677 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1678 Set mapping-table of CHARSET to TABLE.
1682 struct Lisp_Charset *cs;
1683 Lisp_Object old_table;
1686 charset = Fget_charset (charset);
1687 cs = XCHARSET (charset);
1689 if (EQ (table, Qnil))
1691 CHARSET_DECODING_TABLE(cs) = table;
1692 CHARSET_ENCODING_TABLE(cs) = Qnil;
1695 else if (VECTORP (table))
1697 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1698 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1699 old_table = CHARSET_ENCODING_TABLE(cs);
1700 CHARSET_DECODING_TABLE(cs) = table;
1703 signal_error (Qwrong_type_argument,
1704 list2 (build_translated_string ("vector-or-nil-p"),
1706 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1708 switch (CHARSET_DIMENSION (cs))
1711 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1712 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1714 Lisp_Object c = XVECTOR_DATA(table)[i];
1717 put_char_code_table (XCHAR (c),
1718 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1719 CHARSET_ENCODING_TABLE(cs));
1723 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1724 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1726 Lisp_Object v = XVECTOR_DATA(table)[i];
1732 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1734 CHARSET_DECODING_TABLE(cs) = old_table;
1735 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1737 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1739 Lisp_Object c = XVECTOR_DATA(v)[j];
1744 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1745 | (j + CHARSET_BYTE_OFFSET (cs))),
1746 CHARSET_ENCODING_TABLE(cs));
1750 put_char_code_table (XCHAR (v),
1751 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1752 CHARSET_ENCODING_TABLE(cs));
1761 /************************************************************************/
1762 /* Lisp primitives for working with characters */
1763 /************************************************************************/
1765 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1766 Make a character from CHARSET and octets ARG1 and ARG2.
1767 ARG2 is required only for characters from two-dimensional charsets.
1768 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1769 character s with caron.
1771 (charset, arg1, arg2))
1773 struct Lisp_Charset *cs;
1775 int lowlim, highlim;
1777 charset = Fget_charset (charset);
1778 cs = XCHARSET (charset);
1780 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1781 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1783 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1785 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1786 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1789 /* It is useful (and safe, according to Olivier Galibert) to strip
1790 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1791 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1792 Latin 2 code of the character. */
1800 if (a1 < lowlim || a1 > highlim)
1801 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1803 if (CHARSET_DIMENSION (cs) == 1)
1807 ("Charset is of dimension one; second octet must be nil", arg2);
1808 return make_char (MAKE_CHAR (charset, a1, 0));
1817 a2 = XINT (arg2) & 0x7f;
1819 if (a2 < lowlim || a2 > highlim)
1820 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1822 return make_char (MAKE_CHAR (charset, a1, a2));
1825 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1826 Return the character set of char CH.
1830 CHECK_CHAR_COERCE_INT (ch);
1832 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1835 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1836 Return list of charset and one or two position-codes of CHAR.
1840 /* This function can GC */
1841 struct gcpro gcpro1, gcpro2;
1842 Lisp_Object charset = Qnil;
1843 Lisp_Object rc = Qnil;
1846 GCPRO2 (charset, rc);
1847 CHECK_CHAR_COERCE_INT (character);
1849 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1851 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1853 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1857 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1865 #ifdef ENABLE_COMPOSITE_CHARS
1866 /************************************************************************/
1867 /* composite character functions */
1868 /************************************************************************/
1871 lookup_composite_char (Bufbyte *str, int len)
1873 Lisp_Object lispstr = make_string (str, len);
1874 Lisp_Object ch = Fgethash (lispstr,
1875 Vcomposite_char_string2char_hash_table,
1881 if (composite_char_row_next >= 128)
1882 signal_simple_error ("No more composite chars available", lispstr);
1883 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1884 composite_char_col_next);
1885 Fputhash (make_char (emch), lispstr,
1886 Vcomposite_char_char2string_hash_table);
1887 Fputhash (lispstr, make_char (emch),
1888 Vcomposite_char_string2char_hash_table);
1889 composite_char_col_next++;
1890 if (composite_char_col_next >= 128)
1892 composite_char_col_next = 32;
1893 composite_char_row_next++;
1902 composite_char_string (Emchar ch)
1904 Lisp_Object str = Fgethash (make_char (ch),
1905 Vcomposite_char_char2string_hash_table,
1907 assert (!UNBOUNDP (str));
1911 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1912 Convert a string into a single composite character.
1913 The character is the result of overstriking all the characters in
1918 CHECK_STRING (string);
1919 return make_char (lookup_composite_char (XSTRING_DATA (string),
1920 XSTRING_LENGTH (string)));
1923 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1924 Return a string of the characters comprising a composite character.
1932 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1933 signal_simple_error ("Must be composite char", ch);
1934 return composite_char_string (emch);
1936 #endif /* ENABLE_COMPOSITE_CHARS */
1939 /************************************************************************/
1940 /* initialization */
1941 /************************************************************************/
1944 syms_of_mule_charset (void)
1946 DEFSUBR (Fcharsetp);
1947 DEFSUBR (Ffind_charset);
1948 DEFSUBR (Fget_charset);
1949 DEFSUBR (Fcharset_list);
1950 DEFSUBR (Fcharset_name);
1951 DEFSUBR (Fmake_charset);
1952 DEFSUBR (Fmake_reverse_direction_charset);
1953 /* DEFSUBR (Freverse_direction_charset); */
1954 DEFSUBR (Fdefine_charset_alias);
1955 DEFSUBR (Fcharset_from_attributes);
1956 DEFSUBR (Fcharset_short_name);
1957 DEFSUBR (Fcharset_long_name);
1958 DEFSUBR (Fcharset_description);
1959 DEFSUBR (Fcharset_dimension);
1960 DEFSUBR (Fcharset_property);
1961 DEFSUBR (Fcharset_id);
1962 DEFSUBR (Fset_charset_ccl_program);
1963 DEFSUBR (Fset_charset_registry);
1965 DEFSUBR (Fcharset_mapping_table);
1966 DEFSUBR (Fset_charset_mapping_table);
1969 DEFSUBR (Fmake_char);
1970 DEFSUBR (Fchar_charset);
1971 DEFSUBR (Fsplit_char);
1973 #ifdef ENABLE_COMPOSITE_CHARS
1974 DEFSUBR (Fmake_composite_char);
1975 DEFSUBR (Fcomposite_char_string);
1978 defsymbol (&Qcharsetp, "charsetp");
1979 defsymbol (&Qregistry, "registry");
1980 defsymbol (&Qfinal, "final");
1981 defsymbol (&Qgraphic, "graphic");
1982 defsymbol (&Qdirection, "direction");
1983 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1984 defsymbol (&Qshort_name, "short-name");
1985 defsymbol (&Qlong_name, "long-name");
1987 defsymbol (&Ql2r, "l2r");
1988 defsymbol (&Qr2l, "r2l");
1990 /* Charsets, compatible with FSF 20.3
1991 Naming convention is Script-Charset[-Edition] */
1992 defsymbol (&Qascii, "ascii");
1993 defsymbol (&Qcontrol_1, "control-1");
1994 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1995 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1996 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1997 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1998 defsymbol (&Qthai_tis620, "thai-tis620");
1999 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2000 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2001 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2002 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2003 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2004 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2005 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2006 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2007 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2008 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2009 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2010 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2011 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2012 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2014 defsymbol (&Qucs_bmp, "ucs-bmp");
2015 defsymbol (&Qlatin_viscii, "latin-viscii");
2016 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2017 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2018 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2019 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2020 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2021 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2023 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2024 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2026 defsymbol (&Qcomposite, "composite");
2030 vars_of_mule_charset (void)
2037 /* Table of charsets indexed by leading byte. */
2038 for (i = 0; i < countof (charset_by_leading_byte); i++)
2039 charset_by_leading_byte[i] = Qnil;
2042 /* Table of charsets indexed by type/final-byte. */
2043 for (i = 0; i < countof (charset_by_attributes); i++)
2044 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2045 charset_by_attributes[i][j] = Qnil;
2047 /* Table of charsets indexed by type/final-byte/direction. */
2048 for (i = 0; i < countof (charset_by_attributes); i++)
2049 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2050 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2051 charset_by_attributes[i][j][k] = Qnil;
2054 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2056 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2058 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2062 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2063 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2064 Leading-code of private TYPE9N charset of column-width 1.
2066 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2070 Vutf_2000_version = build_string("0.8 (Kami)");
2071 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2072 Version number of UTF-2000.
2075 Vdefault_coded_charset_priority_list = Qnil;
2076 DEFVAR_LISP ("default-coded-charset-priority-list",
2077 &Vdefault_coded_charset_priority_list /*
2078 Default order of preferred coded-character-set.
2084 complex_vars_of_mule_charset (void)
2086 staticpro (&Vcharset_hash_table);
2087 Vcharset_hash_table =
2088 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2090 /* Predefined character sets. We store them into variables for
2095 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2096 CHARSET_TYPE_256X256, 1, 2, 0,
2097 CHARSET_LEFT_TO_RIGHT,
2098 build_string ("BMP"),
2099 build_string ("BMP"),
2100 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2101 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2102 Qnil, 0, 0xFFFF, 0, 0);
2104 # define MIN_CHAR_THAI 0
2105 # define MAX_CHAR_THAI 0
2106 # define MIN_CHAR_GREEK 0
2107 # define MAX_CHAR_GREEK 0
2108 # define MIN_CHAR_HEBREW 0
2109 # define MAX_CHAR_HEBREW 0
2110 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2111 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2112 # define MIN_CHAR_CYRILLIC 0
2113 # define MAX_CHAR_CYRILLIC 0
2116 make_charset (LEADING_BYTE_ASCII, Qascii,
2117 CHARSET_TYPE_94, 1, 0, 'B',
2118 CHARSET_LEFT_TO_RIGHT,
2119 build_string ("ASCII"),
2120 build_string ("ASCII)"),
2121 build_string ("ASCII (ISO646 IRV)"),
2122 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2123 Qnil, 0, 0x7F, 0, 0);
2124 Vcharset_control_1 =
2125 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2126 CHARSET_TYPE_94, 1, 1, 0,
2127 CHARSET_LEFT_TO_RIGHT,
2128 build_string ("C1"),
2129 build_string ("Control characters"),
2130 build_string ("Control characters 128-191"),
2132 Qnil, 0x80, 0x9F, 0, 0);
2133 Vcharset_latin_iso8859_1 =
2134 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2135 CHARSET_TYPE_96, 1, 1, 'A',
2136 CHARSET_LEFT_TO_RIGHT,
2137 build_string ("Latin-1"),
2138 build_string ("ISO8859-1 (Latin-1)"),
2139 build_string ("ISO8859-1 (Latin-1)"),
2140 build_string ("iso8859-1"),
2141 Qnil, 0xA0, 0xFF, 0, 32);
2142 Vcharset_latin_iso8859_2 =
2143 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2144 CHARSET_TYPE_96, 1, 1, 'B',
2145 CHARSET_LEFT_TO_RIGHT,
2146 build_string ("Latin-2"),
2147 build_string ("ISO8859-2 (Latin-2)"),
2148 build_string ("ISO8859-2 (Latin-2)"),
2149 build_string ("iso8859-2"),
2151 Vcharset_latin_iso8859_3 =
2152 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2153 CHARSET_TYPE_96, 1, 1, 'C',
2154 CHARSET_LEFT_TO_RIGHT,
2155 build_string ("Latin-3"),
2156 build_string ("ISO8859-3 (Latin-3)"),
2157 build_string ("ISO8859-3 (Latin-3)"),
2158 build_string ("iso8859-3"),
2160 Vcharset_latin_iso8859_4 =
2161 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2162 CHARSET_TYPE_96, 1, 1, 'D',
2163 CHARSET_LEFT_TO_RIGHT,
2164 build_string ("Latin-4"),
2165 build_string ("ISO8859-4 (Latin-4)"),
2166 build_string ("ISO8859-4 (Latin-4)"),
2167 build_string ("iso8859-4"),
2169 Vcharset_thai_tis620 =
2170 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2171 CHARSET_TYPE_96, 1, 1, 'T',
2172 CHARSET_LEFT_TO_RIGHT,
2173 build_string ("TIS620"),
2174 build_string ("TIS620 (Thai)"),
2175 build_string ("TIS620.2529 (Thai)"),
2176 build_string ("tis620"),
2177 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2178 Vcharset_greek_iso8859_7 =
2179 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2180 CHARSET_TYPE_96, 1, 1, 'F',
2181 CHARSET_LEFT_TO_RIGHT,
2182 build_string ("ISO8859-7"),
2183 build_string ("ISO8859-7 (Greek)"),
2184 build_string ("ISO8859-7 (Greek)"),
2185 build_string ("iso8859-7"),
2186 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2187 Vcharset_arabic_iso8859_6 =
2188 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2189 CHARSET_TYPE_96, 1, 1, 'G',
2190 CHARSET_RIGHT_TO_LEFT,
2191 build_string ("ISO8859-6"),
2192 build_string ("ISO8859-6 (Arabic)"),
2193 build_string ("ISO8859-6 (Arabic)"),
2194 build_string ("iso8859-6"),
2196 Vcharset_hebrew_iso8859_8 =
2197 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2198 CHARSET_TYPE_96, 1, 1, 'H',
2199 CHARSET_RIGHT_TO_LEFT,
2200 build_string ("ISO8859-8"),
2201 build_string ("ISO8859-8 (Hebrew)"),
2202 build_string ("ISO8859-8 (Hebrew)"),
2203 build_string ("iso8859-8"),
2204 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2205 Vcharset_katakana_jisx0201 =
2206 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2207 CHARSET_TYPE_94, 1, 1, 'I',
2208 CHARSET_LEFT_TO_RIGHT,
2209 build_string ("JISX0201 Kana"),
2210 build_string ("JISX0201.1976 (Japanese Kana)"),
2211 build_string ("JISX0201.1976 Japanese Kana"),
2212 build_string ("jisx0201\\.1976"),
2214 MIN_CHAR_HALFWIDTH_KATAKANA,
2215 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2216 Vcharset_latin_jisx0201 =
2217 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2218 CHARSET_TYPE_94, 1, 0, 'J',
2219 CHARSET_LEFT_TO_RIGHT,
2220 build_string ("JISX0201 Roman"),
2221 build_string ("JISX0201.1976 (Japanese Roman)"),
2222 build_string ("JISX0201.1976 Japanese Roman"),
2223 build_string ("jisx0201\\.1976"),
2225 Vcharset_cyrillic_iso8859_5 =
2226 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2227 CHARSET_TYPE_96, 1, 1, 'L',
2228 CHARSET_LEFT_TO_RIGHT,
2229 build_string ("ISO8859-5"),
2230 build_string ("ISO8859-5 (Cyrillic)"),
2231 build_string ("ISO8859-5 (Cyrillic)"),
2232 build_string ("iso8859-5"),
2233 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2234 Vcharset_latin_iso8859_9 =
2235 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2236 CHARSET_TYPE_96, 1, 1, 'M',
2237 CHARSET_LEFT_TO_RIGHT,
2238 build_string ("Latin-5"),
2239 build_string ("ISO8859-9 (Latin-5)"),
2240 build_string ("ISO8859-9 (Latin-5)"),
2241 build_string ("iso8859-9"),
2243 Vcharset_japanese_jisx0208_1978 =
2244 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2245 CHARSET_TYPE_94X94, 2, 0, '@',
2246 CHARSET_LEFT_TO_RIGHT,
2247 build_string ("JIS X0208:1978"),
2248 build_string ("JIS X0208:1978 (Japanese)"),
2250 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2251 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2253 Vcharset_chinese_gb2312 =
2254 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2255 CHARSET_TYPE_94X94, 2, 0, 'A',
2256 CHARSET_LEFT_TO_RIGHT,
2257 build_string ("GB2312"),
2258 build_string ("GB2312)"),
2259 build_string ("GB2312 Chinese simplified"),
2260 build_string ("gb2312"),
2262 Vcharset_japanese_jisx0208 =
2263 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2264 CHARSET_TYPE_94X94, 2, 0, 'B',
2265 CHARSET_LEFT_TO_RIGHT,
2266 build_string ("JISX0208"),
2267 build_string ("JIS X0208:1983 (Japanese)"),
2268 build_string ("JIS X0208:1983 Japanese Kanji"),
2269 build_string ("jisx0208\\.1983"),
2271 Vcharset_korean_ksc5601 =
2272 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2273 CHARSET_TYPE_94X94, 2, 0, 'C',
2274 CHARSET_LEFT_TO_RIGHT,
2275 build_string ("KSC5601"),
2276 build_string ("KSC5601 (Korean"),
2277 build_string ("KSC5601 Korean Hangul and Hanja"),
2278 build_string ("ksc5601"),
2280 Vcharset_japanese_jisx0212 =
2281 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2282 CHARSET_TYPE_94X94, 2, 0, 'D',
2283 CHARSET_LEFT_TO_RIGHT,
2284 build_string ("JISX0212"),
2285 build_string ("JISX0212 (Japanese)"),
2286 build_string ("JISX0212 Japanese Supplement"),
2287 build_string ("jisx0212"),
2290 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2291 Vcharset_chinese_cns11643_1 =
2292 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2293 CHARSET_TYPE_94X94, 2, 0, 'G',
2294 CHARSET_LEFT_TO_RIGHT,
2295 build_string ("CNS11643-1"),
2296 build_string ("CNS11643-1 (Chinese traditional)"),
2298 ("CNS 11643 Plane 1 Chinese traditional"),
2299 build_string (CHINESE_CNS_PLANE_RE("1")),
2301 Vcharset_chinese_cns11643_2 =
2302 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2303 CHARSET_TYPE_94X94, 2, 0, 'H',
2304 CHARSET_LEFT_TO_RIGHT,
2305 build_string ("CNS11643-2"),
2306 build_string ("CNS11643-2 (Chinese traditional)"),
2308 ("CNS 11643 Plane 2 Chinese traditional"),
2309 build_string (CHINESE_CNS_PLANE_RE("2")),
2312 Vcharset_latin_viscii_lower =
2313 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2314 CHARSET_TYPE_96, 1, 1, '1',
2315 CHARSET_LEFT_TO_RIGHT,
2316 build_string ("VISCII lower"),
2317 build_string ("VISCII lower (Vietnamese)"),
2318 build_string ("VISCII lower (Vietnamese)"),
2319 build_string ("MULEVISCII-LOWER"),
2321 Vcharset_latin_viscii_upper =
2322 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2323 CHARSET_TYPE_96, 1, 1, '2',
2324 CHARSET_LEFT_TO_RIGHT,
2325 build_string ("VISCII upper"),
2326 build_string ("VISCII upper (Vietnamese)"),
2327 build_string ("VISCII upper (Vietnamese)"),
2328 build_string ("MULEVISCII-UPPER"),
2330 Vcharset_latin_viscii =
2331 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2332 CHARSET_TYPE_256, 1, 2, 0,
2333 CHARSET_LEFT_TO_RIGHT,
2334 build_string ("VISCII"),
2335 build_string ("VISCII 1.1 (Vietnamese)"),
2336 build_string ("VISCII 1.1 (Vietnamese)"),
2337 build_string ("VISCII1\\.1"),
2339 Vcharset_hiragana_jisx0208 =
2340 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2341 CHARSET_TYPE_94X94, 2, 0, 'B',
2342 CHARSET_LEFT_TO_RIGHT,
2343 build_string ("Hiragana"),
2344 build_string ("Hiragana of JIS X0208"),
2345 build_string ("Japanese Hiragana of JIS X0208"),
2346 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2347 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2348 (0x24 - 33) * 94 + (0x21 - 33), 33);
2349 Vcharset_katakana_jisx0208 =
2350 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2351 CHARSET_TYPE_94X94, 2, 0, 'B',
2352 CHARSET_LEFT_TO_RIGHT,
2353 build_string ("Katakana"),
2354 build_string ("Katakana of JIS X0208"),
2355 build_string ("Japanese Katakana of JIS X0208"),
2356 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2357 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2358 (0x25 - 33) * 94 + (0x21 - 33), 33);
2360 Vcharset_chinese_big5_1 =
2361 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2362 CHARSET_TYPE_94X94, 2, 0, '0',
2363 CHARSET_LEFT_TO_RIGHT,
2364 build_string ("Big5"),
2365 build_string ("Big5 (Level-1)"),
2367 ("Big5 Level-1 Chinese traditional"),
2368 build_string ("big5"),
2370 Vcharset_chinese_big5_2 =
2371 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2372 CHARSET_TYPE_94X94, 2, 0, '1',
2373 CHARSET_LEFT_TO_RIGHT,
2374 build_string ("Big5"),
2375 build_string ("Big5 (Level-2)"),
2377 ("Big5 Level-2 Chinese traditional"),
2378 build_string ("big5"),
2381 #ifdef ENABLE_COMPOSITE_CHARS
2382 /* #### For simplicity, we put composite chars into a 96x96 charset.
2383 This is going to lead to problems because you can run out of
2384 room, esp. as we don't yet recycle numbers. */
2385 Vcharset_composite =
2386 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2387 CHARSET_TYPE_96X96, 2, 0, 0,
2388 CHARSET_LEFT_TO_RIGHT,
2389 build_string ("Composite"),
2390 build_string ("Composite characters"),
2391 build_string ("Composite characters"),
2394 composite_char_row_next = 32;
2395 composite_char_col_next = 32;
2397 Vcomposite_char_string2char_hash_table =
2398 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2399 Vcomposite_char_char2string_hash_table =
2400 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2401 staticpro (&Vcomposite_char_string2char_hash_table);
2402 staticpro (&Vcomposite_char_char2string_hash_table);
2403 #endif /* ENABLE_COMPOSITE_CHARS */