1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vutf_2000_version;
315 int leading_code_private_11;
318 Lisp_Object Qcharsetp;
320 /* Qdoc_string, Qdimension, Qchars defined in general.c */
321 Lisp_Object Qregistry, Qfinal, Qgraphic;
322 Lisp_Object Qdirection;
323 Lisp_Object Qreverse_direction_charset;
324 Lisp_Object Qleading_byte;
325 Lisp_Object Qshort_name, Qlong_name;
341 Qjapanese_jisx0208_1978,
353 Qvietnamese_viscii_lower,
354 Qvietnamese_viscii_upper,
362 Lisp_Object Ql2r, Qr2l;
364 Lisp_Object Vcharset_hash_table;
367 static Charset_ID next_allocated_leading_byte;
369 static Charset_ID next_allocated_1_byte_leading_byte;
370 static Charset_ID next_allocated_2_byte_leading_byte;
373 /* Composite characters are characters constructed by overstriking two
374 or more regular characters.
376 1) The old Mule implementation involves storing composite characters
377 in a buffer as a tag followed by all of the actual characters
378 used to make up the composite character. I think this is a bad
379 idea; it greatly complicates code that wants to handle strings
380 one character at a time because it has to deal with the possibility
381 of great big ungainly characters. It's much more reasonable to
382 simply store an index into a table of composite characters.
384 2) The current implementation only allows for 16,384 separate
385 composite characters over the lifetime of the XEmacs process.
386 This could become a potential problem if the user
387 edited lots of different files that use composite characters.
388 Due to FSF bogosity, increasing the number of allowable
389 composite characters under Mule would decrease the number
390 of possible faces that can exist. Mule already has shrunk
391 this to 2048, and further shrinkage would become uncomfortable.
392 No such problems exist in XEmacs.
394 Composite characters could be represented as 0x80 C1 C2 C3,
395 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
396 for slightly under 2^20 (one million) composite characters
397 over the XEmacs process lifetime, and you only need to
398 increase the size of a Mule character from 19 to 21 bits.
399 Or you could use 0x80 C1 C2 C3 C4, allowing for about
400 85 million (slightly over 2^26) composite characters. */
403 /************************************************************************/
404 /* Basic Emchar functions */
405 /************************************************************************/
407 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
408 string in STR. Returns the number of bytes stored.
409 Do not call this directly. Use the macro set_charptr_emchar() instead.
413 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
428 else if ( c <= 0x7ff )
430 *p++ = (c >> 6) | 0xc0;
431 *p++ = (c & 0x3f) | 0x80;
433 else if ( c <= 0xffff )
435 *p++ = (c >> 12) | 0xe0;
436 *p++ = ((c >> 6) & 0x3f) | 0x80;
437 *p++ = (c & 0x3f) | 0x80;
439 else if ( c <= 0x1fffff )
441 *p++ = (c >> 18) | 0xf0;
442 *p++ = ((c >> 12) & 0x3f) | 0x80;
443 *p++ = ((c >> 6) & 0x3f) | 0x80;
444 *p++ = (c & 0x3f) | 0x80;
446 else if ( c <= 0x3ffffff )
448 *p++ = (c >> 24) | 0xf8;
449 *p++ = ((c >> 18) & 0x3f) | 0x80;
450 *p++ = ((c >> 12) & 0x3f) | 0x80;
451 *p++ = ((c >> 6) & 0x3f) | 0x80;
452 *p++ = (c & 0x3f) | 0x80;
456 *p++ = (c >> 30) | 0xfc;
457 *p++ = ((c >> 24) & 0x3f) | 0x80;
458 *p++ = ((c >> 18) & 0x3f) | 0x80;
459 *p++ = ((c >> 12) & 0x3f) | 0x80;
460 *p++ = ((c >> 6) & 0x3f) | 0x80;
461 *p++ = (c & 0x3f) | 0x80;
464 BREAKUP_CHAR (c, charset, c1, c2);
465 lb = CHAR_LEADING_BYTE (c);
466 if (LEADING_BYTE_PRIVATE_P (lb))
467 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
469 if (EQ (charset, Vcharset_control_1))
478 /* Return the first character from a Mule-encoded string in STR,
479 assuming it's non-ASCII. Do not call this directly.
480 Use the macro charptr_emchar() instead. */
483 non_ascii_charptr_emchar (CONST Bufbyte *str)
496 else if ( b >= 0xf8 )
501 else if ( b >= 0xf0 )
506 else if ( b >= 0xe0 )
511 else if ( b >= 0xc0 )
521 for( ; len > 0; len-- )
524 ch = ( ch << 6 ) | ( b & 0x3f );
528 Bufbyte i0 = *str, i1, i2 = 0;
531 if (i0 == LEADING_BYTE_CONTROL_1)
532 return (Emchar) (*++str - 0x20);
534 if (LEADING_BYTE_PREFIX_P (i0))
539 charset = CHARSET_BY_LEADING_BYTE (i0);
540 if (XCHARSET_DIMENSION (charset) == 2)
543 return MAKE_CHAR (charset, i1, i2);
547 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
548 Do not call this directly. Use the macro valid_char_p() instead. */
552 non_ascii_valid_char_p (Emchar ch)
556 /* Must have only lowest 19 bits set */
560 f1 = CHAR_FIELD1 (ch);
561 f2 = CHAR_FIELD2 (ch);
562 f3 = CHAR_FIELD3 (ch);
568 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
569 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
570 f2 > MAX_CHAR_FIELD2_PRIVATE)
575 if (f3 != 0x20 && f3 != 0x7F)
579 NOTE: This takes advantage of the fact that
580 FIELD2_TO_OFFICIAL_LEADING_BYTE and
581 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
583 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
584 return (XCHARSET_CHARS (charset) == 96);
590 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
591 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
592 f1 > MAX_CHAR_FIELD1_PRIVATE)
594 if (f2 < 0x20 || f3 < 0x20)
597 #ifdef ENABLE_COMPOSITE_CHARS
598 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
600 if (UNBOUNDP (Fgethash (make_int (ch),
601 Vcomposite_char_char2string_hash_table,
606 #endif /* ENABLE_COMPOSITE_CHARS */
608 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
611 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
613 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
616 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
618 return (XCHARSET_CHARS (charset) == 96);
624 /************************************************************************/
625 /* Basic string functions */
626 /************************************************************************/
628 /* Copy the character pointed to by PTR into STR, assuming it's
629 non-ASCII. Do not call this directly. Use the macro
630 charptr_copy_char() instead. */
633 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
635 Bufbyte *strptr = str;
637 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
639 /* Notice fallthrough. */
641 case 6: *++strptr = *ptr++;
642 case 5: *++strptr = *ptr++;
644 case 4: *++strptr = *ptr++;
645 case 3: *++strptr = *ptr++;
646 case 2: *++strptr = *ptr;
651 return strptr + 1 - str;
655 /************************************************************************/
656 /* streams of Emchars */
657 /************************************************************************/
659 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
660 The functions below are not meant to be called directly; use
661 the macros in insdel.h. */
664 Lstream_get_emchar_1 (Lstream *stream, int ch)
666 Bufbyte str[MAX_EMCHAR_LEN];
667 Bufbyte *strptr = str;
669 str[0] = (Bufbyte) ch;
670 switch (REP_BYTES_BY_FIRST_BYTE (ch))
672 /* Notice fallthrough. */
675 ch = Lstream_getc (stream);
677 *++strptr = (Bufbyte) ch;
679 ch = Lstream_getc (stream);
681 *++strptr = (Bufbyte) ch;
684 ch = Lstream_getc (stream);
686 *++strptr = (Bufbyte) ch;
688 ch = Lstream_getc (stream);
690 *++strptr = (Bufbyte) ch;
692 ch = Lstream_getc (stream);
694 *++strptr = (Bufbyte) ch;
699 return charptr_emchar (str);
703 Lstream_fput_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 return Lstream_write (stream, str, len);
711 Lstream_funget_emchar (Lstream *stream, Emchar ch)
713 Bufbyte str[MAX_EMCHAR_LEN];
714 Bytecount len = set_charptr_emchar (str, ch);
715 Lstream_unread (stream, str, len);
719 /************************************************************************/
721 /************************************************************************/
724 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
726 struct Lisp_Charset *cs = XCHARSET (obj);
728 markobj (cs->short_name);
729 markobj (cs->long_name);
730 markobj (cs->doc_string);
731 markobj (cs->registry);
732 markobj (cs->ccl_program);
734 markobj (cs->decoding_table);
735 markobj (cs->encoding_table);
741 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
743 struct Lisp_Charset *cs = XCHARSET (obj);
747 error ("printing unreadable object #<charset %s 0x%x>",
748 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
751 write_c_string ("#<charset ", printcharfun);
752 print_internal (CHARSET_NAME (cs), printcharfun, 0);
753 write_c_string (" ", printcharfun);
754 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
755 write_c_string (" ", printcharfun);
756 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
757 write_c_string (" ", printcharfun);
758 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
759 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
760 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
761 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
762 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
764 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
765 CHARSET_COLUMNS (cs),
766 CHARSET_GRAPHIC (cs),
768 write_c_string (buf, printcharfun);
769 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
770 sprintf (buf, " 0x%x>", cs->header.uid);
771 write_c_string (buf, printcharfun);
774 static const struct lrecord_description charset_description[] = {
775 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
777 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
782 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
783 mark_charset, print_charset, 0, 0, 0,
785 struct Lisp_Charset);
787 /* Make a new charset. */
790 make_charset (Charset_ID id, Lisp_Object name,
791 unsigned char type, unsigned char columns, unsigned char graphic,
792 Bufbyte final, unsigned char direction, Lisp_Object short_name,
793 Lisp_Object long_name, Lisp_Object doc,
795 Lisp_Object decoding_table,
796 Emchar ucs_min, Emchar ucs_max,
797 Emchar code_offset, unsigned char byte_offset)
800 struct Lisp_Charset *cs =
801 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
802 XSETCHARSET (obj, cs);
804 CHARSET_ID (cs) = id;
805 CHARSET_NAME (cs) = name;
806 CHARSET_SHORT_NAME (cs) = short_name;
807 CHARSET_LONG_NAME (cs) = long_name;
808 CHARSET_DIRECTION (cs) = direction;
809 CHARSET_TYPE (cs) = type;
810 CHARSET_COLUMNS (cs) = columns;
811 CHARSET_GRAPHIC (cs) = graphic;
812 CHARSET_FINAL (cs) = final;
813 CHARSET_DOC_STRING (cs) = doc;
814 CHARSET_REGISTRY (cs) = reg;
815 CHARSET_CCL_PROGRAM (cs) = Qnil;
816 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
818 CHARSET_DECODING_TABLE(cs) = Qnil;
819 CHARSET_ENCODING_TABLE(cs) = Qnil;
820 CHARSET_UCS_MIN(cs) = ucs_min;
821 CHARSET_UCS_MAX(cs) = ucs_max;
822 CHARSET_CODE_OFFSET(cs) = code_offset;
823 CHARSET_BYTE_OFFSET(cs) = byte_offset;
826 switch (CHARSET_TYPE (cs))
828 case CHARSET_TYPE_94:
829 CHARSET_DIMENSION (cs) = 1;
830 CHARSET_CHARS (cs) = 94;
832 case CHARSET_TYPE_96:
833 CHARSET_DIMENSION (cs) = 1;
834 CHARSET_CHARS (cs) = 96;
836 case CHARSET_TYPE_94X94:
837 CHARSET_DIMENSION (cs) = 2;
838 CHARSET_CHARS (cs) = 94;
840 case CHARSET_TYPE_96X96:
841 CHARSET_DIMENSION (cs) = 2;
842 CHARSET_CHARS (cs) = 96;
845 case CHARSET_TYPE_128:
846 CHARSET_DIMENSION (cs) = 1;
847 CHARSET_CHARS (cs) = 128;
849 case CHARSET_TYPE_128X128:
850 CHARSET_DIMENSION (cs) = 2;
851 CHARSET_CHARS (cs) = 128;
853 case CHARSET_TYPE_256:
854 CHARSET_DIMENSION (cs) = 1;
855 CHARSET_CHARS (cs) = 256;
857 case CHARSET_TYPE_256X256:
858 CHARSET_DIMENSION (cs) = 2;
859 CHARSET_CHARS (cs) = 256;
865 if (id == LEADING_BYTE_ASCII)
866 CHARSET_REP_BYTES (cs) = 1;
868 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
870 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
875 /* some charsets do not have final characters. This includes
876 ASCII, Control-1, Composite, and the two faux private
879 if (code_offset == 0)
881 assert (NILP (charset_by_attributes[type][final]));
882 charset_by_attributes[type][final] = obj;
885 assert (NILP (charset_by_attributes[type][final][direction]));
886 charset_by_attributes[type][final][direction] = obj;
890 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
891 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
894 /* official leading byte */
895 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
898 /* Some charsets are "faux" and don't have names or really exist at
899 all except in the leading-byte table. */
901 Fputhash (name, obj, Vcharset_hash_table);
906 get_unallocated_leading_byte (int dimension)
911 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
914 lb = next_allocated_leading_byte++;
918 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
921 lb = next_allocated_1_byte_leading_byte++;
925 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
928 lb = next_allocated_2_byte_leading_byte++;
934 ("No more character sets free for this dimension",
935 make_int (dimension));
942 charset_get_byte1 (Lisp_Object charset, Emchar ch)
947 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
949 Lisp_Object value = get_char_code_table (ch, table);
953 Emchar code = XINT (value);
957 else if (code < (1 << 16))
959 else if (code < (1 << 24))
965 if ((XCHARSET_UCS_MIN (charset) <= ch)
966 && (ch <= XCHARSET_UCS_MAX (charset)))
967 return (ch - XCHARSET_UCS_MIN (charset)
968 + XCHARSET_CODE_OFFSET (charset))
969 / (XCHARSET_DIMENSION (charset) == 1 ?
972 XCHARSET_DIMENSION (charset) == 2 ?
973 XCHARSET_CHARS (charset)
975 XCHARSET_DIMENSION (charset) == 3 ?
976 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
978 XCHARSET_CHARS (charset)
979 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
980 + XCHARSET_BYTE_OFFSET (charset);
981 else if (XCHARSET_CODE_OFFSET (charset) == 0)
983 if (XCHARSET_DIMENSION (charset) == 1)
985 if (XCHARSET_CHARS (charset) == 94)
987 if (((d = ch - (MIN_CHAR_94
988 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
992 else if (XCHARSET_CHARS (charset) == 96)
994 if (((d = ch - (MIN_CHAR_96
995 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1002 else if (XCHARSET_DIMENSION (charset) == 2)
1004 if (XCHARSET_CHARS (charset) == 94)
1006 if (((d = ch - (MIN_CHAR_94x94
1007 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1010 return (d / 94) + 33;
1012 else if (XCHARSET_CHARS (charset) == 96)
1014 if (((d = ch - (MIN_CHAR_96x96
1015 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1018 return (d / 96) + 32;
1026 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1028 if (XCHARSET_DIMENSION (charset) == 1)
1034 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1036 Lisp_Object value = get_char_code_table (ch, table);
1040 Emchar code = XINT (value);
1042 if (code < (1 << 16))
1043 return (unsigned char)code;
1044 else if (code < (1 << 24))
1045 return (unsigned char)(code >> 16);
1047 return (unsigned char)(code >> 24);
1050 if ((XCHARSET_UCS_MIN (charset) <= ch)
1051 && (ch <= XCHARSET_UCS_MAX (charset)))
1052 return ((ch - XCHARSET_UCS_MIN (charset)
1053 + XCHARSET_CODE_OFFSET (charset))
1054 / (XCHARSET_DIMENSION (charset) == 2 ?
1057 XCHARSET_DIMENSION (charset) == 3 ?
1058 XCHARSET_CHARS (charset)
1060 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1061 % XCHARSET_CHARS (charset)
1062 + XCHARSET_BYTE_OFFSET (charset);
1063 else if (XCHARSET_CHARS (charset) == 94)
1064 return (MIN_CHAR_94x94
1065 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1066 && (ch < MIN_CHAR_94x94
1067 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1068 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1069 else /* if (XCHARSET_CHARS (charset) == 96) */
1070 return (MIN_CHAR_96x96
1071 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1072 && (ch < MIN_CHAR_96x96
1073 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1074 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1078 Lisp_Object Vdefault_coded_charset_priority_list;
1082 /************************************************************************/
1083 /* Basic charset Lisp functions */
1084 /************************************************************************/
1086 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1087 Return non-nil if OBJECT is a charset.
1091 return CHARSETP (object) ? Qt : Qnil;
1094 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1095 Retrieve the charset of the given name.
1096 If CHARSET-OR-NAME is a charset object, it is simply returned.
1097 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1098 nil is returned. Otherwise the associated charset object is returned.
1102 if (CHARSETP (charset_or_name))
1103 return charset_or_name;
1105 CHECK_SYMBOL (charset_or_name);
1106 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1109 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1110 Retrieve the charset of the given name.
1111 Same as `find-charset' except an error is signalled if there is no such
1112 charset instead of returning nil.
1116 Lisp_Object charset = Ffind_charset (name);
1119 signal_simple_error ("No such charset", name);
1123 /* We store the charsets in hash tables with the names as the key and the
1124 actual charset object as the value. Occasionally we need to use them
1125 in a list format. These routines provide us with that. */
1126 struct charset_list_closure
1128 Lisp_Object *charset_list;
1132 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1133 void *charset_list_closure)
1135 /* This function can GC */
1136 struct charset_list_closure *chcl =
1137 (struct charset_list_closure*) charset_list_closure;
1138 Lisp_Object *charset_list = chcl->charset_list;
1140 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1144 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1145 Return a list of the names of all defined charsets.
1149 Lisp_Object charset_list = Qnil;
1150 struct gcpro gcpro1;
1151 struct charset_list_closure charset_list_closure;
1153 GCPRO1 (charset_list);
1154 charset_list_closure.charset_list = &charset_list;
1155 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1156 &charset_list_closure);
1159 return charset_list;
1162 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1163 Return the name of the given charset.
1167 return XCHARSET_NAME (Fget_charset (charset));
1170 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1171 Define a new character set.
1172 This function is for use with Mule support.
1173 NAME is a symbol, the name by which the character set is normally referred.
1174 DOC-STRING is a string describing the character set.
1175 PROPS is a property list, describing the specific nature of the
1176 character set. Recognized properties are:
1178 'short-name Short version of the charset name (ex: Latin-1)
1179 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1180 'registry A regular expression matching the font registry field for
1182 'dimension Number of octets used to index a character in this charset.
1183 Either 1 or 2. Defaults to 1.
1184 'columns Number of columns used to display a character in this charset.
1185 Only used in TTY mode. (Under X, the actual width of a
1186 character can be derived from the font used to display the
1187 characters.) If unspecified, defaults to the dimension
1188 (this is almost always the correct value).
1189 'chars Number of characters in each dimension (94 or 96).
1190 Defaults to 94. Note that if the dimension is 2, the
1191 character set thus described is 94x94 or 96x96.
1192 'final Final byte of ISO 2022 escape sequence. Must be
1193 supplied. Each combination of (DIMENSION, CHARS) defines a
1194 separate namespace for final bytes. Note that ISO
1195 2022 restricts the final byte to the range
1196 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1197 dimension == 2. Note also that final bytes in the range
1198 0x30 - 0x3F are reserved for user-defined (not official)
1200 'graphic 0 (use left half of font on output) or 1 (use right half
1201 of font on output). Defaults to 0. For example, for
1202 a font whose registry is ISO8859-1, the left half
1203 (octets 0x20 - 0x7F) is the `ascii' character set, while
1204 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1205 character set. With 'graphic set to 0, the octets
1206 will have their high bit cleared; with it set to 1,
1207 the octets will have their high bit set.
1208 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1210 'ccl-program A compiled CCL program used to convert a character in
1211 this charset into an index into the font. This is in
1212 addition to the 'graphic property. The CCL program
1213 is passed the octets of the character, with the high
1214 bit cleared and set depending upon whether the value
1215 of the 'graphic property is 0 or 1.
1217 (name, doc_string, props))
1219 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1220 int direction = CHARSET_LEFT_TO_RIGHT;
1222 Lisp_Object registry = Qnil;
1223 Lisp_Object charset;
1224 Lisp_Object rest, keyword, value;
1225 Lisp_Object ccl_program = Qnil;
1226 Lisp_Object short_name = Qnil, long_name = Qnil;
1228 Emchar code_offset = 0;
1229 unsigned char byte_offset = 0;
1232 CHECK_SYMBOL (name);
1233 if (!NILP (doc_string))
1234 CHECK_STRING (doc_string);
1236 charset = Ffind_charset (name);
1237 if (!NILP (charset))
1238 signal_simple_error ("Cannot redefine existing charset", name);
1240 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1242 if (EQ (keyword, Qshort_name))
1244 CHECK_STRING (value);
1248 if (EQ (keyword, Qlong_name))
1250 CHECK_STRING (value);
1254 else if (EQ (keyword, Qdimension))
1257 dimension = XINT (value);
1258 if (dimension < 1 || dimension > 2)
1259 signal_simple_error ("Invalid value for 'dimension", value);
1262 else if (EQ (keyword, Qchars))
1265 chars = XINT (value);
1266 if (chars != 94 && chars != 96)
1267 signal_simple_error ("Invalid value for 'chars", value);
1270 else if (EQ (keyword, Qcolumns))
1273 columns = XINT (value);
1274 if (columns != 1 && columns != 2)
1275 signal_simple_error ("Invalid value for 'columns", value);
1278 else if (EQ (keyword, Qgraphic))
1281 graphic = XINT (value);
1283 if (graphic < 0 || graphic > 2)
1285 if (graphic < 0 || graphic > 1)
1287 signal_simple_error ("Invalid value for 'graphic", value);
1290 else if (EQ (keyword, Qregistry))
1292 CHECK_STRING (value);
1296 else if (EQ (keyword, Qdirection))
1298 if (EQ (value, Ql2r))
1299 direction = CHARSET_LEFT_TO_RIGHT;
1300 else if (EQ (value, Qr2l))
1301 direction = CHARSET_RIGHT_TO_LEFT;
1303 signal_simple_error ("Invalid value for 'direction", value);
1306 else if (EQ (keyword, Qfinal))
1308 CHECK_CHAR_COERCE_INT (value);
1309 final = XCHAR (value);
1310 if (final < '0' || final > '~')
1311 signal_simple_error ("Invalid value for 'final", value);
1314 else if (EQ (keyword, Qccl_program))
1316 CHECK_VECTOR (value);
1317 ccl_program = value;
1321 signal_simple_error ("Unrecognized property", keyword);
1325 error ("'final must be specified");
1326 if (dimension == 2 && final > 0x5F)
1328 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1332 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1334 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1336 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1337 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1339 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1346 if (code_offset == 0)
1347 id = CHARSET_ID_OFFSET_94 + final;
1349 id = get_unallocated_leading_byte (dimension);
1351 else if (chars == 96)
1353 if (code_offset == 0)
1354 id = CHARSET_ID_OFFSET_96 + final;
1356 id = get_unallocated_leading_byte (dimension);
1363 else if (dimension == 2)
1367 if (code_offset == 0)
1368 id = CHARSET_ID_OFFSET_94x94 + final;
1370 id = get_unallocated_leading_byte (dimension);
1372 else if (chars == 96)
1374 id = get_unallocated_leading_byte (dimension);
1389 else if (chars == 96)
1393 id = get_unallocated_leading_byte (dimension);
1396 if (NILP (doc_string))
1397 doc_string = build_string ("");
1399 if (NILP (registry))
1400 registry = build_string ("");
1402 if (NILP (short_name))
1403 XSETSTRING (short_name, XSYMBOL (name)->name);
1405 if (NILP (long_name))
1406 long_name = doc_string;
1409 columns = dimension;
1410 charset = make_charset (id, name, type, columns, graphic,
1411 final, direction, short_name, long_name,
1412 doc_string, registry,
1413 Qnil, 0, 0, 0, byte_offset);
1414 if (!NILP (ccl_program))
1415 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1419 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1421 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1422 NEW-NAME is the name of the new charset. Return the new charset.
1424 (charset, new_name))
1426 Lisp_Object new_charset = Qnil;
1427 int id, dimension, columns, graphic, final;
1428 int direction, type;
1429 Lisp_Object registry, doc_string, short_name, long_name;
1430 struct Lisp_Charset *cs;
1432 charset = Fget_charset (charset);
1433 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1434 signal_simple_error ("Charset already has reverse-direction charset",
1437 CHECK_SYMBOL (new_name);
1438 if (!NILP (Ffind_charset (new_name)))
1439 signal_simple_error ("Cannot redefine existing charset", new_name);
1441 cs = XCHARSET (charset);
1443 type = CHARSET_TYPE (cs);
1444 columns = CHARSET_COLUMNS (cs);
1445 dimension = CHARSET_DIMENSION (cs);
1446 id = get_unallocated_leading_byte (dimension);
1448 graphic = CHARSET_GRAPHIC (cs);
1449 final = CHARSET_FINAL (cs);
1450 direction = CHARSET_RIGHT_TO_LEFT;
1451 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1452 direction = CHARSET_LEFT_TO_RIGHT;
1453 doc_string = CHARSET_DOC_STRING (cs);
1454 short_name = CHARSET_SHORT_NAME (cs);
1455 long_name = CHARSET_LONG_NAME (cs);
1456 registry = CHARSET_REGISTRY (cs);
1458 new_charset = make_charset (id, new_name, type, columns,
1459 graphic, final, direction, short_name, long_name,
1460 doc_string, registry,
1462 CHARSET_DECODING_TABLE(cs),
1463 CHARSET_UCS_MIN(cs),
1464 CHARSET_UCS_MAX(cs),
1465 CHARSET_CODE_OFFSET(cs),
1466 CHARSET_BYTE_OFFSET(cs)
1472 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1473 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1478 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1479 Define symbol ALIAS as an alias for CHARSET.
1483 CHECK_SYMBOL (alias);
1484 charset = Fget_charset (charset);
1485 return Fputhash (alias, charset, Vcharset_hash_table);
1488 /* #### Reverse direction charsets not yet implemented. */
1490 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1492 Return the reverse-direction charset parallel to CHARSET, if any.
1493 This is the charset with the same properties (in particular, the same
1494 dimension, number of characters per dimension, and final byte) as
1495 CHARSET but whose characters are displayed in the opposite direction.
1499 charset = Fget_charset (charset);
1500 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1504 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1505 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1506 If DIRECTION is omitted, both directions will be checked (left-to-right
1507 will be returned if character sets exist for both directions).
1509 (dimension, chars, final, direction))
1511 int dm, ch, fi, di = -1;
1513 Lisp_Object obj = Qnil;
1515 CHECK_INT (dimension);
1516 dm = XINT (dimension);
1517 if (dm < 1 || dm > 2)
1518 signal_simple_error ("Invalid value for DIMENSION", dimension);
1522 if (ch != 94 && ch != 96)
1523 signal_simple_error ("Invalid value for CHARS", chars);
1525 CHECK_CHAR_COERCE_INT (final);
1527 if (fi < '0' || fi > '~')
1528 signal_simple_error ("Invalid value for FINAL", final);
1530 if (EQ (direction, Ql2r))
1531 di = CHARSET_LEFT_TO_RIGHT;
1532 else if (EQ (direction, Qr2l))
1533 di = CHARSET_RIGHT_TO_LEFT;
1534 else if (!NILP (direction))
1535 signal_simple_error ("Invalid value for DIRECTION", direction);
1537 if (dm == 2 && fi > 0x5F)
1539 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1542 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1544 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1548 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1550 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1553 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1556 return XCHARSET_NAME (obj);
1560 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1561 Return short name of CHARSET.
1565 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1568 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1569 Return long name of CHARSET.
1573 return XCHARSET_LONG_NAME (Fget_charset (charset));
1576 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1577 Return description of CHARSET.
1581 return XCHARSET_DOC_STRING (Fget_charset (charset));
1584 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1585 Return dimension of CHARSET.
1589 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1592 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1593 Return property PROP of CHARSET.
1594 Recognized properties are those listed in `make-charset', as well as
1595 'name and 'doc-string.
1599 struct Lisp_Charset *cs;
1601 charset = Fget_charset (charset);
1602 cs = XCHARSET (charset);
1604 CHECK_SYMBOL (prop);
1605 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1606 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1607 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1608 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1609 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1610 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1611 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1612 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1613 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1614 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1615 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1616 if (EQ (prop, Qdirection))
1617 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1618 if (EQ (prop, Qreverse_direction_charset))
1620 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1624 return XCHARSET_NAME (obj);
1626 signal_simple_error ("Unrecognized charset property name", prop);
1627 return Qnil; /* not reached */
1630 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1631 Return charset identification number of CHARSET.
1635 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1638 /* #### We need to figure out which properties we really want to
1641 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1642 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1644 (charset, ccl_program))
1646 charset = Fget_charset (charset);
1647 CHECK_VECTOR (ccl_program);
1648 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1653 invalidate_charset_font_caches (Lisp_Object charset)
1655 /* Invalidate font cache entries for charset on all devices. */
1656 Lisp_Object devcons, concons, hash_table;
1657 DEVICE_LOOP_NO_BREAK (devcons, concons)
1659 struct device *d = XDEVICE (XCAR (devcons));
1660 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1661 if (!UNBOUNDP (hash_table))
1662 Fclrhash (hash_table);
1666 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1667 Set the 'registry property of CHARSET to REGISTRY.
1669 (charset, registry))
1671 charset = Fget_charset (charset);
1672 CHECK_STRING (registry);
1673 XCHARSET_REGISTRY (charset) = registry;
1674 invalidate_charset_font_caches (charset);
1675 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1680 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1681 Return mapping-table of CHARSET.
1685 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1688 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1689 Set mapping-table of CHARSET to TABLE.
1693 struct Lisp_Charset *cs;
1694 Lisp_Object old_table;
1697 charset = Fget_charset (charset);
1698 cs = XCHARSET (charset);
1700 if (EQ (table, Qnil))
1702 CHARSET_DECODING_TABLE(cs) = table;
1703 CHARSET_ENCODING_TABLE(cs) = Qnil;
1706 else if (VECTORP (table))
1708 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1709 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1710 old_table = CHARSET_ENCODING_TABLE(cs);
1711 CHARSET_DECODING_TABLE(cs) = table;
1714 signal_error (Qwrong_type_argument,
1715 list2 (build_translated_string ("vector-or-nil-p"),
1717 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1719 switch (CHARSET_DIMENSION (cs))
1722 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1723 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1725 Lisp_Object c = XVECTOR_DATA(table)[i];
1728 put_char_code_table (XCHAR (c),
1729 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1730 CHARSET_ENCODING_TABLE(cs));
1734 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1735 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1737 Lisp_Object v = XVECTOR_DATA(table)[i];
1743 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1745 CHARSET_DECODING_TABLE(cs) = old_table;
1746 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1748 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1750 Lisp_Object c = XVECTOR_DATA(v)[j];
1755 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1756 | (j + CHARSET_BYTE_OFFSET (cs))),
1757 CHARSET_ENCODING_TABLE(cs));
1761 put_char_code_table (XCHAR (v),
1762 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1763 CHARSET_ENCODING_TABLE(cs));
1772 /************************************************************************/
1773 /* Lisp primitives for working with characters */
1774 /************************************************************************/
1776 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1777 Make a character from CHARSET and octets ARG1 and ARG2.
1778 ARG2 is required only for characters from two-dimensional charsets.
1779 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1780 character s with caron.
1782 (charset, arg1, arg2))
1784 struct Lisp_Charset *cs;
1786 int lowlim, highlim;
1788 charset = Fget_charset (charset);
1789 cs = XCHARSET (charset);
1791 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1792 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1794 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1796 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1797 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1800 /* It is useful (and safe, according to Olivier Galibert) to strip
1801 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1802 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1803 Latin 2 code of the character. */
1811 if (a1 < lowlim || a1 > highlim)
1812 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1814 if (CHARSET_DIMENSION (cs) == 1)
1818 ("Charset is of dimension one; second octet must be nil", arg2);
1819 return make_char (MAKE_CHAR (charset, a1, 0));
1828 a2 = XINT (arg2) & 0x7f;
1830 if (a2 < lowlim || a2 > highlim)
1831 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1833 return make_char (MAKE_CHAR (charset, a1, a2));
1836 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1837 Return the character set of char CH.
1841 CHECK_CHAR_COERCE_INT (ch);
1843 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1846 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1847 Return list of charset and one or two position-codes of CHAR.
1851 /* This function can GC */
1852 struct gcpro gcpro1, gcpro2;
1853 Lisp_Object charset = Qnil;
1854 Lisp_Object rc = Qnil;
1857 GCPRO2 (charset, rc);
1858 CHECK_CHAR_COERCE_INT (character);
1860 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1862 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1864 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1868 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1876 #ifdef ENABLE_COMPOSITE_CHARS
1877 /************************************************************************/
1878 /* composite character functions */
1879 /************************************************************************/
1882 lookup_composite_char (Bufbyte *str, int len)
1884 Lisp_Object lispstr = make_string (str, len);
1885 Lisp_Object ch = Fgethash (lispstr,
1886 Vcomposite_char_string2char_hash_table,
1892 if (composite_char_row_next >= 128)
1893 signal_simple_error ("No more composite chars available", lispstr);
1894 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1895 composite_char_col_next);
1896 Fputhash (make_char (emch), lispstr,
1897 Vcomposite_char_char2string_hash_table);
1898 Fputhash (lispstr, make_char (emch),
1899 Vcomposite_char_string2char_hash_table);
1900 composite_char_col_next++;
1901 if (composite_char_col_next >= 128)
1903 composite_char_col_next = 32;
1904 composite_char_row_next++;
1913 composite_char_string (Emchar ch)
1915 Lisp_Object str = Fgethash (make_char (ch),
1916 Vcomposite_char_char2string_hash_table,
1918 assert (!UNBOUNDP (str));
1922 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1923 Convert a string into a single composite character.
1924 The character is the result of overstriking all the characters in
1929 CHECK_STRING (string);
1930 return make_char (lookup_composite_char (XSTRING_DATA (string),
1931 XSTRING_LENGTH (string)));
1934 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1935 Return a string of the characters comprising a composite character.
1943 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1944 signal_simple_error ("Must be composite char", ch);
1945 return composite_char_string (emch);
1947 #endif /* ENABLE_COMPOSITE_CHARS */
1950 /************************************************************************/
1951 /* initialization */
1952 /************************************************************************/
1955 syms_of_mule_charset (void)
1957 DEFSUBR (Fcharsetp);
1958 DEFSUBR (Ffind_charset);
1959 DEFSUBR (Fget_charset);
1960 DEFSUBR (Fcharset_list);
1961 DEFSUBR (Fcharset_name);
1962 DEFSUBR (Fmake_charset);
1963 DEFSUBR (Fmake_reverse_direction_charset);
1964 /* DEFSUBR (Freverse_direction_charset); */
1965 DEFSUBR (Fdefine_charset_alias);
1966 DEFSUBR (Fcharset_from_attributes);
1967 DEFSUBR (Fcharset_short_name);
1968 DEFSUBR (Fcharset_long_name);
1969 DEFSUBR (Fcharset_description);
1970 DEFSUBR (Fcharset_dimension);
1971 DEFSUBR (Fcharset_property);
1972 DEFSUBR (Fcharset_id);
1973 DEFSUBR (Fset_charset_ccl_program);
1974 DEFSUBR (Fset_charset_registry);
1976 DEFSUBR (Fcharset_mapping_table);
1977 DEFSUBR (Fset_charset_mapping_table);
1980 DEFSUBR (Fmake_char);
1981 DEFSUBR (Fchar_charset);
1982 DEFSUBR (Fsplit_char);
1984 #ifdef ENABLE_COMPOSITE_CHARS
1985 DEFSUBR (Fmake_composite_char);
1986 DEFSUBR (Fcomposite_char_string);
1989 defsymbol (&Qcharsetp, "charsetp");
1990 defsymbol (&Qregistry, "registry");
1991 defsymbol (&Qfinal, "final");
1992 defsymbol (&Qgraphic, "graphic");
1993 defsymbol (&Qdirection, "direction");
1994 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1995 defsymbol (&Qshort_name, "short-name");
1996 defsymbol (&Qlong_name, "long-name");
1998 defsymbol (&Ql2r, "l2r");
1999 defsymbol (&Qr2l, "r2l");
2001 /* Charsets, compatible with FSF 20.3
2002 Naming convention is Script-Charset[-Edition] */
2003 defsymbol (&Qascii, "ascii");
2004 defsymbol (&Qcontrol_1, "control-1");
2005 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2006 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2007 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2008 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2009 defsymbol (&Qthai_tis620, "thai-tis620");
2010 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2011 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2012 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2013 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2014 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2015 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2016 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2017 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2018 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2019 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2020 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2021 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2022 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2023 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2025 defsymbol (&Qucs_bmp, "ucs-bmp");
2026 defsymbol (&Qlatin_viscii, "latin-viscii");
2027 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2028 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2029 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2030 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2031 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2032 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2034 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2035 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2037 defsymbol (&Qcomposite, "composite");
2041 vars_of_mule_charset (void)
2048 /* Table of charsets indexed by leading byte. */
2049 for (i = 0; i < countof (charset_by_leading_byte); i++)
2050 charset_by_leading_byte[i] = Qnil;
2053 /* Table of charsets indexed by type/final-byte. */
2054 for (i = 0; i < countof (charset_by_attributes); i++)
2055 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2056 charset_by_attributes[i][j] = Qnil;
2058 /* Table of charsets indexed by type/final-byte/direction. */
2059 for (i = 0; i < countof (charset_by_attributes); i++)
2060 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2061 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2062 charset_by_attributes[i][j][k] = Qnil;
2066 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2068 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2069 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2073 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2074 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2075 Leading-code of private TYPE9N charset of column-width 1.
2077 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2081 Vutf_2000_version = build_string("0.8 (Kami)");
2082 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2083 Version number of UTF-2000.
2086 Vdefault_coded_charset_priority_list = Qnil;
2087 DEFVAR_LISP ("default-coded-charset-priority-list",
2088 &Vdefault_coded_charset_priority_list /*
2089 Default order of preferred coded-character-set.
2095 complex_vars_of_mule_charset (void)
2097 staticpro (&Vcharset_hash_table);
2098 Vcharset_hash_table =
2099 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2101 /* Predefined character sets. We store them into variables for
2106 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2107 CHARSET_TYPE_256X256, 1, 2, 0,
2108 CHARSET_LEFT_TO_RIGHT,
2109 build_string ("BMP"),
2110 build_string ("BMP"),
2111 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2112 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2113 Qnil, 0, 0xFFFF, 0, 0);
2115 # define MIN_CHAR_THAI 0
2116 # define MAX_CHAR_THAI 0
2117 # define MIN_CHAR_GREEK 0
2118 # define MAX_CHAR_GREEK 0
2119 # define MIN_CHAR_HEBREW 0
2120 # define MAX_CHAR_HEBREW 0
2121 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2122 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2123 # define MIN_CHAR_CYRILLIC 0
2124 # define MAX_CHAR_CYRILLIC 0
2127 make_charset (LEADING_BYTE_ASCII, Qascii,
2128 CHARSET_TYPE_94, 1, 0, 'B',
2129 CHARSET_LEFT_TO_RIGHT,
2130 build_string ("ASCII"),
2131 build_string ("ASCII)"),
2132 build_string ("ASCII (ISO646 IRV)"),
2133 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2134 Qnil, 0, 0x7F, 0, 0);
2135 Vcharset_control_1 =
2136 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2137 CHARSET_TYPE_94, 1, 1, 0,
2138 CHARSET_LEFT_TO_RIGHT,
2139 build_string ("C1"),
2140 build_string ("Control characters"),
2141 build_string ("Control characters 128-191"),
2143 Qnil, 0x80, 0x9F, 0, 0);
2144 Vcharset_latin_iso8859_1 =
2145 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2146 CHARSET_TYPE_96, 1, 1, 'A',
2147 CHARSET_LEFT_TO_RIGHT,
2148 build_string ("Latin-1"),
2149 build_string ("ISO8859-1 (Latin-1)"),
2150 build_string ("ISO8859-1 (Latin-1)"),
2151 build_string ("iso8859-1"),
2152 Qnil, 0xA0, 0xFF, 0, 32);
2153 Vcharset_latin_iso8859_2 =
2154 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2155 CHARSET_TYPE_96, 1, 1, 'B',
2156 CHARSET_LEFT_TO_RIGHT,
2157 build_string ("Latin-2"),
2158 build_string ("ISO8859-2 (Latin-2)"),
2159 build_string ("ISO8859-2 (Latin-2)"),
2160 build_string ("iso8859-2"),
2162 Vcharset_latin_iso8859_3 =
2163 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2164 CHARSET_TYPE_96, 1, 1, 'C',
2165 CHARSET_LEFT_TO_RIGHT,
2166 build_string ("Latin-3"),
2167 build_string ("ISO8859-3 (Latin-3)"),
2168 build_string ("ISO8859-3 (Latin-3)"),
2169 build_string ("iso8859-3"),
2171 Vcharset_latin_iso8859_4 =
2172 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2173 CHARSET_TYPE_96, 1, 1, 'D',
2174 CHARSET_LEFT_TO_RIGHT,
2175 build_string ("Latin-4"),
2176 build_string ("ISO8859-4 (Latin-4)"),
2177 build_string ("ISO8859-4 (Latin-4)"),
2178 build_string ("iso8859-4"),
2180 Vcharset_thai_tis620 =
2181 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2182 CHARSET_TYPE_96, 1, 1, 'T',
2183 CHARSET_LEFT_TO_RIGHT,
2184 build_string ("TIS620"),
2185 build_string ("TIS620 (Thai)"),
2186 build_string ("TIS620.2529 (Thai)"),
2187 build_string ("tis620"),
2188 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2189 Vcharset_greek_iso8859_7 =
2190 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2191 CHARSET_TYPE_96, 1, 1, 'F',
2192 CHARSET_LEFT_TO_RIGHT,
2193 build_string ("ISO8859-7"),
2194 build_string ("ISO8859-7 (Greek)"),
2195 build_string ("ISO8859-7 (Greek)"),
2196 build_string ("iso8859-7"),
2197 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2198 Vcharset_arabic_iso8859_6 =
2199 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2200 CHARSET_TYPE_96, 1, 1, 'G',
2201 CHARSET_RIGHT_TO_LEFT,
2202 build_string ("ISO8859-6"),
2203 build_string ("ISO8859-6 (Arabic)"),
2204 build_string ("ISO8859-6 (Arabic)"),
2205 build_string ("iso8859-6"),
2207 Vcharset_hebrew_iso8859_8 =
2208 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2209 CHARSET_TYPE_96, 1, 1, 'H',
2210 CHARSET_RIGHT_TO_LEFT,
2211 build_string ("ISO8859-8"),
2212 build_string ("ISO8859-8 (Hebrew)"),
2213 build_string ("ISO8859-8 (Hebrew)"),
2214 build_string ("iso8859-8"),
2215 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2216 Vcharset_katakana_jisx0201 =
2217 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2218 CHARSET_TYPE_94, 1, 1, 'I',
2219 CHARSET_LEFT_TO_RIGHT,
2220 build_string ("JISX0201 Kana"),
2221 build_string ("JISX0201.1976 (Japanese Kana)"),
2222 build_string ("JISX0201.1976 Japanese Kana"),
2223 build_string ("jisx0201\\.1976"),
2225 MIN_CHAR_HALFWIDTH_KATAKANA,
2226 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2227 Vcharset_latin_jisx0201 =
2228 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2229 CHARSET_TYPE_94, 1, 0, 'J',
2230 CHARSET_LEFT_TO_RIGHT,
2231 build_string ("JISX0201 Roman"),
2232 build_string ("JISX0201.1976 (Japanese Roman)"),
2233 build_string ("JISX0201.1976 Japanese Roman"),
2234 build_string ("jisx0201\\.1976"),
2236 Vcharset_cyrillic_iso8859_5 =
2237 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2238 CHARSET_TYPE_96, 1, 1, 'L',
2239 CHARSET_LEFT_TO_RIGHT,
2240 build_string ("ISO8859-5"),
2241 build_string ("ISO8859-5 (Cyrillic)"),
2242 build_string ("ISO8859-5 (Cyrillic)"),
2243 build_string ("iso8859-5"),
2244 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2245 Vcharset_latin_iso8859_9 =
2246 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2247 CHARSET_TYPE_96, 1, 1, 'M',
2248 CHARSET_LEFT_TO_RIGHT,
2249 build_string ("Latin-5"),
2250 build_string ("ISO8859-9 (Latin-5)"),
2251 build_string ("ISO8859-9 (Latin-5)"),
2252 build_string ("iso8859-9"),
2254 Vcharset_japanese_jisx0208_1978 =
2255 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2256 CHARSET_TYPE_94X94, 2, 0, '@',
2257 CHARSET_LEFT_TO_RIGHT,
2258 build_string ("JIS X0208:1978"),
2259 build_string ("JIS X0208:1978 (Japanese)"),
2261 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2262 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2264 Vcharset_chinese_gb2312 =
2265 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2266 CHARSET_TYPE_94X94, 2, 0, 'A',
2267 CHARSET_LEFT_TO_RIGHT,
2268 build_string ("GB2312"),
2269 build_string ("GB2312)"),
2270 build_string ("GB2312 Chinese simplified"),
2271 build_string ("gb2312"),
2273 Vcharset_japanese_jisx0208 =
2274 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2275 CHARSET_TYPE_94X94, 2, 0, 'B',
2276 CHARSET_LEFT_TO_RIGHT,
2277 build_string ("JISX0208"),
2278 build_string ("JIS X0208:1983 (Japanese)"),
2279 build_string ("JIS X0208:1983 Japanese Kanji"),
2280 build_string ("jisx0208\\.1983"),
2282 Vcharset_korean_ksc5601 =
2283 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2284 CHARSET_TYPE_94X94, 2, 0, 'C',
2285 CHARSET_LEFT_TO_RIGHT,
2286 build_string ("KSC5601"),
2287 build_string ("KSC5601 (Korean"),
2288 build_string ("KSC5601 Korean Hangul and Hanja"),
2289 build_string ("ksc5601"),
2291 Vcharset_japanese_jisx0212 =
2292 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2293 CHARSET_TYPE_94X94, 2, 0, 'D',
2294 CHARSET_LEFT_TO_RIGHT,
2295 build_string ("JISX0212"),
2296 build_string ("JISX0212 (Japanese)"),
2297 build_string ("JISX0212 Japanese Supplement"),
2298 build_string ("jisx0212"),
2301 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2302 Vcharset_chinese_cns11643_1 =
2303 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2304 CHARSET_TYPE_94X94, 2, 0, 'G',
2305 CHARSET_LEFT_TO_RIGHT,
2306 build_string ("CNS11643-1"),
2307 build_string ("CNS11643-1 (Chinese traditional)"),
2309 ("CNS 11643 Plane 1 Chinese traditional"),
2310 build_string (CHINESE_CNS_PLANE_RE("1")),
2312 Vcharset_chinese_cns11643_2 =
2313 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2314 CHARSET_TYPE_94X94, 2, 0, 'H',
2315 CHARSET_LEFT_TO_RIGHT,
2316 build_string ("CNS11643-2"),
2317 build_string ("CNS11643-2 (Chinese traditional)"),
2319 ("CNS 11643 Plane 2 Chinese traditional"),
2320 build_string (CHINESE_CNS_PLANE_RE("2")),
2323 Vcharset_latin_viscii_lower =
2324 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2325 CHARSET_TYPE_96, 1, 1, '1',
2326 CHARSET_LEFT_TO_RIGHT,
2327 build_string ("VISCII lower"),
2328 build_string ("VISCII lower (Vietnamese)"),
2329 build_string ("VISCII lower (Vietnamese)"),
2330 build_string ("MULEVISCII-LOWER"),
2332 Vcharset_latin_viscii_upper =
2333 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2334 CHARSET_TYPE_96, 1, 1, '2',
2335 CHARSET_LEFT_TO_RIGHT,
2336 build_string ("VISCII upper"),
2337 build_string ("VISCII upper (Vietnamese)"),
2338 build_string ("VISCII upper (Vietnamese)"),
2339 build_string ("MULEVISCII-UPPER"),
2341 Vcharset_latin_viscii =
2342 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2343 CHARSET_TYPE_256, 1, 2, 0,
2344 CHARSET_LEFT_TO_RIGHT,
2345 build_string ("VISCII"),
2346 build_string ("VISCII 1.1 (Vietnamese)"),
2347 build_string ("VISCII 1.1 (Vietnamese)"),
2348 build_string ("VISCII1\\.1"),
2350 Vcharset_hiragana_jisx0208 =
2351 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2352 CHARSET_TYPE_94X94, 2, 0, 'B',
2353 CHARSET_LEFT_TO_RIGHT,
2354 build_string ("Hiragana"),
2355 build_string ("Hiragana of JIS X0208"),
2356 build_string ("Japanese Hiragana of JIS X0208"),
2357 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2358 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2359 (0x24 - 33) * 94 + (0x21 - 33), 33);
2360 Vcharset_katakana_jisx0208 =
2361 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2362 CHARSET_TYPE_94X94, 2, 0, 'B',
2363 CHARSET_LEFT_TO_RIGHT,
2364 build_string ("Katakana"),
2365 build_string ("Katakana of JIS X0208"),
2366 build_string ("Japanese Katakana of JIS X0208"),
2367 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2368 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2369 (0x25 - 33) * 94 + (0x21 - 33), 33);
2371 Vcharset_chinese_big5_1 =
2372 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2373 CHARSET_TYPE_94X94, 2, 0, '0',
2374 CHARSET_LEFT_TO_RIGHT,
2375 build_string ("Big5"),
2376 build_string ("Big5 (Level-1)"),
2378 ("Big5 Level-1 Chinese traditional"),
2379 build_string ("big5"),
2381 Vcharset_chinese_big5_2 =
2382 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2383 CHARSET_TYPE_94X94, 2, 0, '1',
2384 CHARSET_LEFT_TO_RIGHT,
2385 build_string ("Big5"),
2386 build_string ("Big5 (Level-2)"),
2388 ("Big5 Level-2 Chinese traditional"),
2389 build_string ("big5"),
2392 #ifdef ENABLE_COMPOSITE_CHARS
2393 /* #### For simplicity, we put composite chars into a 96x96 charset.
2394 This is going to lead to problems because you can run out of
2395 room, esp. as we don't yet recycle numbers. */
2396 Vcharset_composite =
2397 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2398 CHARSET_TYPE_96X96, 2, 0, 0,
2399 CHARSET_LEFT_TO_RIGHT,
2400 build_string ("Composite"),
2401 build_string ("Composite characters"),
2402 build_string ("Composite characters"),
2405 composite_char_row_next = 32;
2406 composite_char_col_next = 32;
2408 Vcomposite_char_string2char_hash_table =
2409 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2410 Vcomposite_char_char2string_hash_table =
2411 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2412 staticpro (&Vcomposite_char_string2char_hash_table);
2413 staticpro (&Vcomposite_char_char2string_hash_table);
2414 #endif /* ENABLE_COMPOSITE_CHARS */