1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vutf_2000_version;
315 int leading_code_private_11;
318 Lisp_Object Qcharsetp;
320 /* Qdoc_string, Qdimension, Qchars defined in general.c */
321 Lisp_Object Qregistry, Qfinal, Qgraphic;
322 Lisp_Object Qdirection;
323 Lisp_Object Qreverse_direction_charset;
324 Lisp_Object Qleading_byte;
325 Lisp_Object Qshort_name, Qlong_name;
341 Qjapanese_jisx0208_1978,
353 Qvietnamese_viscii_lower,
354 Qvietnamese_viscii_upper,
362 Lisp_Object Ql2r, Qr2l;
364 Lisp_Object Vcharset_hash_table;
366 static Charset_ID next_allocated_1_byte_leading_byte;
367 static Charset_ID next_allocated_2_byte_leading_byte;
369 /* Composite characters are characters constructed by overstriking two
370 or more regular characters.
372 1) The old Mule implementation involves storing composite characters
373 in a buffer as a tag followed by all of the actual characters
374 used to make up the composite character. I think this is a bad
375 idea; it greatly complicates code that wants to handle strings
376 one character at a time because it has to deal with the possibility
377 of great big ungainly characters. It's much more reasonable to
378 simply store an index into a table of composite characters.
380 2) The current implementation only allows for 16,384 separate
381 composite characters over the lifetime of the XEmacs process.
382 This could become a potential problem if the user
383 edited lots of different files that use composite characters.
384 Due to FSF bogosity, increasing the number of allowable
385 composite characters under Mule would decrease the number
386 of possible faces that can exist. Mule already has shrunk
387 this to 2048, and further shrinkage would become uncomfortable.
388 No such problems exist in XEmacs.
390 Composite characters could be represented as 0x80 C1 C2 C3,
391 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
392 for slightly under 2^20 (one million) composite characters
393 over the XEmacs process lifetime, and you only need to
394 increase the size of a Mule character from 19 to 21 bits.
395 Or you could use 0x80 C1 C2 C3 C4, allowing for about
396 85 million (slightly over 2^26) composite characters. */
399 /************************************************************************/
400 /* Basic Emchar functions */
401 /************************************************************************/
403 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
404 string in STR. Returns the number of bytes stored.
405 Do not call this directly. Use the macro set_charptr_emchar() instead.
409 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
424 else if ( c <= 0x7ff )
426 *p++ = (c >> 6) | 0xc0;
427 *p++ = (c & 0x3f) | 0x80;
429 else if ( c <= 0xffff )
431 *p++ = (c >> 12) | 0xe0;
432 *p++ = ((c >> 6) & 0x3f) | 0x80;
433 *p++ = (c & 0x3f) | 0x80;
435 else if ( c <= 0x1fffff )
437 *p++ = (c >> 18) | 0xf0;
438 *p++ = ((c >> 12) & 0x3f) | 0x80;
439 *p++ = ((c >> 6) & 0x3f) | 0x80;
440 *p++ = (c & 0x3f) | 0x80;
442 else if ( c <= 0x3ffffff )
444 *p++ = (c >> 24) | 0xf8;
445 *p++ = ((c >> 18) & 0x3f) | 0x80;
446 *p++ = ((c >> 12) & 0x3f) | 0x80;
447 *p++ = ((c >> 6) & 0x3f) | 0x80;
448 *p++ = (c & 0x3f) | 0x80;
452 *p++ = (c >> 30) | 0xfc;
453 *p++ = ((c >> 24) & 0x3f) | 0x80;
454 *p++ = ((c >> 18) & 0x3f) | 0x80;
455 *p++ = ((c >> 12) & 0x3f) | 0x80;
456 *p++ = ((c >> 6) & 0x3f) | 0x80;
457 *p++ = (c & 0x3f) | 0x80;
460 BREAKUP_CHAR (c, charset, c1, c2);
461 lb = CHAR_LEADING_BYTE (c);
462 if (LEADING_BYTE_PRIVATE_P (lb))
463 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
465 if (EQ (charset, Vcharset_control_1))
474 /* Return the first character from a Mule-encoded string in STR,
475 assuming it's non-ASCII. Do not call this directly.
476 Use the macro charptr_emchar() instead. */
479 non_ascii_charptr_emchar (CONST Bufbyte *str)
492 else if ( b >= 0xf8 )
497 else if ( b >= 0xf0 )
502 else if ( b >= 0xe0 )
507 else if ( b >= 0xc0 )
517 for( ; len > 0; len-- )
520 ch = ( ch << 6 ) | ( b & 0x3f );
524 Bufbyte i0 = *str, i1, i2 = 0;
527 if (i0 == LEADING_BYTE_CONTROL_1)
528 return (Emchar) (*++str - 0x20);
530 if (LEADING_BYTE_PREFIX_P (i0))
535 charset = CHARSET_BY_LEADING_BYTE (i0);
536 if (XCHARSET_DIMENSION (charset) == 2)
539 return MAKE_CHAR (charset, i1, i2);
543 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
544 Do not call this directly. Use the macro valid_char_p() instead. */
548 non_ascii_valid_char_p (Emchar ch)
552 /* Must have only lowest 19 bits set */
556 f1 = CHAR_FIELD1 (ch);
557 f2 = CHAR_FIELD2 (ch);
558 f3 = CHAR_FIELD3 (ch);
564 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
565 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
566 f2 > MAX_CHAR_FIELD2_PRIVATE)
571 if (f3 != 0x20 && f3 != 0x7F)
575 NOTE: This takes advantage of the fact that
576 FIELD2_TO_OFFICIAL_LEADING_BYTE and
577 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
579 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
580 return (XCHARSET_CHARS (charset) == 96);
586 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
587 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
588 f1 > MAX_CHAR_FIELD1_PRIVATE)
590 if (f2 < 0x20 || f3 < 0x20)
593 #ifdef ENABLE_COMPOSITE_CHARS
594 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
596 if (UNBOUNDP (Fgethash (make_int (ch),
597 Vcomposite_char_char2string_hash_table,
602 #endif /* ENABLE_COMPOSITE_CHARS */
604 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
607 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
609 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
612 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
614 return (XCHARSET_CHARS (charset) == 96);
620 /************************************************************************/
621 /* Basic string functions */
622 /************************************************************************/
624 /* Copy the character pointed to by PTR into STR, assuming it's
625 non-ASCII. Do not call this directly. Use the macro
626 charptr_copy_char() instead. */
629 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
631 Bufbyte *strptr = str;
633 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
635 /* Notice fallthrough. */
637 case 6: *++strptr = *ptr++;
638 case 5: *++strptr = *ptr++;
640 case 4: *++strptr = *ptr++;
641 case 3: *++strptr = *ptr++;
642 case 2: *++strptr = *ptr;
647 return strptr + 1 - str;
651 /************************************************************************/
652 /* streams of Emchars */
653 /************************************************************************/
655 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
656 The functions below are not meant to be called directly; use
657 the macros in insdel.h. */
660 Lstream_get_emchar_1 (Lstream *stream, int ch)
662 Bufbyte str[MAX_EMCHAR_LEN];
663 Bufbyte *strptr = str;
665 str[0] = (Bufbyte) ch;
666 switch (REP_BYTES_BY_FIRST_BYTE (ch))
668 /* Notice fallthrough. */
671 ch = Lstream_getc (stream);
673 *++strptr = (Bufbyte) ch;
675 ch = Lstream_getc (stream);
677 *++strptr = (Bufbyte) ch;
680 ch = Lstream_getc (stream);
682 *++strptr = (Bufbyte) ch;
684 ch = Lstream_getc (stream);
686 *++strptr = (Bufbyte) ch;
688 ch = Lstream_getc (stream);
690 *++strptr = (Bufbyte) ch;
695 return charptr_emchar (str);
699 Lstream_fput_emchar (Lstream *stream, Emchar ch)
701 Bufbyte str[MAX_EMCHAR_LEN];
702 Bytecount len = set_charptr_emchar (str, ch);
703 return Lstream_write (stream, str, len);
707 Lstream_funget_emchar (Lstream *stream, Emchar ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bytecount len = set_charptr_emchar (str, ch);
711 Lstream_unread (stream, str, len);
715 /************************************************************************/
717 /************************************************************************/
720 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
722 struct Lisp_Charset *cs = XCHARSET (obj);
724 markobj (cs->short_name);
725 markobj (cs->long_name);
726 markobj (cs->doc_string);
727 markobj (cs->registry);
728 markobj (cs->ccl_program);
730 markobj (cs->decoding_table);
731 markobj (cs->encoding_table);
737 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
739 struct Lisp_Charset *cs = XCHARSET (obj);
743 error ("printing unreadable object #<charset %s 0x%x>",
744 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
747 write_c_string ("#<charset ", printcharfun);
748 print_internal (CHARSET_NAME (cs), printcharfun, 0);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
751 write_c_string (" ", printcharfun);
752 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
753 write_c_string (" ", printcharfun);
754 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
755 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
756 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
757 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
758 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
760 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
761 CHARSET_COLUMNS (cs),
762 CHARSET_GRAPHIC (cs),
764 write_c_string (buf, printcharfun);
765 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
766 sprintf (buf, " 0x%x>", cs->header.uid);
767 write_c_string (buf, printcharfun);
770 static const struct lrecord_description charset_description[] = {
771 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
773 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
778 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
779 mark_charset, print_charset, 0, 0, 0,
781 struct Lisp_Charset);
783 /* Make a new charset. */
786 make_charset (Charset_ID id, Lisp_Object name,
787 unsigned char type, unsigned char columns, unsigned char graphic,
788 Bufbyte final, unsigned char direction, Lisp_Object short_name,
789 Lisp_Object long_name, Lisp_Object doc,
791 Lisp_Object decoding_table,
792 Emchar ucs_min, Emchar ucs_max,
793 Emchar code_offset, unsigned char byte_offset)
796 struct Lisp_Charset *cs =
797 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
798 XSETCHARSET (obj, cs);
800 CHARSET_ID (cs) = id;
801 CHARSET_NAME (cs) = name;
802 CHARSET_SHORT_NAME (cs) = short_name;
803 CHARSET_LONG_NAME (cs) = long_name;
804 CHARSET_DIRECTION (cs) = direction;
805 CHARSET_TYPE (cs) = type;
806 CHARSET_COLUMNS (cs) = columns;
807 CHARSET_GRAPHIC (cs) = graphic;
808 CHARSET_FINAL (cs) = final;
809 CHARSET_DOC_STRING (cs) = doc;
810 CHARSET_REGISTRY (cs) = reg;
811 CHARSET_CCL_PROGRAM (cs) = Qnil;
812 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
814 CHARSET_DECODING_TABLE(cs) = Qnil;
815 CHARSET_ENCODING_TABLE(cs) = Qnil;
816 CHARSET_UCS_MIN(cs) = ucs_min;
817 CHARSET_UCS_MAX(cs) = ucs_max;
818 CHARSET_CODE_OFFSET(cs) = code_offset;
819 CHARSET_BYTE_OFFSET(cs) = byte_offset;
822 switch (CHARSET_TYPE (cs))
824 case CHARSET_TYPE_94:
825 CHARSET_DIMENSION (cs) = 1;
826 CHARSET_CHARS (cs) = 94;
828 case CHARSET_TYPE_96:
829 CHARSET_DIMENSION (cs) = 1;
830 CHARSET_CHARS (cs) = 96;
832 case CHARSET_TYPE_94X94:
833 CHARSET_DIMENSION (cs) = 2;
834 CHARSET_CHARS (cs) = 94;
836 case CHARSET_TYPE_96X96:
837 CHARSET_DIMENSION (cs) = 2;
838 CHARSET_CHARS (cs) = 96;
841 case CHARSET_TYPE_128:
842 CHARSET_DIMENSION (cs) = 1;
843 CHARSET_CHARS (cs) = 128;
845 case CHARSET_TYPE_128X128:
846 CHARSET_DIMENSION (cs) = 2;
847 CHARSET_CHARS (cs) = 128;
849 case CHARSET_TYPE_256:
850 CHARSET_DIMENSION (cs) = 1;
851 CHARSET_CHARS (cs) = 256;
853 case CHARSET_TYPE_256X256:
854 CHARSET_DIMENSION (cs) = 2;
855 CHARSET_CHARS (cs) = 256;
861 if (id == LEADING_BYTE_ASCII)
862 CHARSET_REP_BYTES (cs) = 1;
864 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
866 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
871 /* some charsets do not have final characters. This includes
872 ASCII, Control-1, Composite, and the two faux private
875 if (code_offset == 0)
877 assert (NILP (charset_by_attributes[type][final]));
878 charset_by_attributes[type][final] = obj;
881 assert (NILP (charset_by_attributes[type][final][direction]));
882 charset_by_attributes[type][final][direction] = obj;
886 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
887 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
890 /* official leading byte */
891 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
894 /* Some charsets are "faux" and don't have names or really exist at
895 all except in the leading-byte table. */
897 Fputhash (name, obj, Vcharset_hash_table);
902 get_unallocated_leading_byte (int dimension)
908 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
911 lb = next_allocated_1_byte_leading_byte++;
915 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
918 lb = next_allocated_2_byte_leading_byte++;
923 ("No more character sets free for this dimension",
924 make_int (dimension));
931 charset_get_byte1 (Lisp_Object charset, Emchar ch)
936 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
938 Lisp_Object value = get_char_code_table (ch, table);
942 Emchar code = XINT (value);
946 else if (code < (1 << 16))
948 else if (code < (1 << 24))
954 if ((XCHARSET_UCS_MIN (charset) <= ch)
955 && (ch <= XCHARSET_UCS_MAX (charset)))
956 return (ch - XCHARSET_UCS_MIN (charset)
957 + XCHARSET_CODE_OFFSET (charset))
958 / (XCHARSET_DIMENSION (charset) == 1 ?
961 XCHARSET_DIMENSION (charset) == 2 ?
962 XCHARSET_CHARS (charset)
964 XCHARSET_DIMENSION (charset) == 3 ?
965 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
967 XCHARSET_CHARS (charset)
968 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
969 + XCHARSET_BYTE_OFFSET (charset);
970 else if (XCHARSET_CODE_OFFSET (charset) == 0)
972 if (XCHARSET_DIMENSION (charset) == 1)
974 if (XCHARSET_CHARS (charset) == 94)
976 if (((d = ch - (MIN_CHAR_94
977 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
981 else if (XCHARSET_CHARS (charset) == 96)
983 if (((d = ch - (MIN_CHAR_96
984 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
991 else if (XCHARSET_DIMENSION (charset) == 2)
993 if (XCHARSET_CHARS (charset) == 94)
995 if (((d = ch - (MIN_CHAR_94x94
996 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
999 return (d / 94) + 33;
1001 else if (XCHARSET_CHARS (charset) == 96)
1003 if (((d = ch - (MIN_CHAR_96x96
1004 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1007 return (d / 96) + 32;
1015 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1017 if (XCHARSET_DIMENSION (charset) == 1)
1023 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1025 Lisp_Object value = get_char_code_table (ch, table);
1029 Emchar code = XINT (value);
1031 if (code < (1 << 16))
1032 return (unsigned char)code;
1033 else if (code < (1 << 24))
1034 return (unsigned char)(code >> 16);
1036 return (unsigned char)(code >> 24);
1039 if ((XCHARSET_UCS_MIN (charset) <= ch)
1040 && (ch <= XCHARSET_UCS_MAX (charset)))
1041 return ((ch - XCHARSET_UCS_MIN (charset)
1042 + XCHARSET_CODE_OFFSET (charset))
1043 / (XCHARSET_DIMENSION (charset) == 2 ?
1046 XCHARSET_DIMENSION (charset) == 3 ?
1047 XCHARSET_CHARS (charset)
1049 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1050 % XCHARSET_CHARS (charset)
1051 + XCHARSET_BYTE_OFFSET (charset);
1052 else if (XCHARSET_CHARS (charset) == 94)
1053 return (MIN_CHAR_94x94
1054 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1055 && (ch < MIN_CHAR_94x94
1056 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1057 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1058 else /* if (XCHARSET_CHARS (charset) == 96) */
1059 return (MIN_CHAR_96x96
1060 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1061 && (ch < MIN_CHAR_96x96
1062 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1063 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1067 Lisp_Object Vdefault_coded_charset_priority_list;
1071 /************************************************************************/
1072 /* Basic charset Lisp functions */
1073 /************************************************************************/
1075 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1076 Return non-nil if OBJECT is a charset.
1080 return CHARSETP (object) ? Qt : Qnil;
1083 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1084 Retrieve the charset of the given name.
1085 If CHARSET-OR-NAME is a charset object, it is simply returned.
1086 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1087 nil is returned. Otherwise the associated charset object is returned.
1091 if (CHARSETP (charset_or_name))
1092 return charset_or_name;
1094 CHECK_SYMBOL (charset_or_name);
1095 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1098 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1099 Retrieve the charset of the given name.
1100 Same as `find-charset' except an error is signalled if there is no such
1101 charset instead of returning nil.
1105 Lisp_Object charset = Ffind_charset (name);
1108 signal_simple_error ("No such charset", name);
1112 /* We store the charsets in hash tables with the names as the key and the
1113 actual charset object as the value. Occasionally we need to use them
1114 in a list format. These routines provide us with that. */
1115 struct charset_list_closure
1117 Lisp_Object *charset_list;
1121 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1122 void *charset_list_closure)
1124 /* This function can GC */
1125 struct charset_list_closure *chcl =
1126 (struct charset_list_closure*) charset_list_closure;
1127 Lisp_Object *charset_list = chcl->charset_list;
1129 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1133 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1134 Return a list of the names of all defined charsets.
1138 Lisp_Object charset_list = Qnil;
1139 struct gcpro gcpro1;
1140 struct charset_list_closure charset_list_closure;
1142 GCPRO1 (charset_list);
1143 charset_list_closure.charset_list = &charset_list;
1144 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1145 &charset_list_closure);
1148 return charset_list;
1151 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1152 Return the name of the given charset.
1156 return XCHARSET_NAME (Fget_charset (charset));
1159 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1160 Define a new character set.
1161 This function is for use with Mule support.
1162 NAME is a symbol, the name by which the character set is normally referred.
1163 DOC-STRING is a string describing the character set.
1164 PROPS is a property list, describing the specific nature of the
1165 character set. Recognized properties are:
1167 'short-name Short version of the charset name (ex: Latin-1)
1168 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1169 'registry A regular expression matching the font registry field for
1171 'dimension Number of octets used to index a character in this charset.
1172 Either 1 or 2. Defaults to 1.
1173 'columns Number of columns used to display a character in this charset.
1174 Only used in TTY mode. (Under X, the actual width of a
1175 character can be derived from the font used to display the
1176 characters.) If unspecified, defaults to the dimension
1177 (this is almost always the correct value).
1178 'chars Number of characters in each dimension (94 or 96).
1179 Defaults to 94. Note that if the dimension is 2, the
1180 character set thus described is 94x94 or 96x96.
1181 'final Final byte of ISO 2022 escape sequence. Must be
1182 supplied. Each combination of (DIMENSION, CHARS) defines a
1183 separate namespace for final bytes. Note that ISO
1184 2022 restricts the final byte to the range
1185 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1186 dimension == 2. Note also that final bytes in the range
1187 0x30 - 0x3F are reserved for user-defined (not official)
1189 'graphic 0 (use left half of font on output) or 1 (use right half
1190 of font on output). Defaults to 0. For example, for
1191 a font whose registry is ISO8859-1, the left half
1192 (octets 0x20 - 0x7F) is the `ascii' character set, while
1193 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1194 character set. With 'graphic set to 0, the octets
1195 will have their high bit cleared; with it set to 1,
1196 the octets will have their high bit set.
1197 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1199 'ccl-program A compiled CCL program used to convert a character in
1200 this charset into an index into the font. This is in
1201 addition to the 'graphic property. The CCL program
1202 is passed the octets of the character, with the high
1203 bit cleared and set depending upon whether the value
1204 of the 'graphic property is 0 or 1.
1206 (name, doc_string, props))
1208 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1209 int direction = CHARSET_LEFT_TO_RIGHT;
1211 Lisp_Object registry = Qnil;
1212 Lisp_Object charset;
1213 Lisp_Object rest, keyword, value;
1214 Lisp_Object ccl_program = Qnil;
1215 Lisp_Object short_name = Qnil, long_name = Qnil;
1217 unsigned char byte_offset = 0;
1220 CHECK_SYMBOL (name);
1221 if (!NILP (doc_string))
1222 CHECK_STRING (doc_string);
1224 charset = Ffind_charset (name);
1225 if (!NILP (charset))
1226 signal_simple_error ("Cannot redefine existing charset", name);
1228 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1230 if (EQ (keyword, Qshort_name))
1232 CHECK_STRING (value);
1236 if (EQ (keyword, Qlong_name))
1238 CHECK_STRING (value);
1242 else if (EQ (keyword, Qdimension))
1245 dimension = XINT (value);
1246 if (dimension < 1 || dimension > 2)
1247 signal_simple_error ("Invalid value for 'dimension", value);
1250 else if (EQ (keyword, Qchars))
1253 chars = XINT (value);
1254 if (chars != 94 && chars != 96)
1255 signal_simple_error ("Invalid value for 'chars", value);
1258 else if (EQ (keyword, Qcolumns))
1261 columns = XINT (value);
1262 if (columns != 1 && columns != 2)
1263 signal_simple_error ("Invalid value for 'columns", value);
1266 else if (EQ (keyword, Qgraphic))
1269 graphic = XINT (value);
1271 if (graphic < 0 || graphic > 2)
1273 if (graphic < 0 || graphic > 1)
1275 signal_simple_error ("Invalid value for 'graphic", value);
1278 else if (EQ (keyword, Qregistry))
1280 CHECK_STRING (value);
1284 else if (EQ (keyword, Qdirection))
1286 if (EQ (value, Ql2r))
1287 direction = CHARSET_LEFT_TO_RIGHT;
1288 else if (EQ (value, Qr2l))
1289 direction = CHARSET_RIGHT_TO_LEFT;
1291 signal_simple_error ("Invalid value for 'direction", value);
1294 else if (EQ (keyword, Qfinal))
1296 CHECK_CHAR_COERCE_INT (value);
1297 final = XCHAR (value);
1298 if (final < '0' || final > '~')
1299 signal_simple_error ("Invalid value for 'final", value);
1302 else if (EQ (keyword, Qccl_program))
1304 CHECK_VECTOR (value);
1305 ccl_program = value;
1309 signal_simple_error ("Unrecognized property", keyword);
1313 error ("'final must be specified");
1314 if (dimension == 2 && final > 0x5F)
1316 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1320 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1322 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1324 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1325 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1327 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1334 /* id = CHARSET_ID_OFFSET_94 + final; */
1335 id = get_unallocated_leading_byte (dimension);
1337 else if (chars == 96)
1339 id = get_unallocated_leading_byte (dimension);
1346 else if (dimension == 2)
1350 id = get_unallocated_leading_byte (dimension);
1352 else if (chars == 96)
1354 id = get_unallocated_leading_byte (dimension);
1369 else if (chars == 96)
1373 id = get_unallocated_leading_byte (dimension);
1376 if (NILP (doc_string))
1377 doc_string = build_string ("");
1379 if (NILP (registry))
1380 registry = build_string ("");
1382 if (NILP (short_name))
1383 XSETSTRING (short_name, XSYMBOL (name)->name);
1385 if (NILP (long_name))
1386 long_name = doc_string;
1389 columns = dimension;
1390 charset = make_charset (id, name, type, columns, graphic,
1391 final, direction, short_name, long_name,
1392 doc_string, registry,
1393 Qnil, 0, 0, 0, byte_offset);
1394 if (!NILP (ccl_program))
1395 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1399 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1401 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1402 NEW-NAME is the name of the new charset. Return the new charset.
1404 (charset, new_name))
1406 Lisp_Object new_charset = Qnil;
1407 int id, dimension, columns, graphic, final;
1408 int direction, type;
1409 Lisp_Object registry, doc_string, short_name, long_name;
1410 struct Lisp_Charset *cs;
1412 charset = Fget_charset (charset);
1413 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1414 signal_simple_error ("Charset already has reverse-direction charset",
1417 CHECK_SYMBOL (new_name);
1418 if (!NILP (Ffind_charset (new_name)))
1419 signal_simple_error ("Cannot redefine existing charset", new_name);
1421 cs = XCHARSET (charset);
1423 type = CHARSET_TYPE (cs);
1424 columns = CHARSET_COLUMNS (cs);
1425 dimension = CHARSET_DIMENSION (cs);
1426 id = get_unallocated_leading_byte (dimension);
1428 graphic = CHARSET_GRAPHIC (cs);
1429 final = CHARSET_FINAL (cs);
1430 direction = CHARSET_RIGHT_TO_LEFT;
1431 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1432 direction = CHARSET_LEFT_TO_RIGHT;
1433 doc_string = CHARSET_DOC_STRING (cs);
1434 short_name = CHARSET_SHORT_NAME (cs);
1435 long_name = CHARSET_LONG_NAME (cs);
1436 registry = CHARSET_REGISTRY (cs);
1438 new_charset = make_charset (id, new_name, type, columns,
1439 graphic, final, direction, short_name, long_name,
1440 doc_string, registry,
1442 CHARSET_DECODING_TABLE(cs),
1443 CHARSET_UCS_MIN(cs),
1444 CHARSET_UCS_MAX(cs),
1445 CHARSET_CODE_OFFSET(cs),
1446 CHARSET_BYTE_OFFSET(cs)
1452 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1453 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1458 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1459 Define symbol ALIAS as an alias for CHARSET.
1463 CHECK_SYMBOL (alias);
1464 charset = Fget_charset (charset);
1465 return Fputhash (alias, charset, Vcharset_hash_table);
1468 /* #### Reverse direction charsets not yet implemented. */
1470 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1472 Return the reverse-direction charset parallel to CHARSET, if any.
1473 This is the charset with the same properties (in particular, the same
1474 dimension, number of characters per dimension, and final byte) as
1475 CHARSET but whose characters are displayed in the opposite direction.
1479 charset = Fget_charset (charset);
1480 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1484 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1485 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1486 If DIRECTION is omitted, both directions will be checked (left-to-right
1487 will be returned if character sets exist for both directions).
1489 (dimension, chars, final, direction))
1491 int dm, ch, fi, di = -1;
1493 Lisp_Object obj = Qnil;
1495 CHECK_INT (dimension);
1496 dm = XINT (dimension);
1497 if (dm < 1 || dm > 2)
1498 signal_simple_error ("Invalid value for DIMENSION", dimension);
1502 if (ch != 94 && ch != 96)
1503 signal_simple_error ("Invalid value for CHARS", chars);
1505 CHECK_CHAR_COERCE_INT (final);
1507 if (fi < '0' || fi > '~')
1508 signal_simple_error ("Invalid value for FINAL", final);
1510 if (EQ (direction, Ql2r))
1511 di = CHARSET_LEFT_TO_RIGHT;
1512 else if (EQ (direction, Qr2l))
1513 di = CHARSET_RIGHT_TO_LEFT;
1514 else if (!NILP (direction))
1515 signal_simple_error ("Invalid value for DIRECTION", direction);
1517 if (dm == 2 && fi > 0x5F)
1519 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1522 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1524 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1528 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1530 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1533 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1536 return XCHARSET_NAME (obj);
1540 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1541 Return short name of CHARSET.
1545 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1548 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1549 Return long name of CHARSET.
1553 return XCHARSET_LONG_NAME (Fget_charset (charset));
1556 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1557 Return description of CHARSET.
1561 return XCHARSET_DOC_STRING (Fget_charset (charset));
1564 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1565 Return dimension of CHARSET.
1569 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1572 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1573 Return property PROP of CHARSET.
1574 Recognized properties are those listed in `make-charset', as well as
1575 'name and 'doc-string.
1579 struct Lisp_Charset *cs;
1581 charset = Fget_charset (charset);
1582 cs = XCHARSET (charset);
1584 CHECK_SYMBOL (prop);
1585 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1586 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1587 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1588 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1589 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1590 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1591 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1592 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1593 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1594 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1595 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1596 if (EQ (prop, Qdirection))
1597 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1598 if (EQ (prop, Qreverse_direction_charset))
1600 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1604 return XCHARSET_NAME (obj);
1606 signal_simple_error ("Unrecognized charset property name", prop);
1607 return Qnil; /* not reached */
1610 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1611 Return charset identification number of CHARSET.
1615 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1618 /* #### We need to figure out which properties we really want to
1621 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1622 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1624 (charset, ccl_program))
1626 charset = Fget_charset (charset);
1627 CHECK_VECTOR (ccl_program);
1628 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1633 invalidate_charset_font_caches (Lisp_Object charset)
1635 /* Invalidate font cache entries for charset on all devices. */
1636 Lisp_Object devcons, concons, hash_table;
1637 DEVICE_LOOP_NO_BREAK (devcons, concons)
1639 struct device *d = XDEVICE (XCAR (devcons));
1640 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1641 if (!UNBOUNDP (hash_table))
1642 Fclrhash (hash_table);
1646 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1647 Set the 'registry property of CHARSET to REGISTRY.
1649 (charset, registry))
1651 charset = Fget_charset (charset);
1652 CHECK_STRING (registry);
1653 XCHARSET_REGISTRY (charset) = registry;
1654 invalidate_charset_font_caches (charset);
1655 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1660 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1661 Return mapping-table of CHARSET.
1665 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1668 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1669 Set mapping-table of CHARSET to TABLE.
1673 struct Lisp_Charset *cs;
1674 Lisp_Object old_table;
1677 charset = Fget_charset (charset);
1678 cs = XCHARSET (charset);
1680 if (EQ (table, Qnil))
1682 CHARSET_DECODING_TABLE(cs) = table;
1683 CHARSET_ENCODING_TABLE(cs) = Qnil;
1686 else if (VECTORP (table))
1688 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1689 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1690 old_table = CHARSET_ENCODING_TABLE(cs);
1691 CHARSET_DECODING_TABLE(cs) = table;
1694 signal_error (Qwrong_type_argument,
1695 list2 (build_translated_string ("vector-or-nil-p"),
1697 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1699 switch (CHARSET_DIMENSION (cs))
1702 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1703 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1705 Lisp_Object c = XVECTOR_DATA(table)[i];
1708 put_char_code_table (XCHAR (c),
1709 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1710 CHARSET_ENCODING_TABLE(cs));
1714 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1715 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1717 Lisp_Object v = XVECTOR_DATA(table)[i];
1723 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1725 CHARSET_DECODING_TABLE(cs) = old_table;
1726 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1728 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1730 Lisp_Object c = XVECTOR_DATA(v)[j];
1735 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1736 | (j + CHARSET_BYTE_OFFSET (cs))),
1737 CHARSET_ENCODING_TABLE(cs));
1741 put_char_code_table (XCHAR (v),
1742 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1743 CHARSET_ENCODING_TABLE(cs));
1752 /************************************************************************/
1753 /* Lisp primitives for working with characters */
1754 /************************************************************************/
1756 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1757 Make a character from CHARSET and octets ARG1 and ARG2.
1758 ARG2 is required only for characters from two-dimensional charsets.
1759 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1760 character s with caron.
1762 (charset, arg1, arg2))
1764 struct Lisp_Charset *cs;
1766 int lowlim, highlim;
1768 charset = Fget_charset (charset);
1769 cs = XCHARSET (charset);
1771 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1772 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1774 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1776 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1777 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1780 /* It is useful (and safe, according to Olivier Galibert) to strip
1781 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1782 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1783 Latin 2 code of the character. */
1791 if (a1 < lowlim || a1 > highlim)
1792 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1794 if (CHARSET_DIMENSION (cs) == 1)
1798 ("Charset is of dimension one; second octet must be nil", arg2);
1799 return make_char (MAKE_CHAR (charset, a1, 0));
1808 a2 = XINT (arg2) & 0x7f;
1810 if (a2 < lowlim || a2 > highlim)
1811 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1813 return make_char (MAKE_CHAR (charset, a1, a2));
1816 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1817 Return the character set of char CH.
1821 CHECK_CHAR_COERCE_INT (ch);
1823 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1826 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1827 Return list of charset and one or two position-codes of CHAR.
1831 /* This function can GC */
1832 struct gcpro gcpro1, gcpro2;
1833 Lisp_Object charset = Qnil;
1834 Lisp_Object rc = Qnil;
1837 GCPRO2 (charset, rc);
1838 CHECK_CHAR_COERCE_INT (character);
1840 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1842 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1844 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1848 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1856 #ifdef ENABLE_COMPOSITE_CHARS
1857 /************************************************************************/
1858 /* composite character functions */
1859 /************************************************************************/
1862 lookup_composite_char (Bufbyte *str, int len)
1864 Lisp_Object lispstr = make_string (str, len);
1865 Lisp_Object ch = Fgethash (lispstr,
1866 Vcomposite_char_string2char_hash_table,
1872 if (composite_char_row_next >= 128)
1873 signal_simple_error ("No more composite chars available", lispstr);
1874 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1875 composite_char_col_next);
1876 Fputhash (make_char (emch), lispstr,
1877 Vcomposite_char_char2string_hash_table);
1878 Fputhash (lispstr, make_char (emch),
1879 Vcomposite_char_string2char_hash_table);
1880 composite_char_col_next++;
1881 if (composite_char_col_next >= 128)
1883 composite_char_col_next = 32;
1884 composite_char_row_next++;
1893 composite_char_string (Emchar ch)
1895 Lisp_Object str = Fgethash (make_char (ch),
1896 Vcomposite_char_char2string_hash_table,
1898 assert (!UNBOUNDP (str));
1902 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1903 Convert a string into a single composite character.
1904 The character is the result of overstriking all the characters in
1909 CHECK_STRING (string);
1910 return make_char (lookup_composite_char (XSTRING_DATA (string),
1911 XSTRING_LENGTH (string)));
1914 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1915 Return a string of the characters comprising a composite character.
1923 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1924 signal_simple_error ("Must be composite char", ch);
1925 return composite_char_string (emch);
1927 #endif /* ENABLE_COMPOSITE_CHARS */
1930 /************************************************************************/
1931 /* initialization */
1932 /************************************************************************/
1935 syms_of_mule_charset (void)
1937 DEFSUBR (Fcharsetp);
1938 DEFSUBR (Ffind_charset);
1939 DEFSUBR (Fget_charset);
1940 DEFSUBR (Fcharset_list);
1941 DEFSUBR (Fcharset_name);
1942 DEFSUBR (Fmake_charset);
1943 DEFSUBR (Fmake_reverse_direction_charset);
1944 /* DEFSUBR (Freverse_direction_charset); */
1945 DEFSUBR (Fdefine_charset_alias);
1946 DEFSUBR (Fcharset_from_attributes);
1947 DEFSUBR (Fcharset_short_name);
1948 DEFSUBR (Fcharset_long_name);
1949 DEFSUBR (Fcharset_description);
1950 DEFSUBR (Fcharset_dimension);
1951 DEFSUBR (Fcharset_property);
1952 DEFSUBR (Fcharset_id);
1953 DEFSUBR (Fset_charset_ccl_program);
1954 DEFSUBR (Fset_charset_registry);
1956 DEFSUBR (Fcharset_mapping_table);
1957 DEFSUBR (Fset_charset_mapping_table);
1960 DEFSUBR (Fmake_char);
1961 DEFSUBR (Fchar_charset);
1962 DEFSUBR (Fsplit_char);
1964 #ifdef ENABLE_COMPOSITE_CHARS
1965 DEFSUBR (Fmake_composite_char);
1966 DEFSUBR (Fcomposite_char_string);
1969 defsymbol (&Qcharsetp, "charsetp");
1970 defsymbol (&Qregistry, "registry");
1971 defsymbol (&Qfinal, "final");
1972 defsymbol (&Qgraphic, "graphic");
1973 defsymbol (&Qdirection, "direction");
1974 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1975 defsymbol (&Qshort_name, "short-name");
1976 defsymbol (&Qlong_name, "long-name");
1978 defsymbol (&Ql2r, "l2r");
1979 defsymbol (&Qr2l, "r2l");
1981 /* Charsets, compatible with FSF 20.3
1982 Naming convention is Script-Charset[-Edition] */
1983 defsymbol (&Qascii, "ascii");
1984 defsymbol (&Qcontrol_1, "control-1");
1985 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1986 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1987 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1988 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1989 defsymbol (&Qthai_tis620, "thai-tis620");
1990 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1991 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1992 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1993 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1994 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1995 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1996 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1997 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1998 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1999 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2000 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2001 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2002 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2003 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2005 defsymbol (&Qucs_bmp, "ucs-bmp");
2006 defsymbol (&Qlatin_viscii, "latin-viscii");
2007 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2008 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2009 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2010 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2011 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2012 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2014 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2015 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2017 defsymbol (&Qcomposite, "composite");
2021 vars_of_mule_charset (void)
2028 /* Table of charsets indexed by leading byte. */
2029 for (i = 0; i < countof (charset_by_leading_byte); i++)
2030 charset_by_leading_byte[i] = Qnil;
2033 /* Table of charsets indexed by type/final-byte. */
2034 for (i = 0; i < countof (charset_by_attributes); i++)
2035 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2036 charset_by_attributes[i][j] = Qnil;
2038 /* Table of charsets indexed by type/final-byte/direction. */
2039 for (i = 0; i < countof (charset_by_attributes); i++)
2040 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2041 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2042 charset_by_attributes[i][j][k] = Qnil;
2045 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2047 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2049 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2053 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2054 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2055 Leading-code of private TYPE9N charset of column-width 1.
2057 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2061 Vutf_2000_version = build_string("0.8 (Kami)");
2062 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2063 Version number of UTF-2000.
2066 Vdefault_coded_charset_priority_list = Qnil;
2067 DEFVAR_LISP ("default-coded-charset-priority-list",
2068 &Vdefault_coded_charset_priority_list /*
2069 Default order of preferred coded-character-set.
2075 complex_vars_of_mule_charset (void)
2077 staticpro (&Vcharset_hash_table);
2078 Vcharset_hash_table =
2079 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2081 /* Predefined character sets. We store them into variables for
2086 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2087 CHARSET_TYPE_256X256, 1, 2, 0,
2088 CHARSET_LEFT_TO_RIGHT,
2089 build_string ("BMP"),
2090 build_string ("BMP"),
2091 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2092 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2093 Qnil, 0, 0xFFFF, 0, 0);
2095 # define MIN_CHAR_THAI 0
2096 # define MAX_CHAR_THAI 0
2097 # define MIN_CHAR_GREEK 0
2098 # define MAX_CHAR_GREEK 0
2099 # define MIN_CHAR_HEBREW 0
2100 # define MAX_CHAR_HEBREW 0
2101 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2102 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2103 # define MIN_CHAR_CYRILLIC 0
2104 # define MAX_CHAR_CYRILLIC 0
2107 make_charset (LEADING_BYTE_ASCII, Qascii,
2108 CHARSET_TYPE_94, 1, 0, 'B',
2109 CHARSET_LEFT_TO_RIGHT,
2110 build_string ("ASCII"),
2111 build_string ("ASCII)"),
2112 build_string ("ASCII (ISO646 IRV)"),
2113 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2114 Qnil, 0, 0x7F, 0, 0);
2115 Vcharset_control_1 =
2116 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2117 CHARSET_TYPE_94, 1, 1, 0,
2118 CHARSET_LEFT_TO_RIGHT,
2119 build_string ("C1"),
2120 build_string ("Control characters"),
2121 build_string ("Control characters 128-191"),
2123 Qnil, 0x80, 0x9F, 0, 0);
2124 Vcharset_latin_iso8859_1 =
2125 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2126 CHARSET_TYPE_96, 1, 1, 'A',
2127 CHARSET_LEFT_TO_RIGHT,
2128 build_string ("Latin-1"),
2129 build_string ("ISO8859-1 (Latin-1)"),
2130 build_string ("ISO8859-1 (Latin-1)"),
2131 build_string ("iso8859-1"),
2132 Qnil, 0xA0, 0xFF, 0, 32);
2133 Vcharset_latin_iso8859_2 =
2134 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2135 CHARSET_TYPE_96, 1, 1, 'B',
2136 CHARSET_LEFT_TO_RIGHT,
2137 build_string ("Latin-2"),
2138 build_string ("ISO8859-2 (Latin-2)"),
2139 build_string ("ISO8859-2 (Latin-2)"),
2140 build_string ("iso8859-2"),
2142 Vcharset_latin_iso8859_3 =
2143 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2144 CHARSET_TYPE_96, 1, 1, 'C',
2145 CHARSET_LEFT_TO_RIGHT,
2146 build_string ("Latin-3"),
2147 build_string ("ISO8859-3 (Latin-3)"),
2148 build_string ("ISO8859-3 (Latin-3)"),
2149 build_string ("iso8859-3"),
2151 Vcharset_latin_iso8859_4 =
2152 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2153 CHARSET_TYPE_96, 1, 1, 'D',
2154 CHARSET_LEFT_TO_RIGHT,
2155 build_string ("Latin-4"),
2156 build_string ("ISO8859-4 (Latin-4)"),
2157 build_string ("ISO8859-4 (Latin-4)"),
2158 build_string ("iso8859-4"),
2160 Vcharset_thai_tis620 =
2161 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2162 CHARSET_TYPE_96, 1, 1, 'T',
2163 CHARSET_LEFT_TO_RIGHT,
2164 build_string ("TIS620"),
2165 build_string ("TIS620 (Thai)"),
2166 build_string ("TIS620.2529 (Thai)"),
2167 build_string ("tis620"),
2168 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2169 Vcharset_greek_iso8859_7 =
2170 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2171 CHARSET_TYPE_96, 1, 1, 'F',
2172 CHARSET_LEFT_TO_RIGHT,
2173 build_string ("ISO8859-7"),
2174 build_string ("ISO8859-7 (Greek)"),
2175 build_string ("ISO8859-7 (Greek)"),
2176 build_string ("iso8859-7"),
2177 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2178 Vcharset_arabic_iso8859_6 =
2179 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2180 CHARSET_TYPE_96, 1, 1, 'G',
2181 CHARSET_RIGHT_TO_LEFT,
2182 build_string ("ISO8859-6"),
2183 build_string ("ISO8859-6 (Arabic)"),
2184 build_string ("ISO8859-6 (Arabic)"),
2185 build_string ("iso8859-6"),
2187 Vcharset_hebrew_iso8859_8 =
2188 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2189 CHARSET_TYPE_96, 1, 1, 'H',
2190 CHARSET_RIGHT_TO_LEFT,
2191 build_string ("ISO8859-8"),
2192 build_string ("ISO8859-8 (Hebrew)"),
2193 build_string ("ISO8859-8 (Hebrew)"),
2194 build_string ("iso8859-8"),
2195 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2196 Vcharset_katakana_jisx0201 =
2197 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2198 CHARSET_TYPE_94, 1, 1, 'I',
2199 CHARSET_LEFT_TO_RIGHT,
2200 build_string ("JISX0201 Kana"),
2201 build_string ("JISX0201.1976 (Japanese Kana)"),
2202 build_string ("JISX0201.1976 Japanese Kana"),
2203 build_string ("jisx0201\\.1976"),
2205 MIN_CHAR_HALFWIDTH_KATAKANA,
2206 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2207 Vcharset_latin_jisx0201 =
2208 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2209 CHARSET_TYPE_94, 1, 0, 'J',
2210 CHARSET_LEFT_TO_RIGHT,
2211 build_string ("JISX0201 Roman"),
2212 build_string ("JISX0201.1976 (Japanese Roman)"),
2213 build_string ("JISX0201.1976 Japanese Roman"),
2214 build_string ("jisx0201\\.1976"),
2216 Vcharset_cyrillic_iso8859_5 =
2217 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2218 CHARSET_TYPE_96, 1, 1, 'L',
2219 CHARSET_LEFT_TO_RIGHT,
2220 build_string ("ISO8859-5"),
2221 build_string ("ISO8859-5 (Cyrillic)"),
2222 build_string ("ISO8859-5 (Cyrillic)"),
2223 build_string ("iso8859-5"),
2224 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2225 Vcharset_latin_iso8859_9 =
2226 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2227 CHARSET_TYPE_96, 1, 1, 'M',
2228 CHARSET_LEFT_TO_RIGHT,
2229 build_string ("Latin-5"),
2230 build_string ("ISO8859-9 (Latin-5)"),
2231 build_string ("ISO8859-9 (Latin-5)"),
2232 build_string ("iso8859-9"),
2234 Vcharset_japanese_jisx0208_1978 =
2235 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2236 CHARSET_TYPE_94X94, 2, 0, '@',
2237 CHARSET_LEFT_TO_RIGHT,
2238 build_string ("JIS X0208:1978"),
2239 build_string ("JIS X0208:1978 (Japanese)"),
2241 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2242 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2244 Vcharset_chinese_gb2312 =
2245 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2246 CHARSET_TYPE_94X94, 2, 0, 'A',
2247 CHARSET_LEFT_TO_RIGHT,
2248 build_string ("GB2312"),
2249 build_string ("GB2312)"),
2250 build_string ("GB2312 Chinese simplified"),
2251 build_string ("gb2312"),
2253 Vcharset_japanese_jisx0208 =
2254 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2255 CHARSET_TYPE_94X94, 2, 0, 'B',
2256 CHARSET_LEFT_TO_RIGHT,
2257 build_string ("JISX0208"),
2258 build_string ("JIS X0208:1983 (Japanese)"),
2259 build_string ("JIS X0208:1983 Japanese Kanji"),
2260 build_string ("jisx0208\\.1983"),
2262 Vcharset_korean_ksc5601 =
2263 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2264 CHARSET_TYPE_94X94, 2, 0, 'C',
2265 CHARSET_LEFT_TO_RIGHT,
2266 build_string ("KSC5601"),
2267 build_string ("KSC5601 (Korean"),
2268 build_string ("KSC5601 Korean Hangul and Hanja"),
2269 build_string ("ksc5601"),
2271 Vcharset_japanese_jisx0212 =
2272 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2273 CHARSET_TYPE_94X94, 2, 0, 'D',
2274 CHARSET_LEFT_TO_RIGHT,
2275 build_string ("JISX0212"),
2276 build_string ("JISX0212 (Japanese)"),
2277 build_string ("JISX0212 Japanese Supplement"),
2278 build_string ("jisx0212"),
2281 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2282 Vcharset_chinese_cns11643_1 =
2283 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2284 CHARSET_TYPE_94X94, 2, 0, 'G',
2285 CHARSET_LEFT_TO_RIGHT,
2286 build_string ("CNS11643-1"),
2287 build_string ("CNS11643-1 (Chinese traditional)"),
2289 ("CNS 11643 Plane 1 Chinese traditional"),
2290 build_string (CHINESE_CNS_PLANE_RE("1")),
2292 Vcharset_chinese_cns11643_2 =
2293 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2294 CHARSET_TYPE_94X94, 2, 0, 'H',
2295 CHARSET_LEFT_TO_RIGHT,
2296 build_string ("CNS11643-2"),
2297 build_string ("CNS11643-2 (Chinese traditional)"),
2299 ("CNS 11643 Plane 2 Chinese traditional"),
2300 build_string (CHINESE_CNS_PLANE_RE("2")),
2303 Vcharset_latin_viscii_lower =
2304 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2305 CHARSET_TYPE_96, 1, 1, '1',
2306 CHARSET_LEFT_TO_RIGHT,
2307 build_string ("VISCII lower"),
2308 build_string ("VISCII lower (Vietnamese)"),
2309 build_string ("VISCII lower (Vietnamese)"),
2310 build_string ("MULEVISCII-LOWER"),
2312 Vcharset_latin_viscii_upper =
2313 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2314 CHARSET_TYPE_96, 1, 1, '2',
2315 CHARSET_LEFT_TO_RIGHT,
2316 build_string ("VISCII upper"),
2317 build_string ("VISCII upper (Vietnamese)"),
2318 build_string ("VISCII upper (Vietnamese)"),
2319 build_string ("MULEVISCII-UPPER"),
2321 Vcharset_latin_viscii =
2322 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2323 CHARSET_TYPE_256, 1, 2, 0,
2324 CHARSET_LEFT_TO_RIGHT,
2325 build_string ("VISCII"),
2326 build_string ("VISCII 1.1 (Vietnamese)"),
2327 build_string ("VISCII 1.1 (Vietnamese)"),
2328 build_string ("VISCII1\\.1"),
2330 Vcharset_hiragana_jisx0208 =
2331 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2332 CHARSET_TYPE_94X94, 2, 0, 'B',
2333 CHARSET_LEFT_TO_RIGHT,
2334 build_string ("Hiragana"),
2335 build_string ("Hiragana of JIS X0208"),
2336 build_string ("Japanese Hiragana of JIS X0208"),
2337 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2338 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2339 (0x24 - 33) * 94 + (0x21 - 33), 33);
2340 Vcharset_katakana_jisx0208 =
2341 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2342 CHARSET_TYPE_94X94, 2, 0, 'B',
2343 CHARSET_LEFT_TO_RIGHT,
2344 build_string ("Katakana"),
2345 build_string ("Katakana of JIS X0208"),
2346 build_string ("Japanese Katakana of JIS X0208"),
2347 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2348 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2349 (0x25 - 33) * 94 + (0x21 - 33), 33);
2351 Vcharset_chinese_big5_1 =
2352 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2353 CHARSET_TYPE_94X94, 2, 0, '0',
2354 CHARSET_LEFT_TO_RIGHT,
2355 build_string ("Big5"),
2356 build_string ("Big5 (Level-1)"),
2358 ("Big5 Level-1 Chinese traditional"),
2359 build_string ("big5"),
2361 Vcharset_chinese_big5_2 =
2362 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2363 CHARSET_TYPE_94X94, 2, 0, '1',
2364 CHARSET_LEFT_TO_RIGHT,
2365 build_string ("Big5"),
2366 build_string ("Big5 (Level-2)"),
2368 ("Big5 Level-2 Chinese traditional"),
2369 build_string ("big5"),
2372 #ifdef ENABLE_COMPOSITE_CHARS
2373 /* #### For simplicity, we put composite chars into a 96x96 charset.
2374 This is going to lead to problems because you can run out of
2375 room, esp. as we don't yet recycle numbers. */
2376 Vcharset_composite =
2377 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2378 CHARSET_TYPE_96X96, 2, 0, 0,
2379 CHARSET_LEFT_TO_RIGHT,
2380 build_string ("Composite"),
2381 build_string ("Composite characters"),
2382 build_string ("Composite characters"),
2385 composite_char_row_next = 32;
2386 composite_char_col_next = 32;
2388 Vcomposite_char_string2char_hash_table =
2389 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2390 Vcomposite_char_char2string_hash_table =
2391 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2392 staticpro (&Vcomposite_char_string2char_hash_table);
2393 staticpro (&Vcomposite_char_char2string_hash_table);
2394 #endif /* ENABLE_COMPOSITE_CHARS */