1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
150 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
151 if (!char_byte_table_equal (cte1->property[i],
152 cte2->property[i], depth + 1))
157 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
163 char_byte_table_hash (Lisp_Object obj, int depth)
165 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
167 return internal_array_hash (cte->property, 256, depth);
170 static const struct lrecord_description char_byte_table_description[] = {
171 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
175 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
176 mark_char_byte_table,
177 internal_object_printer,
178 0, char_byte_table_equal,
179 char_byte_table_hash,
180 char_byte_table_description,
181 struct Lisp_Char_Byte_Table);
185 make_char_byte_table (Lisp_Object initval)
189 struct Lisp_Char_Byte_Table *cte =
190 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
191 &lrecord_char_byte_table);
193 for (i = 0; i < 256; i++)
194 cte->property[i] = initval;
196 XSETCHAR_BYTE_TABLE (obj, cte);
201 copy_char_byte_table (Lisp_Object entry)
203 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
206 struct Lisp_Char_Byte_Table *ctenew =
207 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
208 &lrecord_char_byte_table);
210 for (i = 0; i < 256; i++)
212 Lisp_Object new = cte->property[i];
213 if (CHAR_BYTE_TABLE_P (new))
214 ctenew->property[i] = copy_char_byte_table (new);
216 ctenew->property[i] = new;
219 XSETCHAR_BYTE_TABLE (obj, ctenew);
223 #define make_char_code_table(initval) make_char_byte_table(initval)
226 get_char_code_table (Emchar ch, Lisp_Object table)
228 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
229 Lisp_Object ret = cpt->property [ch >> 24];
231 if (CHAR_BYTE_TABLE_P (ret))
232 cpt = XCHAR_BYTE_TABLE (ret);
236 ret = cpt->property [(unsigned char) (ch >> 16)];
237 if (CHAR_BYTE_TABLE_P (ret))
238 cpt = XCHAR_BYTE_TABLE (ret);
242 ret = cpt->property [(unsigned char) (ch >> 8)];
243 if (CHAR_BYTE_TABLE_P (ret))
244 cpt = XCHAR_BYTE_TABLE (ret);
248 return cpt->property [(unsigned char) ch];
252 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
254 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
255 Lisp_Object ret = cpt1->property[ch >> 24];
257 if (CHAR_BYTE_TABLE_P (ret))
259 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
261 ret = cpt2->property[(unsigned char)(ch >> 16)];
262 if (CHAR_BYTE_TABLE_P (ret))
264 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
266 ret = cpt3->property[(unsigned char)(ch >> 8)];
267 if (CHAR_BYTE_TABLE_P (ret))
269 struct Lisp_Char_Byte_Table* cpt4
270 = XCHAR_BYTE_TABLE (ret);
272 cpt4->property[(unsigned char)ch] = value;
274 else if (!EQ (ret, value))
276 Lisp_Object cpt4 = make_char_byte_table (ret);
278 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
279 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
282 else if (!EQ (ret, value))
284 Lisp_Object cpt3 = make_char_byte_table (ret);
285 Lisp_Object cpt4 = make_char_byte_table (ret);
287 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
288 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
290 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
293 else if (!EQ (ret, value))
295 Lisp_Object cpt2 = make_char_byte_table (ret);
296 Lisp_Object cpt3 = make_char_byte_table (ret);
297 Lisp_Object cpt4 = make_char_byte_table (ret);
299 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
300 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
301 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
302 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
307 Lisp_Object Vutf_2000_version;
311 int leading_code_private_11;
314 Lisp_Object Qcharsetp;
316 /* Qdoc_string, Qdimension, Qchars defined in general.c */
317 Lisp_Object Qregistry, Qfinal, Qgraphic;
318 Lisp_Object Qdirection;
319 Lisp_Object Qreverse_direction_charset;
320 Lisp_Object Qleading_byte;
321 Lisp_Object Qshort_name, Qlong_name;
337 Qjapanese_jisx0208_1978,
349 Qvietnamese_viscii_lower,
350 Qvietnamese_viscii_upper,
358 Lisp_Object Ql2r, Qr2l;
360 Lisp_Object Vcharset_hash_table;
362 static Charset_ID next_allocated_1_byte_leading_byte;
363 static Charset_ID next_allocated_2_byte_leading_byte;
365 /* Composite characters are characters constructed by overstriking two
366 or more regular characters.
368 1) The old Mule implementation involves storing composite characters
369 in a buffer as a tag followed by all of the actual characters
370 used to make up the composite character. I think this is a bad
371 idea; it greatly complicates code that wants to handle strings
372 one character at a time because it has to deal with the possibility
373 of great big ungainly characters. It's much more reasonable to
374 simply store an index into a table of composite characters.
376 2) The current implementation only allows for 16,384 separate
377 composite characters over the lifetime of the XEmacs process.
378 This could become a potential problem if the user
379 edited lots of different files that use composite characters.
380 Due to FSF bogosity, increasing the number of allowable
381 composite characters under Mule would decrease the number
382 of possible faces that can exist. Mule already has shrunk
383 this to 2048, and further shrinkage would become uncomfortable.
384 No such problems exist in XEmacs.
386 Composite characters could be represented as 0x80 C1 C2 C3,
387 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
388 for slightly under 2^20 (one million) composite characters
389 over the XEmacs process lifetime, and you only need to
390 increase the size of a Mule character from 19 to 21 bits.
391 Or you could use 0x80 C1 C2 C3 C4, allowing for about
392 85 million (slightly over 2^26) composite characters. */
395 /************************************************************************/
396 /* Basic Emchar functions */
397 /************************************************************************/
399 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
400 string in STR. Returns the number of bytes stored.
401 Do not call this directly. Use the macro set_charptr_emchar() instead.
405 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
420 else if ( c <= 0x7ff )
422 *p++ = (c >> 6) | 0xc0;
423 *p++ = (c & 0x3f) | 0x80;
425 else if ( c <= 0xffff )
427 *p++ = (c >> 12) | 0xe0;
428 *p++ = ((c >> 6) & 0x3f) | 0x80;
429 *p++ = (c & 0x3f) | 0x80;
431 else if ( c <= 0x1fffff )
433 *p++ = (c >> 18) | 0xf0;
434 *p++ = ((c >> 12) & 0x3f) | 0x80;
435 *p++ = ((c >> 6) & 0x3f) | 0x80;
436 *p++ = (c & 0x3f) | 0x80;
438 else if ( c <= 0x3ffffff )
440 *p++ = (c >> 24) | 0xf8;
441 *p++ = ((c >> 18) & 0x3f) | 0x80;
442 *p++ = ((c >> 12) & 0x3f) | 0x80;
443 *p++ = ((c >> 6) & 0x3f) | 0x80;
444 *p++ = (c & 0x3f) | 0x80;
448 *p++ = (c >> 30) | 0xfc;
449 *p++ = ((c >> 24) & 0x3f) | 0x80;
450 *p++ = ((c >> 18) & 0x3f) | 0x80;
451 *p++ = ((c >> 12) & 0x3f) | 0x80;
452 *p++ = ((c >> 6) & 0x3f) | 0x80;
453 *p++ = (c & 0x3f) | 0x80;
456 BREAKUP_CHAR (c, charset, c1, c2);
457 lb = CHAR_LEADING_BYTE (c);
458 if (LEADING_BYTE_PRIVATE_P (lb))
459 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
461 if (EQ (charset, Vcharset_control_1))
470 /* Return the first character from a Mule-encoded string in STR,
471 assuming it's non-ASCII. Do not call this directly.
472 Use the macro charptr_emchar() instead. */
475 non_ascii_charptr_emchar (CONST Bufbyte *str)
488 else if ( b >= 0xf8 )
493 else if ( b >= 0xf0 )
498 else if ( b >= 0xe0 )
503 else if ( b >= 0xc0 )
513 for( ; len > 0; len-- )
516 ch = ( ch << 6 ) | ( b & 0x3f );
520 Bufbyte i0 = *str, i1, i2 = 0;
523 if (i0 == LEADING_BYTE_CONTROL_1)
524 return (Emchar) (*++str - 0x20);
526 if (LEADING_BYTE_PREFIX_P (i0))
531 charset = CHARSET_BY_LEADING_BYTE (i0);
532 if (XCHARSET_DIMENSION (charset) == 2)
535 return MAKE_CHAR (charset, i1, i2);
539 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
540 Do not call this directly. Use the macro valid_char_p() instead. */
544 non_ascii_valid_char_p (Emchar ch)
548 /* Must have only lowest 19 bits set */
552 f1 = CHAR_FIELD1 (ch);
553 f2 = CHAR_FIELD2 (ch);
554 f3 = CHAR_FIELD3 (ch);
560 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
561 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
562 f2 > MAX_CHAR_FIELD2_PRIVATE)
567 if (f3 != 0x20 && f3 != 0x7F)
571 NOTE: This takes advantage of the fact that
572 FIELD2_TO_OFFICIAL_LEADING_BYTE and
573 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
575 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
576 return (XCHARSET_CHARS (charset) == 96);
582 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
583 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
584 f1 > MAX_CHAR_FIELD1_PRIVATE)
586 if (f2 < 0x20 || f3 < 0x20)
589 #ifdef ENABLE_COMPOSITE_CHARS
590 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
592 if (UNBOUNDP (Fgethash (make_int (ch),
593 Vcomposite_char_char2string_hash_table,
598 #endif /* ENABLE_COMPOSITE_CHARS */
600 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
603 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
605 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
608 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
610 return (XCHARSET_CHARS (charset) == 96);
616 /************************************************************************/
617 /* Basic string functions */
618 /************************************************************************/
620 /* Copy the character pointed to by PTR into STR, assuming it's
621 non-ASCII. Do not call this directly. Use the macro
622 charptr_copy_char() instead. */
625 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
627 Bufbyte *strptr = str;
629 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
631 /* Notice fallthrough. */
633 case 6: *++strptr = *ptr++;
634 case 5: *++strptr = *ptr++;
636 case 4: *++strptr = *ptr++;
637 case 3: *++strptr = *ptr++;
638 case 2: *++strptr = *ptr;
643 return strptr + 1 - str;
647 /************************************************************************/
648 /* streams of Emchars */
649 /************************************************************************/
651 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
652 The functions below are not meant to be called directly; use
653 the macros in insdel.h. */
656 Lstream_get_emchar_1 (Lstream *stream, int ch)
658 Bufbyte str[MAX_EMCHAR_LEN];
659 Bufbyte *strptr = str;
661 str[0] = (Bufbyte) ch;
662 switch (REP_BYTES_BY_FIRST_BYTE (ch))
664 /* Notice fallthrough. */
667 ch = Lstream_getc (stream);
669 *++strptr = (Bufbyte) ch;
671 ch = Lstream_getc (stream);
673 *++strptr = (Bufbyte) ch;
676 ch = Lstream_getc (stream);
678 *++strptr = (Bufbyte) ch;
680 ch = Lstream_getc (stream);
682 *++strptr = (Bufbyte) ch;
684 ch = Lstream_getc (stream);
686 *++strptr = (Bufbyte) ch;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
718 struct Lisp_Charset *cs = XCHARSET (obj);
720 markobj (cs->short_name);
721 markobj (cs->long_name);
722 markobj (cs->doc_string);
723 markobj (cs->registry);
724 markobj (cs->ccl_program);
726 markobj (cs->decoding_table);
727 markobj (cs->encoding_table);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 struct Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
752 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
753 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
754 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
756 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
757 CHARSET_COLUMNS (cs),
758 CHARSET_GRAPHIC (cs),
760 write_c_string (buf, printcharfun);
761 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
762 sprintf (buf, " 0x%x>", cs->header.uid);
763 write_c_string (buf, printcharfun);
766 static const struct lrecord_description charset_description[] = {
767 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
769 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
774 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
775 mark_charset, print_charset, 0, 0, 0,
777 struct Lisp_Charset);
779 /* Make a new charset. */
782 make_charset (Charset_ID id, Lisp_Object name,
783 unsigned char type, unsigned char columns, unsigned char graphic,
784 Bufbyte final, unsigned char direction, Lisp_Object short_name,
785 Lisp_Object long_name, Lisp_Object doc,
787 Lisp_Object decoding_table,
788 Emchar ucs_min, Emchar ucs_max,
789 Emchar code_offset, unsigned char byte_offset)
792 struct Lisp_Charset *cs =
793 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
794 XSETCHARSET (obj, cs);
796 CHARSET_ID (cs) = id;
797 CHARSET_NAME (cs) = name;
798 CHARSET_SHORT_NAME (cs) = short_name;
799 CHARSET_LONG_NAME (cs) = long_name;
800 CHARSET_DIRECTION (cs) = direction;
801 CHARSET_TYPE (cs) = type;
802 CHARSET_COLUMNS (cs) = columns;
803 CHARSET_GRAPHIC (cs) = graphic;
804 CHARSET_FINAL (cs) = final;
805 CHARSET_DOC_STRING (cs) = doc;
806 CHARSET_REGISTRY (cs) = reg;
807 CHARSET_CCL_PROGRAM (cs) = Qnil;
808 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
810 CHARSET_DECODING_TABLE(cs) = Qnil;
811 CHARSET_ENCODING_TABLE(cs) = Qnil;
812 CHARSET_UCS_MIN(cs) = ucs_min;
813 CHARSET_UCS_MAX(cs) = ucs_max;
814 CHARSET_CODE_OFFSET(cs) = code_offset;
815 CHARSET_BYTE_OFFSET(cs) = byte_offset;
818 switch (CHARSET_TYPE (cs))
820 case CHARSET_TYPE_94:
821 CHARSET_DIMENSION (cs) = 1;
822 CHARSET_CHARS (cs) = 94;
824 case CHARSET_TYPE_96:
825 CHARSET_DIMENSION (cs) = 1;
826 CHARSET_CHARS (cs) = 96;
828 case CHARSET_TYPE_94X94:
829 CHARSET_DIMENSION (cs) = 2;
830 CHARSET_CHARS (cs) = 94;
832 case CHARSET_TYPE_96X96:
833 CHARSET_DIMENSION (cs) = 2;
834 CHARSET_CHARS (cs) = 96;
837 case CHARSET_TYPE_128:
838 CHARSET_DIMENSION (cs) = 1;
839 CHARSET_CHARS (cs) = 128;
841 case CHARSET_TYPE_128X128:
842 CHARSET_DIMENSION (cs) = 2;
843 CHARSET_CHARS (cs) = 128;
845 case CHARSET_TYPE_256:
846 CHARSET_DIMENSION (cs) = 1;
847 CHARSET_CHARS (cs) = 256;
849 case CHARSET_TYPE_256X256:
850 CHARSET_DIMENSION (cs) = 2;
851 CHARSET_CHARS (cs) = 256;
857 if (id == LEADING_BYTE_ASCII)
858 CHARSET_REP_BYTES (cs) = 1;
860 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
862 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
867 /* some charsets do not have final characters. This includes
868 ASCII, Control-1, Composite, and the two faux private
871 if (code_offset == 0)
873 assert (NILP (charset_by_attributes[type][final]));
874 charset_by_attributes[type][final] = obj;
877 assert (NILP (charset_by_attributes[type][final][direction]));
878 charset_by_attributes[type][final][direction] = obj;
882 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
883 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
886 /* official leading byte */
887 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
890 /* Some charsets are "faux" and don't have names or really exist at
891 all except in the leading-byte table. */
893 Fputhash (name, obj, Vcharset_hash_table);
898 get_unallocated_leading_byte (int dimension)
904 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
907 lb = next_allocated_1_byte_leading_byte++;
911 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
914 lb = next_allocated_2_byte_leading_byte++;
919 ("No more character sets free for this dimension",
920 make_int (dimension));
927 charset_get_byte1 (Lisp_Object charset, Emchar ch)
932 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
934 Lisp_Object value = get_char_code_table (ch, table);
938 Emchar code = XINT (value);
942 else if (code < (1 << 16))
944 else if (code < (1 << 24))
950 if ((XCHARSET_UCS_MIN (charset) <= ch)
951 && (ch <= XCHARSET_UCS_MAX (charset)))
952 return (ch - XCHARSET_UCS_MIN (charset)
953 + XCHARSET_CODE_OFFSET (charset))
954 / (XCHARSET_DIMENSION (charset) == 1 ?
957 XCHARSET_DIMENSION (charset) == 2 ?
958 XCHARSET_CHARS (charset)
960 XCHARSET_DIMENSION (charset) == 3 ?
961 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
963 XCHARSET_CHARS (charset)
964 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
965 + XCHARSET_BYTE_OFFSET (charset);
966 else if (XCHARSET_CODE_OFFSET (charset) == 0)
968 if (XCHARSET_DIMENSION (charset) == 1)
970 if (XCHARSET_CHARS (charset) == 94)
972 if (((d = ch - (MIN_CHAR_94
973 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
977 else if (XCHARSET_CHARS (charset) == 96)
979 if (((d = ch - (MIN_CHAR_96
980 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
987 else if (XCHARSET_DIMENSION (charset) == 2)
989 if (XCHARSET_CHARS (charset) == 94)
991 if (((d = ch - (MIN_CHAR_94x94
992 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
995 return (d / 94) + 33;
997 else if (XCHARSET_CHARS (charset) == 96)
999 if (((d = ch - (MIN_CHAR_96x96
1000 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1003 return (d / 96) + 32;
1011 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1013 if (XCHARSET_DIMENSION (charset) == 1)
1019 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1021 Lisp_Object value = get_char_code_table (ch, table);
1025 Emchar code = XINT (value);
1027 if (code < (1 << 16))
1028 return (unsigned char)code;
1029 else if (code < (1 << 24))
1030 return (unsigned char)(code >> 16);
1032 return (unsigned char)(code >> 24);
1035 if ((XCHARSET_UCS_MIN (charset) <= ch)
1036 && (ch <= XCHARSET_UCS_MAX (charset)))
1037 return ((ch - XCHARSET_UCS_MIN (charset)
1038 + XCHARSET_CODE_OFFSET (charset))
1039 / (XCHARSET_DIMENSION (charset) == 2 ?
1042 XCHARSET_DIMENSION (charset) == 3 ?
1043 XCHARSET_CHARS (charset)
1045 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1046 % XCHARSET_CHARS (charset)
1047 + XCHARSET_BYTE_OFFSET (charset);
1048 else if (XCHARSET_CHARS (charset) == 94)
1049 return (MIN_CHAR_94x94
1050 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1051 && (ch < MIN_CHAR_94x94
1052 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1053 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1054 else /* if (XCHARSET_CHARS (charset) == 96) */
1055 return (MIN_CHAR_96x96
1056 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1057 && (ch < MIN_CHAR_96x96
1058 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1059 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1063 Lisp_Object Vdefault_coded_charset_priority_list;
1067 /************************************************************************/
1068 /* Basic charset Lisp functions */
1069 /************************************************************************/
1071 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1072 Return non-nil if OBJECT is a charset.
1076 return CHARSETP (object) ? Qt : Qnil;
1079 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1080 Retrieve the charset of the given name.
1081 If CHARSET-OR-NAME is a charset object, it is simply returned.
1082 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1083 nil is returned. Otherwise the associated charset object is returned.
1087 if (CHARSETP (charset_or_name))
1088 return charset_or_name;
1090 CHECK_SYMBOL (charset_or_name);
1091 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1094 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1095 Retrieve the charset of the given name.
1096 Same as `find-charset' except an error is signalled if there is no such
1097 charset instead of returning nil.
1101 Lisp_Object charset = Ffind_charset (name);
1104 signal_simple_error ("No such charset", name);
1108 /* We store the charsets in hash tables with the names as the key and the
1109 actual charset object as the value. Occasionally we need to use them
1110 in a list format. These routines provide us with that. */
1111 struct charset_list_closure
1113 Lisp_Object *charset_list;
1117 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1118 void *charset_list_closure)
1120 /* This function can GC */
1121 struct charset_list_closure *chcl =
1122 (struct charset_list_closure*) charset_list_closure;
1123 Lisp_Object *charset_list = chcl->charset_list;
1125 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1129 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1130 Return a list of the names of all defined charsets.
1134 Lisp_Object charset_list = Qnil;
1135 struct gcpro gcpro1;
1136 struct charset_list_closure charset_list_closure;
1138 GCPRO1 (charset_list);
1139 charset_list_closure.charset_list = &charset_list;
1140 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1141 &charset_list_closure);
1144 return charset_list;
1147 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1148 Return the name of the given charset.
1152 return XCHARSET_NAME (Fget_charset (charset));
1155 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1156 Define a new character set.
1157 This function is for use with Mule support.
1158 NAME is a symbol, the name by which the character set is normally referred.
1159 DOC-STRING is a string describing the character set.
1160 PROPS is a property list, describing the specific nature of the
1161 character set. Recognized properties are:
1163 'short-name Short version of the charset name (ex: Latin-1)
1164 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1165 'registry A regular expression matching the font registry field for
1167 'dimension Number of octets used to index a character in this charset.
1168 Either 1 or 2. Defaults to 1.
1169 'columns Number of columns used to display a character in this charset.
1170 Only used in TTY mode. (Under X, the actual width of a
1171 character can be derived from the font used to display the
1172 characters.) If unspecified, defaults to the dimension
1173 (this is almost always the correct value).
1174 'chars Number of characters in each dimension (94 or 96).
1175 Defaults to 94. Note that if the dimension is 2, the
1176 character set thus described is 94x94 or 96x96.
1177 'final Final byte of ISO 2022 escape sequence. Must be
1178 supplied. Each combination of (DIMENSION, CHARS) defines a
1179 separate namespace for final bytes. Note that ISO
1180 2022 restricts the final byte to the range
1181 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1182 dimension == 2. Note also that final bytes in the range
1183 0x30 - 0x3F are reserved for user-defined (not official)
1185 'graphic 0 (use left half of font on output) or 1 (use right half
1186 of font on output). Defaults to 0. For example, for
1187 a font whose registry is ISO8859-1, the left half
1188 (octets 0x20 - 0x7F) is the `ascii' character set, while
1189 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1190 character set. With 'graphic set to 0, the octets
1191 will have their high bit cleared; with it set to 1,
1192 the octets will have their high bit set.
1193 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1195 'ccl-program A compiled CCL program used to convert a character in
1196 this charset into an index into the font. This is in
1197 addition to the 'graphic property. The CCL program
1198 is passed the octets of the character, with the high
1199 bit cleared and set depending upon whether the value
1200 of the 'graphic property is 0 or 1.
1202 (name, doc_string, props))
1204 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1205 int direction = CHARSET_LEFT_TO_RIGHT;
1207 Lisp_Object registry = Qnil;
1208 Lisp_Object charset;
1209 Lisp_Object rest, keyword, value;
1210 Lisp_Object ccl_program = Qnil;
1211 Lisp_Object short_name = Qnil, long_name = Qnil;
1213 unsigned char byte_offset = 0;
1216 CHECK_SYMBOL (name);
1217 if (!NILP (doc_string))
1218 CHECK_STRING (doc_string);
1220 charset = Ffind_charset (name);
1221 if (!NILP (charset))
1222 signal_simple_error ("Cannot redefine existing charset", name);
1224 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1226 if (EQ (keyword, Qshort_name))
1228 CHECK_STRING (value);
1232 if (EQ (keyword, Qlong_name))
1234 CHECK_STRING (value);
1238 else if (EQ (keyword, Qdimension))
1241 dimension = XINT (value);
1242 if (dimension < 1 || dimension > 2)
1243 signal_simple_error ("Invalid value for 'dimension", value);
1246 else if (EQ (keyword, Qchars))
1249 chars = XINT (value);
1250 if (chars != 94 && chars != 96)
1251 signal_simple_error ("Invalid value for 'chars", value);
1254 else if (EQ (keyword, Qcolumns))
1257 columns = XINT (value);
1258 if (columns != 1 && columns != 2)
1259 signal_simple_error ("Invalid value for 'columns", value);
1262 else if (EQ (keyword, Qgraphic))
1265 graphic = XINT (value);
1267 if (graphic < 0 || graphic > 2)
1269 if (graphic < 0 || graphic > 1)
1271 signal_simple_error ("Invalid value for 'graphic", value);
1274 else if (EQ (keyword, Qregistry))
1276 CHECK_STRING (value);
1280 else if (EQ (keyword, Qdirection))
1282 if (EQ (value, Ql2r))
1283 direction = CHARSET_LEFT_TO_RIGHT;
1284 else if (EQ (value, Qr2l))
1285 direction = CHARSET_RIGHT_TO_LEFT;
1287 signal_simple_error ("Invalid value for 'direction", value);
1290 else if (EQ (keyword, Qfinal))
1292 CHECK_CHAR_COERCE_INT (value);
1293 final = XCHAR (value);
1294 if (final < '0' || final > '~')
1295 signal_simple_error ("Invalid value for 'final", value);
1298 else if (EQ (keyword, Qccl_program))
1300 CHECK_VECTOR (value);
1301 ccl_program = value;
1305 signal_simple_error ("Unrecognized property", keyword);
1309 error ("'final must be specified");
1310 if (dimension == 2 && final > 0x5F)
1312 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1316 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1318 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1320 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1321 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1323 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1330 /* id = CHARSET_ID_OFFSET_94 + final; */
1331 id = get_unallocated_leading_byte (dimension);
1333 else if (chars == 96)
1335 id = get_unallocated_leading_byte (dimension);
1342 else if (dimension == 2)
1346 id = get_unallocated_leading_byte (dimension);
1348 else if (chars == 96)
1350 id = get_unallocated_leading_byte (dimension);
1364 else if (chars == 96)
1367 id = get_unallocated_leading_byte (dimension);
1370 if (NILP (doc_string))
1371 doc_string = build_string ("");
1373 if (NILP (registry))
1374 registry = build_string ("");
1376 if (NILP (short_name))
1377 XSETSTRING (short_name, XSYMBOL (name)->name);
1379 if (NILP (long_name))
1380 long_name = doc_string;
1383 columns = dimension;
1384 charset = make_charset (id, name, type, columns, graphic,
1385 final, direction, short_name, long_name,
1386 doc_string, registry,
1387 Qnil, 0, 0, 0, byte_offset);
1388 if (!NILP (ccl_program))
1389 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1393 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1395 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1396 NEW-NAME is the name of the new charset. Return the new charset.
1398 (charset, new_name))
1400 Lisp_Object new_charset = Qnil;
1401 int id, dimension, columns, graphic, final;
1402 int direction, type;
1403 Lisp_Object registry, doc_string, short_name, long_name;
1404 struct Lisp_Charset *cs;
1406 charset = Fget_charset (charset);
1407 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1408 signal_simple_error ("Charset already has reverse-direction charset",
1411 CHECK_SYMBOL (new_name);
1412 if (!NILP (Ffind_charset (new_name)))
1413 signal_simple_error ("Cannot redefine existing charset", new_name);
1415 cs = XCHARSET (charset);
1417 type = CHARSET_TYPE (cs);
1418 columns = CHARSET_COLUMNS (cs);
1419 dimension = CHARSET_DIMENSION (cs);
1420 id = get_unallocated_leading_byte (dimension);
1422 graphic = CHARSET_GRAPHIC (cs);
1423 final = CHARSET_FINAL (cs);
1424 direction = CHARSET_RIGHT_TO_LEFT;
1425 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1426 direction = CHARSET_LEFT_TO_RIGHT;
1427 doc_string = CHARSET_DOC_STRING (cs);
1428 short_name = CHARSET_SHORT_NAME (cs);
1429 long_name = CHARSET_LONG_NAME (cs);
1430 registry = CHARSET_REGISTRY (cs);
1432 new_charset = make_charset (id, new_name, type, columns,
1433 graphic, final, direction, short_name, long_name,
1434 doc_string, registry,
1436 CHARSET_DECODING_TABLE(cs),
1437 CHARSET_UCS_MIN(cs),
1438 CHARSET_UCS_MAX(cs),
1439 CHARSET_CODE_OFFSET(cs),
1440 CHARSET_BYTE_OFFSET(cs)
1446 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1447 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1452 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1453 Define symbol ALIAS as an alias for CHARSET.
1457 CHECK_SYMBOL (alias);
1458 charset = Fget_charset (charset);
1459 return Fputhash (alias, charset, Vcharset_hash_table);
1462 /* #### Reverse direction charsets not yet implemented. */
1464 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1466 Return the reverse-direction charset parallel to CHARSET, if any.
1467 This is the charset with the same properties (in particular, the same
1468 dimension, number of characters per dimension, and final byte) as
1469 CHARSET but whose characters are displayed in the opposite direction.
1473 charset = Fget_charset (charset);
1474 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1478 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1479 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1480 If DIRECTION is omitted, both directions will be checked (left-to-right
1481 will be returned if character sets exist for both directions).
1483 (dimension, chars, final, direction))
1485 int dm, ch, fi, di = -1;
1487 Lisp_Object obj = Qnil;
1489 CHECK_INT (dimension);
1490 dm = XINT (dimension);
1491 if (dm < 1 || dm > 2)
1492 signal_simple_error ("Invalid value for DIMENSION", dimension);
1496 if (ch != 94 && ch != 96)
1497 signal_simple_error ("Invalid value for CHARS", chars);
1499 CHECK_CHAR_COERCE_INT (final);
1501 if (fi < '0' || fi > '~')
1502 signal_simple_error ("Invalid value for FINAL", final);
1504 if (EQ (direction, Ql2r))
1505 di = CHARSET_LEFT_TO_RIGHT;
1506 else if (EQ (direction, Qr2l))
1507 di = CHARSET_RIGHT_TO_LEFT;
1508 else if (!NILP (direction))
1509 signal_simple_error ("Invalid value for DIRECTION", direction);
1511 if (dm == 2 && fi > 0x5F)
1513 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1516 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1518 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1522 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1524 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1527 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1530 return XCHARSET_NAME (obj);
1534 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1535 Return short name of CHARSET.
1539 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1542 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1543 Return long name of CHARSET.
1547 return XCHARSET_LONG_NAME (Fget_charset (charset));
1550 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1551 Return description of CHARSET.
1555 return XCHARSET_DOC_STRING (Fget_charset (charset));
1558 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1559 Return dimension of CHARSET.
1563 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1566 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1567 Return property PROP of CHARSET.
1568 Recognized properties are those listed in `make-charset', as well as
1569 'name and 'doc-string.
1573 struct Lisp_Charset *cs;
1575 charset = Fget_charset (charset);
1576 cs = XCHARSET (charset);
1578 CHECK_SYMBOL (prop);
1579 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1580 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1581 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1582 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1583 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1584 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1585 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1586 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1587 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1588 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1589 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1590 if (EQ (prop, Qdirection))
1591 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1592 if (EQ (prop, Qreverse_direction_charset))
1594 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1598 return XCHARSET_NAME (obj);
1600 signal_simple_error ("Unrecognized charset property name", prop);
1601 return Qnil; /* not reached */
1604 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1605 Return charset identification number of CHARSET.
1609 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1612 /* #### We need to figure out which properties we really want to
1615 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1616 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1618 (charset, ccl_program))
1620 charset = Fget_charset (charset);
1621 CHECK_VECTOR (ccl_program);
1622 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1627 invalidate_charset_font_caches (Lisp_Object charset)
1629 /* Invalidate font cache entries for charset on all devices. */
1630 Lisp_Object devcons, concons, hash_table;
1631 DEVICE_LOOP_NO_BREAK (devcons, concons)
1633 struct device *d = XDEVICE (XCAR (devcons));
1634 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1635 if (!UNBOUNDP (hash_table))
1636 Fclrhash (hash_table);
1640 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1641 Set the 'registry property of CHARSET to REGISTRY.
1643 (charset, registry))
1645 charset = Fget_charset (charset);
1646 CHECK_STRING (registry);
1647 XCHARSET_REGISTRY (charset) = registry;
1648 invalidate_charset_font_caches (charset);
1649 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1654 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1655 Return mapping-table of CHARSET.
1659 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1662 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1663 Set mapping-table of CHARSET to TABLE.
1667 struct Lisp_Charset *cs;
1668 Lisp_Object old_table;
1671 charset = Fget_charset (charset);
1672 cs = XCHARSET (charset);
1674 if (EQ (table, Qnil))
1676 CHARSET_DECODING_TABLE(cs) = table;
1677 CHARSET_ENCODING_TABLE(cs) = Qnil;
1680 else if (VECTORP (table))
1682 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1683 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1684 old_table = CHARSET_ENCODING_TABLE(cs);
1685 CHARSET_DECODING_TABLE(cs) = table;
1688 signal_error (Qwrong_type_argument,
1689 list2 (build_translated_string ("vector-or-nil-p"),
1691 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1693 switch (CHARSET_DIMENSION (cs))
1696 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1697 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1699 Lisp_Object c = XVECTOR_DATA(table)[i];
1702 put_char_code_table (XCHAR (c),
1703 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1704 CHARSET_ENCODING_TABLE(cs));
1708 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1709 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1711 Lisp_Object v = XVECTOR_DATA(table)[i];
1717 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1719 CHARSET_DECODING_TABLE(cs) = old_table;
1720 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1722 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1724 Lisp_Object c = XVECTOR_DATA(v)[j];
1729 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1730 | (j + CHARSET_BYTE_OFFSET (cs))),
1731 CHARSET_ENCODING_TABLE(cs));
1735 put_char_code_table (XCHAR (v),
1736 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1737 CHARSET_ENCODING_TABLE(cs));
1746 /************************************************************************/
1747 /* Lisp primitives for working with characters */
1748 /************************************************************************/
1750 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1751 Make a character from CHARSET and octets ARG1 and ARG2.
1752 ARG2 is required only for characters from two-dimensional charsets.
1753 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1754 character s with caron.
1756 (charset, arg1, arg2))
1758 struct Lisp_Charset *cs;
1760 int lowlim, highlim;
1762 charset = Fget_charset (charset);
1763 cs = XCHARSET (charset);
1765 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1766 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1768 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1770 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1771 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1774 /* It is useful (and safe, according to Olivier Galibert) to strip
1775 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1776 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1777 Latin 2 code of the character. */
1785 if (a1 < lowlim || a1 > highlim)
1786 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1788 if (CHARSET_DIMENSION (cs) == 1)
1792 ("Charset is of dimension one; second octet must be nil", arg2);
1793 return make_char (MAKE_CHAR (charset, a1, 0));
1802 a2 = XINT (arg2) & 0x7f;
1804 if (a2 < lowlim || a2 > highlim)
1805 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1807 return make_char (MAKE_CHAR (charset, a1, a2));
1810 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1811 Return the character set of char CH.
1815 CHECK_CHAR_COERCE_INT (ch);
1817 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1820 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1821 Return list of charset and one or two position-codes of CHAR.
1825 /* This function can GC */
1826 struct gcpro gcpro1, gcpro2;
1827 Lisp_Object charset = Qnil;
1828 Lisp_Object rc = Qnil;
1831 GCPRO2 (charset, rc);
1832 CHECK_CHAR_COERCE_INT (character);
1834 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1836 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1838 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1842 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1850 #ifdef ENABLE_COMPOSITE_CHARS
1851 /************************************************************************/
1852 /* composite character functions */
1853 /************************************************************************/
1856 lookup_composite_char (Bufbyte *str, int len)
1858 Lisp_Object lispstr = make_string (str, len);
1859 Lisp_Object ch = Fgethash (lispstr,
1860 Vcomposite_char_string2char_hash_table,
1866 if (composite_char_row_next >= 128)
1867 signal_simple_error ("No more composite chars available", lispstr);
1868 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1869 composite_char_col_next);
1870 Fputhash (make_char (emch), lispstr,
1871 Vcomposite_char_char2string_hash_table);
1872 Fputhash (lispstr, make_char (emch),
1873 Vcomposite_char_string2char_hash_table);
1874 composite_char_col_next++;
1875 if (composite_char_col_next >= 128)
1877 composite_char_col_next = 32;
1878 composite_char_row_next++;
1887 composite_char_string (Emchar ch)
1889 Lisp_Object str = Fgethash (make_char (ch),
1890 Vcomposite_char_char2string_hash_table,
1892 assert (!UNBOUNDP (str));
1896 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1897 Convert a string into a single composite character.
1898 The character is the result of overstriking all the characters in
1903 CHECK_STRING (string);
1904 return make_char (lookup_composite_char (XSTRING_DATA (string),
1905 XSTRING_LENGTH (string)));
1908 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1909 Return a string of the characters comprising a composite character.
1917 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1918 signal_simple_error ("Must be composite char", ch);
1919 return composite_char_string (emch);
1921 #endif /* ENABLE_COMPOSITE_CHARS */
1924 /************************************************************************/
1925 /* initialization */
1926 /************************************************************************/
1929 syms_of_mule_charset (void)
1931 DEFSUBR (Fcharsetp);
1932 DEFSUBR (Ffind_charset);
1933 DEFSUBR (Fget_charset);
1934 DEFSUBR (Fcharset_list);
1935 DEFSUBR (Fcharset_name);
1936 DEFSUBR (Fmake_charset);
1937 DEFSUBR (Fmake_reverse_direction_charset);
1938 /* DEFSUBR (Freverse_direction_charset); */
1939 DEFSUBR (Fdefine_charset_alias);
1940 DEFSUBR (Fcharset_from_attributes);
1941 DEFSUBR (Fcharset_short_name);
1942 DEFSUBR (Fcharset_long_name);
1943 DEFSUBR (Fcharset_description);
1944 DEFSUBR (Fcharset_dimension);
1945 DEFSUBR (Fcharset_property);
1946 DEFSUBR (Fcharset_id);
1947 DEFSUBR (Fset_charset_ccl_program);
1948 DEFSUBR (Fset_charset_registry);
1950 DEFSUBR (Fcharset_mapping_table);
1951 DEFSUBR (Fset_charset_mapping_table);
1954 DEFSUBR (Fmake_char);
1955 DEFSUBR (Fchar_charset);
1956 DEFSUBR (Fsplit_char);
1958 #ifdef ENABLE_COMPOSITE_CHARS
1959 DEFSUBR (Fmake_composite_char);
1960 DEFSUBR (Fcomposite_char_string);
1963 defsymbol (&Qcharsetp, "charsetp");
1964 defsymbol (&Qregistry, "registry");
1965 defsymbol (&Qfinal, "final");
1966 defsymbol (&Qgraphic, "graphic");
1967 defsymbol (&Qdirection, "direction");
1968 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1969 defsymbol (&Qshort_name, "short-name");
1970 defsymbol (&Qlong_name, "long-name");
1972 defsymbol (&Ql2r, "l2r");
1973 defsymbol (&Qr2l, "r2l");
1975 /* Charsets, compatible with FSF 20.3
1976 Naming convention is Script-Charset[-Edition] */
1977 defsymbol (&Qascii, "ascii");
1978 defsymbol (&Qcontrol_1, "control-1");
1979 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1980 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1981 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1982 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1983 defsymbol (&Qthai_tis620, "thai-tis620");
1984 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1985 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1986 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1987 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1988 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1989 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1990 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1991 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1992 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1993 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1994 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1995 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1996 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1997 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1999 defsymbol (&Qucs_bmp, "ucs-bmp");
2000 defsymbol (&Qlatin_viscii, "latin-viscii");
2001 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2002 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2003 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2004 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2005 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2006 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2008 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2009 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2011 defsymbol (&Qcomposite, "composite");
2015 vars_of_mule_charset (void)
2022 /* Table of charsets indexed by leading byte. */
2023 for (i = 0; i < countof (charset_by_leading_byte); i++)
2024 charset_by_leading_byte[i] = Qnil;
2027 /* Table of charsets indexed by type/final-byte. */
2028 for (i = 0; i < countof (charset_by_attributes); i++)
2029 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2030 charset_by_attributes[i][j] = Qnil;
2032 /* Table of charsets indexed by type/final-byte/direction. */
2033 for (i = 0; i < countof (charset_by_attributes); i++)
2034 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2035 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2036 charset_by_attributes[i][j][k] = Qnil;
2039 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2041 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2043 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2047 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2048 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2049 Leading-code of private TYPE9N charset of column-width 1.
2051 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2055 Vutf_2000_version = build_string("0.8 (Kami)");
2056 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2057 Version number of UTF-2000.
2060 Vdefault_coded_charset_priority_list = Qnil;
2061 DEFVAR_LISP ("default-coded-charset-priority-list",
2062 &Vdefault_coded_charset_priority_list /*
2063 Default order of preferred coded-character-set.
2069 complex_vars_of_mule_charset (void)
2071 staticpro (&Vcharset_hash_table);
2072 Vcharset_hash_table =
2073 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2075 /* Predefined character sets. We store them into variables for
2080 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2081 CHARSET_TYPE_256X256, 1, 2, 0,
2082 CHARSET_LEFT_TO_RIGHT,
2083 build_string ("BMP"),
2084 build_string ("BMP"),
2085 build_string ("BMP"),
2086 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?\\)"),
2087 Qnil, 0, 0xFFFF, 0, 0);
2089 # define MIN_CHAR_THAI 0
2090 # define MAX_CHAR_THAI 0
2091 # define MIN_CHAR_GREEK 0
2092 # define MAX_CHAR_GREEK 0
2093 # define MIN_CHAR_HEBREW 0
2094 # define MAX_CHAR_HEBREW 0
2095 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2096 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2097 # define MIN_CHAR_CYRILLIC 0
2098 # define MAX_CHAR_CYRILLIC 0
2101 make_charset (LEADING_BYTE_ASCII, Qascii,
2102 CHARSET_TYPE_94, 1, 0, 'B',
2103 CHARSET_LEFT_TO_RIGHT,
2104 build_string ("ASCII"),
2105 build_string ("ASCII)"),
2106 build_string ("ASCII (ISO646 IRV)"),
2107 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2108 Qnil, 0, 0x7F, 0, 0);
2109 Vcharset_control_1 =
2110 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2111 CHARSET_TYPE_94, 1, 1, 0,
2112 CHARSET_LEFT_TO_RIGHT,
2113 build_string ("C1"),
2114 build_string ("Control characters"),
2115 build_string ("Control characters 128-191"),
2117 Qnil, 0x80, 0x9F, 0, 0);
2118 Vcharset_latin_iso8859_1 =
2119 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2120 CHARSET_TYPE_96, 1, 1, 'A',
2121 CHARSET_LEFT_TO_RIGHT,
2122 build_string ("Latin-1"),
2123 build_string ("ISO8859-1 (Latin-1)"),
2124 build_string ("ISO8859-1 (Latin-1)"),
2125 build_string ("iso8859-1"),
2126 Qnil, 0xA0, 0xFF, 0, 32);
2127 Vcharset_latin_iso8859_2 =
2128 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2129 CHARSET_TYPE_96, 1, 1, 'B',
2130 CHARSET_LEFT_TO_RIGHT,
2131 build_string ("Latin-2"),
2132 build_string ("ISO8859-2 (Latin-2)"),
2133 build_string ("ISO8859-2 (Latin-2)"),
2134 build_string ("iso8859-2"),
2136 Vcharset_latin_iso8859_3 =
2137 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2138 CHARSET_TYPE_96, 1, 1, 'C',
2139 CHARSET_LEFT_TO_RIGHT,
2140 build_string ("Latin-3"),
2141 build_string ("ISO8859-3 (Latin-3)"),
2142 build_string ("ISO8859-3 (Latin-3)"),
2143 build_string ("iso8859-3"),
2145 Vcharset_latin_iso8859_4 =
2146 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2147 CHARSET_TYPE_96, 1, 1, 'D',
2148 CHARSET_LEFT_TO_RIGHT,
2149 build_string ("Latin-4"),
2150 build_string ("ISO8859-4 (Latin-4)"),
2151 build_string ("ISO8859-4 (Latin-4)"),
2152 build_string ("iso8859-4"),
2154 Vcharset_thai_tis620 =
2155 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2156 CHARSET_TYPE_96, 1, 1, 'T',
2157 CHARSET_LEFT_TO_RIGHT,
2158 build_string ("TIS620"),
2159 build_string ("TIS620 (Thai)"),
2160 build_string ("TIS620.2529 (Thai)"),
2161 build_string ("tis620"),
2162 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2163 Vcharset_greek_iso8859_7 =
2164 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2165 CHARSET_TYPE_96, 1, 1, 'F',
2166 CHARSET_LEFT_TO_RIGHT,
2167 build_string ("ISO8859-7"),
2168 build_string ("ISO8859-7 (Greek)"),
2169 build_string ("ISO8859-7 (Greek)"),
2170 build_string ("iso8859-7"),
2171 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2172 Vcharset_arabic_iso8859_6 =
2173 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2174 CHARSET_TYPE_96, 1, 1, 'G',
2175 CHARSET_RIGHT_TO_LEFT,
2176 build_string ("ISO8859-6"),
2177 build_string ("ISO8859-6 (Arabic)"),
2178 build_string ("ISO8859-6 (Arabic)"),
2179 build_string ("iso8859-6"),
2181 Vcharset_hebrew_iso8859_8 =
2182 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2183 CHARSET_TYPE_96, 1, 1, 'H',
2184 CHARSET_RIGHT_TO_LEFT,
2185 build_string ("ISO8859-8"),
2186 build_string ("ISO8859-8 (Hebrew)"),
2187 build_string ("ISO8859-8 (Hebrew)"),
2188 build_string ("iso8859-8"),
2189 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2190 Vcharset_katakana_jisx0201 =
2191 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2192 CHARSET_TYPE_94, 1, 1, 'I',
2193 CHARSET_LEFT_TO_RIGHT,
2194 build_string ("JISX0201 Kana"),
2195 build_string ("JISX0201.1976 (Japanese Kana)"),
2196 build_string ("JISX0201.1976 Japanese Kana"),
2197 build_string ("jisx0201\\.1976"),
2199 MIN_CHAR_HALFWIDTH_KATAKANA,
2200 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2201 Vcharset_latin_jisx0201 =
2202 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2203 CHARSET_TYPE_94, 1, 0, 'J',
2204 CHARSET_LEFT_TO_RIGHT,
2205 build_string ("JISX0201 Roman"),
2206 build_string ("JISX0201.1976 (Japanese Roman)"),
2207 build_string ("JISX0201.1976 Japanese Roman"),
2208 build_string ("jisx0201\\.1976"),
2210 Vcharset_cyrillic_iso8859_5 =
2211 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2212 CHARSET_TYPE_96, 1, 1, 'L',
2213 CHARSET_LEFT_TO_RIGHT,
2214 build_string ("ISO8859-5"),
2215 build_string ("ISO8859-5 (Cyrillic)"),
2216 build_string ("ISO8859-5 (Cyrillic)"),
2217 build_string ("iso8859-5"),
2218 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2219 Vcharset_latin_iso8859_9 =
2220 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2221 CHARSET_TYPE_96, 1, 1, 'M',
2222 CHARSET_LEFT_TO_RIGHT,
2223 build_string ("Latin-5"),
2224 build_string ("ISO8859-9 (Latin-5)"),
2225 build_string ("ISO8859-9 (Latin-5)"),
2226 build_string ("iso8859-9"),
2228 Vcharset_japanese_jisx0208_1978 =
2229 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2230 CHARSET_TYPE_94X94, 2, 0, '@',
2231 CHARSET_LEFT_TO_RIGHT,
2232 build_string ("JIS X0208:1978"),
2233 build_string ("JIS X0208:1978 (Japanese)"),
2235 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2236 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2238 Vcharset_chinese_gb2312 =
2239 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2240 CHARSET_TYPE_94X94, 2, 0, 'A',
2241 CHARSET_LEFT_TO_RIGHT,
2242 build_string ("GB2312"),
2243 build_string ("GB2312)"),
2244 build_string ("GB2312 Chinese simplified"),
2245 build_string ("gb2312"),
2247 Vcharset_japanese_jisx0208 =
2248 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2249 CHARSET_TYPE_94X94, 2, 0, 'B',
2250 CHARSET_LEFT_TO_RIGHT,
2251 build_string ("JISX0208"),
2252 build_string ("JIS X0208:1983 (Japanese)"),
2253 build_string ("JIS X0208:1983 Japanese Kanji"),
2254 build_string ("jisx0208\\.1983"),
2256 Vcharset_korean_ksc5601 =
2257 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2258 CHARSET_TYPE_94X94, 2, 0, 'C',
2259 CHARSET_LEFT_TO_RIGHT,
2260 build_string ("KSC5601"),
2261 build_string ("KSC5601 (Korean"),
2262 build_string ("KSC5601 Korean Hangul and Hanja"),
2263 build_string ("ksc5601"),
2265 Vcharset_japanese_jisx0212 =
2266 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2267 CHARSET_TYPE_94X94, 2, 0, 'D',
2268 CHARSET_LEFT_TO_RIGHT,
2269 build_string ("JISX0212"),
2270 build_string ("JISX0212 (Japanese)"),
2271 build_string ("JISX0212 Japanese Supplement"),
2272 build_string ("jisx0212"),
2275 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2276 Vcharset_chinese_cns11643_1 =
2277 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2278 CHARSET_TYPE_94X94, 2, 0, 'G',
2279 CHARSET_LEFT_TO_RIGHT,
2280 build_string ("CNS11643-1"),
2281 build_string ("CNS11643-1 (Chinese traditional)"),
2283 ("CNS 11643 Plane 1 Chinese traditional"),
2284 build_string (CHINESE_CNS_PLANE_RE("1")),
2286 Vcharset_chinese_cns11643_2 =
2287 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2288 CHARSET_TYPE_94X94, 2, 0, 'H',
2289 CHARSET_LEFT_TO_RIGHT,
2290 build_string ("CNS11643-2"),
2291 build_string ("CNS11643-2 (Chinese traditional)"),
2293 ("CNS 11643 Plane 2 Chinese traditional"),
2294 build_string (CHINESE_CNS_PLANE_RE("2")),
2297 Vcharset_latin_viscii_lower =
2298 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2299 CHARSET_TYPE_96, 1, 1, '1',
2300 CHARSET_LEFT_TO_RIGHT,
2301 build_string ("VISCII lower"),
2302 build_string ("VISCII lower (Vietnamese)"),
2303 build_string ("VISCII lower (Vietnamese)"),
2304 build_string ("MULEVISCII-LOWER"),
2306 Vcharset_latin_viscii_upper =
2307 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2308 CHARSET_TYPE_96, 1, 1, '2',
2309 CHARSET_LEFT_TO_RIGHT,
2310 build_string ("VISCII upper"),
2311 build_string ("VISCII upper (Vietnamese)"),
2312 build_string ("VISCII upper (Vietnamese)"),
2313 build_string ("MULEVISCII-UPPER"),
2315 Vcharset_latin_viscii =
2316 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2317 CHARSET_TYPE_256, 1, 2, 0,
2318 CHARSET_LEFT_TO_RIGHT,
2319 build_string ("VISCII"),
2320 build_string ("VISCII 1.1 (Vietnamese)"),
2321 build_string ("VISCII 1.1 (Vietnamese)"),
2322 build_string ("VISCII1\\.1"),
2324 Vcharset_hiragana_jisx0208 =
2325 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2326 CHARSET_TYPE_94X94, 2, 0, 'B',
2327 CHARSET_LEFT_TO_RIGHT,
2328 build_string ("Hiragana"),
2329 build_string ("Hiragana of JIS X0208"),
2330 build_string ("Japanese Hiragana of JIS X0208"),
2331 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2332 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2333 (0x24 - 33) * 94 + (0x21 - 33), 33);
2334 Vcharset_katakana_jisx0208 =
2335 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2336 CHARSET_TYPE_94X94, 2, 0, 'B',
2337 CHARSET_LEFT_TO_RIGHT,
2338 build_string ("Katakana"),
2339 build_string ("Katakana of JIS X0208"),
2340 build_string ("Japanese Katakana of JIS X0208"),
2341 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2342 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2343 (0x25 - 33) * 94 + (0x21 - 33), 33);
2345 Vcharset_chinese_big5_1 =
2346 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2347 CHARSET_TYPE_94X94, 2, 0, '0',
2348 CHARSET_LEFT_TO_RIGHT,
2349 build_string ("Big5"),
2350 build_string ("Big5 (Level-1)"),
2352 ("Big5 Level-1 Chinese traditional"),
2353 build_string ("big5"),
2355 Vcharset_chinese_big5_2 =
2356 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2357 CHARSET_TYPE_94X94, 2, 0, '1',
2358 CHARSET_LEFT_TO_RIGHT,
2359 build_string ("Big5"),
2360 build_string ("Big5 (Level-2)"),
2362 ("Big5 Level-2 Chinese traditional"),
2363 build_string ("big5"),
2366 #ifdef ENABLE_COMPOSITE_CHARS
2367 /* #### For simplicity, we put composite chars into a 96x96 charset.
2368 This is going to lead to problems because you can run out of
2369 room, esp. as we don't yet recycle numbers. */
2370 Vcharset_composite =
2371 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2372 CHARSET_TYPE_96X96, 2, 0, 0,
2373 CHARSET_LEFT_TO_RIGHT,
2374 build_string ("Composite"),
2375 build_string ("Composite characters"),
2376 build_string ("Composite characters"),
2379 composite_char_row_next = 32;
2380 composite_char_col_next = 32;
2382 Vcomposite_char_string2char_hash_table =
2383 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2384 Vcomposite_char_char2string_hash_table =
2385 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2386 staticpro (&Vcomposite_char_string2char_hash_table);
2387 staticpro (&Vcomposite_char_char2string_hash_table);
2388 #endif /* ENABLE_COMPOSITE_CHARS */