1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
332 return Fcdr (Fassq (attribute, ret));
335 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
336 Store CHARACTER's ATTRIBUTE with VALUE.
338 (character, attribute, value))
340 Emchar char_code = XCHAR (character);
342 = get_char_code_table (char_code, Vcharacter_attribute_table);
343 Lisp_Object cell = Fassq (attribute, ret);
346 ret = Fcons (Fcons (attribute, value), ret);
348 Fsetcdr (cell, value);
349 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
354 Lisp_Object Vutf_2000_version;
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
384 Qjapanese_jisx0208_1978,
396 Qvietnamese_viscii_lower,
397 Qvietnamese_viscii_upper,
405 Lisp_Object Ql2r, Qr2l;
407 Lisp_Object Vcharset_hash_table;
410 static Charset_ID next_allocated_leading_byte;
412 static Charset_ID next_allocated_1_byte_leading_byte;
413 static Charset_ID next_allocated_2_byte_leading_byte;
416 /* Composite characters are characters constructed by overstriking two
417 or more regular characters.
419 1) The old Mule implementation involves storing composite characters
420 in a buffer as a tag followed by all of the actual characters
421 used to make up the composite character. I think this is a bad
422 idea; it greatly complicates code that wants to handle strings
423 one character at a time because it has to deal with the possibility
424 of great big ungainly characters. It's much more reasonable to
425 simply store an index into a table of composite characters.
427 2) The current implementation only allows for 16,384 separate
428 composite characters over the lifetime of the XEmacs process.
429 This could become a potential problem if the user
430 edited lots of different files that use composite characters.
431 Due to FSF bogosity, increasing the number of allowable
432 composite characters under Mule would decrease the number
433 of possible faces that can exist. Mule already has shrunk
434 this to 2048, and further shrinkage would become uncomfortable.
435 No such problems exist in XEmacs.
437 Composite characters could be represented as 0x80 C1 C2 C3,
438 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
439 for slightly under 2^20 (one million) composite characters
440 over the XEmacs process lifetime, and you only need to
441 increase the size of a Mule character from 19 to 21 bits.
442 Or you could use 0x80 C1 C2 C3 C4, allowing for about
443 85 million (slightly over 2^26) composite characters. */
446 /************************************************************************/
447 /* Basic Emchar functions */
448 /************************************************************************/
450 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
451 string in STR. Returns the number of bytes stored.
452 Do not call this directly. Use the macro set_charptr_emchar() instead.
456 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
471 else if ( c <= 0x7ff )
473 *p++ = (c >> 6) | 0xc0;
474 *p++ = (c & 0x3f) | 0x80;
476 else if ( c <= 0xffff )
478 *p++ = (c >> 12) | 0xe0;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
482 else if ( c <= 0x1fffff )
484 *p++ = (c >> 18) | 0xf0;
485 *p++ = ((c >> 12) & 0x3f) | 0x80;
486 *p++ = ((c >> 6) & 0x3f) | 0x80;
487 *p++ = (c & 0x3f) | 0x80;
489 else if ( c <= 0x3ffffff )
491 *p++ = (c >> 24) | 0xf8;
492 *p++ = ((c >> 18) & 0x3f) | 0x80;
493 *p++ = ((c >> 12) & 0x3f) | 0x80;
494 *p++ = ((c >> 6) & 0x3f) | 0x80;
495 *p++ = (c & 0x3f) | 0x80;
499 *p++ = (c >> 30) | 0xfc;
500 *p++ = ((c >> 24) & 0x3f) | 0x80;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
507 BREAKUP_CHAR (c, charset, c1, c2);
508 lb = CHAR_LEADING_BYTE (c);
509 if (LEADING_BYTE_PRIVATE_P (lb))
510 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
512 if (EQ (charset, Vcharset_control_1))
521 /* Return the first character from a Mule-encoded string in STR,
522 assuming it's non-ASCII. Do not call this directly.
523 Use the macro charptr_emchar() instead. */
526 non_ascii_charptr_emchar (CONST Bufbyte *str)
539 else if ( b >= 0xf8 )
544 else if ( b >= 0xf0 )
549 else if ( b >= 0xe0 )
554 else if ( b >= 0xc0 )
564 for( ; len > 0; len-- )
567 ch = ( ch << 6 ) | ( b & 0x3f );
571 Bufbyte i0 = *str, i1, i2 = 0;
574 if (i0 == LEADING_BYTE_CONTROL_1)
575 return (Emchar) (*++str - 0x20);
577 if (LEADING_BYTE_PREFIX_P (i0))
582 charset = CHARSET_BY_LEADING_BYTE (i0);
583 if (XCHARSET_DIMENSION (charset) == 2)
586 return MAKE_CHAR (charset, i1, i2);
590 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
591 Do not call this directly. Use the macro valid_char_p() instead. */
595 non_ascii_valid_char_p (Emchar ch)
599 /* Must have only lowest 19 bits set */
603 f1 = CHAR_FIELD1 (ch);
604 f2 = CHAR_FIELD2 (ch);
605 f3 = CHAR_FIELD3 (ch);
611 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
612 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
613 f2 > MAX_CHAR_FIELD2_PRIVATE)
618 if (f3 != 0x20 && f3 != 0x7F)
622 NOTE: This takes advantage of the fact that
623 FIELD2_TO_OFFICIAL_LEADING_BYTE and
624 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
626 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
627 return (XCHARSET_CHARS (charset) == 96);
633 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
634 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
635 f1 > MAX_CHAR_FIELD1_PRIVATE)
637 if (f2 < 0x20 || f3 < 0x20)
640 #ifdef ENABLE_COMPOSITE_CHARS
641 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
643 if (UNBOUNDP (Fgethash (make_int (ch),
644 Vcomposite_char_char2string_hash_table,
649 #endif /* ENABLE_COMPOSITE_CHARS */
651 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
654 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
656 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
659 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
661 return (XCHARSET_CHARS (charset) == 96);
667 /************************************************************************/
668 /* Basic string functions */
669 /************************************************************************/
671 /* Copy the character pointed to by PTR into STR, assuming it's
672 non-ASCII. Do not call this directly. Use the macro
673 charptr_copy_char() instead. */
676 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
678 Bufbyte *strptr = str;
680 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
682 /* Notice fallthrough. */
684 case 6: *++strptr = *ptr++;
685 case 5: *++strptr = *ptr++;
687 case 4: *++strptr = *ptr++;
688 case 3: *++strptr = *ptr++;
689 case 2: *++strptr = *ptr;
694 return strptr + 1 - str;
698 /************************************************************************/
699 /* streams of Emchars */
700 /************************************************************************/
702 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
703 The functions below are not meant to be called directly; use
704 the macros in insdel.h. */
707 Lstream_get_emchar_1 (Lstream *stream, int ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bufbyte *strptr = str;
712 str[0] = (Bufbyte) ch;
713 switch (REP_BYTES_BY_FIRST_BYTE (ch))
715 /* Notice fallthrough. */
718 ch = Lstream_getc (stream);
720 *++strptr = (Bufbyte) ch;
722 ch = Lstream_getc (stream);
724 *++strptr = (Bufbyte) ch;
727 ch = Lstream_getc (stream);
729 *++strptr = (Bufbyte) ch;
731 ch = Lstream_getc (stream);
733 *++strptr = (Bufbyte) ch;
735 ch = Lstream_getc (stream);
737 *++strptr = (Bufbyte) ch;
742 return charptr_emchar (str);
746 Lstream_fput_emchar (Lstream *stream, Emchar ch)
748 Bufbyte str[MAX_EMCHAR_LEN];
749 Bytecount len = set_charptr_emchar (str, ch);
750 return Lstream_write (stream, str, len);
754 Lstream_funget_emchar (Lstream *stream, Emchar ch)
756 Bufbyte str[MAX_EMCHAR_LEN];
757 Bytecount len = set_charptr_emchar (str, ch);
758 Lstream_unread (stream, str, len);
762 /************************************************************************/
764 /************************************************************************/
767 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
769 struct Lisp_Charset *cs = XCHARSET (obj);
771 markobj (cs->short_name);
772 markobj (cs->long_name);
773 markobj (cs->doc_string);
774 markobj (cs->registry);
775 markobj (cs->ccl_program);
777 markobj (cs->decoding_table);
778 markobj (cs->encoding_table);
784 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
786 struct Lisp_Charset *cs = XCHARSET (obj);
790 error ("printing unreadable object #<charset %s 0x%x>",
791 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
794 write_c_string ("#<charset ", printcharfun);
795 print_internal (CHARSET_NAME (cs), printcharfun, 0);
796 write_c_string (" ", printcharfun);
797 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
798 write_c_string (" ", printcharfun);
799 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
800 write_c_string (" ", printcharfun);
801 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
802 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
803 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
804 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
805 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
807 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
808 CHARSET_COLUMNS (cs),
809 CHARSET_GRAPHIC (cs),
811 write_c_string (buf, printcharfun);
812 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
813 sprintf (buf, " 0x%x>", cs->header.uid);
814 write_c_string (buf, printcharfun);
817 static const struct lrecord_description charset_description[] = {
818 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
820 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
825 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
826 mark_charset, print_charset, 0, 0, 0,
828 struct Lisp_Charset);
830 /* Make a new charset. */
833 make_charset (Charset_ID id, Lisp_Object name,
834 unsigned char type, unsigned char columns, unsigned char graphic,
835 Bufbyte final, unsigned char direction, Lisp_Object short_name,
836 Lisp_Object long_name, Lisp_Object doc,
838 Lisp_Object decoding_table,
839 Emchar ucs_min, Emchar ucs_max,
840 Emchar code_offset, unsigned char byte_offset)
843 struct Lisp_Charset *cs =
844 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
845 XSETCHARSET (obj, cs);
847 CHARSET_ID (cs) = id;
848 CHARSET_NAME (cs) = name;
849 CHARSET_SHORT_NAME (cs) = short_name;
850 CHARSET_LONG_NAME (cs) = long_name;
851 CHARSET_DIRECTION (cs) = direction;
852 CHARSET_TYPE (cs) = type;
853 CHARSET_COLUMNS (cs) = columns;
854 CHARSET_GRAPHIC (cs) = graphic;
855 CHARSET_FINAL (cs) = final;
856 CHARSET_DOC_STRING (cs) = doc;
857 CHARSET_REGISTRY (cs) = reg;
858 CHARSET_CCL_PROGRAM (cs) = Qnil;
859 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
861 CHARSET_DECODING_TABLE(cs) = Qnil;
862 CHARSET_ENCODING_TABLE(cs) = Qnil;
863 CHARSET_UCS_MIN(cs) = ucs_min;
864 CHARSET_UCS_MAX(cs) = ucs_max;
865 CHARSET_CODE_OFFSET(cs) = code_offset;
866 CHARSET_BYTE_OFFSET(cs) = byte_offset;
869 switch (CHARSET_TYPE (cs))
871 case CHARSET_TYPE_94:
872 CHARSET_DIMENSION (cs) = 1;
873 CHARSET_CHARS (cs) = 94;
875 case CHARSET_TYPE_96:
876 CHARSET_DIMENSION (cs) = 1;
877 CHARSET_CHARS (cs) = 96;
879 case CHARSET_TYPE_94X94:
880 CHARSET_DIMENSION (cs) = 2;
881 CHARSET_CHARS (cs) = 94;
883 case CHARSET_TYPE_96X96:
884 CHARSET_DIMENSION (cs) = 2;
885 CHARSET_CHARS (cs) = 96;
888 case CHARSET_TYPE_128:
889 CHARSET_DIMENSION (cs) = 1;
890 CHARSET_CHARS (cs) = 128;
892 case CHARSET_TYPE_128X128:
893 CHARSET_DIMENSION (cs) = 2;
894 CHARSET_CHARS (cs) = 128;
896 case CHARSET_TYPE_256:
897 CHARSET_DIMENSION (cs) = 1;
898 CHARSET_CHARS (cs) = 256;
900 case CHARSET_TYPE_256X256:
901 CHARSET_DIMENSION (cs) = 2;
902 CHARSET_CHARS (cs) = 256;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
922 if (code_offset == 0)
924 assert (NILP (charset_by_attributes[type][final]));
925 charset_by_attributes[type][final] = obj;
928 assert (NILP (charset_by_attributes[type][final][direction]));
929 charset_by_attributes[type][final][direction] = obj;
933 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
934 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
937 /* official leading byte */
938 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
941 /* Some charsets are "faux" and don't have names or really exist at
942 all except in the leading-byte table. */
944 Fputhash (name, obj, Vcharset_hash_table);
949 get_unallocated_leading_byte (int dimension)
954 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
957 lb = next_allocated_leading_byte++;
961 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
964 lb = next_allocated_1_byte_leading_byte++;
968 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
971 lb = next_allocated_2_byte_leading_byte++;
977 ("No more character sets free for this dimension",
978 make_int (dimension));
985 charset_get_byte1 (Lisp_Object charset, Emchar ch)
990 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
992 Lisp_Object value = get_char_code_table (ch, table);
996 Emchar code = XINT (value);
1000 else if (code < (1 << 16))
1002 else if (code < (1 << 24))
1008 if ((XCHARSET_UCS_MIN (charset) <= ch)
1009 && (ch <= XCHARSET_UCS_MAX (charset)))
1010 return (ch - XCHARSET_UCS_MIN (charset)
1011 + XCHARSET_CODE_OFFSET (charset))
1012 / (XCHARSET_DIMENSION (charset) == 1 ?
1015 XCHARSET_DIMENSION (charset) == 2 ?
1016 XCHARSET_CHARS (charset)
1018 XCHARSET_DIMENSION (charset) == 3 ?
1019 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
1021 XCHARSET_CHARS (charset)
1022 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1023 + XCHARSET_BYTE_OFFSET (charset);
1024 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1026 if (XCHARSET_DIMENSION (charset) == 1)
1028 if (XCHARSET_CHARS (charset) == 94)
1030 if (((d = ch - (MIN_CHAR_94
1031 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1035 else if (XCHARSET_CHARS (charset) == 96)
1037 if (((d = ch - (MIN_CHAR_96
1038 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1045 else if (XCHARSET_DIMENSION (charset) == 2)
1047 if (XCHARSET_CHARS (charset) == 94)
1049 if (((d = ch - (MIN_CHAR_94x94
1050 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1053 return (d / 94) + 33;
1055 else if (XCHARSET_CHARS (charset) == 96)
1057 if (((d = ch - (MIN_CHAR_96x96
1058 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1061 return (d / 96) + 32;
1069 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1071 if (XCHARSET_DIMENSION (charset) == 1)
1077 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1079 Lisp_Object value = get_char_code_table (ch, table);
1083 Emchar code = XINT (value);
1085 if (code < (1 << 16))
1086 return (unsigned char)code;
1087 else if (code < (1 << 24))
1088 return (unsigned char)(code >> 16);
1090 return (unsigned char)(code >> 24);
1093 if ((XCHARSET_UCS_MIN (charset) <= ch)
1094 && (ch <= XCHARSET_UCS_MAX (charset)))
1095 return ((ch - XCHARSET_UCS_MIN (charset)
1096 + XCHARSET_CODE_OFFSET (charset))
1097 / (XCHARSET_DIMENSION (charset) == 2 ?
1100 XCHARSET_DIMENSION (charset) == 3 ?
1101 XCHARSET_CHARS (charset)
1103 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1104 % XCHARSET_CHARS (charset)
1105 + XCHARSET_BYTE_OFFSET (charset);
1106 else if (XCHARSET_CHARS (charset) == 94)
1107 return (MIN_CHAR_94x94
1108 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1109 && (ch < MIN_CHAR_94x94
1110 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1111 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1112 else /* if (XCHARSET_CHARS (charset) == 96) */
1113 return (MIN_CHAR_96x96
1114 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1115 && (ch < MIN_CHAR_96x96
1116 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1117 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1121 Lisp_Object Vdefault_coded_charset_priority_list;
1125 /************************************************************************/
1126 /* Basic charset Lisp functions */
1127 /************************************************************************/
1129 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1130 Return non-nil if OBJECT is a charset.
1134 return CHARSETP (object) ? Qt : Qnil;
1137 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1138 Retrieve the charset of the given name.
1139 If CHARSET-OR-NAME is a charset object, it is simply returned.
1140 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1141 nil is returned. Otherwise the associated charset object is returned.
1145 if (CHARSETP (charset_or_name))
1146 return charset_or_name;
1148 CHECK_SYMBOL (charset_or_name);
1149 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1152 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1153 Retrieve the charset of the given name.
1154 Same as `find-charset' except an error is signalled if there is no such
1155 charset instead of returning nil.
1159 Lisp_Object charset = Ffind_charset (name);
1162 signal_simple_error ("No such charset", name);
1166 /* We store the charsets in hash tables with the names as the key and the
1167 actual charset object as the value. Occasionally we need to use them
1168 in a list format. These routines provide us with that. */
1169 struct charset_list_closure
1171 Lisp_Object *charset_list;
1175 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1176 void *charset_list_closure)
1178 /* This function can GC */
1179 struct charset_list_closure *chcl =
1180 (struct charset_list_closure*) charset_list_closure;
1181 Lisp_Object *charset_list = chcl->charset_list;
1183 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1187 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1188 Return a list of the names of all defined charsets.
1192 Lisp_Object charset_list = Qnil;
1193 struct gcpro gcpro1;
1194 struct charset_list_closure charset_list_closure;
1196 GCPRO1 (charset_list);
1197 charset_list_closure.charset_list = &charset_list;
1198 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1199 &charset_list_closure);
1202 return charset_list;
1205 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1206 Return the name of the given charset.
1210 return XCHARSET_NAME (Fget_charset (charset));
1213 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1214 Define a new character set.
1215 This function is for use with Mule support.
1216 NAME is a symbol, the name by which the character set is normally referred.
1217 DOC-STRING is a string describing the character set.
1218 PROPS is a property list, describing the specific nature of the
1219 character set. Recognized properties are:
1221 'short-name Short version of the charset name (ex: Latin-1)
1222 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1223 'registry A regular expression matching the font registry field for
1225 'dimension Number of octets used to index a character in this charset.
1226 Either 1 or 2. Defaults to 1.
1227 'columns Number of columns used to display a character in this charset.
1228 Only used in TTY mode. (Under X, the actual width of a
1229 character can be derived from the font used to display the
1230 characters.) If unspecified, defaults to the dimension
1231 (this is almost always the correct value).
1232 'chars Number of characters in each dimension (94 or 96).
1233 Defaults to 94. Note that if the dimension is 2, the
1234 character set thus described is 94x94 or 96x96.
1235 'final Final byte of ISO 2022 escape sequence. Must be
1236 supplied. Each combination of (DIMENSION, CHARS) defines a
1237 separate namespace for final bytes. Note that ISO
1238 2022 restricts the final byte to the range
1239 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1240 dimension == 2. Note also that final bytes in the range
1241 0x30 - 0x3F are reserved for user-defined (not official)
1243 'graphic 0 (use left half of font on output) or 1 (use right half
1244 of font on output). Defaults to 0. For example, for
1245 a font whose registry is ISO8859-1, the left half
1246 (octets 0x20 - 0x7F) is the `ascii' character set, while
1247 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1248 character set. With 'graphic set to 0, the octets
1249 will have their high bit cleared; with it set to 1,
1250 the octets will have their high bit set.
1251 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1253 'ccl-program A compiled CCL program used to convert a character in
1254 this charset into an index into the font. This is in
1255 addition to the 'graphic property. The CCL program
1256 is passed the octets of the character, with the high
1257 bit cleared and set depending upon whether the value
1258 of the 'graphic property is 0 or 1.
1260 (name, doc_string, props))
1262 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1263 int direction = CHARSET_LEFT_TO_RIGHT;
1265 Lisp_Object registry = Qnil;
1266 Lisp_Object charset;
1267 Lisp_Object rest, keyword, value;
1268 Lisp_Object ccl_program = Qnil;
1269 Lisp_Object short_name = Qnil, long_name = Qnil;
1271 unsigned char byte_offset = 0;
1274 CHECK_SYMBOL (name);
1275 if (!NILP (doc_string))
1276 CHECK_STRING (doc_string);
1278 charset = Ffind_charset (name);
1279 if (!NILP (charset))
1280 signal_simple_error ("Cannot redefine existing charset", name);
1282 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1284 if (EQ (keyword, Qshort_name))
1286 CHECK_STRING (value);
1290 if (EQ (keyword, Qlong_name))
1292 CHECK_STRING (value);
1296 else if (EQ (keyword, Qdimension))
1299 dimension = XINT (value);
1300 if (dimension < 1 || dimension > 2)
1301 signal_simple_error ("Invalid value for 'dimension", value);
1304 else if (EQ (keyword, Qchars))
1307 chars = XINT (value);
1308 if (chars != 94 && chars != 96)
1309 signal_simple_error ("Invalid value for 'chars", value);
1312 else if (EQ (keyword, Qcolumns))
1315 columns = XINT (value);
1316 if (columns != 1 && columns != 2)
1317 signal_simple_error ("Invalid value for 'columns", value);
1320 else if (EQ (keyword, Qgraphic))
1323 graphic = XINT (value);
1325 if (graphic < 0 || graphic > 2)
1327 if (graphic < 0 || graphic > 1)
1329 signal_simple_error ("Invalid value for 'graphic", value);
1332 else if (EQ (keyword, Qregistry))
1334 CHECK_STRING (value);
1338 else if (EQ (keyword, Qdirection))
1340 if (EQ (value, Ql2r))
1341 direction = CHARSET_LEFT_TO_RIGHT;
1342 else if (EQ (value, Qr2l))
1343 direction = CHARSET_RIGHT_TO_LEFT;
1345 signal_simple_error ("Invalid value for 'direction", value);
1348 else if (EQ (keyword, Qfinal))
1350 CHECK_CHAR_COERCE_INT (value);
1351 final = XCHAR (value);
1352 if (final < '0' || final > '~')
1353 signal_simple_error ("Invalid value for 'final", value);
1356 else if (EQ (keyword, Qccl_program))
1358 CHECK_VECTOR (value);
1359 ccl_program = value;
1363 signal_simple_error ("Unrecognized property", keyword);
1367 error ("'final must be specified");
1368 if (dimension == 2 && final > 0x5F)
1370 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1374 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1376 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1378 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1379 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1381 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1383 id = get_unallocated_leading_byte (dimension);
1385 if (NILP (doc_string))
1386 doc_string = build_string ("");
1388 if (NILP (registry))
1389 registry = build_string ("");
1391 if (NILP (short_name))
1392 XSETSTRING (short_name, XSYMBOL (name)->name);
1394 if (NILP (long_name))
1395 long_name = doc_string;
1398 columns = dimension;
1399 charset = make_charset (id, name, type, columns, graphic,
1400 final, direction, short_name, long_name,
1401 doc_string, registry,
1402 Qnil, 0, 0, 0, byte_offset);
1403 if (!NILP (ccl_program))
1404 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1408 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1410 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1411 NEW-NAME is the name of the new charset. Return the new charset.
1413 (charset, new_name))
1415 Lisp_Object new_charset = Qnil;
1416 int id, dimension, columns, graphic, final;
1417 int direction, type;
1418 Lisp_Object registry, doc_string, short_name, long_name;
1419 struct Lisp_Charset *cs;
1421 charset = Fget_charset (charset);
1422 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1423 signal_simple_error ("Charset already has reverse-direction charset",
1426 CHECK_SYMBOL (new_name);
1427 if (!NILP (Ffind_charset (new_name)))
1428 signal_simple_error ("Cannot redefine existing charset", new_name);
1430 cs = XCHARSET (charset);
1432 type = CHARSET_TYPE (cs);
1433 columns = CHARSET_COLUMNS (cs);
1434 dimension = CHARSET_DIMENSION (cs);
1435 id = get_unallocated_leading_byte (dimension);
1437 graphic = CHARSET_GRAPHIC (cs);
1438 final = CHARSET_FINAL (cs);
1439 direction = CHARSET_RIGHT_TO_LEFT;
1440 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1441 direction = CHARSET_LEFT_TO_RIGHT;
1442 doc_string = CHARSET_DOC_STRING (cs);
1443 short_name = CHARSET_SHORT_NAME (cs);
1444 long_name = CHARSET_LONG_NAME (cs);
1445 registry = CHARSET_REGISTRY (cs);
1447 new_charset = make_charset (id, new_name, type, columns,
1448 graphic, final, direction, short_name, long_name,
1449 doc_string, registry,
1451 CHARSET_DECODING_TABLE(cs),
1452 CHARSET_UCS_MIN(cs),
1453 CHARSET_UCS_MAX(cs),
1454 CHARSET_CODE_OFFSET(cs),
1455 CHARSET_BYTE_OFFSET(cs)
1461 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1462 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1467 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1468 Define symbol ALIAS as an alias for CHARSET.
1472 CHECK_SYMBOL (alias);
1473 charset = Fget_charset (charset);
1474 return Fputhash (alias, charset, Vcharset_hash_table);
1477 /* #### Reverse direction charsets not yet implemented. */
1479 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1481 Return the reverse-direction charset parallel to CHARSET, if any.
1482 This is the charset with the same properties (in particular, the same
1483 dimension, number of characters per dimension, and final byte) as
1484 CHARSET but whose characters are displayed in the opposite direction.
1488 charset = Fget_charset (charset);
1489 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1493 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1494 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1495 If DIRECTION is omitted, both directions will be checked (left-to-right
1496 will be returned if character sets exist for both directions).
1498 (dimension, chars, final, direction))
1500 int dm, ch, fi, di = -1;
1502 Lisp_Object obj = Qnil;
1504 CHECK_INT (dimension);
1505 dm = XINT (dimension);
1506 if (dm < 1 || dm > 2)
1507 signal_simple_error ("Invalid value for DIMENSION", dimension);
1511 if (ch != 94 && ch != 96)
1512 signal_simple_error ("Invalid value for CHARS", chars);
1514 CHECK_CHAR_COERCE_INT (final);
1516 if (fi < '0' || fi > '~')
1517 signal_simple_error ("Invalid value for FINAL", final);
1519 if (EQ (direction, Ql2r))
1520 di = CHARSET_LEFT_TO_RIGHT;
1521 else if (EQ (direction, Qr2l))
1522 di = CHARSET_RIGHT_TO_LEFT;
1523 else if (!NILP (direction))
1524 signal_simple_error ("Invalid value for DIRECTION", direction);
1526 if (dm == 2 && fi > 0x5F)
1528 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1531 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1533 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1537 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1539 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1542 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1545 return XCHARSET_NAME (obj);
1549 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1550 Return short name of CHARSET.
1554 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1557 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1558 Return long name of CHARSET.
1562 return XCHARSET_LONG_NAME (Fget_charset (charset));
1565 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1566 Return description of CHARSET.
1570 return XCHARSET_DOC_STRING (Fget_charset (charset));
1573 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1574 Return dimension of CHARSET.
1578 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1581 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1582 Return property PROP of CHARSET.
1583 Recognized properties are those listed in `make-charset', as well as
1584 'name and 'doc-string.
1588 struct Lisp_Charset *cs;
1590 charset = Fget_charset (charset);
1591 cs = XCHARSET (charset);
1593 CHECK_SYMBOL (prop);
1594 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1595 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1596 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1597 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1598 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1599 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1600 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1601 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1602 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1603 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1604 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1605 if (EQ (prop, Qdirection))
1606 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1607 if (EQ (prop, Qreverse_direction_charset))
1609 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1613 return XCHARSET_NAME (obj);
1615 signal_simple_error ("Unrecognized charset property name", prop);
1616 return Qnil; /* not reached */
1619 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1620 Return charset identification number of CHARSET.
1624 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1627 /* #### We need to figure out which properties we really want to
1630 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1631 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1633 (charset, ccl_program))
1635 charset = Fget_charset (charset);
1636 CHECK_VECTOR (ccl_program);
1637 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1642 invalidate_charset_font_caches (Lisp_Object charset)
1644 /* Invalidate font cache entries for charset on all devices. */
1645 Lisp_Object devcons, concons, hash_table;
1646 DEVICE_LOOP_NO_BREAK (devcons, concons)
1648 struct device *d = XDEVICE (XCAR (devcons));
1649 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1650 if (!UNBOUNDP (hash_table))
1651 Fclrhash (hash_table);
1655 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1656 Set the 'registry property of CHARSET to REGISTRY.
1658 (charset, registry))
1660 charset = Fget_charset (charset);
1661 CHECK_STRING (registry);
1662 XCHARSET_REGISTRY (charset) = registry;
1663 invalidate_charset_font_caches (charset);
1664 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1669 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1670 Return mapping-table of CHARSET.
1674 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1677 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1678 Set mapping-table of CHARSET to TABLE.
1682 struct Lisp_Charset *cs;
1683 Lisp_Object old_table;
1686 charset = Fget_charset (charset);
1687 cs = XCHARSET (charset);
1689 if (EQ (table, Qnil))
1691 CHARSET_DECODING_TABLE(cs) = table;
1692 CHARSET_ENCODING_TABLE(cs) = Qnil;
1695 else if (VECTORP (table))
1697 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1698 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1699 old_table = CHARSET_ENCODING_TABLE(cs);
1700 CHARSET_DECODING_TABLE(cs) = table;
1703 signal_error (Qwrong_type_argument,
1704 list2 (build_translated_string ("vector-or-nil-p"),
1706 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1708 switch (CHARSET_DIMENSION (cs))
1711 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1712 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1714 Lisp_Object c = XVECTOR_DATA(table)[i];
1718 put_char_code_table (XCHAR (c),
1719 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1720 CHARSET_ENCODING_TABLE(cs));
1721 Fput_char_attribute (c, charset,
1723 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1728 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1729 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1731 Lisp_Object v = XVECTOR_DATA(table)[i];
1737 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1739 CHARSET_DECODING_TABLE(cs) = old_table;
1740 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1742 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1744 Lisp_Object c = XVECTOR_DATA(v)[j];
1750 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1751 | (j + CHARSET_BYTE_OFFSET (cs))),
1752 CHARSET_ENCODING_TABLE(cs));
1753 Fput_char_attribute (c, charset,
1756 (i + CHARSET_BYTE_OFFSET (cs)),
1758 (j + CHARSET_BYTE_OFFSET (cs))));
1764 put_char_code_table (XCHAR (v),
1765 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1766 CHARSET_ENCODING_TABLE(cs));
1767 Fput_char_attribute (v, charset,
1769 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1779 /************************************************************************/
1780 /* Lisp primitives for working with characters */
1781 /************************************************************************/
1783 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1784 Make a character from CHARSET and octets ARG1 and ARG2.
1785 ARG2 is required only for characters from two-dimensional charsets.
1786 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1787 character s with caron.
1789 (charset, arg1, arg2))
1791 struct Lisp_Charset *cs;
1793 int lowlim, highlim;
1795 charset = Fget_charset (charset);
1796 cs = XCHARSET (charset);
1798 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1799 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1801 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1803 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1804 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1807 /* It is useful (and safe, according to Olivier Galibert) to strip
1808 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1809 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1810 Latin 2 code of the character. */
1818 if (a1 < lowlim || a1 > highlim)
1819 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1821 if (CHARSET_DIMENSION (cs) == 1)
1825 ("Charset is of dimension one; second octet must be nil", arg2);
1826 return make_char (MAKE_CHAR (charset, a1, 0));
1835 a2 = XINT (arg2) & 0x7f;
1837 if (a2 < lowlim || a2 > highlim)
1838 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1840 return make_char (MAKE_CHAR (charset, a1, a2));
1843 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1844 Return the character set of char CH.
1848 CHECK_CHAR_COERCE_INT (ch);
1850 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1853 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1854 Return list of charset and one or two position-codes of CHAR.
1858 /* This function can GC */
1859 struct gcpro gcpro1, gcpro2;
1860 Lisp_Object charset = Qnil;
1861 Lisp_Object rc = Qnil;
1864 GCPRO2 (charset, rc);
1865 CHECK_CHAR_COERCE_INT (character);
1867 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1869 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1871 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1875 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1883 #ifdef ENABLE_COMPOSITE_CHARS
1884 /************************************************************************/
1885 /* composite character functions */
1886 /************************************************************************/
1889 lookup_composite_char (Bufbyte *str, int len)
1891 Lisp_Object lispstr = make_string (str, len);
1892 Lisp_Object ch = Fgethash (lispstr,
1893 Vcomposite_char_string2char_hash_table,
1899 if (composite_char_row_next >= 128)
1900 signal_simple_error ("No more composite chars available", lispstr);
1901 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1902 composite_char_col_next);
1903 Fputhash (make_char (emch), lispstr,
1904 Vcomposite_char_char2string_hash_table);
1905 Fputhash (lispstr, make_char (emch),
1906 Vcomposite_char_string2char_hash_table);
1907 composite_char_col_next++;
1908 if (composite_char_col_next >= 128)
1910 composite_char_col_next = 32;
1911 composite_char_row_next++;
1920 composite_char_string (Emchar ch)
1922 Lisp_Object str = Fgethash (make_char (ch),
1923 Vcomposite_char_char2string_hash_table,
1925 assert (!UNBOUNDP (str));
1929 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1930 Convert a string into a single composite character.
1931 The character is the result of overstriking all the characters in
1936 CHECK_STRING (string);
1937 return make_char (lookup_composite_char (XSTRING_DATA (string),
1938 XSTRING_LENGTH (string)));
1941 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1942 Return a string of the characters comprising a composite character.
1950 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1951 signal_simple_error ("Must be composite char", ch);
1952 return composite_char_string (emch);
1954 #endif /* ENABLE_COMPOSITE_CHARS */
1957 /************************************************************************/
1958 /* initialization */
1959 /************************************************************************/
1962 syms_of_mule_charset (void)
1964 DEFSUBR (Fcharsetp);
1965 DEFSUBR (Ffind_charset);
1966 DEFSUBR (Fget_charset);
1967 DEFSUBR (Fcharset_list);
1968 DEFSUBR (Fcharset_name);
1969 DEFSUBR (Fmake_charset);
1970 DEFSUBR (Fmake_reverse_direction_charset);
1971 /* DEFSUBR (Freverse_direction_charset); */
1972 DEFSUBR (Fdefine_charset_alias);
1973 DEFSUBR (Fcharset_from_attributes);
1974 DEFSUBR (Fcharset_short_name);
1975 DEFSUBR (Fcharset_long_name);
1976 DEFSUBR (Fcharset_description);
1977 DEFSUBR (Fcharset_dimension);
1978 DEFSUBR (Fcharset_property);
1979 DEFSUBR (Fcharset_id);
1980 DEFSUBR (Fset_charset_ccl_program);
1981 DEFSUBR (Fset_charset_registry);
1983 DEFSUBR (Fchar_attribute_alist);
1984 DEFSUBR (Fget_char_attribute);
1985 DEFSUBR (Fput_char_attribute);
1986 DEFSUBR (Fcharset_mapping_table);
1987 DEFSUBR (Fset_charset_mapping_table);
1990 DEFSUBR (Fmake_char);
1991 DEFSUBR (Fchar_charset);
1992 DEFSUBR (Fsplit_char);
1994 #ifdef ENABLE_COMPOSITE_CHARS
1995 DEFSUBR (Fmake_composite_char);
1996 DEFSUBR (Fcomposite_char_string);
1999 defsymbol (&Qcharsetp, "charsetp");
2000 defsymbol (&Qregistry, "registry");
2001 defsymbol (&Qfinal, "final");
2002 defsymbol (&Qgraphic, "graphic");
2003 defsymbol (&Qdirection, "direction");
2004 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2005 defsymbol (&Qshort_name, "short-name");
2006 defsymbol (&Qlong_name, "long-name");
2008 defsymbol (&Ql2r, "l2r");
2009 defsymbol (&Qr2l, "r2l");
2011 /* Charsets, compatible with FSF 20.3
2012 Naming convention is Script-Charset[-Edition] */
2013 defsymbol (&Qascii, "ascii");
2014 defsymbol (&Qcontrol_1, "control-1");
2015 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2016 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2017 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2018 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2019 defsymbol (&Qthai_tis620, "thai-tis620");
2020 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2021 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2022 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2023 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2024 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2025 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2026 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2027 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2028 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2029 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2030 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2031 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2032 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2033 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2035 defsymbol (&Qucs_bmp, "ucs-bmp");
2036 defsymbol (&Qlatin_viscii, "latin-viscii");
2037 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2038 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2039 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2040 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2041 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2042 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2044 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2045 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2047 defsymbol (&Qcomposite, "composite");
2051 vars_of_mule_charset (void)
2058 /* Table of charsets indexed by leading byte. */
2059 for (i = 0; i < countof (charset_by_leading_byte); i++)
2060 charset_by_leading_byte[i] = Qnil;
2063 /* Table of charsets indexed by type/final-byte. */
2064 for (i = 0; i < countof (charset_by_attributes); i++)
2065 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2066 charset_by_attributes[i][j] = Qnil;
2068 /* Table of charsets indexed by type/final-byte/direction. */
2069 for (i = 0; i < countof (charset_by_attributes); i++)
2070 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2071 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2072 charset_by_attributes[i][j][k] = Qnil;
2076 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2078 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2079 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2083 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2084 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2085 Leading-code of private TYPE9N charset of column-width 1.
2087 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2091 Vutf_2000_version = build_string("0.10 (Yao)");
2092 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2093 Version number of UTF-2000.
2096 staticpro (&Vcharacter_attribute_table);
2097 Vcharacter_attribute_table = make_char_code_table (Qnil);
2099 Vdefault_coded_charset_priority_list = Qnil;
2100 DEFVAR_LISP ("default-coded-charset-priority-list",
2101 &Vdefault_coded_charset_priority_list /*
2102 Default order of preferred coded-character-sets.
2108 complex_vars_of_mule_charset (void)
2110 staticpro (&Vcharset_hash_table);
2111 Vcharset_hash_table =
2112 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2114 /* Predefined character sets. We store them into variables for
2119 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2120 CHARSET_TYPE_256X256, 1, 2, 0,
2121 CHARSET_LEFT_TO_RIGHT,
2122 build_string ("BMP"),
2123 build_string ("BMP"),
2124 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2125 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2126 Qnil, 0, 0xFFFF, 0, 0);
2128 # define MIN_CHAR_THAI 0
2129 # define MAX_CHAR_THAI 0
2130 # define MIN_CHAR_GREEK 0
2131 # define MAX_CHAR_GREEK 0
2132 # define MIN_CHAR_HEBREW 0
2133 # define MAX_CHAR_HEBREW 0
2134 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2135 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2136 # define MIN_CHAR_CYRILLIC 0
2137 # define MAX_CHAR_CYRILLIC 0
2140 make_charset (LEADING_BYTE_ASCII, Qascii,
2141 CHARSET_TYPE_94, 1, 0, 'B',
2142 CHARSET_LEFT_TO_RIGHT,
2143 build_string ("ASCII"),
2144 build_string ("ASCII)"),
2145 build_string ("ASCII (ISO646 IRV)"),
2146 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2147 Qnil, 0, 0x7F, 0, 0);
2148 Vcharset_control_1 =
2149 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2150 CHARSET_TYPE_94, 1, 1, 0,
2151 CHARSET_LEFT_TO_RIGHT,
2152 build_string ("C1"),
2153 build_string ("Control characters"),
2154 build_string ("Control characters 128-191"),
2156 Qnil, 0x80, 0x9F, 0, 0);
2157 Vcharset_latin_iso8859_1 =
2158 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2159 CHARSET_TYPE_96, 1, 1, 'A',
2160 CHARSET_LEFT_TO_RIGHT,
2161 build_string ("Latin-1"),
2162 build_string ("ISO8859-1 (Latin-1)"),
2163 build_string ("ISO8859-1 (Latin-1)"),
2164 build_string ("iso8859-1"),
2165 Qnil, 0xA0, 0xFF, 0, 32);
2166 Vcharset_latin_iso8859_2 =
2167 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2168 CHARSET_TYPE_96, 1, 1, 'B',
2169 CHARSET_LEFT_TO_RIGHT,
2170 build_string ("Latin-2"),
2171 build_string ("ISO8859-2 (Latin-2)"),
2172 build_string ("ISO8859-2 (Latin-2)"),
2173 build_string ("iso8859-2"),
2175 Vcharset_latin_iso8859_3 =
2176 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2177 CHARSET_TYPE_96, 1, 1, 'C',
2178 CHARSET_LEFT_TO_RIGHT,
2179 build_string ("Latin-3"),
2180 build_string ("ISO8859-3 (Latin-3)"),
2181 build_string ("ISO8859-3 (Latin-3)"),
2182 build_string ("iso8859-3"),
2184 Vcharset_latin_iso8859_4 =
2185 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2186 CHARSET_TYPE_96, 1, 1, 'D',
2187 CHARSET_LEFT_TO_RIGHT,
2188 build_string ("Latin-4"),
2189 build_string ("ISO8859-4 (Latin-4)"),
2190 build_string ("ISO8859-4 (Latin-4)"),
2191 build_string ("iso8859-4"),
2193 Vcharset_thai_tis620 =
2194 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2195 CHARSET_TYPE_96, 1, 1, 'T',
2196 CHARSET_LEFT_TO_RIGHT,
2197 build_string ("TIS620"),
2198 build_string ("TIS620 (Thai)"),
2199 build_string ("TIS620.2529 (Thai)"),
2200 build_string ("tis620"),
2201 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2202 Vcharset_greek_iso8859_7 =
2203 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2204 CHARSET_TYPE_96, 1, 1, 'F',
2205 CHARSET_LEFT_TO_RIGHT,
2206 build_string ("ISO8859-7"),
2207 build_string ("ISO8859-7 (Greek)"),
2208 build_string ("ISO8859-7 (Greek)"),
2209 build_string ("iso8859-7"),
2210 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2211 Vcharset_arabic_iso8859_6 =
2212 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2213 CHARSET_TYPE_96, 1, 1, 'G',
2214 CHARSET_RIGHT_TO_LEFT,
2215 build_string ("ISO8859-6"),
2216 build_string ("ISO8859-6 (Arabic)"),
2217 build_string ("ISO8859-6 (Arabic)"),
2218 build_string ("iso8859-6"),
2220 Vcharset_hebrew_iso8859_8 =
2221 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2222 CHARSET_TYPE_96, 1, 1, 'H',
2223 CHARSET_RIGHT_TO_LEFT,
2224 build_string ("ISO8859-8"),
2225 build_string ("ISO8859-8 (Hebrew)"),
2226 build_string ("ISO8859-8 (Hebrew)"),
2227 build_string ("iso8859-8"),
2228 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2229 Vcharset_katakana_jisx0201 =
2230 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2231 CHARSET_TYPE_94, 1, 1, 'I',
2232 CHARSET_LEFT_TO_RIGHT,
2233 build_string ("JISX0201 Kana"),
2234 build_string ("JISX0201.1976 (Japanese Kana)"),
2235 build_string ("JISX0201.1976 Japanese Kana"),
2236 build_string ("jisx0201\\.1976"),
2238 MIN_CHAR_HALFWIDTH_KATAKANA,
2239 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2240 Vcharset_latin_jisx0201 =
2241 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2242 CHARSET_TYPE_94, 1, 0, 'J',
2243 CHARSET_LEFT_TO_RIGHT,
2244 build_string ("JISX0201 Roman"),
2245 build_string ("JISX0201.1976 (Japanese Roman)"),
2246 build_string ("JISX0201.1976 Japanese Roman"),
2247 build_string ("jisx0201\\.1976"),
2249 Vcharset_cyrillic_iso8859_5 =
2250 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2251 CHARSET_TYPE_96, 1, 1, 'L',
2252 CHARSET_LEFT_TO_RIGHT,
2253 build_string ("ISO8859-5"),
2254 build_string ("ISO8859-5 (Cyrillic)"),
2255 build_string ("ISO8859-5 (Cyrillic)"),
2256 build_string ("iso8859-5"),
2257 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2258 Vcharset_latin_iso8859_9 =
2259 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2260 CHARSET_TYPE_96, 1, 1, 'M',
2261 CHARSET_LEFT_TO_RIGHT,
2262 build_string ("Latin-5"),
2263 build_string ("ISO8859-9 (Latin-5)"),
2264 build_string ("ISO8859-9 (Latin-5)"),
2265 build_string ("iso8859-9"),
2267 Vcharset_japanese_jisx0208_1978 =
2268 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2269 CHARSET_TYPE_94X94, 2, 0, '@',
2270 CHARSET_LEFT_TO_RIGHT,
2271 build_string ("JIS X0208:1978"),
2272 build_string ("JIS X0208:1978 (Japanese)"),
2274 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2275 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2277 Vcharset_chinese_gb2312 =
2278 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2279 CHARSET_TYPE_94X94, 2, 0, 'A',
2280 CHARSET_LEFT_TO_RIGHT,
2281 build_string ("GB2312"),
2282 build_string ("GB2312)"),
2283 build_string ("GB2312 Chinese simplified"),
2284 build_string ("gb2312"),
2286 Vcharset_japanese_jisx0208 =
2287 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2288 CHARSET_TYPE_94X94, 2, 0, 'B',
2289 CHARSET_LEFT_TO_RIGHT,
2290 build_string ("JISX0208"),
2291 build_string ("JIS X0208:1983 (Japanese)"),
2292 build_string ("JIS X0208:1983 Japanese Kanji"),
2293 build_string ("jisx0208\\.1983"),
2295 Vcharset_korean_ksc5601 =
2296 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2297 CHARSET_TYPE_94X94, 2, 0, 'C',
2298 CHARSET_LEFT_TO_RIGHT,
2299 build_string ("KSC5601"),
2300 build_string ("KSC5601 (Korean"),
2301 build_string ("KSC5601 Korean Hangul and Hanja"),
2302 build_string ("ksc5601"),
2304 Vcharset_japanese_jisx0212 =
2305 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2306 CHARSET_TYPE_94X94, 2, 0, 'D',
2307 CHARSET_LEFT_TO_RIGHT,
2308 build_string ("JISX0212"),
2309 build_string ("JISX0212 (Japanese)"),
2310 build_string ("JISX0212 Japanese Supplement"),
2311 build_string ("jisx0212"),
2314 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2315 Vcharset_chinese_cns11643_1 =
2316 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2317 CHARSET_TYPE_94X94, 2, 0, 'G',
2318 CHARSET_LEFT_TO_RIGHT,
2319 build_string ("CNS11643-1"),
2320 build_string ("CNS11643-1 (Chinese traditional)"),
2322 ("CNS 11643 Plane 1 Chinese traditional"),
2323 build_string (CHINESE_CNS_PLANE_RE("1")),
2325 Vcharset_chinese_cns11643_2 =
2326 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2327 CHARSET_TYPE_94X94, 2, 0, 'H',
2328 CHARSET_LEFT_TO_RIGHT,
2329 build_string ("CNS11643-2"),
2330 build_string ("CNS11643-2 (Chinese traditional)"),
2332 ("CNS 11643 Plane 2 Chinese traditional"),
2333 build_string (CHINESE_CNS_PLANE_RE("2")),
2336 Vcharset_latin_viscii_lower =
2337 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2338 CHARSET_TYPE_96, 1, 1, '1',
2339 CHARSET_LEFT_TO_RIGHT,
2340 build_string ("VISCII lower"),
2341 build_string ("VISCII lower (Vietnamese)"),
2342 build_string ("VISCII lower (Vietnamese)"),
2343 build_string ("MULEVISCII-LOWER"),
2345 Vcharset_latin_viscii_upper =
2346 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2347 CHARSET_TYPE_96, 1, 1, '2',
2348 CHARSET_LEFT_TO_RIGHT,
2349 build_string ("VISCII upper"),
2350 build_string ("VISCII upper (Vietnamese)"),
2351 build_string ("VISCII upper (Vietnamese)"),
2352 build_string ("MULEVISCII-UPPER"),
2354 Vcharset_latin_viscii =
2355 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2356 CHARSET_TYPE_256, 1, 2, 0,
2357 CHARSET_LEFT_TO_RIGHT,
2358 build_string ("VISCII"),
2359 build_string ("VISCII 1.1 (Vietnamese)"),
2360 build_string ("VISCII 1.1 (Vietnamese)"),
2361 build_string ("VISCII1\\.1"),
2363 Vcharset_hiragana_jisx0208 =
2364 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2365 CHARSET_TYPE_94X94, 2, 0, 'B',
2366 CHARSET_LEFT_TO_RIGHT,
2367 build_string ("Hiragana"),
2368 build_string ("Hiragana of JIS X0208"),
2369 build_string ("Japanese Hiragana of JIS X0208"),
2370 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2371 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2372 (0x24 - 33) * 94 + (0x21 - 33), 33);
2373 Vcharset_katakana_jisx0208 =
2374 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2375 CHARSET_TYPE_94X94, 2, 0, 'B',
2376 CHARSET_LEFT_TO_RIGHT,
2377 build_string ("Katakana"),
2378 build_string ("Katakana of JIS X0208"),
2379 build_string ("Japanese Katakana of JIS X0208"),
2380 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2381 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2382 (0x25 - 33) * 94 + (0x21 - 33), 33);
2384 Vcharset_chinese_big5_1 =
2385 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2386 CHARSET_TYPE_94X94, 2, 0, '0',
2387 CHARSET_LEFT_TO_RIGHT,
2388 build_string ("Big5"),
2389 build_string ("Big5 (Level-1)"),
2391 ("Big5 Level-1 Chinese traditional"),
2392 build_string ("big5"),
2394 Vcharset_chinese_big5_2 =
2395 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2396 CHARSET_TYPE_94X94, 2, 0, '1',
2397 CHARSET_LEFT_TO_RIGHT,
2398 build_string ("Big5"),
2399 build_string ("Big5 (Level-2)"),
2401 ("Big5 Level-2 Chinese traditional"),
2402 build_string ("big5"),
2405 #ifdef ENABLE_COMPOSITE_CHARS
2406 /* #### For simplicity, we put composite chars into a 96x96 charset.
2407 This is going to lead to problems because you can run out of
2408 room, esp. as we don't yet recycle numbers. */
2409 Vcharset_composite =
2410 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2411 CHARSET_TYPE_96X96, 2, 0, 0,
2412 CHARSET_LEFT_TO_RIGHT,
2413 build_string ("Composite"),
2414 build_string ("Composite characters"),
2415 build_string ("Composite characters"),
2418 composite_char_row_next = 32;
2419 composite_char_col_next = 32;
2421 Vcomposite_char_string2char_hash_table =
2422 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2423 Vcomposite_char_char2string_hash_table =
2424 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2425 staticpro (&Vcomposite_char_string2char_hash_table);
2426 staticpro (&Vcomposite_char_char2string_hash_table);
2427 #endif /* ENABLE_COMPOSITE_CHARS */