1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
332 return Fcdr (Fassq (attribute, ret));
335 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
336 Store CHARACTER's ATTRIBUTE with VALUE.
338 (character, attribute, value))
340 Emchar char_code = XCHAR (character);
342 = get_char_code_table (char_code, Vcharacter_attribute_table);
343 Lisp_Object cell = Fassq (attribute, ret);
346 ret = Fcons (Fcons (attribute, value), ret);
348 Fsetcdr (cell, value);
349 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
354 Lisp_Object Vutf_2000_version;
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
384 Qjapanese_jisx0208_1978,
396 Qvietnamese_viscii_lower,
397 Qvietnamese_viscii_upper,
405 Lisp_Object Ql2r, Qr2l;
407 Lisp_Object Vcharset_hash_table;
410 static Charset_ID next_allocated_leading_byte;
412 static Charset_ID next_allocated_1_byte_leading_byte;
413 static Charset_ID next_allocated_2_byte_leading_byte;
416 /* Composite characters are characters constructed by overstriking two
417 or more regular characters.
419 1) The old Mule implementation involves storing composite characters
420 in a buffer as a tag followed by all of the actual characters
421 used to make up the composite character. I think this is a bad
422 idea; it greatly complicates code that wants to handle strings
423 one character at a time because it has to deal with the possibility
424 of great big ungainly characters. It's much more reasonable to
425 simply store an index into a table of composite characters.
427 2) The current implementation only allows for 16,384 separate
428 composite characters over the lifetime of the XEmacs process.
429 This could become a potential problem if the user
430 edited lots of different files that use composite characters.
431 Due to FSF bogosity, increasing the number of allowable
432 composite characters under Mule would decrease the number
433 of possible faces that can exist. Mule already has shrunk
434 this to 2048, and further shrinkage would become uncomfortable.
435 No such problems exist in XEmacs.
437 Composite characters could be represented as 0x80 C1 C2 C3,
438 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
439 for slightly under 2^20 (one million) composite characters
440 over the XEmacs process lifetime, and you only need to
441 increase the size of a Mule character from 19 to 21 bits.
442 Or you could use 0x80 C1 C2 C3 C4, allowing for about
443 85 million (slightly over 2^26) composite characters. */
446 /************************************************************************/
447 /* Basic Emchar functions */
448 /************************************************************************/
450 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
451 string in STR. Returns the number of bytes stored.
452 Do not call this directly. Use the macro set_charptr_emchar() instead.
456 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
471 else if ( c <= 0x7ff )
473 *p++ = (c >> 6) | 0xc0;
474 *p++ = (c & 0x3f) | 0x80;
476 else if ( c <= 0xffff )
478 *p++ = (c >> 12) | 0xe0;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
482 else if ( c <= 0x1fffff )
484 *p++ = (c >> 18) | 0xf0;
485 *p++ = ((c >> 12) & 0x3f) | 0x80;
486 *p++ = ((c >> 6) & 0x3f) | 0x80;
487 *p++ = (c & 0x3f) | 0x80;
489 else if ( c <= 0x3ffffff )
491 *p++ = (c >> 24) | 0xf8;
492 *p++ = ((c >> 18) & 0x3f) | 0x80;
493 *p++ = ((c >> 12) & 0x3f) | 0x80;
494 *p++ = ((c >> 6) & 0x3f) | 0x80;
495 *p++ = (c & 0x3f) | 0x80;
499 *p++ = (c >> 30) | 0xfc;
500 *p++ = ((c >> 24) & 0x3f) | 0x80;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
507 BREAKUP_CHAR (c, charset, c1, c2);
508 lb = CHAR_LEADING_BYTE (c);
509 if (LEADING_BYTE_PRIVATE_P (lb))
510 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
512 if (EQ (charset, Vcharset_control_1))
521 /* Return the first character from a Mule-encoded string in STR,
522 assuming it's non-ASCII. Do not call this directly.
523 Use the macro charptr_emchar() instead. */
526 non_ascii_charptr_emchar (CONST Bufbyte *str)
539 else if ( b >= 0xf8 )
544 else if ( b >= 0xf0 )
549 else if ( b >= 0xe0 )
554 else if ( b >= 0xc0 )
564 for( ; len > 0; len-- )
567 ch = ( ch << 6 ) | ( b & 0x3f );
571 Bufbyte i0 = *str, i1, i2 = 0;
574 if (i0 == LEADING_BYTE_CONTROL_1)
575 return (Emchar) (*++str - 0x20);
577 if (LEADING_BYTE_PREFIX_P (i0))
582 charset = CHARSET_BY_LEADING_BYTE (i0);
583 if (XCHARSET_DIMENSION (charset) == 2)
586 return MAKE_CHAR (charset, i1, i2);
590 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
591 Do not call this directly. Use the macro valid_char_p() instead. */
595 non_ascii_valid_char_p (Emchar ch)
599 /* Must have only lowest 19 bits set */
603 f1 = CHAR_FIELD1 (ch);
604 f2 = CHAR_FIELD2 (ch);
605 f3 = CHAR_FIELD3 (ch);
611 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
612 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
613 f2 > MAX_CHAR_FIELD2_PRIVATE)
618 if (f3 != 0x20 && f3 != 0x7F)
622 NOTE: This takes advantage of the fact that
623 FIELD2_TO_OFFICIAL_LEADING_BYTE and
624 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
626 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
627 return (XCHARSET_CHARS (charset) == 96);
633 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
634 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
635 f1 > MAX_CHAR_FIELD1_PRIVATE)
637 if (f2 < 0x20 || f3 < 0x20)
640 #ifdef ENABLE_COMPOSITE_CHARS
641 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
643 if (UNBOUNDP (Fgethash (make_int (ch),
644 Vcomposite_char_char2string_hash_table,
649 #endif /* ENABLE_COMPOSITE_CHARS */
651 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
654 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
656 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
659 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
661 return (XCHARSET_CHARS (charset) == 96);
667 /************************************************************************/
668 /* Basic string functions */
669 /************************************************************************/
671 /* Copy the character pointed to by PTR into STR, assuming it's
672 non-ASCII. Do not call this directly. Use the macro
673 charptr_copy_char() instead. */
676 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
678 Bufbyte *strptr = str;
680 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
682 /* Notice fallthrough. */
684 case 6: *++strptr = *ptr++;
685 case 5: *++strptr = *ptr++;
687 case 4: *++strptr = *ptr++;
688 case 3: *++strptr = *ptr++;
689 case 2: *++strptr = *ptr;
694 return strptr + 1 - str;
698 /************************************************************************/
699 /* streams of Emchars */
700 /************************************************************************/
702 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
703 The functions below are not meant to be called directly; use
704 the macros in insdel.h. */
707 Lstream_get_emchar_1 (Lstream *stream, int ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bufbyte *strptr = str;
712 str[0] = (Bufbyte) ch;
713 switch (REP_BYTES_BY_FIRST_BYTE (ch))
715 /* Notice fallthrough. */
718 ch = Lstream_getc (stream);
720 *++strptr = (Bufbyte) ch;
722 ch = Lstream_getc (stream);
724 *++strptr = (Bufbyte) ch;
727 ch = Lstream_getc (stream);
729 *++strptr = (Bufbyte) ch;
731 ch = Lstream_getc (stream);
733 *++strptr = (Bufbyte) ch;
735 ch = Lstream_getc (stream);
737 *++strptr = (Bufbyte) ch;
742 return charptr_emchar (str);
746 Lstream_fput_emchar (Lstream *stream, Emchar ch)
748 Bufbyte str[MAX_EMCHAR_LEN];
749 Bytecount len = set_charptr_emchar (str, ch);
750 return Lstream_write (stream, str, len);
754 Lstream_funget_emchar (Lstream *stream, Emchar ch)
756 Bufbyte str[MAX_EMCHAR_LEN];
757 Bytecount len = set_charptr_emchar (str, ch);
758 Lstream_unread (stream, str, len);
762 /************************************************************************/
764 /************************************************************************/
767 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
769 struct Lisp_Charset *cs = XCHARSET (obj);
771 markobj (cs->short_name);
772 markobj (cs->long_name);
773 markobj (cs->doc_string);
774 markobj (cs->registry);
775 markobj (cs->ccl_program);
777 markobj (cs->decoding_table);
783 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
785 struct Lisp_Charset *cs = XCHARSET (obj);
789 error ("printing unreadable object #<charset %s 0x%x>",
790 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
793 write_c_string ("#<charset ", printcharfun);
794 print_internal (CHARSET_NAME (cs), printcharfun, 0);
795 write_c_string (" ", printcharfun);
796 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
797 write_c_string (" ", printcharfun);
798 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
799 write_c_string (" ", printcharfun);
800 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
801 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
802 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
803 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
804 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
806 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
807 CHARSET_COLUMNS (cs),
808 CHARSET_GRAPHIC (cs),
810 write_c_string (buf, printcharfun);
811 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
812 sprintf (buf, " 0x%x>", cs->header.uid);
813 write_c_string (buf, printcharfun);
816 static const struct lrecord_description charset_description[] = {
817 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
819 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
824 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
825 mark_charset, print_charset, 0, 0, 0,
827 struct Lisp_Charset);
829 /* Make a new charset. */
832 make_charset (Charset_ID id, Lisp_Object name,
833 unsigned char type, unsigned char columns, unsigned char graphic,
834 Bufbyte final, unsigned char direction, Lisp_Object short_name,
835 Lisp_Object long_name, Lisp_Object doc,
837 Lisp_Object decoding_table,
838 Emchar ucs_min, Emchar ucs_max,
839 Emchar code_offset, unsigned char byte_offset)
842 struct Lisp_Charset *cs =
843 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
844 XSETCHARSET (obj, cs);
846 CHARSET_ID (cs) = id;
847 CHARSET_NAME (cs) = name;
848 CHARSET_SHORT_NAME (cs) = short_name;
849 CHARSET_LONG_NAME (cs) = long_name;
850 CHARSET_DIRECTION (cs) = direction;
851 CHARSET_TYPE (cs) = type;
852 CHARSET_COLUMNS (cs) = columns;
853 CHARSET_GRAPHIC (cs) = graphic;
854 CHARSET_FINAL (cs) = final;
855 CHARSET_DOC_STRING (cs) = doc;
856 CHARSET_REGISTRY (cs) = reg;
857 CHARSET_CCL_PROGRAM (cs) = Qnil;
858 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
860 CHARSET_DECODING_TABLE(cs) = Qnil;
861 CHARSET_UCS_MIN(cs) = ucs_min;
862 CHARSET_UCS_MAX(cs) = ucs_max;
863 CHARSET_CODE_OFFSET(cs) = code_offset;
864 CHARSET_BYTE_OFFSET(cs) = byte_offset;
867 switch (CHARSET_TYPE (cs))
869 case CHARSET_TYPE_94:
870 CHARSET_DIMENSION (cs) = 1;
871 CHARSET_CHARS (cs) = 94;
873 case CHARSET_TYPE_96:
874 CHARSET_DIMENSION (cs) = 1;
875 CHARSET_CHARS (cs) = 96;
877 case CHARSET_TYPE_94X94:
878 CHARSET_DIMENSION (cs) = 2;
879 CHARSET_CHARS (cs) = 94;
881 case CHARSET_TYPE_96X96:
882 CHARSET_DIMENSION (cs) = 2;
883 CHARSET_CHARS (cs) = 96;
886 case CHARSET_TYPE_128:
887 CHARSET_DIMENSION (cs) = 1;
888 CHARSET_CHARS (cs) = 128;
890 case CHARSET_TYPE_128X128:
891 CHARSET_DIMENSION (cs) = 2;
892 CHARSET_CHARS (cs) = 128;
894 case CHARSET_TYPE_256:
895 CHARSET_DIMENSION (cs) = 1;
896 CHARSET_CHARS (cs) = 256;
898 case CHARSET_TYPE_256X256:
899 CHARSET_DIMENSION (cs) = 2;
900 CHARSET_CHARS (cs) = 256;
906 if (id == LEADING_BYTE_ASCII)
907 CHARSET_REP_BYTES (cs) = 1;
909 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
916 /* some charsets do not have final characters. This includes
917 ASCII, Control-1, Composite, and the two faux private
920 if (code_offset == 0)
922 assert (NILP (charset_by_attributes[type][final]));
923 charset_by_attributes[type][final] = obj;
926 assert (NILP (charset_by_attributes[type][final][direction]));
927 charset_by_attributes[type][final][direction] = obj;
931 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
932 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
935 /* official leading byte */
936 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = next_allocated_leading_byte++;
959 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = next_allocated_1_byte_leading_byte++;
966 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
983 range_charset_code_point (Lisp_Object charset, Emchar ch)
987 if ((XCHARSET_UCS_MIN (charset) <= ch)
988 && (ch <= XCHARSET_UCS_MAX (charset)))
990 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
992 if (XCHARSET_DIMENSION (charset) == 1)
993 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
994 else if (XCHARSET_DIMENSION (charset) == 2)
995 return list2 (make_int (d / XCHARSET_CHARS (charset)
996 + XCHARSET_BYTE_OFFSET (charset)),
997 make_int (d % XCHARSET_CHARS (charset)
998 + XCHARSET_BYTE_OFFSET (charset)));
999 else if (XCHARSET_DIMENSION (charset) == 3)
1000 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1001 * XCHARSET_CHARS (charset))
1002 + XCHARSET_BYTE_OFFSET (charset)),
1003 make_int (d / XCHARSET_CHARS (charset)
1004 % XCHARSET_CHARS (charset)
1005 + XCHARSET_BYTE_OFFSET (charset)),
1006 make_int (d % XCHARSET_CHARS (charset)
1007 + XCHARSET_BYTE_OFFSET (charset)));
1008 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1009 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1010 * XCHARSET_CHARS (charset)
1011 * XCHARSET_CHARS (charset))
1012 + XCHARSET_BYTE_OFFSET (charset)),
1013 make_int (d / (XCHARSET_CHARS (charset)
1014 * XCHARSET_CHARS (charset))
1015 % XCHARSET_CHARS (charset)
1016 + XCHARSET_BYTE_OFFSET (charset)),
1017 make_int (d / XCHARSET_CHARS (charset)
1018 % XCHARSET_CHARS (charset)
1019 + XCHARSET_BYTE_OFFSET (charset)),
1020 make_int (d % XCHARSET_CHARS (charset)
1021 + XCHARSET_BYTE_OFFSET (charset)));
1023 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1025 if (XCHARSET_DIMENSION (charset) == 1)
1027 if (XCHARSET_CHARS (charset) == 94)
1029 if (((d = ch - (MIN_CHAR_94
1030 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1032 return list1 (make_int (d + 33));
1034 else if (XCHARSET_CHARS (charset) == 96)
1036 if (((d = ch - (MIN_CHAR_96
1037 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1039 return list1 (make_int (d + 32));
1044 else if (XCHARSET_DIMENSION (charset) == 2)
1046 if (XCHARSET_CHARS (charset) == 94)
1048 if (((d = ch - (MIN_CHAR_94x94
1049 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1052 return list2 ((d / 94) + 33, d % 94 + 33);
1054 else if (XCHARSET_CHARS (charset) == 96)
1056 if (((d = ch - (MIN_CHAR_96x96
1057 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1060 return list2 ((d / 96) + 32, d % 96 + 32);
1068 charset_code_point (Lisp_Object charset, Emchar ch)
1070 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1072 if (!EQ (cdef, Qnil))
1074 Lisp_Object field = Fassq (charset, cdef);
1076 if (!EQ (field, Qnil))
1077 return Fcdr (field);
1079 return range_charset_code_point (charset, ch);
1082 Lisp_Object Vdefault_coded_charset_priority_list;
1086 /************************************************************************/
1087 /* Basic charset Lisp functions */
1088 /************************************************************************/
1090 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1091 Return non-nil if OBJECT is a charset.
1095 return CHARSETP (object) ? Qt : Qnil;
1098 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1099 Retrieve the charset of the given name.
1100 If CHARSET-OR-NAME is a charset object, it is simply returned.
1101 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1102 nil is returned. Otherwise the associated charset object is returned.
1106 if (CHARSETP (charset_or_name))
1107 return charset_or_name;
1109 CHECK_SYMBOL (charset_or_name);
1110 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1113 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1114 Retrieve the charset of the given name.
1115 Same as `find-charset' except an error is signalled if there is no such
1116 charset instead of returning nil.
1120 Lisp_Object charset = Ffind_charset (name);
1123 signal_simple_error ("No such charset", name);
1127 /* We store the charsets in hash tables with the names as the key and the
1128 actual charset object as the value. Occasionally we need to use them
1129 in a list format. These routines provide us with that. */
1130 struct charset_list_closure
1132 Lisp_Object *charset_list;
1136 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1137 void *charset_list_closure)
1139 /* This function can GC */
1140 struct charset_list_closure *chcl =
1141 (struct charset_list_closure*) charset_list_closure;
1142 Lisp_Object *charset_list = chcl->charset_list;
1144 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1148 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1149 Return a list of the names of all defined charsets.
1153 Lisp_Object charset_list = Qnil;
1154 struct gcpro gcpro1;
1155 struct charset_list_closure charset_list_closure;
1157 GCPRO1 (charset_list);
1158 charset_list_closure.charset_list = &charset_list;
1159 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1160 &charset_list_closure);
1163 return charset_list;
1166 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1167 Return the name of the given charset.
1171 return XCHARSET_NAME (Fget_charset (charset));
1174 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1175 Define a new character set.
1176 This function is for use with Mule support.
1177 NAME is a symbol, the name by which the character set is normally referred.
1178 DOC-STRING is a string describing the character set.
1179 PROPS is a property list, describing the specific nature of the
1180 character set. Recognized properties are:
1182 'short-name Short version of the charset name (ex: Latin-1)
1183 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1184 'registry A regular expression matching the font registry field for
1186 'dimension Number of octets used to index a character in this charset.
1187 Either 1 or 2. Defaults to 1.
1188 'columns Number of columns used to display a character in this charset.
1189 Only used in TTY mode. (Under X, the actual width of a
1190 character can be derived from the font used to display the
1191 characters.) If unspecified, defaults to the dimension
1192 (this is almost always the correct value).
1193 'chars Number of characters in each dimension (94 or 96).
1194 Defaults to 94. Note that if the dimension is 2, the
1195 character set thus described is 94x94 or 96x96.
1196 'final Final byte of ISO 2022 escape sequence. Must be
1197 supplied. Each combination of (DIMENSION, CHARS) defines a
1198 separate namespace for final bytes. Note that ISO
1199 2022 restricts the final byte to the range
1200 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1201 dimension == 2. Note also that final bytes in the range
1202 0x30 - 0x3F are reserved for user-defined (not official)
1204 'graphic 0 (use left half of font on output) or 1 (use right half
1205 of font on output). Defaults to 0. For example, for
1206 a font whose registry is ISO8859-1, the left half
1207 (octets 0x20 - 0x7F) is the `ascii' character set, while
1208 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1209 character set. With 'graphic set to 0, the octets
1210 will have their high bit cleared; with it set to 1,
1211 the octets will have their high bit set.
1212 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1214 'ccl-program A compiled CCL program used to convert a character in
1215 this charset into an index into the font. This is in
1216 addition to the 'graphic property. The CCL program
1217 is passed the octets of the character, with the high
1218 bit cleared and set depending upon whether the value
1219 of the 'graphic property is 0 or 1.
1221 (name, doc_string, props))
1223 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1224 int direction = CHARSET_LEFT_TO_RIGHT;
1226 Lisp_Object registry = Qnil;
1227 Lisp_Object charset;
1228 Lisp_Object rest, keyword, value;
1229 Lisp_Object ccl_program = Qnil;
1230 Lisp_Object short_name = Qnil, long_name = Qnil;
1232 unsigned char byte_offset = 0;
1235 CHECK_SYMBOL (name);
1236 if (!NILP (doc_string))
1237 CHECK_STRING (doc_string);
1239 charset = Ffind_charset (name);
1240 if (!NILP (charset))
1241 signal_simple_error ("Cannot redefine existing charset", name);
1243 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1245 if (EQ (keyword, Qshort_name))
1247 CHECK_STRING (value);
1251 if (EQ (keyword, Qlong_name))
1253 CHECK_STRING (value);
1257 else if (EQ (keyword, Qdimension))
1260 dimension = XINT (value);
1261 if (dimension < 1 || dimension > 2)
1262 signal_simple_error ("Invalid value for 'dimension", value);
1265 else if (EQ (keyword, Qchars))
1268 chars = XINT (value);
1269 if (chars != 94 && chars != 96)
1270 signal_simple_error ("Invalid value for 'chars", value);
1273 else if (EQ (keyword, Qcolumns))
1276 columns = XINT (value);
1277 if (columns != 1 && columns != 2)
1278 signal_simple_error ("Invalid value for 'columns", value);
1281 else if (EQ (keyword, Qgraphic))
1284 graphic = XINT (value);
1286 if (graphic < 0 || graphic > 2)
1288 if (graphic < 0 || graphic > 1)
1290 signal_simple_error ("Invalid value for 'graphic", value);
1293 else if (EQ (keyword, Qregistry))
1295 CHECK_STRING (value);
1299 else if (EQ (keyword, Qdirection))
1301 if (EQ (value, Ql2r))
1302 direction = CHARSET_LEFT_TO_RIGHT;
1303 else if (EQ (value, Qr2l))
1304 direction = CHARSET_RIGHT_TO_LEFT;
1306 signal_simple_error ("Invalid value for 'direction", value);
1309 else if (EQ (keyword, Qfinal))
1311 CHECK_CHAR_COERCE_INT (value);
1312 final = XCHAR (value);
1313 if (final < '0' || final > '~')
1314 signal_simple_error ("Invalid value for 'final", value);
1317 else if (EQ (keyword, Qccl_program))
1319 CHECK_VECTOR (value);
1320 ccl_program = value;
1324 signal_simple_error ("Unrecognized property", keyword);
1328 error ("'final must be specified");
1329 if (dimension == 2 && final > 0x5F)
1331 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1335 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1337 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1339 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1340 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1342 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1344 id = get_unallocated_leading_byte (dimension);
1346 if (NILP (doc_string))
1347 doc_string = build_string ("");
1349 if (NILP (registry))
1350 registry = build_string ("");
1352 if (NILP (short_name))
1353 XSETSTRING (short_name, XSYMBOL (name)->name);
1355 if (NILP (long_name))
1356 long_name = doc_string;
1359 columns = dimension;
1360 charset = make_charset (id, name, type, columns, graphic,
1361 final, direction, short_name, long_name,
1362 doc_string, registry,
1363 Qnil, 0, 0, 0, byte_offset);
1364 if (!NILP (ccl_program))
1365 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1369 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1371 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1372 NEW-NAME is the name of the new charset. Return the new charset.
1374 (charset, new_name))
1376 Lisp_Object new_charset = Qnil;
1377 int id, dimension, columns, graphic, final;
1378 int direction, type;
1379 Lisp_Object registry, doc_string, short_name, long_name;
1380 struct Lisp_Charset *cs;
1382 charset = Fget_charset (charset);
1383 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1384 signal_simple_error ("Charset already has reverse-direction charset",
1387 CHECK_SYMBOL (new_name);
1388 if (!NILP (Ffind_charset (new_name)))
1389 signal_simple_error ("Cannot redefine existing charset", new_name);
1391 cs = XCHARSET (charset);
1393 type = CHARSET_TYPE (cs);
1394 columns = CHARSET_COLUMNS (cs);
1395 dimension = CHARSET_DIMENSION (cs);
1396 id = get_unallocated_leading_byte (dimension);
1398 graphic = CHARSET_GRAPHIC (cs);
1399 final = CHARSET_FINAL (cs);
1400 direction = CHARSET_RIGHT_TO_LEFT;
1401 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1402 direction = CHARSET_LEFT_TO_RIGHT;
1403 doc_string = CHARSET_DOC_STRING (cs);
1404 short_name = CHARSET_SHORT_NAME (cs);
1405 long_name = CHARSET_LONG_NAME (cs);
1406 registry = CHARSET_REGISTRY (cs);
1408 new_charset = make_charset (id, new_name, type, columns,
1409 graphic, final, direction, short_name, long_name,
1410 doc_string, registry,
1412 CHARSET_DECODING_TABLE(cs),
1413 CHARSET_UCS_MIN(cs),
1414 CHARSET_UCS_MAX(cs),
1415 CHARSET_CODE_OFFSET(cs),
1416 CHARSET_BYTE_OFFSET(cs)
1422 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1423 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1428 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1429 Define symbol ALIAS as an alias for CHARSET.
1433 CHECK_SYMBOL (alias);
1434 charset = Fget_charset (charset);
1435 return Fputhash (alias, charset, Vcharset_hash_table);
1438 /* #### Reverse direction charsets not yet implemented. */
1440 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1442 Return the reverse-direction charset parallel to CHARSET, if any.
1443 This is the charset with the same properties (in particular, the same
1444 dimension, number of characters per dimension, and final byte) as
1445 CHARSET but whose characters are displayed in the opposite direction.
1449 charset = Fget_charset (charset);
1450 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1454 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1455 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1456 If DIRECTION is omitted, both directions will be checked (left-to-right
1457 will be returned if character sets exist for both directions).
1459 (dimension, chars, final, direction))
1461 int dm, ch, fi, di = -1;
1463 Lisp_Object obj = Qnil;
1465 CHECK_INT (dimension);
1466 dm = XINT (dimension);
1467 if (dm < 1 || dm > 2)
1468 signal_simple_error ("Invalid value for DIMENSION", dimension);
1472 if (ch != 94 && ch != 96)
1473 signal_simple_error ("Invalid value for CHARS", chars);
1475 CHECK_CHAR_COERCE_INT (final);
1477 if (fi < '0' || fi > '~')
1478 signal_simple_error ("Invalid value for FINAL", final);
1480 if (EQ (direction, Ql2r))
1481 di = CHARSET_LEFT_TO_RIGHT;
1482 else if (EQ (direction, Qr2l))
1483 di = CHARSET_RIGHT_TO_LEFT;
1484 else if (!NILP (direction))
1485 signal_simple_error ("Invalid value for DIRECTION", direction);
1487 if (dm == 2 && fi > 0x5F)
1489 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1492 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1494 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1498 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1500 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1503 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1506 return XCHARSET_NAME (obj);
1510 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1511 Return short name of CHARSET.
1515 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1518 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1519 Return long name of CHARSET.
1523 return XCHARSET_LONG_NAME (Fget_charset (charset));
1526 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1527 Return description of CHARSET.
1531 return XCHARSET_DOC_STRING (Fget_charset (charset));
1534 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1535 Return dimension of CHARSET.
1539 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1542 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1543 Return property PROP of CHARSET.
1544 Recognized properties are those listed in `make-charset', as well as
1545 'name and 'doc-string.
1549 struct Lisp_Charset *cs;
1551 charset = Fget_charset (charset);
1552 cs = XCHARSET (charset);
1554 CHECK_SYMBOL (prop);
1555 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1556 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1557 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1558 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1559 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1560 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1561 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1562 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1563 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1564 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1565 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1566 if (EQ (prop, Qdirection))
1567 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1568 if (EQ (prop, Qreverse_direction_charset))
1570 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1574 return XCHARSET_NAME (obj);
1576 signal_simple_error ("Unrecognized charset property name", prop);
1577 return Qnil; /* not reached */
1580 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1581 Return charset identification number of CHARSET.
1585 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1588 /* #### We need to figure out which properties we really want to
1591 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1592 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1594 (charset, ccl_program))
1596 charset = Fget_charset (charset);
1597 CHECK_VECTOR (ccl_program);
1598 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1603 invalidate_charset_font_caches (Lisp_Object charset)
1605 /* Invalidate font cache entries for charset on all devices. */
1606 Lisp_Object devcons, concons, hash_table;
1607 DEVICE_LOOP_NO_BREAK (devcons, concons)
1609 struct device *d = XDEVICE (XCAR (devcons));
1610 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1611 if (!UNBOUNDP (hash_table))
1612 Fclrhash (hash_table);
1616 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1617 Set the 'registry property of CHARSET to REGISTRY.
1619 (charset, registry))
1621 charset = Fget_charset (charset);
1622 CHECK_STRING (registry);
1623 XCHARSET_REGISTRY (charset) = registry;
1624 invalidate_charset_font_caches (charset);
1625 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1630 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1631 Return mapping-table of CHARSET.
1635 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1638 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1639 Set mapping-table of CHARSET to TABLE.
1643 struct Lisp_Charset *cs;
1644 Lisp_Object old_table;
1647 charset = Fget_charset (charset);
1648 cs = XCHARSET (charset);
1650 if (EQ (table, Qnil))
1652 CHARSET_DECODING_TABLE(cs) = table;
1655 else if (VECTORP (table))
1657 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1658 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1659 old_table = CHARSET_DECODING_TABLE(cs);
1660 CHARSET_DECODING_TABLE(cs) = table;
1663 signal_error (Qwrong_type_argument,
1664 list2 (build_translated_string ("vector-or-nil-p"),
1666 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1668 switch (CHARSET_DIMENSION (cs))
1671 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1673 Lisp_Object c = XVECTOR_DATA(table)[i];
1678 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1682 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1684 Lisp_Object v = XVECTOR_DATA(table)[i];
1690 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1692 CHARSET_DECODING_TABLE(cs) = old_table;
1693 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1695 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1697 Lisp_Object c = XVECTOR_DATA(v)[j];
1700 Fput_char_attribute (c, charset,
1703 (i + CHARSET_BYTE_OFFSET (cs)),
1705 (j + CHARSET_BYTE_OFFSET (cs))));
1709 Fput_char_attribute (v, charset,
1711 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1720 /************************************************************************/
1721 /* Lisp primitives for working with characters */
1722 /************************************************************************/
1724 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1725 Make a character from CHARSET and octets ARG1 and ARG2.
1726 ARG2 is required only for characters from two-dimensional charsets.
1727 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1728 character s with caron.
1730 (charset, arg1, arg2))
1732 struct Lisp_Charset *cs;
1734 int lowlim, highlim;
1736 charset = Fget_charset (charset);
1737 cs = XCHARSET (charset);
1739 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1740 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1742 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1744 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1745 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1748 /* It is useful (and safe, according to Olivier Galibert) to strip
1749 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1750 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1751 Latin 2 code of the character. */
1759 if (a1 < lowlim || a1 > highlim)
1760 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1762 if (CHARSET_DIMENSION (cs) == 1)
1766 ("Charset is of dimension one; second octet must be nil", arg2);
1767 return make_char (MAKE_CHAR (charset, a1, 0));
1776 a2 = XINT (arg2) & 0x7f;
1778 if (a2 < lowlim || a2 > highlim)
1779 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1781 return make_char (MAKE_CHAR (charset, a1, a2));
1784 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1785 Return the character set of char CH.
1789 CHECK_CHAR_COERCE_INT (ch);
1791 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1794 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1795 Return list of charset and one or two position-codes of CHAR.
1799 /* This function can GC */
1800 struct gcpro gcpro1, gcpro2;
1801 Lisp_Object charset = Qnil;
1802 Lisp_Object rc = Qnil;
1805 GCPRO2 (charset, rc);
1806 CHECK_CHAR_COERCE_INT (character);
1808 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1810 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1812 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1816 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1824 #ifdef ENABLE_COMPOSITE_CHARS
1825 /************************************************************************/
1826 /* composite character functions */
1827 /************************************************************************/
1830 lookup_composite_char (Bufbyte *str, int len)
1832 Lisp_Object lispstr = make_string (str, len);
1833 Lisp_Object ch = Fgethash (lispstr,
1834 Vcomposite_char_string2char_hash_table,
1840 if (composite_char_row_next >= 128)
1841 signal_simple_error ("No more composite chars available", lispstr);
1842 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1843 composite_char_col_next);
1844 Fputhash (make_char (emch), lispstr,
1845 Vcomposite_char_char2string_hash_table);
1846 Fputhash (lispstr, make_char (emch),
1847 Vcomposite_char_string2char_hash_table);
1848 composite_char_col_next++;
1849 if (composite_char_col_next >= 128)
1851 composite_char_col_next = 32;
1852 composite_char_row_next++;
1861 composite_char_string (Emchar ch)
1863 Lisp_Object str = Fgethash (make_char (ch),
1864 Vcomposite_char_char2string_hash_table,
1866 assert (!UNBOUNDP (str));
1870 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1871 Convert a string into a single composite character.
1872 The character is the result of overstriking all the characters in
1877 CHECK_STRING (string);
1878 return make_char (lookup_composite_char (XSTRING_DATA (string),
1879 XSTRING_LENGTH (string)));
1882 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1883 Return a string of the characters comprising a composite character.
1891 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1892 signal_simple_error ("Must be composite char", ch);
1893 return composite_char_string (emch);
1895 #endif /* ENABLE_COMPOSITE_CHARS */
1898 /************************************************************************/
1899 /* initialization */
1900 /************************************************************************/
1903 syms_of_mule_charset (void)
1905 DEFSUBR (Fcharsetp);
1906 DEFSUBR (Ffind_charset);
1907 DEFSUBR (Fget_charset);
1908 DEFSUBR (Fcharset_list);
1909 DEFSUBR (Fcharset_name);
1910 DEFSUBR (Fmake_charset);
1911 DEFSUBR (Fmake_reverse_direction_charset);
1912 /* DEFSUBR (Freverse_direction_charset); */
1913 DEFSUBR (Fdefine_charset_alias);
1914 DEFSUBR (Fcharset_from_attributes);
1915 DEFSUBR (Fcharset_short_name);
1916 DEFSUBR (Fcharset_long_name);
1917 DEFSUBR (Fcharset_description);
1918 DEFSUBR (Fcharset_dimension);
1919 DEFSUBR (Fcharset_property);
1920 DEFSUBR (Fcharset_id);
1921 DEFSUBR (Fset_charset_ccl_program);
1922 DEFSUBR (Fset_charset_registry);
1924 DEFSUBR (Fchar_attribute_alist);
1925 DEFSUBR (Fget_char_attribute);
1926 DEFSUBR (Fput_char_attribute);
1927 DEFSUBR (Fcharset_mapping_table);
1928 DEFSUBR (Fset_charset_mapping_table);
1931 DEFSUBR (Fmake_char);
1932 DEFSUBR (Fchar_charset);
1933 DEFSUBR (Fsplit_char);
1935 #ifdef ENABLE_COMPOSITE_CHARS
1936 DEFSUBR (Fmake_composite_char);
1937 DEFSUBR (Fcomposite_char_string);
1940 defsymbol (&Qcharsetp, "charsetp");
1941 defsymbol (&Qregistry, "registry");
1942 defsymbol (&Qfinal, "final");
1943 defsymbol (&Qgraphic, "graphic");
1944 defsymbol (&Qdirection, "direction");
1945 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1946 defsymbol (&Qshort_name, "short-name");
1947 defsymbol (&Qlong_name, "long-name");
1949 defsymbol (&Ql2r, "l2r");
1950 defsymbol (&Qr2l, "r2l");
1952 /* Charsets, compatible with FSF 20.3
1953 Naming convention is Script-Charset[-Edition] */
1954 defsymbol (&Qascii, "ascii");
1955 defsymbol (&Qcontrol_1, "control-1");
1956 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1957 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1958 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1959 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1960 defsymbol (&Qthai_tis620, "thai-tis620");
1961 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1962 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1963 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1964 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1965 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1966 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1967 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1968 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1969 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1970 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1971 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1972 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1973 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1974 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1976 defsymbol (&Qucs_bmp, "ucs-bmp");
1977 defsymbol (&Qlatin_viscii, "latin-viscii");
1978 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
1979 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
1980 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
1981 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
1982 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
1983 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
1985 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1986 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1988 defsymbol (&Qcomposite, "composite");
1992 vars_of_mule_charset (void)
1999 /* Table of charsets indexed by leading byte. */
2000 for (i = 0; i < countof (charset_by_leading_byte); i++)
2001 charset_by_leading_byte[i] = Qnil;
2004 /* Table of charsets indexed by type/final-byte. */
2005 for (i = 0; i < countof (charset_by_attributes); i++)
2006 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2007 charset_by_attributes[i][j] = Qnil;
2009 /* Table of charsets indexed by type/final-byte/direction. */
2010 for (i = 0; i < countof (charset_by_attributes); i++)
2011 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2012 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2013 charset_by_attributes[i][j][k] = Qnil;
2017 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2019 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2020 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2024 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2025 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2026 Leading-code of private TYPE9N charset of column-width 1.
2028 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2032 Vutf_2000_version = build_string("0.10 (Yao)");
2033 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2034 Version number of UTF-2000.
2037 staticpro (&Vcharacter_attribute_table);
2038 Vcharacter_attribute_table = make_char_code_table (Qnil);
2040 Vdefault_coded_charset_priority_list = Qnil;
2041 DEFVAR_LISP ("default-coded-charset-priority-list",
2042 &Vdefault_coded_charset_priority_list /*
2043 Default order of preferred coded-character-sets.
2049 complex_vars_of_mule_charset (void)
2051 staticpro (&Vcharset_hash_table);
2052 Vcharset_hash_table =
2053 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2055 /* Predefined character sets. We store them into variables for
2060 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2061 CHARSET_TYPE_256X256, 1, 2, 0,
2062 CHARSET_LEFT_TO_RIGHT,
2063 build_string ("BMP"),
2064 build_string ("BMP"),
2065 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2066 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2067 Qnil, 0, 0xFFFF, 0, 0);
2069 # define MIN_CHAR_THAI 0
2070 # define MAX_CHAR_THAI 0
2071 # define MIN_CHAR_GREEK 0
2072 # define MAX_CHAR_GREEK 0
2073 # define MIN_CHAR_HEBREW 0
2074 # define MAX_CHAR_HEBREW 0
2075 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2076 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2077 # define MIN_CHAR_CYRILLIC 0
2078 # define MAX_CHAR_CYRILLIC 0
2081 make_charset (LEADING_BYTE_ASCII, Qascii,
2082 CHARSET_TYPE_94, 1, 0, 'B',
2083 CHARSET_LEFT_TO_RIGHT,
2084 build_string ("ASCII"),
2085 build_string ("ASCII)"),
2086 build_string ("ASCII (ISO646 IRV)"),
2087 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2088 Qnil, 0, 0x7F, 0, 0);
2089 Vcharset_control_1 =
2090 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2091 CHARSET_TYPE_94, 1, 1, 0,
2092 CHARSET_LEFT_TO_RIGHT,
2093 build_string ("C1"),
2094 build_string ("Control characters"),
2095 build_string ("Control characters 128-191"),
2097 Qnil, 0x80, 0x9F, 0, 0);
2098 Vcharset_latin_iso8859_1 =
2099 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2100 CHARSET_TYPE_96, 1, 1, 'A',
2101 CHARSET_LEFT_TO_RIGHT,
2102 build_string ("Latin-1"),
2103 build_string ("ISO8859-1 (Latin-1)"),
2104 build_string ("ISO8859-1 (Latin-1)"),
2105 build_string ("iso8859-1"),
2106 Qnil, 0xA0, 0xFF, 0, 32);
2107 Vcharset_latin_iso8859_2 =
2108 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2109 CHARSET_TYPE_96, 1, 1, 'B',
2110 CHARSET_LEFT_TO_RIGHT,
2111 build_string ("Latin-2"),
2112 build_string ("ISO8859-2 (Latin-2)"),
2113 build_string ("ISO8859-2 (Latin-2)"),
2114 build_string ("iso8859-2"),
2116 Vcharset_latin_iso8859_3 =
2117 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2118 CHARSET_TYPE_96, 1, 1, 'C',
2119 CHARSET_LEFT_TO_RIGHT,
2120 build_string ("Latin-3"),
2121 build_string ("ISO8859-3 (Latin-3)"),
2122 build_string ("ISO8859-3 (Latin-3)"),
2123 build_string ("iso8859-3"),
2125 Vcharset_latin_iso8859_4 =
2126 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2127 CHARSET_TYPE_96, 1, 1, 'D',
2128 CHARSET_LEFT_TO_RIGHT,
2129 build_string ("Latin-4"),
2130 build_string ("ISO8859-4 (Latin-4)"),
2131 build_string ("ISO8859-4 (Latin-4)"),
2132 build_string ("iso8859-4"),
2134 Vcharset_thai_tis620 =
2135 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2136 CHARSET_TYPE_96, 1, 1, 'T',
2137 CHARSET_LEFT_TO_RIGHT,
2138 build_string ("TIS620"),
2139 build_string ("TIS620 (Thai)"),
2140 build_string ("TIS620.2529 (Thai)"),
2141 build_string ("tis620"),
2142 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2143 Vcharset_greek_iso8859_7 =
2144 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2145 CHARSET_TYPE_96, 1, 1, 'F',
2146 CHARSET_LEFT_TO_RIGHT,
2147 build_string ("ISO8859-7"),
2148 build_string ("ISO8859-7 (Greek)"),
2149 build_string ("ISO8859-7 (Greek)"),
2150 build_string ("iso8859-7"),
2151 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2152 Vcharset_arabic_iso8859_6 =
2153 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2154 CHARSET_TYPE_96, 1, 1, 'G',
2155 CHARSET_RIGHT_TO_LEFT,
2156 build_string ("ISO8859-6"),
2157 build_string ("ISO8859-6 (Arabic)"),
2158 build_string ("ISO8859-6 (Arabic)"),
2159 build_string ("iso8859-6"),
2161 Vcharset_hebrew_iso8859_8 =
2162 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2163 CHARSET_TYPE_96, 1, 1, 'H',
2164 CHARSET_RIGHT_TO_LEFT,
2165 build_string ("ISO8859-8"),
2166 build_string ("ISO8859-8 (Hebrew)"),
2167 build_string ("ISO8859-8 (Hebrew)"),
2168 build_string ("iso8859-8"),
2169 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2170 Vcharset_katakana_jisx0201 =
2171 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2172 CHARSET_TYPE_94, 1, 1, 'I',
2173 CHARSET_LEFT_TO_RIGHT,
2174 build_string ("JISX0201 Kana"),
2175 build_string ("JISX0201.1976 (Japanese Kana)"),
2176 build_string ("JISX0201.1976 Japanese Kana"),
2177 build_string ("jisx0201\\.1976"),
2179 MIN_CHAR_HALFWIDTH_KATAKANA,
2180 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2181 Vcharset_latin_jisx0201 =
2182 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2183 CHARSET_TYPE_94, 1, 0, 'J',
2184 CHARSET_LEFT_TO_RIGHT,
2185 build_string ("JISX0201 Roman"),
2186 build_string ("JISX0201.1976 (Japanese Roman)"),
2187 build_string ("JISX0201.1976 Japanese Roman"),
2188 build_string ("jisx0201\\.1976"),
2190 Vcharset_cyrillic_iso8859_5 =
2191 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2192 CHARSET_TYPE_96, 1, 1, 'L',
2193 CHARSET_LEFT_TO_RIGHT,
2194 build_string ("ISO8859-5"),
2195 build_string ("ISO8859-5 (Cyrillic)"),
2196 build_string ("ISO8859-5 (Cyrillic)"),
2197 build_string ("iso8859-5"),
2198 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2199 Vcharset_latin_iso8859_9 =
2200 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2201 CHARSET_TYPE_96, 1, 1, 'M',
2202 CHARSET_LEFT_TO_RIGHT,
2203 build_string ("Latin-5"),
2204 build_string ("ISO8859-9 (Latin-5)"),
2205 build_string ("ISO8859-9 (Latin-5)"),
2206 build_string ("iso8859-9"),
2208 Vcharset_japanese_jisx0208_1978 =
2209 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2210 CHARSET_TYPE_94X94, 2, 0, '@',
2211 CHARSET_LEFT_TO_RIGHT,
2212 build_string ("JIS X0208:1978"),
2213 build_string ("JIS X0208:1978 (Japanese)"),
2215 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2216 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2218 Vcharset_chinese_gb2312 =
2219 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2220 CHARSET_TYPE_94X94, 2, 0, 'A',
2221 CHARSET_LEFT_TO_RIGHT,
2222 build_string ("GB2312"),
2223 build_string ("GB2312)"),
2224 build_string ("GB2312 Chinese simplified"),
2225 build_string ("gb2312"),
2227 Vcharset_japanese_jisx0208 =
2228 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2229 CHARSET_TYPE_94X94, 2, 0, 'B',
2230 CHARSET_LEFT_TO_RIGHT,
2231 build_string ("JISX0208"),
2232 build_string ("JIS X0208:1983 (Japanese)"),
2233 build_string ("JIS X0208:1983 Japanese Kanji"),
2234 build_string ("jisx0208\\.1983"),
2236 Vcharset_korean_ksc5601 =
2237 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2238 CHARSET_TYPE_94X94, 2, 0, 'C',
2239 CHARSET_LEFT_TO_RIGHT,
2240 build_string ("KSC5601"),
2241 build_string ("KSC5601 (Korean"),
2242 build_string ("KSC5601 Korean Hangul and Hanja"),
2243 build_string ("ksc5601"),
2245 Vcharset_japanese_jisx0212 =
2246 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2247 CHARSET_TYPE_94X94, 2, 0, 'D',
2248 CHARSET_LEFT_TO_RIGHT,
2249 build_string ("JISX0212"),
2250 build_string ("JISX0212 (Japanese)"),
2251 build_string ("JISX0212 Japanese Supplement"),
2252 build_string ("jisx0212"),
2255 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2256 Vcharset_chinese_cns11643_1 =
2257 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2258 CHARSET_TYPE_94X94, 2, 0, 'G',
2259 CHARSET_LEFT_TO_RIGHT,
2260 build_string ("CNS11643-1"),
2261 build_string ("CNS11643-1 (Chinese traditional)"),
2263 ("CNS 11643 Plane 1 Chinese traditional"),
2264 build_string (CHINESE_CNS_PLANE_RE("1")),
2266 Vcharset_chinese_cns11643_2 =
2267 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2268 CHARSET_TYPE_94X94, 2, 0, 'H',
2269 CHARSET_LEFT_TO_RIGHT,
2270 build_string ("CNS11643-2"),
2271 build_string ("CNS11643-2 (Chinese traditional)"),
2273 ("CNS 11643 Plane 2 Chinese traditional"),
2274 build_string (CHINESE_CNS_PLANE_RE("2")),
2277 Vcharset_latin_viscii_lower =
2278 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2279 CHARSET_TYPE_96, 1, 1, '1',
2280 CHARSET_LEFT_TO_RIGHT,
2281 build_string ("VISCII lower"),
2282 build_string ("VISCII lower (Vietnamese)"),
2283 build_string ("VISCII lower (Vietnamese)"),
2284 build_string ("MULEVISCII-LOWER"),
2286 Vcharset_latin_viscii_upper =
2287 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2288 CHARSET_TYPE_96, 1, 1, '2',
2289 CHARSET_LEFT_TO_RIGHT,
2290 build_string ("VISCII upper"),
2291 build_string ("VISCII upper (Vietnamese)"),
2292 build_string ("VISCII upper (Vietnamese)"),
2293 build_string ("MULEVISCII-UPPER"),
2295 Vcharset_latin_viscii =
2296 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2297 CHARSET_TYPE_256, 1, 2, 0,
2298 CHARSET_LEFT_TO_RIGHT,
2299 build_string ("VISCII"),
2300 build_string ("VISCII 1.1 (Vietnamese)"),
2301 build_string ("VISCII 1.1 (Vietnamese)"),
2302 build_string ("VISCII1\\.1"),
2304 Vcharset_hiragana_jisx0208 =
2305 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2306 CHARSET_TYPE_94X94, 2, 0, 'B',
2307 CHARSET_LEFT_TO_RIGHT,
2308 build_string ("Hiragana"),
2309 build_string ("Hiragana of JIS X0208"),
2310 build_string ("Japanese Hiragana of JIS X0208"),
2311 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2312 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2313 (0x24 - 33) * 94 + (0x21 - 33), 33);
2314 Vcharset_katakana_jisx0208 =
2315 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2316 CHARSET_TYPE_94X94, 2, 0, 'B',
2317 CHARSET_LEFT_TO_RIGHT,
2318 build_string ("Katakana"),
2319 build_string ("Katakana of JIS X0208"),
2320 build_string ("Japanese Katakana of JIS X0208"),
2321 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2322 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2323 (0x25 - 33) * 94 + (0x21 - 33), 33);
2325 Vcharset_chinese_big5_1 =
2326 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2327 CHARSET_TYPE_94X94, 2, 0, '0',
2328 CHARSET_LEFT_TO_RIGHT,
2329 build_string ("Big5"),
2330 build_string ("Big5 (Level-1)"),
2332 ("Big5 Level-1 Chinese traditional"),
2333 build_string ("big5"),
2335 Vcharset_chinese_big5_2 =
2336 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2337 CHARSET_TYPE_94X94, 2, 0, '1',
2338 CHARSET_LEFT_TO_RIGHT,
2339 build_string ("Big5"),
2340 build_string ("Big5 (Level-2)"),
2342 ("Big5 Level-2 Chinese traditional"),
2343 build_string ("big5"),
2346 #ifdef ENABLE_COMPOSITE_CHARS
2347 /* #### For simplicity, we put composite chars into a 96x96 charset.
2348 This is going to lead to problems because you can run out of
2349 room, esp. as we don't yet recycle numbers. */
2350 Vcharset_composite =
2351 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2352 CHARSET_TYPE_96X96, 2, 0, 0,
2353 CHARSET_LEFT_TO_RIGHT,
2354 build_string ("Composite"),
2355 build_string ("Composite characters"),
2356 build_string ("Composite characters"),
2359 composite_char_row_next = 32;
2360 composite_char_col_next = 32;
2362 Vcomposite_char_string2char_hash_table =
2363 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2364 Vcomposite_char_char2string_hash_table =
2365 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2366 staticpro (&Vcomposite_char_string2char_hash_table);
2367 staticpro (&Vcomposite_char_char2string_hash_table);
2368 #endif /* ENABLE_COMPOSITE_CHARS */