1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
332 return Fcdr (Fassq (attribute, ret));
335 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
336 Store CHARACTER's ATTRIBUTE with VALUE.
338 (character, attribute, value))
340 Emchar char_code = XCHAR (character);
342 = get_char_code_table (char_code, Vcharacter_attribute_table);
343 Lisp_Object cell = Fassq (attribute, ret);
346 ret = Fcons (Fcons (attribute, value), ret);
348 Fsetcdr (cell, value);
349 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
354 Lisp_Object Vutf_2000_version;
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
384 Qjapanese_jisx0208_1978,
396 Qvietnamese_viscii_lower,
397 Qvietnamese_viscii_upper,
405 Lisp_Object Ql2r, Qr2l;
407 Lisp_Object Vcharset_hash_table;
410 static Charset_ID next_allocated_leading_byte;
412 static Charset_ID next_allocated_1_byte_leading_byte;
413 static Charset_ID next_allocated_2_byte_leading_byte;
416 /* Composite characters are characters constructed by overstriking two
417 or more regular characters.
419 1) The old Mule implementation involves storing composite characters
420 in a buffer as a tag followed by all of the actual characters
421 used to make up the composite character. I think this is a bad
422 idea; it greatly complicates code that wants to handle strings
423 one character at a time because it has to deal with the possibility
424 of great big ungainly characters. It's much more reasonable to
425 simply store an index into a table of composite characters.
427 2) The current implementation only allows for 16,384 separate
428 composite characters over the lifetime of the XEmacs process.
429 This could become a potential problem if the user
430 edited lots of different files that use composite characters.
431 Due to FSF bogosity, increasing the number of allowable
432 composite characters under Mule would decrease the number
433 of possible faces that can exist. Mule already has shrunk
434 this to 2048, and further shrinkage would become uncomfortable.
435 No such problems exist in XEmacs.
437 Composite characters could be represented as 0x80 C1 C2 C3,
438 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
439 for slightly under 2^20 (one million) composite characters
440 over the XEmacs process lifetime, and you only need to
441 increase the size of a Mule character from 19 to 21 bits.
442 Or you could use 0x80 C1 C2 C3 C4, allowing for about
443 85 million (slightly over 2^26) composite characters. */
446 /************************************************************************/
447 /* Basic Emchar functions */
448 /************************************************************************/
450 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
451 string in STR. Returns the number of bytes stored.
452 Do not call this directly. Use the macro set_charptr_emchar() instead.
456 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
471 else if ( c <= 0x7ff )
473 *p++ = (c >> 6) | 0xc0;
474 *p++ = (c & 0x3f) | 0x80;
476 else if ( c <= 0xffff )
478 *p++ = (c >> 12) | 0xe0;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
482 else if ( c <= 0x1fffff )
484 *p++ = (c >> 18) | 0xf0;
485 *p++ = ((c >> 12) & 0x3f) | 0x80;
486 *p++ = ((c >> 6) & 0x3f) | 0x80;
487 *p++ = (c & 0x3f) | 0x80;
489 else if ( c <= 0x3ffffff )
491 *p++ = (c >> 24) | 0xf8;
492 *p++ = ((c >> 18) & 0x3f) | 0x80;
493 *p++ = ((c >> 12) & 0x3f) | 0x80;
494 *p++ = ((c >> 6) & 0x3f) | 0x80;
495 *p++ = (c & 0x3f) | 0x80;
499 *p++ = (c >> 30) | 0xfc;
500 *p++ = ((c >> 24) & 0x3f) | 0x80;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
507 BREAKUP_CHAR (c, charset, c1, c2);
508 lb = CHAR_LEADING_BYTE (c);
509 if (LEADING_BYTE_PRIVATE_P (lb))
510 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
512 if (EQ (charset, Vcharset_control_1))
521 /* Return the first character from a Mule-encoded string in STR,
522 assuming it's non-ASCII. Do not call this directly.
523 Use the macro charptr_emchar() instead. */
526 non_ascii_charptr_emchar (CONST Bufbyte *str)
539 else if ( b >= 0xf8 )
544 else if ( b >= 0xf0 )
549 else if ( b >= 0xe0 )
554 else if ( b >= 0xc0 )
564 for( ; len > 0; len-- )
567 ch = ( ch << 6 ) | ( b & 0x3f );
571 Bufbyte i0 = *str, i1, i2 = 0;
574 if (i0 == LEADING_BYTE_CONTROL_1)
575 return (Emchar) (*++str - 0x20);
577 if (LEADING_BYTE_PREFIX_P (i0))
582 charset = CHARSET_BY_LEADING_BYTE (i0);
583 if (XCHARSET_DIMENSION (charset) == 2)
586 return MAKE_CHAR (charset, i1, i2);
590 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
591 Do not call this directly. Use the macro valid_char_p() instead. */
595 non_ascii_valid_char_p (Emchar ch)
599 /* Must have only lowest 19 bits set */
603 f1 = CHAR_FIELD1 (ch);
604 f2 = CHAR_FIELD2 (ch);
605 f3 = CHAR_FIELD3 (ch);
611 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
612 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
613 f2 > MAX_CHAR_FIELD2_PRIVATE)
618 if (f3 != 0x20 && f3 != 0x7F)
622 NOTE: This takes advantage of the fact that
623 FIELD2_TO_OFFICIAL_LEADING_BYTE and
624 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
626 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
627 return (XCHARSET_CHARS (charset) == 96);
633 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
634 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
635 f1 > MAX_CHAR_FIELD1_PRIVATE)
637 if (f2 < 0x20 || f3 < 0x20)
640 #ifdef ENABLE_COMPOSITE_CHARS
641 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
643 if (UNBOUNDP (Fgethash (make_int (ch),
644 Vcomposite_char_char2string_hash_table,
649 #endif /* ENABLE_COMPOSITE_CHARS */
651 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
654 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
656 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
659 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
661 return (XCHARSET_CHARS (charset) == 96);
667 /************************************************************************/
668 /* Basic string functions */
669 /************************************************************************/
671 /* Copy the character pointed to by PTR into STR, assuming it's
672 non-ASCII. Do not call this directly. Use the macro
673 charptr_copy_char() instead. */
676 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
678 Bufbyte *strptr = str;
680 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
682 /* Notice fallthrough. */
684 case 6: *++strptr = *ptr++;
685 case 5: *++strptr = *ptr++;
687 case 4: *++strptr = *ptr++;
688 case 3: *++strptr = *ptr++;
689 case 2: *++strptr = *ptr;
694 return strptr + 1 - str;
698 /************************************************************************/
699 /* streams of Emchars */
700 /************************************************************************/
702 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
703 The functions below are not meant to be called directly; use
704 the macros in insdel.h. */
707 Lstream_get_emchar_1 (Lstream *stream, int ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bufbyte *strptr = str;
712 str[0] = (Bufbyte) ch;
713 switch (REP_BYTES_BY_FIRST_BYTE (ch))
715 /* Notice fallthrough. */
718 ch = Lstream_getc (stream);
720 *++strptr = (Bufbyte) ch;
722 ch = Lstream_getc (stream);
724 *++strptr = (Bufbyte) ch;
727 ch = Lstream_getc (stream);
729 *++strptr = (Bufbyte) ch;
731 ch = Lstream_getc (stream);
733 *++strptr = (Bufbyte) ch;
735 ch = Lstream_getc (stream);
737 *++strptr = (Bufbyte) ch;
742 return charptr_emchar (str);
746 Lstream_fput_emchar (Lstream *stream, Emchar ch)
748 Bufbyte str[MAX_EMCHAR_LEN];
749 Bytecount len = set_charptr_emchar (str, ch);
750 return Lstream_write (stream, str, len);
754 Lstream_funget_emchar (Lstream *stream, Emchar ch)
756 Bufbyte str[MAX_EMCHAR_LEN];
757 Bytecount len = set_charptr_emchar (str, ch);
758 Lstream_unread (stream, str, len);
762 /************************************************************************/
764 /************************************************************************/
767 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
769 struct Lisp_Charset *cs = XCHARSET (obj);
771 markobj (cs->short_name);
772 markobj (cs->long_name);
773 markobj (cs->doc_string);
774 markobj (cs->registry);
775 markobj (cs->ccl_program);
777 markobj (cs->decoding_table);
778 markobj (cs->encoding_table);
784 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
786 struct Lisp_Charset *cs = XCHARSET (obj);
790 error ("printing unreadable object #<charset %s 0x%x>",
791 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
794 write_c_string ("#<charset ", printcharfun);
795 print_internal (CHARSET_NAME (cs), printcharfun, 0);
796 write_c_string (" ", printcharfun);
797 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
798 write_c_string (" ", printcharfun);
799 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
800 write_c_string (" ", printcharfun);
801 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
802 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
803 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
804 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
805 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
807 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
808 CHARSET_COLUMNS (cs),
809 CHARSET_GRAPHIC (cs),
811 write_c_string (buf, printcharfun);
812 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
813 sprintf (buf, " 0x%x>", cs->header.uid);
814 write_c_string (buf, printcharfun);
817 static const struct lrecord_description charset_description[] = {
818 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
820 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
825 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
826 mark_charset, print_charset, 0, 0, 0,
828 struct Lisp_Charset);
830 /* Make a new charset. */
833 make_charset (Charset_ID id, Lisp_Object name,
834 unsigned char type, unsigned char columns, unsigned char graphic,
835 Bufbyte final, unsigned char direction, Lisp_Object short_name,
836 Lisp_Object long_name, Lisp_Object doc,
838 Lisp_Object decoding_table,
839 Emchar ucs_min, Emchar ucs_max,
840 Emchar code_offset, unsigned char byte_offset)
843 struct Lisp_Charset *cs =
844 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
845 XSETCHARSET (obj, cs);
847 CHARSET_ID (cs) = id;
848 CHARSET_NAME (cs) = name;
849 CHARSET_SHORT_NAME (cs) = short_name;
850 CHARSET_LONG_NAME (cs) = long_name;
851 CHARSET_DIRECTION (cs) = direction;
852 CHARSET_TYPE (cs) = type;
853 CHARSET_COLUMNS (cs) = columns;
854 CHARSET_GRAPHIC (cs) = graphic;
855 CHARSET_FINAL (cs) = final;
856 CHARSET_DOC_STRING (cs) = doc;
857 CHARSET_REGISTRY (cs) = reg;
858 CHARSET_CCL_PROGRAM (cs) = Qnil;
859 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
861 CHARSET_DECODING_TABLE(cs) = Qnil;
862 CHARSET_ENCODING_TABLE(cs) = Qnil;
863 CHARSET_UCS_MIN(cs) = ucs_min;
864 CHARSET_UCS_MAX(cs) = ucs_max;
865 CHARSET_CODE_OFFSET(cs) = code_offset;
866 CHARSET_BYTE_OFFSET(cs) = byte_offset;
869 switch (CHARSET_TYPE (cs))
871 case CHARSET_TYPE_94:
872 CHARSET_DIMENSION (cs) = 1;
873 CHARSET_CHARS (cs) = 94;
875 case CHARSET_TYPE_96:
876 CHARSET_DIMENSION (cs) = 1;
877 CHARSET_CHARS (cs) = 96;
879 case CHARSET_TYPE_94X94:
880 CHARSET_DIMENSION (cs) = 2;
881 CHARSET_CHARS (cs) = 94;
883 case CHARSET_TYPE_96X96:
884 CHARSET_DIMENSION (cs) = 2;
885 CHARSET_CHARS (cs) = 96;
888 case CHARSET_TYPE_128:
889 CHARSET_DIMENSION (cs) = 1;
890 CHARSET_CHARS (cs) = 128;
892 case CHARSET_TYPE_128X128:
893 CHARSET_DIMENSION (cs) = 2;
894 CHARSET_CHARS (cs) = 128;
896 case CHARSET_TYPE_256:
897 CHARSET_DIMENSION (cs) = 1;
898 CHARSET_CHARS (cs) = 256;
900 case CHARSET_TYPE_256X256:
901 CHARSET_DIMENSION (cs) = 2;
902 CHARSET_CHARS (cs) = 256;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
922 if (code_offset == 0)
924 assert (NILP (charset_by_attributes[type][final]));
925 charset_by_attributes[type][final] = obj;
928 assert (NILP (charset_by_attributes[type][final][direction]));
929 charset_by_attributes[type][final][direction] = obj;
933 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
934 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
937 /* official leading byte */
938 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
941 /* Some charsets are "faux" and don't have names or really exist at
942 all except in the leading-byte table. */
944 Fputhash (name, obj, Vcharset_hash_table);
949 get_unallocated_leading_byte (int dimension)
954 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
957 lb = next_allocated_leading_byte++;
961 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
964 lb = next_allocated_1_byte_leading_byte++;
968 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
971 lb = next_allocated_2_byte_leading_byte++;
977 ("No more character sets free for this dimension",
978 make_int (dimension));
985 range_charset_code_point (Lisp_Object charset, Emchar ch)
989 if ((XCHARSET_UCS_MIN (charset) <= ch)
990 && (ch <= XCHARSET_UCS_MAX (charset)))
992 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
994 if (XCHARSET_DIMENSION (charset) == 1)
995 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
996 else if (XCHARSET_DIMENSION (charset) == 2)
997 return list2 (make_int (d / XCHARSET_CHARS (charset)
998 + XCHARSET_BYTE_OFFSET (charset)),
999 make_int (d % XCHARSET_CHARS (charset)
1000 + XCHARSET_BYTE_OFFSET (charset)));
1001 else if (XCHARSET_DIMENSION (charset) == 3)
1002 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1003 * XCHARSET_CHARS (charset))
1004 + XCHARSET_BYTE_OFFSET (charset)),
1005 make_int (d / XCHARSET_CHARS (charset)
1006 % XCHARSET_CHARS (charset)
1007 + XCHARSET_BYTE_OFFSET (charset)),
1008 make_int (d % XCHARSET_CHARS (charset)
1009 + XCHARSET_BYTE_OFFSET (charset)));
1010 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1011 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1012 * XCHARSET_CHARS (charset)
1013 * XCHARSET_CHARS (charset))
1014 + XCHARSET_BYTE_OFFSET (charset)),
1015 make_int (d / (XCHARSET_CHARS (charset)
1016 * XCHARSET_CHARS (charset))
1017 % XCHARSET_CHARS (charset)
1018 + XCHARSET_BYTE_OFFSET (charset)),
1019 make_int (d / XCHARSET_CHARS (charset)
1020 % XCHARSET_CHARS (charset)
1021 + XCHARSET_BYTE_OFFSET (charset)),
1022 make_int (d % XCHARSET_CHARS (charset)
1023 + XCHARSET_BYTE_OFFSET (charset)));
1025 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1027 if (XCHARSET_DIMENSION (charset) == 1)
1029 if (XCHARSET_CHARS (charset) == 94)
1031 if (((d = ch - (MIN_CHAR_94
1032 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1034 return list1 (make_int (d + 33));
1036 else if (XCHARSET_CHARS (charset) == 96)
1038 if (((d = ch - (MIN_CHAR_96
1039 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1041 return list1 (make_int (d + 32));
1046 else if (XCHARSET_DIMENSION (charset) == 2)
1048 if (XCHARSET_CHARS (charset) == 94)
1050 if (((d = ch - (MIN_CHAR_94x94
1051 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1054 return list2 ((d / 94) + 33, d % 94 + 33);
1056 else if (XCHARSET_CHARS (charset) == 96)
1058 if (((d = ch - (MIN_CHAR_96x96
1059 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1062 return list2 ((d / 96) + 32, d % 96 + 32);
1070 charset_code_point (Lisp_Object charset, Emchar ch)
1072 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1074 if (!EQ (cdef, Qnil))
1076 Lisp_Object field = Fassq (charset, cdef);
1078 if (!EQ (field, Qnil))
1079 return Fcdr (field);
1081 return range_charset_code_point (charset, ch);
1085 charset_get_byte1 (Lisp_Object charset, Emchar ch)
1090 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1092 Lisp_Object value = get_char_code_table (ch, table);
1096 Emchar code = XINT (value);
1098 if (code < (1 << 8))
1100 else if (code < (1 << 16))
1102 else if (code < (1 << 24))
1108 if ((XCHARSET_UCS_MIN (charset) <= ch)
1109 && (ch <= XCHARSET_UCS_MAX (charset)))
1110 return (ch - XCHARSET_UCS_MIN (charset)
1111 + XCHARSET_CODE_OFFSET (charset))
1112 / (XCHARSET_DIMENSION (charset) == 1 ?
1115 XCHARSET_DIMENSION (charset) == 2 ?
1116 XCHARSET_CHARS (charset)
1118 XCHARSET_DIMENSION (charset) == 3 ?
1119 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
1121 XCHARSET_CHARS (charset)
1122 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1123 + XCHARSET_BYTE_OFFSET (charset);
1124 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1126 if (XCHARSET_DIMENSION (charset) == 1)
1128 if (XCHARSET_CHARS (charset) == 94)
1130 if (((d = ch - (MIN_CHAR_94
1131 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1135 else if (XCHARSET_CHARS (charset) == 96)
1137 if (((d = ch - (MIN_CHAR_96
1138 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1145 else if (XCHARSET_DIMENSION (charset) == 2)
1147 if (XCHARSET_CHARS (charset) == 94)
1149 if (((d = ch - (MIN_CHAR_94x94
1150 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1153 return (d / 94) + 33;
1155 else if (XCHARSET_CHARS (charset) == 96)
1157 if (((d = ch - (MIN_CHAR_96x96
1158 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1161 return (d / 96) + 32;
1169 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1171 if (XCHARSET_DIMENSION (charset) == 1)
1177 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1179 Lisp_Object value = get_char_code_table (ch, table);
1183 Emchar code = XINT (value);
1185 if (code < (1 << 16))
1186 return (unsigned char)code;
1187 else if (code < (1 << 24))
1188 return (unsigned char)(code >> 16);
1190 return (unsigned char)(code >> 24);
1193 if ((XCHARSET_UCS_MIN (charset) <= ch)
1194 && (ch <= XCHARSET_UCS_MAX (charset)))
1195 return ((ch - XCHARSET_UCS_MIN (charset)
1196 + XCHARSET_CODE_OFFSET (charset))
1197 / (XCHARSET_DIMENSION (charset) == 2 ?
1200 XCHARSET_DIMENSION (charset) == 3 ?
1201 XCHARSET_CHARS (charset)
1203 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1204 % XCHARSET_CHARS (charset)
1205 + XCHARSET_BYTE_OFFSET (charset);
1206 else if (XCHARSET_CHARS (charset) == 94)
1207 return (MIN_CHAR_94x94
1208 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1209 && (ch < MIN_CHAR_94x94
1210 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1211 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1212 else /* if (XCHARSET_CHARS (charset) == 96) */
1213 return (MIN_CHAR_96x96
1214 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1215 && (ch < MIN_CHAR_96x96
1216 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1217 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1221 Lisp_Object Vdefault_coded_charset_priority_list;
1225 /************************************************************************/
1226 /* Basic charset Lisp functions */
1227 /************************************************************************/
1229 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1230 Return non-nil if OBJECT is a charset.
1234 return CHARSETP (object) ? Qt : Qnil;
1237 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1238 Retrieve the charset of the given name.
1239 If CHARSET-OR-NAME is a charset object, it is simply returned.
1240 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1241 nil is returned. Otherwise the associated charset object is returned.
1245 if (CHARSETP (charset_or_name))
1246 return charset_or_name;
1248 CHECK_SYMBOL (charset_or_name);
1249 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1252 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1253 Retrieve the charset of the given name.
1254 Same as `find-charset' except an error is signalled if there is no such
1255 charset instead of returning nil.
1259 Lisp_Object charset = Ffind_charset (name);
1262 signal_simple_error ("No such charset", name);
1266 /* We store the charsets in hash tables with the names as the key and the
1267 actual charset object as the value. Occasionally we need to use them
1268 in a list format. These routines provide us with that. */
1269 struct charset_list_closure
1271 Lisp_Object *charset_list;
1275 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1276 void *charset_list_closure)
1278 /* This function can GC */
1279 struct charset_list_closure *chcl =
1280 (struct charset_list_closure*) charset_list_closure;
1281 Lisp_Object *charset_list = chcl->charset_list;
1283 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1287 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1288 Return a list of the names of all defined charsets.
1292 Lisp_Object charset_list = Qnil;
1293 struct gcpro gcpro1;
1294 struct charset_list_closure charset_list_closure;
1296 GCPRO1 (charset_list);
1297 charset_list_closure.charset_list = &charset_list;
1298 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1299 &charset_list_closure);
1302 return charset_list;
1305 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1306 Return the name of the given charset.
1310 return XCHARSET_NAME (Fget_charset (charset));
1313 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1314 Define a new character set.
1315 This function is for use with Mule support.
1316 NAME is a symbol, the name by which the character set is normally referred.
1317 DOC-STRING is a string describing the character set.
1318 PROPS is a property list, describing the specific nature of the
1319 character set. Recognized properties are:
1321 'short-name Short version of the charset name (ex: Latin-1)
1322 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1323 'registry A regular expression matching the font registry field for
1325 'dimension Number of octets used to index a character in this charset.
1326 Either 1 or 2. Defaults to 1.
1327 'columns Number of columns used to display a character in this charset.
1328 Only used in TTY mode. (Under X, the actual width of a
1329 character can be derived from the font used to display the
1330 characters.) If unspecified, defaults to the dimension
1331 (this is almost always the correct value).
1332 'chars Number of characters in each dimension (94 or 96).
1333 Defaults to 94. Note that if the dimension is 2, the
1334 character set thus described is 94x94 or 96x96.
1335 'final Final byte of ISO 2022 escape sequence. Must be
1336 supplied. Each combination of (DIMENSION, CHARS) defines a
1337 separate namespace for final bytes. Note that ISO
1338 2022 restricts the final byte to the range
1339 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1340 dimension == 2. Note also that final bytes in the range
1341 0x30 - 0x3F are reserved for user-defined (not official)
1343 'graphic 0 (use left half of font on output) or 1 (use right half
1344 of font on output). Defaults to 0. For example, for
1345 a font whose registry is ISO8859-1, the left half
1346 (octets 0x20 - 0x7F) is the `ascii' character set, while
1347 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1348 character set. With 'graphic set to 0, the octets
1349 will have their high bit cleared; with it set to 1,
1350 the octets will have their high bit set.
1351 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1353 'ccl-program A compiled CCL program used to convert a character in
1354 this charset into an index into the font. This is in
1355 addition to the 'graphic property. The CCL program
1356 is passed the octets of the character, with the high
1357 bit cleared and set depending upon whether the value
1358 of the 'graphic property is 0 or 1.
1360 (name, doc_string, props))
1362 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1363 int direction = CHARSET_LEFT_TO_RIGHT;
1365 Lisp_Object registry = Qnil;
1366 Lisp_Object charset;
1367 Lisp_Object rest, keyword, value;
1368 Lisp_Object ccl_program = Qnil;
1369 Lisp_Object short_name = Qnil, long_name = Qnil;
1371 unsigned char byte_offset = 0;
1374 CHECK_SYMBOL (name);
1375 if (!NILP (doc_string))
1376 CHECK_STRING (doc_string);
1378 charset = Ffind_charset (name);
1379 if (!NILP (charset))
1380 signal_simple_error ("Cannot redefine existing charset", name);
1382 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1384 if (EQ (keyword, Qshort_name))
1386 CHECK_STRING (value);
1390 if (EQ (keyword, Qlong_name))
1392 CHECK_STRING (value);
1396 else if (EQ (keyword, Qdimension))
1399 dimension = XINT (value);
1400 if (dimension < 1 || dimension > 2)
1401 signal_simple_error ("Invalid value for 'dimension", value);
1404 else if (EQ (keyword, Qchars))
1407 chars = XINT (value);
1408 if (chars != 94 && chars != 96)
1409 signal_simple_error ("Invalid value for 'chars", value);
1412 else if (EQ (keyword, Qcolumns))
1415 columns = XINT (value);
1416 if (columns != 1 && columns != 2)
1417 signal_simple_error ("Invalid value for 'columns", value);
1420 else if (EQ (keyword, Qgraphic))
1423 graphic = XINT (value);
1425 if (graphic < 0 || graphic > 2)
1427 if (graphic < 0 || graphic > 1)
1429 signal_simple_error ("Invalid value for 'graphic", value);
1432 else if (EQ (keyword, Qregistry))
1434 CHECK_STRING (value);
1438 else if (EQ (keyword, Qdirection))
1440 if (EQ (value, Ql2r))
1441 direction = CHARSET_LEFT_TO_RIGHT;
1442 else if (EQ (value, Qr2l))
1443 direction = CHARSET_RIGHT_TO_LEFT;
1445 signal_simple_error ("Invalid value for 'direction", value);
1448 else if (EQ (keyword, Qfinal))
1450 CHECK_CHAR_COERCE_INT (value);
1451 final = XCHAR (value);
1452 if (final < '0' || final > '~')
1453 signal_simple_error ("Invalid value for 'final", value);
1456 else if (EQ (keyword, Qccl_program))
1458 CHECK_VECTOR (value);
1459 ccl_program = value;
1463 signal_simple_error ("Unrecognized property", keyword);
1467 error ("'final must be specified");
1468 if (dimension == 2 && final > 0x5F)
1470 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1474 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1476 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1478 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1479 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1481 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1483 id = get_unallocated_leading_byte (dimension);
1485 if (NILP (doc_string))
1486 doc_string = build_string ("");
1488 if (NILP (registry))
1489 registry = build_string ("");
1491 if (NILP (short_name))
1492 XSETSTRING (short_name, XSYMBOL (name)->name);
1494 if (NILP (long_name))
1495 long_name = doc_string;
1498 columns = dimension;
1499 charset = make_charset (id, name, type, columns, graphic,
1500 final, direction, short_name, long_name,
1501 doc_string, registry,
1502 Qnil, 0, 0, 0, byte_offset);
1503 if (!NILP (ccl_program))
1504 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1508 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1510 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1511 NEW-NAME is the name of the new charset. Return the new charset.
1513 (charset, new_name))
1515 Lisp_Object new_charset = Qnil;
1516 int id, dimension, columns, graphic, final;
1517 int direction, type;
1518 Lisp_Object registry, doc_string, short_name, long_name;
1519 struct Lisp_Charset *cs;
1521 charset = Fget_charset (charset);
1522 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1523 signal_simple_error ("Charset already has reverse-direction charset",
1526 CHECK_SYMBOL (new_name);
1527 if (!NILP (Ffind_charset (new_name)))
1528 signal_simple_error ("Cannot redefine existing charset", new_name);
1530 cs = XCHARSET (charset);
1532 type = CHARSET_TYPE (cs);
1533 columns = CHARSET_COLUMNS (cs);
1534 dimension = CHARSET_DIMENSION (cs);
1535 id = get_unallocated_leading_byte (dimension);
1537 graphic = CHARSET_GRAPHIC (cs);
1538 final = CHARSET_FINAL (cs);
1539 direction = CHARSET_RIGHT_TO_LEFT;
1540 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1541 direction = CHARSET_LEFT_TO_RIGHT;
1542 doc_string = CHARSET_DOC_STRING (cs);
1543 short_name = CHARSET_SHORT_NAME (cs);
1544 long_name = CHARSET_LONG_NAME (cs);
1545 registry = CHARSET_REGISTRY (cs);
1547 new_charset = make_charset (id, new_name, type, columns,
1548 graphic, final, direction, short_name, long_name,
1549 doc_string, registry,
1551 CHARSET_DECODING_TABLE(cs),
1552 CHARSET_UCS_MIN(cs),
1553 CHARSET_UCS_MAX(cs),
1554 CHARSET_CODE_OFFSET(cs),
1555 CHARSET_BYTE_OFFSET(cs)
1561 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1562 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1567 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1568 Define symbol ALIAS as an alias for CHARSET.
1572 CHECK_SYMBOL (alias);
1573 charset = Fget_charset (charset);
1574 return Fputhash (alias, charset, Vcharset_hash_table);
1577 /* #### Reverse direction charsets not yet implemented. */
1579 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1581 Return the reverse-direction charset parallel to CHARSET, if any.
1582 This is the charset with the same properties (in particular, the same
1583 dimension, number of characters per dimension, and final byte) as
1584 CHARSET but whose characters are displayed in the opposite direction.
1588 charset = Fget_charset (charset);
1589 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1593 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1594 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1595 If DIRECTION is omitted, both directions will be checked (left-to-right
1596 will be returned if character sets exist for both directions).
1598 (dimension, chars, final, direction))
1600 int dm, ch, fi, di = -1;
1602 Lisp_Object obj = Qnil;
1604 CHECK_INT (dimension);
1605 dm = XINT (dimension);
1606 if (dm < 1 || dm > 2)
1607 signal_simple_error ("Invalid value for DIMENSION", dimension);
1611 if (ch != 94 && ch != 96)
1612 signal_simple_error ("Invalid value for CHARS", chars);
1614 CHECK_CHAR_COERCE_INT (final);
1616 if (fi < '0' || fi > '~')
1617 signal_simple_error ("Invalid value for FINAL", final);
1619 if (EQ (direction, Ql2r))
1620 di = CHARSET_LEFT_TO_RIGHT;
1621 else if (EQ (direction, Qr2l))
1622 di = CHARSET_RIGHT_TO_LEFT;
1623 else if (!NILP (direction))
1624 signal_simple_error ("Invalid value for DIRECTION", direction);
1626 if (dm == 2 && fi > 0x5F)
1628 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1631 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1633 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1637 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1639 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1642 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1645 return XCHARSET_NAME (obj);
1649 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1650 Return short name of CHARSET.
1654 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1657 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1658 Return long name of CHARSET.
1662 return XCHARSET_LONG_NAME (Fget_charset (charset));
1665 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1666 Return description of CHARSET.
1670 return XCHARSET_DOC_STRING (Fget_charset (charset));
1673 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1674 Return dimension of CHARSET.
1678 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1681 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1682 Return property PROP of CHARSET.
1683 Recognized properties are those listed in `make-charset', as well as
1684 'name and 'doc-string.
1688 struct Lisp_Charset *cs;
1690 charset = Fget_charset (charset);
1691 cs = XCHARSET (charset);
1693 CHECK_SYMBOL (prop);
1694 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1695 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1696 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1697 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1698 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1699 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1700 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1701 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1702 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1703 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1704 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1705 if (EQ (prop, Qdirection))
1706 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1707 if (EQ (prop, Qreverse_direction_charset))
1709 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1713 return XCHARSET_NAME (obj);
1715 signal_simple_error ("Unrecognized charset property name", prop);
1716 return Qnil; /* not reached */
1719 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1720 Return charset identification number of CHARSET.
1724 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1727 /* #### We need to figure out which properties we really want to
1730 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1731 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1733 (charset, ccl_program))
1735 charset = Fget_charset (charset);
1736 CHECK_VECTOR (ccl_program);
1737 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1742 invalidate_charset_font_caches (Lisp_Object charset)
1744 /* Invalidate font cache entries for charset on all devices. */
1745 Lisp_Object devcons, concons, hash_table;
1746 DEVICE_LOOP_NO_BREAK (devcons, concons)
1748 struct device *d = XDEVICE (XCAR (devcons));
1749 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1750 if (!UNBOUNDP (hash_table))
1751 Fclrhash (hash_table);
1755 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1756 Set the 'registry property of CHARSET to REGISTRY.
1758 (charset, registry))
1760 charset = Fget_charset (charset);
1761 CHECK_STRING (registry);
1762 XCHARSET_REGISTRY (charset) = registry;
1763 invalidate_charset_font_caches (charset);
1764 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1769 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1770 Return mapping-table of CHARSET.
1774 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1777 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1778 Set mapping-table of CHARSET to TABLE.
1782 struct Lisp_Charset *cs;
1783 Lisp_Object old_table;
1786 charset = Fget_charset (charset);
1787 cs = XCHARSET (charset);
1789 if (EQ (table, Qnil))
1791 CHARSET_DECODING_TABLE(cs) = table;
1792 CHARSET_ENCODING_TABLE(cs) = Qnil;
1795 else if (VECTORP (table))
1797 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1798 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1799 old_table = CHARSET_ENCODING_TABLE(cs);
1800 CHARSET_DECODING_TABLE(cs) = table;
1803 signal_error (Qwrong_type_argument,
1804 list2 (build_translated_string ("vector-or-nil-p"),
1806 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1808 switch (CHARSET_DIMENSION (cs))
1811 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1812 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1814 Lisp_Object c = XVECTOR_DATA(table)[i];
1818 put_char_code_table (XCHAR (c),
1819 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1820 CHARSET_ENCODING_TABLE(cs));
1821 Fput_char_attribute (c, charset,
1823 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1828 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1829 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1831 Lisp_Object v = XVECTOR_DATA(table)[i];
1837 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1839 CHARSET_DECODING_TABLE(cs) = old_table;
1840 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1842 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1844 Lisp_Object c = XVECTOR_DATA(v)[j];
1850 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1851 | (j + CHARSET_BYTE_OFFSET (cs))),
1852 CHARSET_ENCODING_TABLE(cs));
1853 Fput_char_attribute (c, charset,
1856 (i + CHARSET_BYTE_OFFSET (cs)),
1858 (j + CHARSET_BYTE_OFFSET (cs))));
1864 put_char_code_table (XCHAR (v),
1865 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1866 CHARSET_ENCODING_TABLE(cs));
1867 Fput_char_attribute (v, charset,
1869 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1879 /************************************************************************/
1880 /* Lisp primitives for working with characters */
1881 /************************************************************************/
1883 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1884 Make a character from CHARSET and octets ARG1 and ARG2.
1885 ARG2 is required only for characters from two-dimensional charsets.
1886 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1887 character s with caron.
1889 (charset, arg1, arg2))
1891 struct Lisp_Charset *cs;
1893 int lowlim, highlim;
1895 charset = Fget_charset (charset);
1896 cs = XCHARSET (charset);
1898 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1899 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1901 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1903 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1904 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1907 /* It is useful (and safe, according to Olivier Galibert) to strip
1908 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1909 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1910 Latin 2 code of the character. */
1918 if (a1 < lowlim || a1 > highlim)
1919 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1921 if (CHARSET_DIMENSION (cs) == 1)
1925 ("Charset is of dimension one; second octet must be nil", arg2);
1926 return make_char (MAKE_CHAR (charset, a1, 0));
1935 a2 = XINT (arg2) & 0x7f;
1937 if (a2 < lowlim || a2 > highlim)
1938 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1940 return make_char (MAKE_CHAR (charset, a1, a2));
1943 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1944 Return the character set of char CH.
1948 CHECK_CHAR_COERCE_INT (ch);
1950 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1953 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1954 Return list of charset and one or two position-codes of CHAR.
1958 /* This function can GC */
1959 struct gcpro gcpro1, gcpro2;
1960 Lisp_Object charset = Qnil;
1961 Lisp_Object rc = Qnil;
1964 GCPRO2 (charset, rc);
1965 CHECK_CHAR_COERCE_INT (character);
1967 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1969 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1971 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1975 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1983 #ifdef ENABLE_COMPOSITE_CHARS
1984 /************************************************************************/
1985 /* composite character functions */
1986 /************************************************************************/
1989 lookup_composite_char (Bufbyte *str, int len)
1991 Lisp_Object lispstr = make_string (str, len);
1992 Lisp_Object ch = Fgethash (lispstr,
1993 Vcomposite_char_string2char_hash_table,
1999 if (composite_char_row_next >= 128)
2000 signal_simple_error ("No more composite chars available", lispstr);
2001 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2002 composite_char_col_next);
2003 Fputhash (make_char (emch), lispstr,
2004 Vcomposite_char_char2string_hash_table);
2005 Fputhash (lispstr, make_char (emch),
2006 Vcomposite_char_string2char_hash_table);
2007 composite_char_col_next++;
2008 if (composite_char_col_next >= 128)
2010 composite_char_col_next = 32;
2011 composite_char_row_next++;
2020 composite_char_string (Emchar ch)
2022 Lisp_Object str = Fgethash (make_char (ch),
2023 Vcomposite_char_char2string_hash_table,
2025 assert (!UNBOUNDP (str));
2029 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2030 Convert a string into a single composite character.
2031 The character is the result of overstriking all the characters in
2036 CHECK_STRING (string);
2037 return make_char (lookup_composite_char (XSTRING_DATA (string),
2038 XSTRING_LENGTH (string)));
2041 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2042 Return a string of the characters comprising a composite character.
2050 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2051 signal_simple_error ("Must be composite char", ch);
2052 return composite_char_string (emch);
2054 #endif /* ENABLE_COMPOSITE_CHARS */
2057 /************************************************************************/
2058 /* initialization */
2059 /************************************************************************/
2062 syms_of_mule_charset (void)
2064 DEFSUBR (Fcharsetp);
2065 DEFSUBR (Ffind_charset);
2066 DEFSUBR (Fget_charset);
2067 DEFSUBR (Fcharset_list);
2068 DEFSUBR (Fcharset_name);
2069 DEFSUBR (Fmake_charset);
2070 DEFSUBR (Fmake_reverse_direction_charset);
2071 /* DEFSUBR (Freverse_direction_charset); */
2072 DEFSUBR (Fdefine_charset_alias);
2073 DEFSUBR (Fcharset_from_attributes);
2074 DEFSUBR (Fcharset_short_name);
2075 DEFSUBR (Fcharset_long_name);
2076 DEFSUBR (Fcharset_description);
2077 DEFSUBR (Fcharset_dimension);
2078 DEFSUBR (Fcharset_property);
2079 DEFSUBR (Fcharset_id);
2080 DEFSUBR (Fset_charset_ccl_program);
2081 DEFSUBR (Fset_charset_registry);
2083 DEFSUBR (Fchar_attribute_alist);
2084 DEFSUBR (Fget_char_attribute);
2085 DEFSUBR (Fput_char_attribute);
2086 DEFSUBR (Fcharset_mapping_table);
2087 DEFSUBR (Fset_charset_mapping_table);
2090 DEFSUBR (Fmake_char);
2091 DEFSUBR (Fchar_charset);
2092 DEFSUBR (Fsplit_char);
2094 #ifdef ENABLE_COMPOSITE_CHARS
2095 DEFSUBR (Fmake_composite_char);
2096 DEFSUBR (Fcomposite_char_string);
2099 defsymbol (&Qcharsetp, "charsetp");
2100 defsymbol (&Qregistry, "registry");
2101 defsymbol (&Qfinal, "final");
2102 defsymbol (&Qgraphic, "graphic");
2103 defsymbol (&Qdirection, "direction");
2104 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2105 defsymbol (&Qshort_name, "short-name");
2106 defsymbol (&Qlong_name, "long-name");
2108 defsymbol (&Ql2r, "l2r");
2109 defsymbol (&Qr2l, "r2l");
2111 /* Charsets, compatible with FSF 20.3
2112 Naming convention is Script-Charset[-Edition] */
2113 defsymbol (&Qascii, "ascii");
2114 defsymbol (&Qcontrol_1, "control-1");
2115 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2116 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2117 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2118 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2119 defsymbol (&Qthai_tis620, "thai-tis620");
2120 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2121 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2122 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2123 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2124 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2125 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2126 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2127 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2128 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2129 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2130 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2131 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2132 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2133 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2135 defsymbol (&Qucs_bmp, "ucs-bmp");
2136 defsymbol (&Qlatin_viscii, "latin-viscii");
2137 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2138 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2139 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2140 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2141 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2142 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2144 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2145 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2147 defsymbol (&Qcomposite, "composite");
2151 vars_of_mule_charset (void)
2158 /* Table of charsets indexed by leading byte. */
2159 for (i = 0; i < countof (charset_by_leading_byte); i++)
2160 charset_by_leading_byte[i] = Qnil;
2163 /* Table of charsets indexed by type/final-byte. */
2164 for (i = 0; i < countof (charset_by_attributes); i++)
2165 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2166 charset_by_attributes[i][j] = Qnil;
2168 /* Table of charsets indexed by type/final-byte/direction. */
2169 for (i = 0; i < countof (charset_by_attributes); i++)
2170 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2171 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2172 charset_by_attributes[i][j][k] = Qnil;
2176 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2178 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2179 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2183 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2184 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2185 Leading-code of private TYPE9N charset of column-width 1.
2187 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2191 Vutf_2000_version = build_string("0.10 (Yao)");
2192 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2193 Version number of UTF-2000.
2196 staticpro (&Vcharacter_attribute_table);
2197 Vcharacter_attribute_table = make_char_code_table (Qnil);
2199 Vdefault_coded_charset_priority_list = Qnil;
2200 DEFVAR_LISP ("default-coded-charset-priority-list",
2201 &Vdefault_coded_charset_priority_list /*
2202 Default order of preferred coded-character-sets.
2208 complex_vars_of_mule_charset (void)
2210 staticpro (&Vcharset_hash_table);
2211 Vcharset_hash_table =
2212 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2214 /* Predefined character sets. We store them into variables for
2219 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2220 CHARSET_TYPE_256X256, 1, 2, 0,
2221 CHARSET_LEFT_TO_RIGHT,
2222 build_string ("BMP"),
2223 build_string ("BMP"),
2224 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2225 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2226 Qnil, 0, 0xFFFF, 0, 0);
2228 # define MIN_CHAR_THAI 0
2229 # define MAX_CHAR_THAI 0
2230 # define MIN_CHAR_GREEK 0
2231 # define MAX_CHAR_GREEK 0
2232 # define MIN_CHAR_HEBREW 0
2233 # define MAX_CHAR_HEBREW 0
2234 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2235 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2236 # define MIN_CHAR_CYRILLIC 0
2237 # define MAX_CHAR_CYRILLIC 0
2240 make_charset (LEADING_BYTE_ASCII, Qascii,
2241 CHARSET_TYPE_94, 1, 0, 'B',
2242 CHARSET_LEFT_TO_RIGHT,
2243 build_string ("ASCII"),
2244 build_string ("ASCII)"),
2245 build_string ("ASCII (ISO646 IRV)"),
2246 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2247 Qnil, 0, 0x7F, 0, 0);
2248 Vcharset_control_1 =
2249 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2250 CHARSET_TYPE_94, 1, 1, 0,
2251 CHARSET_LEFT_TO_RIGHT,
2252 build_string ("C1"),
2253 build_string ("Control characters"),
2254 build_string ("Control characters 128-191"),
2256 Qnil, 0x80, 0x9F, 0, 0);
2257 Vcharset_latin_iso8859_1 =
2258 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2259 CHARSET_TYPE_96, 1, 1, 'A',
2260 CHARSET_LEFT_TO_RIGHT,
2261 build_string ("Latin-1"),
2262 build_string ("ISO8859-1 (Latin-1)"),
2263 build_string ("ISO8859-1 (Latin-1)"),
2264 build_string ("iso8859-1"),
2265 Qnil, 0xA0, 0xFF, 0, 32);
2266 Vcharset_latin_iso8859_2 =
2267 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2268 CHARSET_TYPE_96, 1, 1, 'B',
2269 CHARSET_LEFT_TO_RIGHT,
2270 build_string ("Latin-2"),
2271 build_string ("ISO8859-2 (Latin-2)"),
2272 build_string ("ISO8859-2 (Latin-2)"),
2273 build_string ("iso8859-2"),
2275 Vcharset_latin_iso8859_3 =
2276 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2277 CHARSET_TYPE_96, 1, 1, 'C',
2278 CHARSET_LEFT_TO_RIGHT,
2279 build_string ("Latin-3"),
2280 build_string ("ISO8859-3 (Latin-3)"),
2281 build_string ("ISO8859-3 (Latin-3)"),
2282 build_string ("iso8859-3"),
2284 Vcharset_latin_iso8859_4 =
2285 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2286 CHARSET_TYPE_96, 1, 1, 'D',
2287 CHARSET_LEFT_TO_RIGHT,
2288 build_string ("Latin-4"),
2289 build_string ("ISO8859-4 (Latin-4)"),
2290 build_string ("ISO8859-4 (Latin-4)"),
2291 build_string ("iso8859-4"),
2293 Vcharset_thai_tis620 =
2294 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2295 CHARSET_TYPE_96, 1, 1, 'T',
2296 CHARSET_LEFT_TO_RIGHT,
2297 build_string ("TIS620"),
2298 build_string ("TIS620 (Thai)"),
2299 build_string ("TIS620.2529 (Thai)"),
2300 build_string ("tis620"),
2301 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2302 Vcharset_greek_iso8859_7 =
2303 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2304 CHARSET_TYPE_96, 1, 1, 'F',
2305 CHARSET_LEFT_TO_RIGHT,
2306 build_string ("ISO8859-7"),
2307 build_string ("ISO8859-7 (Greek)"),
2308 build_string ("ISO8859-7 (Greek)"),
2309 build_string ("iso8859-7"),
2310 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2311 Vcharset_arabic_iso8859_6 =
2312 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2313 CHARSET_TYPE_96, 1, 1, 'G',
2314 CHARSET_RIGHT_TO_LEFT,
2315 build_string ("ISO8859-6"),
2316 build_string ("ISO8859-6 (Arabic)"),
2317 build_string ("ISO8859-6 (Arabic)"),
2318 build_string ("iso8859-6"),
2320 Vcharset_hebrew_iso8859_8 =
2321 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2322 CHARSET_TYPE_96, 1, 1, 'H',
2323 CHARSET_RIGHT_TO_LEFT,
2324 build_string ("ISO8859-8"),
2325 build_string ("ISO8859-8 (Hebrew)"),
2326 build_string ("ISO8859-8 (Hebrew)"),
2327 build_string ("iso8859-8"),
2328 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2329 Vcharset_katakana_jisx0201 =
2330 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2331 CHARSET_TYPE_94, 1, 1, 'I',
2332 CHARSET_LEFT_TO_RIGHT,
2333 build_string ("JISX0201 Kana"),
2334 build_string ("JISX0201.1976 (Japanese Kana)"),
2335 build_string ("JISX0201.1976 Japanese Kana"),
2336 build_string ("jisx0201\\.1976"),
2338 MIN_CHAR_HALFWIDTH_KATAKANA,
2339 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2340 Vcharset_latin_jisx0201 =
2341 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2342 CHARSET_TYPE_94, 1, 0, 'J',
2343 CHARSET_LEFT_TO_RIGHT,
2344 build_string ("JISX0201 Roman"),
2345 build_string ("JISX0201.1976 (Japanese Roman)"),
2346 build_string ("JISX0201.1976 Japanese Roman"),
2347 build_string ("jisx0201\\.1976"),
2349 Vcharset_cyrillic_iso8859_5 =
2350 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2351 CHARSET_TYPE_96, 1, 1, 'L',
2352 CHARSET_LEFT_TO_RIGHT,
2353 build_string ("ISO8859-5"),
2354 build_string ("ISO8859-5 (Cyrillic)"),
2355 build_string ("ISO8859-5 (Cyrillic)"),
2356 build_string ("iso8859-5"),
2357 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2358 Vcharset_latin_iso8859_9 =
2359 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2360 CHARSET_TYPE_96, 1, 1, 'M',
2361 CHARSET_LEFT_TO_RIGHT,
2362 build_string ("Latin-5"),
2363 build_string ("ISO8859-9 (Latin-5)"),
2364 build_string ("ISO8859-9 (Latin-5)"),
2365 build_string ("iso8859-9"),
2367 Vcharset_japanese_jisx0208_1978 =
2368 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2369 CHARSET_TYPE_94X94, 2, 0, '@',
2370 CHARSET_LEFT_TO_RIGHT,
2371 build_string ("JIS X0208:1978"),
2372 build_string ("JIS X0208:1978 (Japanese)"),
2374 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2375 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2377 Vcharset_chinese_gb2312 =
2378 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2379 CHARSET_TYPE_94X94, 2, 0, 'A',
2380 CHARSET_LEFT_TO_RIGHT,
2381 build_string ("GB2312"),
2382 build_string ("GB2312)"),
2383 build_string ("GB2312 Chinese simplified"),
2384 build_string ("gb2312"),
2386 Vcharset_japanese_jisx0208 =
2387 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2388 CHARSET_TYPE_94X94, 2, 0, 'B',
2389 CHARSET_LEFT_TO_RIGHT,
2390 build_string ("JISX0208"),
2391 build_string ("JIS X0208:1983 (Japanese)"),
2392 build_string ("JIS X0208:1983 Japanese Kanji"),
2393 build_string ("jisx0208\\.1983"),
2395 Vcharset_korean_ksc5601 =
2396 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2397 CHARSET_TYPE_94X94, 2, 0, 'C',
2398 CHARSET_LEFT_TO_RIGHT,
2399 build_string ("KSC5601"),
2400 build_string ("KSC5601 (Korean"),
2401 build_string ("KSC5601 Korean Hangul and Hanja"),
2402 build_string ("ksc5601"),
2404 Vcharset_japanese_jisx0212 =
2405 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2406 CHARSET_TYPE_94X94, 2, 0, 'D',
2407 CHARSET_LEFT_TO_RIGHT,
2408 build_string ("JISX0212"),
2409 build_string ("JISX0212 (Japanese)"),
2410 build_string ("JISX0212 Japanese Supplement"),
2411 build_string ("jisx0212"),
2414 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2415 Vcharset_chinese_cns11643_1 =
2416 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2417 CHARSET_TYPE_94X94, 2, 0, 'G',
2418 CHARSET_LEFT_TO_RIGHT,
2419 build_string ("CNS11643-1"),
2420 build_string ("CNS11643-1 (Chinese traditional)"),
2422 ("CNS 11643 Plane 1 Chinese traditional"),
2423 build_string (CHINESE_CNS_PLANE_RE("1")),
2425 Vcharset_chinese_cns11643_2 =
2426 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2427 CHARSET_TYPE_94X94, 2, 0, 'H',
2428 CHARSET_LEFT_TO_RIGHT,
2429 build_string ("CNS11643-2"),
2430 build_string ("CNS11643-2 (Chinese traditional)"),
2432 ("CNS 11643 Plane 2 Chinese traditional"),
2433 build_string (CHINESE_CNS_PLANE_RE("2")),
2436 Vcharset_latin_viscii_lower =
2437 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2438 CHARSET_TYPE_96, 1, 1, '1',
2439 CHARSET_LEFT_TO_RIGHT,
2440 build_string ("VISCII lower"),
2441 build_string ("VISCII lower (Vietnamese)"),
2442 build_string ("VISCII lower (Vietnamese)"),
2443 build_string ("MULEVISCII-LOWER"),
2445 Vcharset_latin_viscii_upper =
2446 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2447 CHARSET_TYPE_96, 1, 1, '2',
2448 CHARSET_LEFT_TO_RIGHT,
2449 build_string ("VISCII upper"),
2450 build_string ("VISCII upper (Vietnamese)"),
2451 build_string ("VISCII upper (Vietnamese)"),
2452 build_string ("MULEVISCII-UPPER"),
2454 Vcharset_latin_viscii =
2455 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2456 CHARSET_TYPE_256, 1, 2, 0,
2457 CHARSET_LEFT_TO_RIGHT,
2458 build_string ("VISCII"),
2459 build_string ("VISCII 1.1 (Vietnamese)"),
2460 build_string ("VISCII 1.1 (Vietnamese)"),
2461 build_string ("VISCII1\\.1"),
2463 Vcharset_hiragana_jisx0208 =
2464 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2465 CHARSET_TYPE_94X94, 2, 0, 'B',
2466 CHARSET_LEFT_TO_RIGHT,
2467 build_string ("Hiragana"),
2468 build_string ("Hiragana of JIS X0208"),
2469 build_string ("Japanese Hiragana of JIS X0208"),
2470 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2471 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2472 (0x24 - 33) * 94 + (0x21 - 33), 33);
2473 Vcharset_katakana_jisx0208 =
2474 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2475 CHARSET_TYPE_94X94, 2, 0, 'B',
2476 CHARSET_LEFT_TO_RIGHT,
2477 build_string ("Katakana"),
2478 build_string ("Katakana of JIS X0208"),
2479 build_string ("Japanese Katakana of JIS X0208"),
2480 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2481 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2482 (0x25 - 33) * 94 + (0x21 - 33), 33);
2484 Vcharset_chinese_big5_1 =
2485 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2486 CHARSET_TYPE_94X94, 2, 0, '0',
2487 CHARSET_LEFT_TO_RIGHT,
2488 build_string ("Big5"),
2489 build_string ("Big5 (Level-1)"),
2491 ("Big5 Level-1 Chinese traditional"),
2492 build_string ("big5"),
2494 Vcharset_chinese_big5_2 =
2495 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2496 CHARSET_TYPE_94X94, 2, 0, '1',
2497 CHARSET_LEFT_TO_RIGHT,
2498 build_string ("Big5"),
2499 build_string ("Big5 (Level-2)"),
2501 ("Big5 Level-2 Chinese traditional"),
2502 build_string ("big5"),
2505 #ifdef ENABLE_COMPOSITE_CHARS
2506 /* #### For simplicity, we put composite chars into a 96x96 charset.
2507 This is going to lead to problems because you can run out of
2508 room, esp. as we don't yet recycle numbers. */
2509 Vcharset_composite =
2510 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2511 CHARSET_TYPE_96X96, 2, 0, 0,
2512 CHARSET_LEFT_TO_RIGHT,
2513 build_string ("Composite"),
2514 build_string ("Composite characters"),
2515 build_string ("Composite characters"),
2518 composite_char_row_next = 32;
2519 composite_char_col_next = 32;
2521 Vcomposite_char_string2char_hash_table =
2522 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2523 Vcomposite_char_char2string_hash_table =
2524 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2525 staticpro (&Vcomposite_char_string2char_hash_table);
2526 staticpro (&Vcomposite_char_char2string_hash_table);
2527 #endif /* ENABLE_COMPOSITE_CHARS */