1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
332 return Fcdr (Fassq (attribute, ret));
335 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
336 Store CHARACTER's ATTRIBUTE with VALUE.
338 (character, attribute, value))
340 Emchar char_code = XCHAR (character);
342 = get_char_code_table (char_code, Vcharacter_attribute_table);
343 Lisp_Object cell = Fassq (attribute, ret);
346 ret = Fcons (Fcons (attribute, value), ret);
348 Fsetcdr (cell, value);
349 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
354 Lisp_Object Vutf_2000_version;
358 int leading_code_private_11;
361 Lisp_Object Qcharsetp;
363 /* Qdoc_string, Qdimension, Qchars defined in general.c */
364 Lisp_Object Qregistry, Qfinal, Qgraphic;
365 Lisp_Object Qdirection;
366 Lisp_Object Qreverse_direction_charset;
367 Lisp_Object Qleading_byte;
368 Lisp_Object Qshort_name, Qlong_name;
384 Qjapanese_jisx0208_1978,
396 Qvietnamese_viscii_lower,
397 Qvietnamese_viscii_upper,
405 Lisp_Object Ql2r, Qr2l;
407 Lisp_Object Vcharset_hash_table;
410 static Charset_ID next_allocated_leading_byte;
412 static Charset_ID next_allocated_1_byte_leading_byte;
413 static Charset_ID next_allocated_2_byte_leading_byte;
416 /* Composite characters are characters constructed by overstriking two
417 or more regular characters.
419 1) The old Mule implementation involves storing composite characters
420 in a buffer as a tag followed by all of the actual characters
421 used to make up the composite character. I think this is a bad
422 idea; it greatly complicates code that wants to handle strings
423 one character at a time because it has to deal with the possibility
424 of great big ungainly characters. It's much more reasonable to
425 simply store an index into a table of composite characters.
427 2) The current implementation only allows for 16,384 separate
428 composite characters over the lifetime of the XEmacs process.
429 This could become a potential problem if the user
430 edited lots of different files that use composite characters.
431 Due to FSF bogosity, increasing the number of allowable
432 composite characters under Mule would decrease the number
433 of possible faces that can exist. Mule already has shrunk
434 this to 2048, and further shrinkage would become uncomfortable.
435 No such problems exist in XEmacs.
437 Composite characters could be represented as 0x80 C1 C2 C3,
438 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
439 for slightly under 2^20 (one million) composite characters
440 over the XEmacs process lifetime, and you only need to
441 increase the size of a Mule character from 19 to 21 bits.
442 Or you could use 0x80 C1 C2 C3 C4, allowing for about
443 85 million (slightly over 2^26) composite characters. */
446 /************************************************************************/
447 /* Basic Emchar functions */
448 /************************************************************************/
450 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
451 string in STR. Returns the number of bytes stored.
452 Do not call this directly. Use the macro set_charptr_emchar() instead.
456 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
471 else if ( c <= 0x7ff )
473 *p++ = (c >> 6) | 0xc0;
474 *p++ = (c & 0x3f) | 0x80;
476 else if ( c <= 0xffff )
478 *p++ = (c >> 12) | 0xe0;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
482 else if ( c <= 0x1fffff )
484 *p++ = (c >> 18) | 0xf0;
485 *p++ = ((c >> 12) & 0x3f) | 0x80;
486 *p++ = ((c >> 6) & 0x3f) | 0x80;
487 *p++ = (c & 0x3f) | 0x80;
489 else if ( c <= 0x3ffffff )
491 *p++ = (c >> 24) | 0xf8;
492 *p++ = ((c >> 18) & 0x3f) | 0x80;
493 *p++ = ((c >> 12) & 0x3f) | 0x80;
494 *p++ = ((c >> 6) & 0x3f) | 0x80;
495 *p++ = (c & 0x3f) | 0x80;
499 *p++ = (c >> 30) | 0xfc;
500 *p++ = ((c >> 24) & 0x3f) | 0x80;
501 *p++ = ((c >> 18) & 0x3f) | 0x80;
502 *p++ = ((c >> 12) & 0x3f) | 0x80;
503 *p++ = ((c >> 6) & 0x3f) | 0x80;
504 *p++ = (c & 0x3f) | 0x80;
507 BREAKUP_CHAR (c, charset, c1, c2);
508 lb = CHAR_LEADING_BYTE (c);
509 if (LEADING_BYTE_PRIVATE_P (lb))
510 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
512 if (EQ (charset, Vcharset_control_1))
521 /* Return the first character from a Mule-encoded string in STR,
522 assuming it's non-ASCII. Do not call this directly.
523 Use the macro charptr_emchar() instead. */
526 non_ascii_charptr_emchar (CONST Bufbyte *str)
539 else if ( b >= 0xf8 )
544 else if ( b >= 0xf0 )
549 else if ( b >= 0xe0 )
554 else if ( b >= 0xc0 )
564 for( ; len > 0; len-- )
567 ch = ( ch << 6 ) | ( b & 0x3f );
571 Bufbyte i0 = *str, i1, i2 = 0;
574 if (i0 == LEADING_BYTE_CONTROL_1)
575 return (Emchar) (*++str - 0x20);
577 if (LEADING_BYTE_PREFIX_P (i0))
582 charset = CHARSET_BY_LEADING_BYTE (i0);
583 if (XCHARSET_DIMENSION (charset) == 2)
586 return MAKE_CHAR (charset, i1, i2);
590 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
591 Do not call this directly. Use the macro valid_char_p() instead. */
595 non_ascii_valid_char_p (Emchar ch)
599 /* Must have only lowest 19 bits set */
603 f1 = CHAR_FIELD1 (ch);
604 f2 = CHAR_FIELD2 (ch);
605 f3 = CHAR_FIELD3 (ch);
611 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
612 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
613 f2 > MAX_CHAR_FIELD2_PRIVATE)
618 if (f3 != 0x20 && f3 != 0x7F)
622 NOTE: This takes advantage of the fact that
623 FIELD2_TO_OFFICIAL_LEADING_BYTE and
624 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
626 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
627 return (XCHARSET_CHARS (charset) == 96);
633 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
634 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
635 f1 > MAX_CHAR_FIELD1_PRIVATE)
637 if (f2 < 0x20 || f3 < 0x20)
640 #ifdef ENABLE_COMPOSITE_CHARS
641 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
643 if (UNBOUNDP (Fgethash (make_int (ch),
644 Vcomposite_char_char2string_hash_table,
649 #endif /* ENABLE_COMPOSITE_CHARS */
651 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
654 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
656 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
659 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
661 return (XCHARSET_CHARS (charset) == 96);
667 /************************************************************************/
668 /* Basic string functions */
669 /************************************************************************/
671 /* Copy the character pointed to by PTR into STR, assuming it's
672 non-ASCII. Do not call this directly. Use the macro
673 charptr_copy_char() instead. */
676 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
678 Bufbyte *strptr = str;
680 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
682 /* Notice fallthrough. */
684 case 6: *++strptr = *ptr++;
685 case 5: *++strptr = *ptr++;
687 case 4: *++strptr = *ptr++;
688 case 3: *++strptr = *ptr++;
689 case 2: *++strptr = *ptr;
694 return strptr + 1 - str;
698 /************************************************************************/
699 /* streams of Emchars */
700 /************************************************************************/
702 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
703 The functions below are not meant to be called directly; use
704 the macros in insdel.h. */
707 Lstream_get_emchar_1 (Lstream *stream, int ch)
709 Bufbyte str[MAX_EMCHAR_LEN];
710 Bufbyte *strptr = str;
712 str[0] = (Bufbyte) ch;
713 switch (REP_BYTES_BY_FIRST_BYTE (ch))
715 /* Notice fallthrough. */
718 ch = Lstream_getc (stream);
720 *++strptr = (Bufbyte) ch;
722 ch = Lstream_getc (stream);
724 *++strptr = (Bufbyte) ch;
727 ch = Lstream_getc (stream);
729 *++strptr = (Bufbyte) ch;
731 ch = Lstream_getc (stream);
733 *++strptr = (Bufbyte) ch;
735 ch = Lstream_getc (stream);
737 *++strptr = (Bufbyte) ch;
742 return charptr_emchar (str);
746 Lstream_fput_emchar (Lstream *stream, Emchar ch)
748 Bufbyte str[MAX_EMCHAR_LEN];
749 Bytecount len = set_charptr_emchar (str, ch);
750 return Lstream_write (stream, str, len);
754 Lstream_funget_emchar (Lstream *stream, Emchar ch)
756 Bufbyte str[MAX_EMCHAR_LEN];
757 Bytecount len = set_charptr_emchar (str, ch);
758 Lstream_unread (stream, str, len);
762 /************************************************************************/
764 /************************************************************************/
767 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
769 struct Lisp_Charset *cs = XCHARSET (obj);
771 markobj (cs->short_name);
772 markobj (cs->long_name);
773 markobj (cs->doc_string);
774 markobj (cs->registry);
775 markobj (cs->ccl_program);
777 markobj (cs->decoding_table);
778 markobj (cs->encoding_table);
784 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
786 struct Lisp_Charset *cs = XCHARSET (obj);
790 error ("printing unreadable object #<charset %s 0x%x>",
791 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
794 write_c_string ("#<charset ", printcharfun);
795 print_internal (CHARSET_NAME (cs), printcharfun, 0);
796 write_c_string (" ", printcharfun);
797 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
798 write_c_string (" ", printcharfun);
799 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
800 write_c_string (" ", printcharfun);
801 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
802 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
803 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
804 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
805 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
807 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
808 CHARSET_COLUMNS (cs),
809 CHARSET_GRAPHIC (cs),
811 write_c_string (buf, printcharfun);
812 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
813 sprintf (buf, " 0x%x>", cs->header.uid);
814 write_c_string (buf, printcharfun);
817 static const struct lrecord_description charset_description[] = {
818 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
820 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
825 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
826 mark_charset, print_charset, 0, 0, 0,
828 struct Lisp_Charset);
830 /* Make a new charset. */
833 make_charset (Charset_ID id, Lisp_Object name,
834 unsigned char type, unsigned char columns, unsigned char graphic,
835 Bufbyte final, unsigned char direction, Lisp_Object short_name,
836 Lisp_Object long_name, Lisp_Object doc,
838 Lisp_Object decoding_table,
839 Emchar ucs_min, Emchar ucs_max,
840 Emchar code_offset, unsigned char byte_offset)
843 struct Lisp_Charset *cs =
844 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
845 XSETCHARSET (obj, cs);
847 CHARSET_ID (cs) = id;
848 CHARSET_NAME (cs) = name;
849 CHARSET_SHORT_NAME (cs) = short_name;
850 CHARSET_LONG_NAME (cs) = long_name;
851 CHARSET_DIRECTION (cs) = direction;
852 CHARSET_TYPE (cs) = type;
853 CHARSET_COLUMNS (cs) = columns;
854 CHARSET_GRAPHIC (cs) = graphic;
855 CHARSET_FINAL (cs) = final;
856 CHARSET_DOC_STRING (cs) = doc;
857 CHARSET_REGISTRY (cs) = reg;
858 CHARSET_CCL_PROGRAM (cs) = Qnil;
859 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
861 CHARSET_DECODING_TABLE(cs) = Qnil;
862 CHARSET_ENCODING_TABLE(cs) = Qnil;
863 CHARSET_UCS_MIN(cs) = ucs_min;
864 CHARSET_UCS_MAX(cs) = ucs_max;
865 CHARSET_CODE_OFFSET(cs) = code_offset;
866 CHARSET_BYTE_OFFSET(cs) = byte_offset;
869 switch (CHARSET_TYPE (cs))
871 case CHARSET_TYPE_94:
872 CHARSET_DIMENSION (cs) = 1;
873 CHARSET_CHARS (cs) = 94;
875 case CHARSET_TYPE_96:
876 CHARSET_DIMENSION (cs) = 1;
877 CHARSET_CHARS (cs) = 96;
879 case CHARSET_TYPE_94X94:
880 CHARSET_DIMENSION (cs) = 2;
881 CHARSET_CHARS (cs) = 94;
883 case CHARSET_TYPE_96X96:
884 CHARSET_DIMENSION (cs) = 2;
885 CHARSET_CHARS (cs) = 96;
888 case CHARSET_TYPE_128:
889 CHARSET_DIMENSION (cs) = 1;
890 CHARSET_CHARS (cs) = 128;
892 case CHARSET_TYPE_128X128:
893 CHARSET_DIMENSION (cs) = 2;
894 CHARSET_CHARS (cs) = 128;
896 case CHARSET_TYPE_256:
897 CHARSET_DIMENSION (cs) = 1;
898 CHARSET_CHARS (cs) = 256;
900 case CHARSET_TYPE_256X256:
901 CHARSET_DIMENSION (cs) = 2;
902 CHARSET_CHARS (cs) = 256;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
922 if (code_offset == 0)
924 assert (NILP (charset_by_attributes[type][final]));
925 charset_by_attributes[type][final] = obj;
928 assert (NILP (charset_by_attributes[type][final][direction]));
929 charset_by_attributes[type][final][direction] = obj;
933 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
934 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
937 /* official leading byte */
938 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
941 /* Some charsets are "faux" and don't have names or really exist at
942 all except in the leading-byte table. */
944 Fputhash (name, obj, Vcharset_hash_table);
949 get_unallocated_leading_byte (int dimension)
954 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
957 lb = next_allocated_leading_byte++;
961 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
964 lb = next_allocated_1_byte_leading_byte++;
968 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
971 lb = next_allocated_2_byte_leading_byte++;
977 ("No more character sets free for this dimension",
978 make_int (dimension));
985 charset_get_byte1 (Lisp_Object charset, Emchar ch)
990 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
992 Lisp_Object value = get_char_code_table (ch, table);
996 Emchar code = XINT (value);
1000 else if (code < (1 << 16))
1002 else if (code < (1 << 24))
1008 if ((XCHARSET_UCS_MIN (charset) <= ch)
1009 && (ch <= XCHARSET_UCS_MAX (charset)))
1010 return (ch - XCHARSET_UCS_MIN (charset)
1011 + XCHARSET_CODE_OFFSET (charset))
1012 / (XCHARSET_DIMENSION (charset) == 1 ?
1015 XCHARSET_DIMENSION (charset) == 2 ?
1016 XCHARSET_CHARS (charset)
1018 XCHARSET_DIMENSION (charset) == 3 ?
1019 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
1021 XCHARSET_CHARS (charset)
1022 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1023 + XCHARSET_BYTE_OFFSET (charset);
1024 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1026 if (XCHARSET_DIMENSION (charset) == 1)
1028 if (XCHARSET_CHARS (charset) == 94)
1030 if (((d = ch - (MIN_CHAR_94
1031 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1035 else if (XCHARSET_CHARS (charset) == 96)
1037 if (((d = ch - (MIN_CHAR_96
1038 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1045 else if (XCHARSET_DIMENSION (charset) == 2)
1047 if (XCHARSET_CHARS (charset) == 94)
1049 if (((d = ch - (MIN_CHAR_94x94
1050 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1053 return (d / 94) + 33;
1055 else if (XCHARSET_CHARS (charset) == 96)
1057 if (((d = ch - (MIN_CHAR_96x96
1058 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1061 return (d / 96) + 32;
1069 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1071 if (XCHARSET_DIMENSION (charset) == 1)
1077 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1079 Lisp_Object value = get_char_code_table (ch, table);
1083 Emchar code = XINT (value);
1085 if (code < (1 << 16))
1086 return (unsigned char)code;
1087 else if (code < (1 << 24))
1088 return (unsigned char)(code >> 16);
1090 return (unsigned char)(code >> 24);
1093 if ((XCHARSET_UCS_MIN (charset) <= ch)
1094 && (ch <= XCHARSET_UCS_MAX (charset)))
1095 return ((ch - XCHARSET_UCS_MIN (charset)
1096 + XCHARSET_CODE_OFFSET (charset))
1097 / (XCHARSET_DIMENSION (charset) == 2 ?
1100 XCHARSET_DIMENSION (charset) == 3 ?
1101 XCHARSET_CHARS (charset)
1103 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1104 % XCHARSET_CHARS (charset)
1105 + XCHARSET_BYTE_OFFSET (charset);
1106 else if (XCHARSET_CHARS (charset) == 94)
1107 return (MIN_CHAR_94x94
1108 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1109 && (ch < MIN_CHAR_94x94
1110 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1111 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1112 else /* if (XCHARSET_CHARS (charset) == 96) */
1113 return (MIN_CHAR_96x96
1114 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1115 && (ch < MIN_CHAR_96x96
1116 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1117 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1121 Lisp_Object Vdefault_coded_charset_priority_list;
1125 /************************************************************************/
1126 /* Basic charset Lisp functions */
1127 /************************************************************************/
1129 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1130 Return non-nil if OBJECT is a charset.
1134 return CHARSETP (object) ? Qt : Qnil;
1137 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1138 Retrieve the charset of the given name.
1139 If CHARSET-OR-NAME is a charset object, it is simply returned.
1140 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1141 nil is returned. Otherwise the associated charset object is returned.
1145 if (CHARSETP (charset_or_name))
1146 return charset_or_name;
1148 CHECK_SYMBOL (charset_or_name);
1149 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1152 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1153 Retrieve the charset of the given name.
1154 Same as `find-charset' except an error is signalled if there is no such
1155 charset instead of returning nil.
1159 Lisp_Object charset = Ffind_charset (name);
1162 signal_simple_error ("No such charset", name);
1166 /* We store the charsets in hash tables with the names as the key and the
1167 actual charset object as the value. Occasionally we need to use them
1168 in a list format. These routines provide us with that. */
1169 struct charset_list_closure
1171 Lisp_Object *charset_list;
1175 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1176 void *charset_list_closure)
1178 /* This function can GC */
1179 struct charset_list_closure *chcl =
1180 (struct charset_list_closure*) charset_list_closure;
1181 Lisp_Object *charset_list = chcl->charset_list;
1183 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1187 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1188 Return a list of the names of all defined charsets.
1192 Lisp_Object charset_list = Qnil;
1193 struct gcpro gcpro1;
1194 struct charset_list_closure charset_list_closure;
1196 GCPRO1 (charset_list);
1197 charset_list_closure.charset_list = &charset_list;
1198 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1199 &charset_list_closure);
1202 return charset_list;
1205 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1206 Return the name of the given charset.
1210 return XCHARSET_NAME (Fget_charset (charset));
1213 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1214 Define a new character set.
1215 This function is for use with Mule support.
1216 NAME is a symbol, the name by which the character set is normally referred.
1217 DOC-STRING is a string describing the character set.
1218 PROPS is a property list, describing the specific nature of the
1219 character set. Recognized properties are:
1221 'short-name Short version of the charset name (ex: Latin-1)
1222 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1223 'registry A regular expression matching the font registry field for
1225 'dimension Number of octets used to index a character in this charset.
1226 Either 1 or 2. Defaults to 1.
1227 'columns Number of columns used to display a character in this charset.
1228 Only used in TTY mode. (Under X, the actual width of a
1229 character can be derived from the font used to display the
1230 characters.) If unspecified, defaults to the dimension
1231 (this is almost always the correct value).
1232 'chars Number of characters in each dimension (94 or 96).
1233 Defaults to 94. Note that if the dimension is 2, the
1234 character set thus described is 94x94 or 96x96.
1235 'final Final byte of ISO 2022 escape sequence. Must be
1236 supplied. Each combination of (DIMENSION, CHARS) defines a
1237 separate namespace for final bytes. Note that ISO
1238 2022 restricts the final byte to the range
1239 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1240 dimension == 2. Note also that final bytes in the range
1241 0x30 - 0x3F are reserved for user-defined (not official)
1243 'graphic 0 (use left half of font on output) or 1 (use right half
1244 of font on output). Defaults to 0. For example, for
1245 a font whose registry is ISO8859-1, the left half
1246 (octets 0x20 - 0x7F) is the `ascii' character set, while
1247 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1248 character set. With 'graphic set to 0, the octets
1249 will have their high bit cleared; with it set to 1,
1250 the octets will have their high bit set.
1251 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1253 'ccl-program A compiled CCL program used to convert a character in
1254 this charset into an index into the font. This is in
1255 addition to the 'graphic property. The CCL program
1256 is passed the octets of the character, with the high
1257 bit cleared and set depending upon whether the value
1258 of the 'graphic property is 0 or 1.
1260 (name, doc_string, props))
1262 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1263 int direction = CHARSET_LEFT_TO_RIGHT;
1265 Lisp_Object registry = Qnil;
1266 Lisp_Object charset;
1267 Lisp_Object rest, keyword, value;
1268 Lisp_Object ccl_program = Qnil;
1269 Lisp_Object short_name = Qnil, long_name = Qnil;
1271 Emchar code_offset = 0;
1272 unsigned char byte_offset = 0;
1275 CHECK_SYMBOL (name);
1276 if (!NILP (doc_string))
1277 CHECK_STRING (doc_string);
1279 charset = Ffind_charset (name);
1280 if (!NILP (charset))
1281 signal_simple_error ("Cannot redefine existing charset", name);
1283 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1285 if (EQ (keyword, Qshort_name))
1287 CHECK_STRING (value);
1291 if (EQ (keyword, Qlong_name))
1293 CHECK_STRING (value);
1297 else if (EQ (keyword, Qdimension))
1300 dimension = XINT (value);
1301 if (dimension < 1 || dimension > 2)
1302 signal_simple_error ("Invalid value for 'dimension", value);
1305 else if (EQ (keyword, Qchars))
1308 chars = XINT (value);
1309 if (chars != 94 && chars != 96)
1310 signal_simple_error ("Invalid value for 'chars", value);
1313 else if (EQ (keyword, Qcolumns))
1316 columns = XINT (value);
1317 if (columns != 1 && columns != 2)
1318 signal_simple_error ("Invalid value for 'columns", value);
1321 else if (EQ (keyword, Qgraphic))
1324 graphic = XINT (value);
1326 if (graphic < 0 || graphic > 2)
1328 if (graphic < 0 || graphic > 1)
1330 signal_simple_error ("Invalid value for 'graphic", value);
1333 else if (EQ (keyword, Qregistry))
1335 CHECK_STRING (value);
1339 else if (EQ (keyword, Qdirection))
1341 if (EQ (value, Ql2r))
1342 direction = CHARSET_LEFT_TO_RIGHT;
1343 else if (EQ (value, Qr2l))
1344 direction = CHARSET_RIGHT_TO_LEFT;
1346 signal_simple_error ("Invalid value for 'direction", value);
1349 else if (EQ (keyword, Qfinal))
1351 CHECK_CHAR_COERCE_INT (value);
1352 final = XCHAR (value);
1353 if (final < '0' || final > '~')
1354 signal_simple_error ("Invalid value for 'final", value);
1357 else if (EQ (keyword, Qccl_program))
1359 CHECK_VECTOR (value);
1360 ccl_program = value;
1364 signal_simple_error ("Unrecognized property", keyword);
1368 error ("'final must be specified");
1369 if (dimension == 2 && final > 0x5F)
1371 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1375 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1377 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1379 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1380 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1382 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1384 id = get_unallocated_leading_byte (dimension);
1386 if (NILP (doc_string))
1387 doc_string = build_string ("");
1389 if (NILP (registry))
1390 registry = build_string ("");
1392 if (NILP (short_name))
1393 XSETSTRING (short_name, XSYMBOL (name)->name);
1395 if (NILP (long_name))
1396 long_name = doc_string;
1399 columns = dimension;
1400 charset = make_charset (id, name, type, columns, graphic,
1401 final, direction, short_name, long_name,
1402 doc_string, registry,
1403 Qnil, 0, 0, 0, byte_offset);
1404 if (!NILP (ccl_program))
1405 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1409 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1411 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1412 NEW-NAME is the name of the new charset. Return the new charset.
1414 (charset, new_name))
1416 Lisp_Object new_charset = Qnil;
1417 int id, dimension, columns, graphic, final;
1418 int direction, type;
1419 Lisp_Object registry, doc_string, short_name, long_name;
1420 struct Lisp_Charset *cs;
1422 charset = Fget_charset (charset);
1423 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1424 signal_simple_error ("Charset already has reverse-direction charset",
1427 CHECK_SYMBOL (new_name);
1428 if (!NILP (Ffind_charset (new_name)))
1429 signal_simple_error ("Cannot redefine existing charset", new_name);
1431 cs = XCHARSET (charset);
1433 type = CHARSET_TYPE (cs);
1434 columns = CHARSET_COLUMNS (cs);
1435 dimension = CHARSET_DIMENSION (cs);
1436 id = get_unallocated_leading_byte (dimension);
1438 graphic = CHARSET_GRAPHIC (cs);
1439 final = CHARSET_FINAL (cs);
1440 direction = CHARSET_RIGHT_TO_LEFT;
1441 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1442 direction = CHARSET_LEFT_TO_RIGHT;
1443 doc_string = CHARSET_DOC_STRING (cs);
1444 short_name = CHARSET_SHORT_NAME (cs);
1445 long_name = CHARSET_LONG_NAME (cs);
1446 registry = CHARSET_REGISTRY (cs);
1448 new_charset = make_charset (id, new_name, type, columns,
1449 graphic, final, direction, short_name, long_name,
1450 doc_string, registry,
1452 CHARSET_DECODING_TABLE(cs),
1453 CHARSET_UCS_MIN(cs),
1454 CHARSET_UCS_MAX(cs),
1455 CHARSET_CODE_OFFSET(cs),
1456 CHARSET_BYTE_OFFSET(cs)
1462 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1463 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1468 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1469 Define symbol ALIAS as an alias for CHARSET.
1473 CHECK_SYMBOL (alias);
1474 charset = Fget_charset (charset);
1475 return Fputhash (alias, charset, Vcharset_hash_table);
1478 /* #### Reverse direction charsets not yet implemented. */
1480 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1482 Return the reverse-direction charset parallel to CHARSET, if any.
1483 This is the charset with the same properties (in particular, the same
1484 dimension, number of characters per dimension, and final byte) as
1485 CHARSET but whose characters are displayed in the opposite direction.
1489 charset = Fget_charset (charset);
1490 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1494 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1495 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1496 If DIRECTION is omitted, both directions will be checked (left-to-right
1497 will be returned if character sets exist for both directions).
1499 (dimension, chars, final, direction))
1501 int dm, ch, fi, di = -1;
1503 Lisp_Object obj = Qnil;
1505 CHECK_INT (dimension);
1506 dm = XINT (dimension);
1507 if (dm < 1 || dm > 2)
1508 signal_simple_error ("Invalid value for DIMENSION", dimension);
1512 if (ch != 94 && ch != 96)
1513 signal_simple_error ("Invalid value for CHARS", chars);
1515 CHECK_CHAR_COERCE_INT (final);
1517 if (fi < '0' || fi > '~')
1518 signal_simple_error ("Invalid value for FINAL", final);
1520 if (EQ (direction, Ql2r))
1521 di = CHARSET_LEFT_TO_RIGHT;
1522 else if (EQ (direction, Qr2l))
1523 di = CHARSET_RIGHT_TO_LEFT;
1524 else if (!NILP (direction))
1525 signal_simple_error ("Invalid value for DIRECTION", direction);
1527 if (dm == 2 && fi > 0x5F)
1529 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1532 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1534 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1538 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1540 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1543 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1546 return XCHARSET_NAME (obj);
1550 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1551 Return short name of CHARSET.
1555 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1558 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1559 Return long name of CHARSET.
1563 return XCHARSET_LONG_NAME (Fget_charset (charset));
1566 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1567 Return description of CHARSET.
1571 return XCHARSET_DOC_STRING (Fget_charset (charset));
1574 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1575 Return dimension of CHARSET.
1579 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1582 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1583 Return property PROP of CHARSET.
1584 Recognized properties are those listed in `make-charset', as well as
1585 'name and 'doc-string.
1589 struct Lisp_Charset *cs;
1591 charset = Fget_charset (charset);
1592 cs = XCHARSET (charset);
1594 CHECK_SYMBOL (prop);
1595 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1596 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1597 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1598 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1599 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1600 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1601 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1602 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1603 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1604 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1605 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1606 if (EQ (prop, Qdirection))
1607 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1608 if (EQ (prop, Qreverse_direction_charset))
1610 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1614 return XCHARSET_NAME (obj);
1616 signal_simple_error ("Unrecognized charset property name", prop);
1617 return Qnil; /* not reached */
1620 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1621 Return charset identification number of CHARSET.
1625 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1628 /* #### We need to figure out which properties we really want to
1631 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1632 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1634 (charset, ccl_program))
1636 charset = Fget_charset (charset);
1637 CHECK_VECTOR (ccl_program);
1638 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1643 invalidate_charset_font_caches (Lisp_Object charset)
1645 /* Invalidate font cache entries for charset on all devices. */
1646 Lisp_Object devcons, concons, hash_table;
1647 DEVICE_LOOP_NO_BREAK (devcons, concons)
1649 struct device *d = XDEVICE (XCAR (devcons));
1650 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1651 if (!UNBOUNDP (hash_table))
1652 Fclrhash (hash_table);
1656 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1657 Set the 'registry property of CHARSET to REGISTRY.
1659 (charset, registry))
1661 charset = Fget_charset (charset);
1662 CHECK_STRING (registry);
1663 XCHARSET_REGISTRY (charset) = registry;
1664 invalidate_charset_font_caches (charset);
1665 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1670 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1671 Return mapping-table of CHARSET.
1675 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1678 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1679 Set mapping-table of CHARSET to TABLE.
1683 struct Lisp_Charset *cs;
1684 Lisp_Object old_table;
1687 charset = Fget_charset (charset);
1688 cs = XCHARSET (charset);
1690 if (EQ (table, Qnil))
1692 CHARSET_DECODING_TABLE(cs) = table;
1693 CHARSET_ENCODING_TABLE(cs) = Qnil;
1696 else if (VECTORP (table))
1698 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1699 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1700 old_table = CHARSET_ENCODING_TABLE(cs);
1701 CHARSET_DECODING_TABLE(cs) = table;
1704 signal_error (Qwrong_type_argument,
1705 list2 (build_translated_string ("vector-or-nil-p"),
1707 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1709 switch (CHARSET_DIMENSION (cs))
1712 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1713 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1715 Lisp_Object c = XVECTOR_DATA(table)[i];
1719 put_char_code_table (XCHAR (c),
1720 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1721 CHARSET_ENCODING_TABLE(cs));
1722 Fput_char_attribute (c, charset,
1724 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1729 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1730 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1732 Lisp_Object v = XVECTOR_DATA(table)[i];
1738 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1740 CHARSET_DECODING_TABLE(cs) = old_table;
1741 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1743 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1745 Lisp_Object c = XVECTOR_DATA(v)[j];
1751 make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
1752 | (j + CHARSET_BYTE_OFFSET (cs))),
1753 CHARSET_ENCODING_TABLE(cs));
1754 Fput_char_attribute (c, charset,
1757 (i + CHARSET_BYTE_OFFSET (cs)),
1759 (j + CHARSET_BYTE_OFFSET (cs))));
1765 put_char_code_table (XCHAR (v),
1766 make_int (i + CHARSET_BYTE_OFFSET (cs)),
1767 CHARSET_ENCODING_TABLE(cs));
1768 Fput_char_attribute (v, charset,
1770 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1780 /************************************************************************/
1781 /* Lisp primitives for working with characters */
1782 /************************************************************************/
1784 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1785 Make a character from CHARSET and octets ARG1 and ARG2.
1786 ARG2 is required only for characters from two-dimensional charsets.
1787 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1788 character s with caron.
1790 (charset, arg1, arg2))
1792 struct Lisp_Charset *cs;
1794 int lowlim, highlim;
1796 charset = Fget_charset (charset);
1797 cs = XCHARSET (charset);
1799 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1800 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1802 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1804 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1805 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1808 /* It is useful (and safe, according to Olivier Galibert) to strip
1809 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1810 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1811 Latin 2 code of the character. */
1819 if (a1 < lowlim || a1 > highlim)
1820 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1822 if (CHARSET_DIMENSION (cs) == 1)
1826 ("Charset is of dimension one; second octet must be nil", arg2);
1827 return make_char (MAKE_CHAR (charset, a1, 0));
1836 a2 = XINT (arg2) & 0x7f;
1838 if (a2 < lowlim || a2 > highlim)
1839 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1841 return make_char (MAKE_CHAR (charset, a1, a2));
1844 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1845 Return the character set of char CH.
1849 CHECK_CHAR_COERCE_INT (ch);
1851 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1854 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1855 Return list of charset and one or two position-codes of CHAR.
1859 /* This function can GC */
1860 struct gcpro gcpro1, gcpro2;
1861 Lisp_Object charset = Qnil;
1862 Lisp_Object rc = Qnil;
1865 GCPRO2 (charset, rc);
1866 CHECK_CHAR_COERCE_INT (character);
1868 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1870 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1872 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1876 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1884 #ifdef ENABLE_COMPOSITE_CHARS
1885 /************************************************************************/
1886 /* composite character functions */
1887 /************************************************************************/
1890 lookup_composite_char (Bufbyte *str, int len)
1892 Lisp_Object lispstr = make_string (str, len);
1893 Lisp_Object ch = Fgethash (lispstr,
1894 Vcomposite_char_string2char_hash_table,
1900 if (composite_char_row_next >= 128)
1901 signal_simple_error ("No more composite chars available", lispstr);
1902 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1903 composite_char_col_next);
1904 Fputhash (make_char (emch), lispstr,
1905 Vcomposite_char_char2string_hash_table);
1906 Fputhash (lispstr, make_char (emch),
1907 Vcomposite_char_string2char_hash_table);
1908 composite_char_col_next++;
1909 if (composite_char_col_next >= 128)
1911 composite_char_col_next = 32;
1912 composite_char_row_next++;
1921 composite_char_string (Emchar ch)
1923 Lisp_Object str = Fgethash (make_char (ch),
1924 Vcomposite_char_char2string_hash_table,
1926 assert (!UNBOUNDP (str));
1930 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1931 Convert a string into a single composite character.
1932 The character is the result of overstriking all the characters in
1937 CHECK_STRING (string);
1938 return make_char (lookup_composite_char (XSTRING_DATA (string),
1939 XSTRING_LENGTH (string)));
1942 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1943 Return a string of the characters comprising a composite character.
1951 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1952 signal_simple_error ("Must be composite char", ch);
1953 return composite_char_string (emch);
1955 #endif /* ENABLE_COMPOSITE_CHARS */
1958 /************************************************************************/
1959 /* initialization */
1960 /************************************************************************/
1963 syms_of_mule_charset (void)
1965 DEFSUBR (Fcharsetp);
1966 DEFSUBR (Ffind_charset);
1967 DEFSUBR (Fget_charset);
1968 DEFSUBR (Fcharset_list);
1969 DEFSUBR (Fcharset_name);
1970 DEFSUBR (Fmake_charset);
1971 DEFSUBR (Fmake_reverse_direction_charset);
1972 /* DEFSUBR (Freverse_direction_charset); */
1973 DEFSUBR (Fdefine_charset_alias);
1974 DEFSUBR (Fcharset_from_attributes);
1975 DEFSUBR (Fcharset_short_name);
1976 DEFSUBR (Fcharset_long_name);
1977 DEFSUBR (Fcharset_description);
1978 DEFSUBR (Fcharset_dimension);
1979 DEFSUBR (Fcharset_property);
1980 DEFSUBR (Fcharset_id);
1981 DEFSUBR (Fset_charset_ccl_program);
1982 DEFSUBR (Fset_charset_registry);
1984 DEFSUBR (Fchar_attribute_alist);
1985 DEFSUBR (Fget_char_attribute);
1986 DEFSUBR (Fput_char_attribute);
1987 DEFSUBR (Fcharset_mapping_table);
1988 DEFSUBR (Fset_charset_mapping_table);
1991 DEFSUBR (Fmake_char);
1992 DEFSUBR (Fchar_charset);
1993 DEFSUBR (Fsplit_char);
1995 #ifdef ENABLE_COMPOSITE_CHARS
1996 DEFSUBR (Fmake_composite_char);
1997 DEFSUBR (Fcomposite_char_string);
2000 defsymbol (&Qcharsetp, "charsetp");
2001 defsymbol (&Qregistry, "registry");
2002 defsymbol (&Qfinal, "final");
2003 defsymbol (&Qgraphic, "graphic");
2004 defsymbol (&Qdirection, "direction");
2005 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2006 defsymbol (&Qshort_name, "short-name");
2007 defsymbol (&Qlong_name, "long-name");
2009 defsymbol (&Ql2r, "l2r");
2010 defsymbol (&Qr2l, "r2l");
2012 /* Charsets, compatible with FSF 20.3
2013 Naming convention is Script-Charset[-Edition] */
2014 defsymbol (&Qascii, "ascii");
2015 defsymbol (&Qcontrol_1, "control-1");
2016 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2017 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2018 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2019 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2020 defsymbol (&Qthai_tis620, "thai-tis620");
2021 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2022 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2023 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2024 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2025 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2026 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2027 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2028 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2029 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2030 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2031 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2032 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2033 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2034 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2036 defsymbol (&Qucs_bmp, "ucs-bmp");
2037 defsymbol (&Qlatin_viscii, "latin-viscii");
2038 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2039 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2040 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2041 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2042 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2043 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2045 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2046 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2048 defsymbol (&Qcomposite, "composite");
2052 vars_of_mule_charset (void)
2059 /* Table of charsets indexed by leading byte. */
2060 for (i = 0; i < countof (charset_by_leading_byte); i++)
2061 charset_by_leading_byte[i] = Qnil;
2064 /* Table of charsets indexed by type/final-byte. */
2065 for (i = 0; i < countof (charset_by_attributes); i++)
2066 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2067 charset_by_attributes[i][j] = Qnil;
2069 /* Table of charsets indexed by type/final-byte/direction. */
2070 for (i = 0; i < countof (charset_by_attributes); i++)
2071 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2072 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2073 charset_by_attributes[i][j][k] = Qnil;
2077 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2079 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2080 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2084 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2085 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2086 Leading-code of private TYPE9N charset of column-width 1.
2088 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2092 Vutf_2000_version = build_string("0.9 (Kyūhōji)");
2093 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2094 Version number of UTF-2000.
2097 staticpro (&Vcharacter_attribute_table);
2098 Vcharacter_attribute_table = make_char_code_table (Qnil);
2100 Vdefault_coded_charset_priority_list = Qnil;
2101 DEFVAR_LISP ("default-coded-charset-priority-list",
2102 &Vdefault_coded_charset_priority_list /*
2103 Default order of preferred coded-character-sets.
2109 complex_vars_of_mule_charset (void)
2111 staticpro (&Vcharset_hash_table);
2112 Vcharset_hash_table =
2113 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2115 /* Predefined character sets. We store them into variables for
2120 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2121 CHARSET_TYPE_256X256, 1, 2, 0,
2122 CHARSET_LEFT_TO_RIGHT,
2123 build_string ("BMP"),
2124 build_string ("BMP"),
2125 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2126 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2127 Qnil, 0, 0xFFFF, 0, 0);
2129 # define MIN_CHAR_THAI 0
2130 # define MAX_CHAR_THAI 0
2131 # define MIN_CHAR_GREEK 0
2132 # define MAX_CHAR_GREEK 0
2133 # define MIN_CHAR_HEBREW 0
2134 # define MAX_CHAR_HEBREW 0
2135 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2136 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2137 # define MIN_CHAR_CYRILLIC 0
2138 # define MAX_CHAR_CYRILLIC 0
2141 make_charset (LEADING_BYTE_ASCII, Qascii,
2142 CHARSET_TYPE_94, 1, 0, 'B',
2143 CHARSET_LEFT_TO_RIGHT,
2144 build_string ("ASCII"),
2145 build_string ("ASCII)"),
2146 build_string ("ASCII (ISO646 IRV)"),
2147 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2148 Qnil, 0, 0x7F, 0, 0);
2149 Vcharset_control_1 =
2150 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2151 CHARSET_TYPE_94, 1, 1, 0,
2152 CHARSET_LEFT_TO_RIGHT,
2153 build_string ("C1"),
2154 build_string ("Control characters"),
2155 build_string ("Control characters 128-191"),
2157 Qnil, 0x80, 0x9F, 0, 0);
2158 Vcharset_latin_iso8859_1 =
2159 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2160 CHARSET_TYPE_96, 1, 1, 'A',
2161 CHARSET_LEFT_TO_RIGHT,
2162 build_string ("Latin-1"),
2163 build_string ("ISO8859-1 (Latin-1)"),
2164 build_string ("ISO8859-1 (Latin-1)"),
2165 build_string ("iso8859-1"),
2166 Qnil, 0xA0, 0xFF, 0, 32);
2167 Vcharset_latin_iso8859_2 =
2168 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2169 CHARSET_TYPE_96, 1, 1, 'B',
2170 CHARSET_LEFT_TO_RIGHT,
2171 build_string ("Latin-2"),
2172 build_string ("ISO8859-2 (Latin-2)"),
2173 build_string ("ISO8859-2 (Latin-2)"),
2174 build_string ("iso8859-2"),
2176 Vcharset_latin_iso8859_3 =
2177 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2178 CHARSET_TYPE_96, 1, 1, 'C',
2179 CHARSET_LEFT_TO_RIGHT,
2180 build_string ("Latin-3"),
2181 build_string ("ISO8859-3 (Latin-3)"),
2182 build_string ("ISO8859-3 (Latin-3)"),
2183 build_string ("iso8859-3"),
2185 Vcharset_latin_iso8859_4 =
2186 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2187 CHARSET_TYPE_96, 1, 1, 'D',
2188 CHARSET_LEFT_TO_RIGHT,
2189 build_string ("Latin-4"),
2190 build_string ("ISO8859-4 (Latin-4)"),
2191 build_string ("ISO8859-4 (Latin-4)"),
2192 build_string ("iso8859-4"),
2194 Vcharset_thai_tis620 =
2195 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2196 CHARSET_TYPE_96, 1, 1, 'T',
2197 CHARSET_LEFT_TO_RIGHT,
2198 build_string ("TIS620"),
2199 build_string ("TIS620 (Thai)"),
2200 build_string ("TIS620.2529 (Thai)"),
2201 build_string ("tis620"),
2202 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2203 Vcharset_greek_iso8859_7 =
2204 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2205 CHARSET_TYPE_96, 1, 1, 'F',
2206 CHARSET_LEFT_TO_RIGHT,
2207 build_string ("ISO8859-7"),
2208 build_string ("ISO8859-7 (Greek)"),
2209 build_string ("ISO8859-7 (Greek)"),
2210 build_string ("iso8859-7"),
2211 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2212 Vcharset_arabic_iso8859_6 =
2213 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2214 CHARSET_TYPE_96, 1, 1, 'G',
2215 CHARSET_RIGHT_TO_LEFT,
2216 build_string ("ISO8859-6"),
2217 build_string ("ISO8859-6 (Arabic)"),
2218 build_string ("ISO8859-6 (Arabic)"),
2219 build_string ("iso8859-6"),
2221 Vcharset_hebrew_iso8859_8 =
2222 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2223 CHARSET_TYPE_96, 1, 1, 'H',
2224 CHARSET_RIGHT_TO_LEFT,
2225 build_string ("ISO8859-8"),
2226 build_string ("ISO8859-8 (Hebrew)"),
2227 build_string ("ISO8859-8 (Hebrew)"),
2228 build_string ("iso8859-8"),
2229 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2230 Vcharset_katakana_jisx0201 =
2231 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2232 CHARSET_TYPE_94, 1, 1, 'I',
2233 CHARSET_LEFT_TO_RIGHT,
2234 build_string ("JISX0201 Kana"),
2235 build_string ("JISX0201.1976 (Japanese Kana)"),
2236 build_string ("JISX0201.1976 Japanese Kana"),
2237 build_string ("jisx0201\\.1976"),
2239 MIN_CHAR_HALFWIDTH_KATAKANA,
2240 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2241 Vcharset_latin_jisx0201 =
2242 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2243 CHARSET_TYPE_94, 1, 0, 'J',
2244 CHARSET_LEFT_TO_RIGHT,
2245 build_string ("JISX0201 Roman"),
2246 build_string ("JISX0201.1976 (Japanese Roman)"),
2247 build_string ("JISX0201.1976 Japanese Roman"),
2248 build_string ("jisx0201\\.1976"),
2250 Vcharset_cyrillic_iso8859_5 =
2251 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2252 CHARSET_TYPE_96, 1, 1, 'L',
2253 CHARSET_LEFT_TO_RIGHT,
2254 build_string ("ISO8859-5"),
2255 build_string ("ISO8859-5 (Cyrillic)"),
2256 build_string ("ISO8859-5 (Cyrillic)"),
2257 build_string ("iso8859-5"),
2258 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2259 Vcharset_latin_iso8859_9 =
2260 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2261 CHARSET_TYPE_96, 1, 1, 'M',
2262 CHARSET_LEFT_TO_RIGHT,
2263 build_string ("Latin-5"),
2264 build_string ("ISO8859-9 (Latin-5)"),
2265 build_string ("ISO8859-9 (Latin-5)"),
2266 build_string ("iso8859-9"),
2268 Vcharset_japanese_jisx0208_1978 =
2269 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2270 CHARSET_TYPE_94X94, 2, 0, '@',
2271 CHARSET_LEFT_TO_RIGHT,
2272 build_string ("JIS X0208:1978"),
2273 build_string ("JIS X0208:1978 (Japanese)"),
2275 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2276 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2278 Vcharset_chinese_gb2312 =
2279 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2280 CHARSET_TYPE_94X94, 2, 0, 'A',
2281 CHARSET_LEFT_TO_RIGHT,
2282 build_string ("GB2312"),
2283 build_string ("GB2312)"),
2284 build_string ("GB2312 Chinese simplified"),
2285 build_string ("gb2312"),
2287 Vcharset_japanese_jisx0208 =
2288 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2289 CHARSET_TYPE_94X94, 2, 0, 'B',
2290 CHARSET_LEFT_TO_RIGHT,
2291 build_string ("JISX0208"),
2292 build_string ("JIS X0208:1983 (Japanese)"),
2293 build_string ("JIS X0208:1983 Japanese Kanji"),
2294 build_string ("jisx0208\\.1983"),
2296 Vcharset_korean_ksc5601 =
2297 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2298 CHARSET_TYPE_94X94, 2, 0, 'C',
2299 CHARSET_LEFT_TO_RIGHT,
2300 build_string ("KSC5601"),
2301 build_string ("KSC5601 (Korean"),
2302 build_string ("KSC5601 Korean Hangul and Hanja"),
2303 build_string ("ksc5601"),
2305 Vcharset_japanese_jisx0212 =
2306 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2307 CHARSET_TYPE_94X94, 2, 0, 'D',
2308 CHARSET_LEFT_TO_RIGHT,
2309 build_string ("JISX0212"),
2310 build_string ("JISX0212 (Japanese)"),
2311 build_string ("JISX0212 Japanese Supplement"),
2312 build_string ("jisx0212"),
2315 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2316 Vcharset_chinese_cns11643_1 =
2317 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2318 CHARSET_TYPE_94X94, 2, 0, 'G',
2319 CHARSET_LEFT_TO_RIGHT,
2320 build_string ("CNS11643-1"),
2321 build_string ("CNS11643-1 (Chinese traditional)"),
2323 ("CNS 11643 Plane 1 Chinese traditional"),
2324 build_string (CHINESE_CNS_PLANE_RE("1")),
2326 Vcharset_chinese_cns11643_2 =
2327 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2328 CHARSET_TYPE_94X94, 2, 0, 'H',
2329 CHARSET_LEFT_TO_RIGHT,
2330 build_string ("CNS11643-2"),
2331 build_string ("CNS11643-2 (Chinese traditional)"),
2333 ("CNS 11643 Plane 2 Chinese traditional"),
2334 build_string (CHINESE_CNS_PLANE_RE("2")),
2337 Vcharset_latin_viscii_lower =
2338 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2339 CHARSET_TYPE_96, 1, 1, '1',
2340 CHARSET_LEFT_TO_RIGHT,
2341 build_string ("VISCII lower"),
2342 build_string ("VISCII lower (Vietnamese)"),
2343 build_string ("VISCII lower (Vietnamese)"),
2344 build_string ("MULEVISCII-LOWER"),
2346 Vcharset_latin_viscii_upper =
2347 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2348 CHARSET_TYPE_96, 1, 1, '2',
2349 CHARSET_LEFT_TO_RIGHT,
2350 build_string ("VISCII upper"),
2351 build_string ("VISCII upper (Vietnamese)"),
2352 build_string ("VISCII upper (Vietnamese)"),
2353 build_string ("MULEVISCII-UPPER"),
2355 Vcharset_latin_viscii =
2356 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2357 CHARSET_TYPE_256, 1, 2, 0,
2358 CHARSET_LEFT_TO_RIGHT,
2359 build_string ("VISCII"),
2360 build_string ("VISCII 1.1 (Vietnamese)"),
2361 build_string ("VISCII 1.1 (Vietnamese)"),
2362 build_string ("VISCII1\\.1"),
2364 Vcharset_hiragana_jisx0208 =
2365 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2366 CHARSET_TYPE_94X94, 2, 0, 'B',
2367 CHARSET_LEFT_TO_RIGHT,
2368 build_string ("Hiragana"),
2369 build_string ("Hiragana of JIS X0208"),
2370 build_string ("Japanese Hiragana of JIS X0208"),
2371 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2372 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2373 (0x24 - 33) * 94 + (0x21 - 33), 33);
2374 Vcharset_katakana_jisx0208 =
2375 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2376 CHARSET_TYPE_94X94, 2, 0, 'B',
2377 CHARSET_LEFT_TO_RIGHT,
2378 build_string ("Katakana"),
2379 build_string ("Katakana of JIS X0208"),
2380 build_string ("Japanese Katakana of JIS X0208"),
2381 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2382 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2383 (0x25 - 33) * 94 + (0x21 - 33), 33);
2385 Vcharset_chinese_big5_1 =
2386 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2387 CHARSET_TYPE_94X94, 2, 0, '0',
2388 CHARSET_LEFT_TO_RIGHT,
2389 build_string ("Big5"),
2390 build_string ("Big5 (Level-1)"),
2392 ("Big5 Level-1 Chinese traditional"),
2393 build_string ("big5"),
2395 Vcharset_chinese_big5_2 =
2396 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2397 CHARSET_TYPE_94X94, 2, 0, '1',
2398 CHARSET_LEFT_TO_RIGHT,
2399 build_string ("Big5"),
2400 build_string ("Big5 (Level-2)"),
2402 ("Big5 Level-2 Chinese traditional"),
2403 build_string ("big5"),
2406 #ifdef ENABLE_COMPOSITE_CHARS
2407 /* #### For simplicity, we put composite chars into a 96x96 charset.
2408 This is going to lead to problems because you can run out of
2409 room, esp. as we don't yet recycle numbers. */
2410 Vcharset_composite =
2411 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2412 CHARSET_TYPE_96X96, 2, 0, 0,
2413 CHARSET_LEFT_TO_RIGHT,
2414 build_string ("Composite"),
2415 build_string ("Composite characters"),
2416 build_string ("Composite characters"),
2419 composite_char_row_next = 32;
2420 composite_char_col_next = 32;
2422 Vcomposite_char_string2char_hash_table =
2423 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2424 Vcomposite_char_char2string_hash_table =
2425 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2426 staticpro (&Vcomposite_char_string2char_hash_table);
2427 staticpro (&Vcomposite_char_char2string_hash_table);
2428 #endif /* ENABLE_COMPOSITE_CHARS */