1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii_lower;
63 Lisp_Object Vcharset_latin_viscii_upper;
64 Lisp_Object Vcharset_hiragana_jisx0208;
65 Lisp_Object Vcharset_katakana_jisx0208;
67 Lisp_Object Vcharset_chinese_big5_1;
68 Lisp_Object Vcharset_chinese_big5_2;
70 #ifdef ENABLE_COMPOSITE_CHARS
71 Lisp_Object Vcharset_composite;
73 /* Hash tables for composite chars. One maps string representing
74 composed chars to their equivalent chars; one goes the
76 Lisp_Object Vcomposite_char_char2string_hash_table;
77 Lisp_Object Vcomposite_char_string2char_hash_table;
79 static int composite_char_row_next;
80 static int composite_char_col_next;
82 #endif /* ENABLE_COMPOSITE_CHARS */
84 /* Table of charsets indexed by leading byte. */
85 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
87 /* Table of charsets indexed by type/final-byte/direction. */
89 Lisp_Object charset_by_attributes[4][128];
91 Lisp_Object charset_by_attributes[4][128][2];
95 /* Table of number of bytes in the string representation of a character
96 indexed by the first byte of that representation.
98 rep_bytes_by_first_byte(c) is more efficient than the equivalent
99 canonical computation:
101 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
103 Bytecount rep_bytes_by_first_byte[0xA0] =
104 { /* 0x00 - 0x7f are for straight ASCII */
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 /* 0x80 - 0x8f are for Dimension-1 official charsets */
115 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 /* 0x90 - 0x9d are for Dimension-2 official charsets */
120 /* 0x9e is for Dimension-1 private charsets */
121 /* 0x9f is for Dimension-2 private charsets */
122 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
127 Emchar_to_byte_table*
128 make_byte_from_character_table ()
130 Emchar_to_byte_table* table
131 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
137 #define destroy_byte_from_character_table(table) xfree(table)
140 put_byte_from_character_table (Emchar ch, unsigned char val,
141 Emchar_to_byte_table* table)
143 if (table->base == NULL)
145 table->base = xmalloc (256);
146 table->offset = ch - (ch % 256);
148 table->base[ch - table->offset] = val;
152 int i = ch - table->offset;
156 size_t new_size = table->size - i;
159 new_size += 256 - (new_size % 256);
160 table->base = xrealloc (table->base, new_size);
161 memmove (table->base + (new_size - table->size), table->base,
163 for (j = 0; j < (new_size - table->size); j++)
165 table->offset -= (new_size - table->size);
166 table->base[ch - table->offset] = val;
167 table->size = new_size;
169 else if (i >= table->size)
171 size_t new_size = i + 1;
174 new_size += 256 - (new_size % 256);
175 table->base = xrealloc (table->base, new_size);
176 for (j = table->size; j < new_size; j++)
178 table->base[i] = val;
179 table->size = new_size;
183 table->base[i] = val;
189 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
191 size_t i = ch - table->offset;
193 return table->base[i];
199 Lisp_Object Vutf_2000_version;
203 int leading_code_private_11;
206 Lisp_Object Qcharsetp;
208 /* Qdoc_string, Qdimension, Qchars defined in general.c */
209 Lisp_Object Qregistry, Qfinal, Qgraphic;
210 Lisp_Object Qdirection;
211 Lisp_Object Qreverse_direction_charset;
212 Lisp_Object Qleading_byte;
213 Lisp_Object Qshort_name, Qlong_name;
229 Qjapanese_jisx0208_1978,
247 Lisp_Object Ql2r, Qr2l;
249 Lisp_Object Vcharset_hash_table;
251 static Charset_ID next_allocated_1_byte_leading_byte;
252 static Charset_ID next_allocated_2_byte_leading_byte;
254 /* Composite characters are characters constructed by overstriking two
255 or more regular characters.
257 1) The old Mule implementation involves storing composite characters
258 in a buffer as a tag followed by all of the actual characters
259 used to make up the composite character. I think this is a bad
260 idea; it greatly complicates code that wants to handle strings
261 one character at a time because it has to deal with the possibility
262 of great big ungainly characters. It's much more reasonable to
263 simply store an index into a table of composite characters.
265 2) The current implementation only allows for 16,384 separate
266 composite characters over the lifetime of the XEmacs process.
267 This could become a potential problem if the user
268 edited lots of different files that use composite characters.
269 Due to FSF bogosity, increasing the number of allowable
270 composite characters under Mule would decrease the number
271 of possible faces that can exist. Mule already has shrunk
272 this to 2048, and further shrinkage would become uncomfortable.
273 No such problems exist in XEmacs.
275 Composite characters could be represented as 0x80 C1 C2 C3,
276 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
277 for slightly under 2^20 (one million) composite characters
278 over the XEmacs process lifetime, and you only need to
279 increase the size of a Mule character from 19 to 21 bits.
280 Or you could use 0x80 C1 C2 C3 C4, allowing for about
281 85 million (slightly over 2^26) composite characters. */
284 /************************************************************************/
285 /* Basic Emchar functions */
286 /************************************************************************/
288 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
289 string in STR. Returns the number of bytes stored.
290 Do not call this directly. Use the macro set_charptr_emchar() instead.
294 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
309 else if ( c <= 0x7ff )
311 *p++ = (c >> 6) | 0xc0;
312 *p++ = (c & 0x3f) | 0x80;
314 else if ( c <= 0xffff )
316 *p++ = (c >> 12) | 0xe0;
317 *p++ = ((c >> 6) & 0x3f) | 0x80;
318 *p++ = (c & 0x3f) | 0x80;
320 else if ( c <= 0x1fffff )
322 *p++ = (c >> 18) | 0xf0;
323 *p++ = ((c >> 12) & 0x3f) | 0x80;
324 *p++ = ((c >> 6) & 0x3f) | 0x80;
325 *p++ = (c & 0x3f) | 0x80;
327 else if ( c <= 0x3ffffff )
329 *p++ = (c >> 24) | 0xf8;
330 *p++ = ((c >> 18) & 0x3f) | 0x80;
331 *p++ = ((c >> 12) & 0x3f) | 0x80;
332 *p++ = ((c >> 6) & 0x3f) | 0x80;
333 *p++ = (c & 0x3f) | 0x80;
337 *p++ = (c >> 30) | 0xfc;
338 *p++ = ((c >> 24) & 0x3f) | 0x80;
339 *p++ = ((c >> 18) & 0x3f) | 0x80;
340 *p++ = ((c >> 12) & 0x3f) | 0x80;
341 *p++ = ((c >> 6) & 0x3f) | 0x80;
342 *p++ = (c & 0x3f) | 0x80;
345 BREAKUP_CHAR (c, charset, c1, c2);
346 lb = CHAR_LEADING_BYTE (c);
347 if (LEADING_BYTE_PRIVATE_P (lb))
348 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
350 if (EQ (charset, Vcharset_control_1))
359 /* Return the first character from a Mule-encoded string in STR,
360 assuming it's non-ASCII. Do not call this directly.
361 Use the macro charptr_emchar() instead. */
364 non_ascii_charptr_emchar (CONST Bufbyte *str)
377 else if ( b >= 0xf8 )
382 else if ( b >= 0xf0 )
387 else if ( b >= 0xe0 )
392 else if ( b >= 0xc0 )
402 for( ; len > 0; len-- )
405 ch = ( ch << 6 ) | ( b & 0x3f );
409 Bufbyte i0 = *str, i1, i2 = 0;
412 if (i0 == LEADING_BYTE_CONTROL_1)
413 return (Emchar) (*++str - 0x20);
415 if (LEADING_BYTE_PREFIX_P (i0))
420 charset = CHARSET_BY_LEADING_BYTE (i0);
421 if (XCHARSET_DIMENSION (charset) == 2)
424 return MAKE_CHAR (charset, i1, i2);
428 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
429 Do not call this directly. Use the macro valid_char_p() instead. */
433 non_ascii_valid_char_p (Emchar ch)
437 /* Must have only lowest 19 bits set */
441 f1 = CHAR_FIELD1 (ch);
442 f2 = CHAR_FIELD2 (ch);
443 f3 = CHAR_FIELD3 (ch);
449 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
450 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
451 f2 > MAX_CHAR_FIELD2_PRIVATE)
456 if (f3 != 0x20 && f3 != 0x7F)
460 NOTE: This takes advantage of the fact that
461 FIELD2_TO_OFFICIAL_LEADING_BYTE and
462 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
464 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
465 return (XCHARSET_CHARS (charset) == 96);
471 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
472 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
473 f1 > MAX_CHAR_FIELD1_PRIVATE)
475 if (f2 < 0x20 || f3 < 0x20)
478 #ifdef ENABLE_COMPOSITE_CHARS
479 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
481 if (UNBOUNDP (Fgethash (make_int (ch),
482 Vcomposite_char_char2string_hash_table,
487 #endif /* ENABLE_COMPOSITE_CHARS */
489 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
492 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
494 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
497 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
499 return (XCHARSET_CHARS (charset) == 96);
505 /************************************************************************/
506 /* Basic string functions */
507 /************************************************************************/
509 /* Copy the character pointed to by PTR into STR, assuming it's
510 non-ASCII. Do not call this directly. Use the macro
511 charptr_copy_char() instead. */
514 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
516 Bufbyte *strptr = str;
518 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
520 /* Notice fallthrough. */
522 case 6: *++strptr = *ptr++;
523 case 5: *++strptr = *ptr++;
525 case 4: *++strptr = *ptr++;
526 case 3: *++strptr = *ptr++;
527 case 2: *++strptr = *ptr;
532 return strptr + 1 - str;
536 /************************************************************************/
537 /* streams of Emchars */
538 /************************************************************************/
540 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
541 The functions below are not meant to be called directly; use
542 the macros in insdel.h. */
545 Lstream_get_emchar_1 (Lstream *stream, int ch)
547 Bufbyte str[MAX_EMCHAR_LEN];
548 Bufbyte *strptr = str;
550 str[0] = (Bufbyte) ch;
551 switch (REP_BYTES_BY_FIRST_BYTE (ch))
553 /* Notice fallthrough. */
556 ch = Lstream_getc (stream);
558 *++strptr = (Bufbyte) ch;
560 ch = Lstream_getc (stream);
562 *++strptr = (Bufbyte) ch;
565 ch = Lstream_getc (stream);
567 *++strptr = (Bufbyte) ch;
569 ch = Lstream_getc (stream);
571 *++strptr = (Bufbyte) ch;
573 ch = Lstream_getc (stream);
575 *++strptr = (Bufbyte) ch;
580 return charptr_emchar (str);
584 Lstream_fput_emchar (Lstream *stream, Emchar ch)
586 Bufbyte str[MAX_EMCHAR_LEN];
587 Bytecount len = set_charptr_emchar (str, ch);
588 return Lstream_write (stream, str, len);
592 Lstream_funget_emchar (Lstream *stream, Emchar ch)
594 Bufbyte str[MAX_EMCHAR_LEN];
595 Bytecount len = set_charptr_emchar (str, ch);
596 Lstream_unread (stream, str, len);
600 /************************************************************************/
602 /************************************************************************/
605 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
607 struct Lisp_Charset *cs = XCHARSET (obj);
609 markobj (cs->short_name);
610 markobj (cs->long_name);
611 markobj (cs->doc_string);
612 markobj (cs->registry);
613 markobj (cs->ccl_program);
615 markobj (cs->decoding_table);
621 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
623 struct Lisp_Charset *cs = XCHARSET (obj);
627 error ("printing unreadable object #<charset %s 0x%x>",
628 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
631 write_c_string ("#<charset ", printcharfun);
632 print_internal (CHARSET_NAME (cs), printcharfun, 0);
633 write_c_string (" ", printcharfun);
634 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
635 write_c_string (" ", printcharfun);
636 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
637 write_c_string (" ", printcharfun);
638 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
639 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
640 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
641 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
642 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
644 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
645 CHARSET_COLUMNS (cs),
646 CHARSET_GRAPHIC (cs),
648 write_c_string (buf, printcharfun);
649 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
650 sprintf (buf, " 0x%x>", cs->header.uid);
651 write_c_string (buf, printcharfun);
654 static const struct lrecord_description charset_description[] = {
655 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
659 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
660 mark_charset, print_charset, 0, 0, 0,
662 struct Lisp_Charset);
663 /* Make a new charset. */
666 make_charset (Charset_ID id, Lisp_Object name,
667 unsigned char type, unsigned char columns, unsigned char graphic,
668 Bufbyte final, unsigned char direction, Lisp_Object short_name,
669 Lisp_Object long_name, Lisp_Object doc,
671 Lisp_Object decoding_table,
672 Emchar ucs_min, Emchar ucs_max,
673 Emchar code_offset, unsigned char byte_offset)
676 struct Lisp_Charset *cs =
677 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
678 XSETCHARSET (obj, cs);
680 CHARSET_ID (cs) = id;
681 CHARSET_NAME (cs) = name;
682 CHARSET_SHORT_NAME (cs) = short_name;
683 CHARSET_LONG_NAME (cs) = long_name;
684 CHARSET_DIRECTION (cs) = direction;
685 CHARSET_TYPE (cs) = type;
686 CHARSET_COLUMNS (cs) = columns;
687 CHARSET_GRAPHIC (cs) = graphic;
688 CHARSET_FINAL (cs) = final;
689 CHARSET_DOC_STRING (cs) = doc;
690 CHARSET_REGISTRY (cs) = reg;
691 CHARSET_CCL_PROGRAM (cs) = Qnil;
692 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
694 CHARSET_DECODING_TABLE(cs) = decoding_table;
695 CHARSET_UCS_MIN(cs) = ucs_min;
696 CHARSET_UCS_MAX(cs) = ucs_max;
697 CHARSET_CODE_OFFSET(cs) = code_offset;
698 CHARSET_BYTE_OFFSET(cs) = byte_offset;
701 switch ( CHARSET_TYPE (cs) )
703 case CHARSET_TYPE_94:
704 CHARSET_DIMENSION (cs) = 1;
705 CHARSET_CHARS (cs) = 94;
707 if (!EQ (decoding_table, Qnil))
710 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
711 for (i = 0; i < 94; i++)
713 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
716 put_byte_from_character_table (XCHAR (c), i + 33,
717 CHARSET_TO_BYTE1_TABLE(cs));
721 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
722 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
725 case CHARSET_TYPE_96:
726 CHARSET_DIMENSION (cs) = 1;
727 CHARSET_CHARS (cs) = 96;
729 if (!EQ (decoding_table, Qnil))
732 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
733 for (i = 0; i < 96; i++)
735 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
738 put_byte_from_character_table (XCHAR (c), i + 32,
739 CHARSET_TO_BYTE1_TABLE(cs));
743 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
744 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
747 case CHARSET_TYPE_94X94:
748 CHARSET_DIMENSION (cs) = 2;
749 CHARSET_CHARS (cs) = 94;
751 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
752 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
755 case CHARSET_TYPE_96X96:
756 CHARSET_DIMENSION (cs) = 2;
757 CHARSET_CHARS (cs) = 96;
759 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
760 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
764 case CHARSET_TYPE_128X128:
765 CHARSET_DIMENSION (cs) = 2;
766 CHARSET_CHARS (cs) = 128;
768 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
769 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
772 case CHARSET_TYPE_256X256:
773 CHARSET_DIMENSION (cs) = 2;
774 CHARSET_CHARS (cs) = 256;
776 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
777 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
784 if (id == LEADING_BYTE_ASCII)
785 CHARSET_REP_BYTES (cs) = 1;
787 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
789 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
794 /* some charsets do not have final characters. This includes
795 ASCII, Control-1, Composite, and the two faux private
798 if (code_offset == 0)
800 assert (NILP (charset_by_attributes[type][final]));
801 charset_by_attributes[type][final] = obj;
804 assert (NILP (charset_by_attributes[type][final][direction]));
805 charset_by_attributes[type][final][direction] = obj;
809 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
810 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
813 /* official leading byte */
814 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
817 /* Some charsets are "faux" and don't have names or really exist at
818 all except in the leading-byte table. */
820 Fputhash (name, obj, Vcharset_hash_table);
825 get_unallocated_leading_byte (int dimension)
831 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
834 lb = next_allocated_1_byte_leading_byte++;
838 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
841 lb = next_allocated_2_byte_leading_byte++;
846 ("No more character sets free for this dimension",
847 make_int (dimension));
854 charset_get_byte1 (Lisp_Object charset, Emchar ch)
856 Emchar_to_byte_table* table;
859 if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL)
860 return get_byte_from_character_table (ch, table);
861 else if ((XCHARSET_UCS_MIN (charset) <= ch)
862 && (ch <= XCHARSET_UCS_MAX (charset)))
863 return (ch - XCHARSET_UCS_MIN (charset)
864 + XCHARSET_CODE_OFFSET (charset))
865 / (XCHARSET_DIMENSION (charset) == 1 ?
868 XCHARSET_DIMENSION (charset) == 2 ?
869 XCHARSET_CHARS (charset)
871 XCHARSET_DIMENSION (charset) == 3 ?
872 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
874 XCHARSET_CHARS (charset)
875 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
876 + XCHARSET_BYTE_OFFSET (charset);
877 else if (XCHARSET_CODE_OFFSET (charset) == 0)
879 if (XCHARSET_DIMENSION (charset) == 1)
881 if (XCHARSET_CHARS (charset) == 94)
883 if (((d = ch - (MIN_CHAR_94
884 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
888 else if (XCHARSET_CHARS (charset) == 96)
890 if (((d = ch - (MIN_CHAR_96
891 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
898 else if (XCHARSET_DIMENSION (charset) == 2)
900 if (XCHARSET_CHARS (charset) == 94)
902 if (((d = ch - (MIN_CHAR_94x94
903 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
906 return (d / 94) + 33;
908 else if (XCHARSET_CHARS (charset) == 96)
910 if (((d = ch - (MIN_CHAR_96x96
911 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
914 return (d / 96) + 32;
922 charset_get_byte2 (Lisp_Object charset, Emchar ch)
924 if (XCHARSET_DIMENSION (charset) == 1)
928 Emchar_to_byte_table* table;
930 if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL)
931 return get_byte_from_character_table (ch, table);
932 else if ((XCHARSET_UCS_MIN (charset) <= ch)
933 && (ch <= XCHARSET_UCS_MAX (charset)))
934 return ((ch - XCHARSET_UCS_MIN (charset)
935 + XCHARSET_CODE_OFFSET (charset))
936 / (XCHARSET_DIMENSION (charset) == 2 ?
939 XCHARSET_DIMENSION (charset) == 3 ?
940 XCHARSET_CHARS (charset)
942 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
943 % XCHARSET_CHARS (charset)
944 + XCHARSET_BYTE_OFFSET (charset);
945 else if (XCHARSET_CHARS (charset) == 94)
946 return (MIN_CHAR_94x94
947 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
948 && (ch < MIN_CHAR_94x94
949 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
950 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
951 else /* if (XCHARSET_CHARS (charset) == 96) */
952 return (MIN_CHAR_96x96
953 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
954 && (ch < MIN_CHAR_96x96
955 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
956 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
960 Lisp_Object Vdefault_coded_charset_priority_list;
964 /************************************************************************/
965 /* Basic charset Lisp functions */
966 /************************************************************************/
968 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
969 Return non-nil if OBJECT is a charset.
973 return CHARSETP (object) ? Qt : Qnil;
976 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
977 Retrieve the charset of the given name.
978 If CHARSET-OR-NAME is a charset object, it is simply returned.
979 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
980 nil is returned. Otherwise the associated charset object is returned.
984 if (CHARSETP (charset_or_name))
985 return charset_or_name;
987 CHECK_SYMBOL (charset_or_name);
988 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
991 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
992 Retrieve the charset of the given name.
993 Same as `find-charset' except an error is signalled if there is no such
994 charset instead of returning nil.
998 Lisp_Object charset = Ffind_charset (name);
1001 signal_simple_error ("No such charset", name);
1005 /* We store the charsets in hash tables with the names as the key and the
1006 actual charset object as the value. Occasionally we need to use them
1007 in a list format. These routines provide us with that. */
1008 struct charset_list_closure
1010 Lisp_Object *charset_list;
1014 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1015 void *charset_list_closure)
1017 /* This function can GC */
1018 struct charset_list_closure *chcl =
1019 (struct charset_list_closure*) charset_list_closure;
1020 Lisp_Object *charset_list = chcl->charset_list;
1022 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1026 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1027 Return a list of the names of all defined charsets.
1031 Lisp_Object charset_list = Qnil;
1032 struct gcpro gcpro1;
1033 struct charset_list_closure charset_list_closure;
1035 GCPRO1 (charset_list);
1036 charset_list_closure.charset_list = &charset_list;
1037 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1038 &charset_list_closure);
1041 return charset_list;
1044 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1045 Return the name of the given charset.
1049 return XCHARSET_NAME (Fget_charset (charset));
1052 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1053 Define a new character set.
1054 This function is for use with Mule support.
1055 NAME is a symbol, the name by which the character set is normally referred.
1056 DOC-STRING is a string describing the character set.
1057 PROPS is a property list, describing the specific nature of the
1058 character set. Recognized properties are:
1060 'short-name Short version of the charset name (ex: Latin-1)
1061 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1062 'registry A regular expression matching the font registry field for
1064 'dimension Number of octets used to index a character in this charset.
1065 Either 1 or 2. Defaults to 1.
1066 'columns Number of columns used to display a character in this charset.
1067 Only used in TTY mode. (Under X, the actual width of a
1068 character can be derived from the font used to display the
1069 characters.) If unspecified, defaults to the dimension
1070 (this is almost always the correct value).
1071 'chars Number of characters in each dimension (94 or 96).
1072 Defaults to 94. Note that if the dimension is 2, the
1073 character set thus described is 94x94 or 96x96.
1074 'final Final byte of ISO 2022 escape sequence. Must be
1075 supplied. Each combination of (DIMENSION, CHARS) defines a
1076 separate namespace for final bytes. Note that ISO
1077 2022 restricts the final byte to the range
1078 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1079 dimension == 2. Note also that final bytes in the range
1080 0x30 - 0x3F are reserved for user-defined (not official)
1082 'graphic 0 (use left half of font on output) or 1 (use right half
1083 of font on output). Defaults to 0. For example, for
1084 a font whose registry is ISO8859-1, the left half
1085 (octets 0x20 - 0x7F) is the `ascii' character set, while
1086 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1087 character set. With 'graphic set to 0, the octets
1088 will have their high bit cleared; with it set to 1,
1089 the octets will have their high bit set.
1090 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1092 'ccl-program A compiled CCL program used to convert a character in
1093 this charset into an index into the font. This is in
1094 addition to the 'graphic property. The CCL program
1095 is passed the octets of the character, with the high
1096 bit cleared and set depending upon whether the value
1097 of the 'graphic property is 0 or 1.
1099 (name, doc_string, props))
1101 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1102 int direction = CHARSET_LEFT_TO_RIGHT;
1104 Lisp_Object registry = Qnil;
1105 Lisp_Object charset;
1106 Lisp_Object rest, keyword, value;
1107 Lisp_Object ccl_program = Qnil;
1108 Lisp_Object short_name = Qnil, long_name = Qnil;
1110 CHECK_SYMBOL (name);
1111 if (!NILP (doc_string))
1112 CHECK_STRING (doc_string);
1114 charset = Ffind_charset (name);
1115 if (!NILP (charset))
1116 signal_simple_error ("Cannot redefine existing charset", name);
1118 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1120 if (EQ (keyword, Qshort_name))
1122 CHECK_STRING (value);
1126 if (EQ (keyword, Qlong_name))
1128 CHECK_STRING (value);
1132 else if (EQ (keyword, Qdimension))
1135 dimension = XINT (value);
1136 if (dimension < 1 || dimension > 2)
1137 signal_simple_error ("Invalid value for 'dimension", value);
1140 else if (EQ (keyword, Qchars))
1143 chars = XINT (value);
1144 if (chars != 94 && chars != 96)
1145 signal_simple_error ("Invalid value for 'chars", value);
1148 else if (EQ (keyword, Qcolumns))
1151 columns = XINT (value);
1152 if (columns != 1 && columns != 2)
1153 signal_simple_error ("Invalid value for 'columns", value);
1156 else if (EQ (keyword, Qgraphic))
1159 graphic = XINT (value);
1160 if (graphic < 0 || graphic > 1)
1161 signal_simple_error ("Invalid value for 'graphic", value);
1164 else if (EQ (keyword, Qregistry))
1166 CHECK_STRING (value);
1170 else if (EQ (keyword, Qdirection))
1172 if (EQ (value, Ql2r))
1173 direction = CHARSET_LEFT_TO_RIGHT;
1174 else if (EQ (value, Qr2l))
1175 direction = CHARSET_RIGHT_TO_LEFT;
1177 signal_simple_error ("Invalid value for 'direction", value);
1180 else if (EQ (keyword, Qfinal))
1182 CHECK_CHAR_COERCE_INT (value);
1183 final = XCHAR (value);
1184 if (final < '0' || final > '~')
1185 signal_simple_error ("Invalid value for 'final", value);
1188 else if (EQ (keyword, Qccl_program))
1190 CHECK_VECTOR (value);
1191 ccl_program = value;
1195 signal_simple_error ("Unrecognized property", keyword);
1199 error ("'final must be specified");
1200 if (dimension == 2 && final > 0x5F)
1202 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1206 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1208 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1210 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1211 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1213 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1220 /* id = CHARSET_ID_OFFSET_94 + final; */
1221 id = get_unallocated_leading_byte (dimension);
1223 else if (chars == 96)
1225 id = get_unallocated_leading_byte (dimension);
1232 else if (dimension == 2)
1236 id = get_unallocated_leading_byte (dimension);
1238 else if (chars == 96)
1240 id = get_unallocated_leading_byte (dimension);
1252 id = get_unallocated_leading_byte (dimension);
1255 if (NILP (doc_string))
1256 doc_string = build_string ("");
1258 if (NILP (registry))
1259 registry = build_string ("");
1261 if (NILP (short_name))
1262 XSETSTRING (short_name, XSYMBOL (name)->name);
1264 if (NILP (long_name))
1265 long_name = doc_string;
1268 columns = dimension;
1269 charset = make_charset (id, name, type, columns, graphic,
1270 final, direction, short_name, long_name,
1271 doc_string, registry,
1273 if (!NILP (ccl_program))
1274 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1278 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1280 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1281 NEW-NAME is the name of the new charset. Return the new charset.
1283 (charset, new_name))
1285 Lisp_Object new_charset = Qnil;
1286 int id, dimension, columns, graphic, final;
1287 int direction, type;
1288 Lisp_Object registry, doc_string, short_name, long_name;
1289 struct Lisp_Charset *cs;
1291 charset = Fget_charset (charset);
1292 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1293 signal_simple_error ("Charset already has reverse-direction charset",
1296 CHECK_SYMBOL (new_name);
1297 if (!NILP (Ffind_charset (new_name)))
1298 signal_simple_error ("Cannot redefine existing charset", new_name);
1300 cs = XCHARSET (charset);
1302 type = CHARSET_TYPE (cs);
1303 columns = CHARSET_COLUMNS (cs);
1304 dimension = CHARSET_DIMENSION (cs);
1305 id = get_unallocated_leading_byte (dimension);
1307 graphic = CHARSET_GRAPHIC (cs);
1308 final = CHARSET_FINAL (cs);
1309 direction = CHARSET_RIGHT_TO_LEFT;
1310 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1311 direction = CHARSET_LEFT_TO_RIGHT;
1312 doc_string = CHARSET_DOC_STRING (cs);
1313 short_name = CHARSET_SHORT_NAME (cs);
1314 long_name = CHARSET_LONG_NAME (cs);
1315 registry = CHARSET_REGISTRY (cs);
1317 new_charset = make_charset (id, new_name, type, columns,
1318 graphic, final, direction, short_name, long_name,
1319 doc_string, registry,
1321 CHARSET_DECODING_TABLE(cs),
1322 CHARSET_UCS_MIN(cs),
1323 CHARSET_UCS_MAX(cs),
1324 CHARSET_CODE_OFFSET(cs),
1325 CHARSET_BYTE_OFFSET(cs)
1331 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1332 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1337 /* #### Reverse direction charsets not yet implemented. */
1339 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1341 Return the reverse-direction charset parallel to CHARSET, if any.
1342 This is the charset with the same properties (in particular, the same
1343 dimension, number of characters per dimension, and final byte) as
1344 CHARSET but whose characters are displayed in the opposite direction.
1348 charset = Fget_charset (charset);
1349 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1353 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1354 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1355 If DIRECTION is omitted, both directions will be checked (left-to-right
1356 will be returned if character sets exist for both directions).
1358 (dimension, chars, final, direction))
1360 int dm, ch, fi, di = -1;
1362 Lisp_Object obj = Qnil;
1364 CHECK_INT (dimension);
1365 dm = XINT (dimension);
1366 if (dm < 1 || dm > 2)
1367 signal_simple_error ("Invalid value for DIMENSION", dimension);
1371 if (ch != 94 && ch != 96)
1372 signal_simple_error ("Invalid value for CHARS", chars);
1374 CHECK_CHAR_COERCE_INT (final);
1376 if (fi < '0' || fi > '~')
1377 signal_simple_error ("Invalid value for FINAL", final);
1379 if (EQ (direction, Ql2r))
1380 di = CHARSET_LEFT_TO_RIGHT;
1381 else if (EQ (direction, Qr2l))
1382 di = CHARSET_RIGHT_TO_LEFT;
1383 else if (!NILP (direction))
1384 signal_simple_error ("Invalid value for DIRECTION", direction);
1386 if (dm == 2 && fi > 0x5F)
1388 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1391 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1393 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1397 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1399 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1402 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1405 return XCHARSET_NAME (obj);
1409 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1410 Return short name of CHARSET.
1414 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1417 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1418 Return long name of CHARSET.
1422 return XCHARSET_LONG_NAME (Fget_charset (charset));
1425 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1426 Return description of CHARSET.
1430 return XCHARSET_DOC_STRING (Fget_charset (charset));
1433 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1434 Return dimension of CHARSET.
1438 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1441 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1442 Return property PROP of CHARSET.
1443 Recognized properties are those listed in `make-charset', as well as
1444 'name and 'doc-string.
1448 struct Lisp_Charset *cs;
1450 charset = Fget_charset (charset);
1451 cs = XCHARSET (charset);
1453 CHECK_SYMBOL (prop);
1454 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1455 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1456 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1457 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1458 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1459 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1460 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1461 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1462 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1463 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1464 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1465 if (EQ (prop, Qdirection))
1466 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1467 if (EQ (prop, Qreverse_direction_charset))
1469 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1473 return XCHARSET_NAME (obj);
1475 signal_simple_error ("Unrecognized charset property name", prop);
1476 return Qnil; /* not reached */
1479 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1480 Return charset identification number of CHARSET.
1484 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1487 /* #### We need to figure out which properties we really want to
1490 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1491 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1493 (charset, ccl_program))
1495 charset = Fget_charset (charset);
1496 CHECK_VECTOR (ccl_program);
1497 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1502 invalidate_charset_font_caches (Lisp_Object charset)
1504 /* Invalidate font cache entries for charset on all devices. */
1505 Lisp_Object devcons, concons, hash_table;
1506 DEVICE_LOOP_NO_BREAK (devcons, concons)
1508 struct device *d = XDEVICE (XCAR (devcons));
1509 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1510 if (!UNBOUNDP (hash_table))
1511 Fclrhash (hash_table);
1515 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1516 Set the 'registry property of CHARSET to REGISTRY.
1518 (charset, registry))
1520 charset = Fget_charset (charset);
1521 CHECK_STRING (registry);
1522 XCHARSET_REGISTRY (charset) = registry;
1523 invalidate_charset_font_caches (charset);
1524 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1529 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1530 Return mapping-table of CHARSET.
1534 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1537 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1538 Set mapping-table of CHARSET to TABLE.
1542 struct Lisp_Charset *cs;
1543 Emchar_to_byte_table* old_byte1_table;
1544 Emchar_to_byte_table* old_byte2_table;
1546 charset = Fget_charset (charset);
1547 CHECK_VECTOR (table);
1549 cs = XCHARSET (charset);
1550 CHARSET_DECODING_TABLE(cs) = table;
1551 old_byte1_table = CHARSET_TO_BYTE1_TABLE(cs);
1552 old_byte2_table = CHARSET_TO_BYTE2_TABLE(cs);
1553 switch (CHARSET_TYPE (cs))
1555 case CHARSET_TYPE_94:
1556 if (!EQ (table, Qnil))
1559 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1560 for (i = 0; i < 94; i++)
1562 Lisp_Object c = XVECTOR_DATA(table)[i];
1565 put_byte_from_character_table (XCHAR (c), i + 33,
1566 CHARSET_TO_BYTE1_TABLE(cs));
1570 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1571 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1573 case CHARSET_TYPE_96:
1574 if (!EQ (table, Qnil))
1577 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1578 for (i = 0; i < 96; i++)
1580 Lisp_Object c = XVECTOR_DATA(table)[i];
1583 put_byte_from_character_table (XCHAR (c), i + 32,
1584 CHARSET_TO_BYTE1_TABLE(cs));
1588 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1589 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1591 case CHARSET_TYPE_94X94:
1592 if (!EQ (table, Qnil))
1596 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1597 CHARSET_TO_BYTE2_TABLE(cs) = make_byte_from_character_table();
1598 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1600 Lisp_Object v = XVECTOR_DATA(table)[i];
1606 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1608 Lisp_Object c = XVECTOR_DATA(v)[j];
1612 put_byte_from_character_table
1613 (XCHAR (c), i + 33, CHARSET_TO_BYTE1_TABLE(cs));
1614 put_byte_from_character_table
1615 (XCHAR (c), j + 33, CHARSET_TO_BYTE2_TABLE(cs));
1620 put_byte_from_character_table
1621 (XCHAR (v), i + 33, CHARSET_TO_BYTE1_TABLE(cs));
1626 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1627 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1630 case CHARSET_TYPE_96X96:
1631 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1632 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1634 case CHARSET_TYPE_128X128:
1635 CHARSET_DIMENSION (cs) = 2;
1636 CHARSET_CHARS (cs) = 128;
1637 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1638 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1640 case CHARSET_TYPE_256X256:
1641 CHARSET_DIMENSION (cs) = 2;
1642 CHARSET_CHARS (cs) = 256;
1643 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1644 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1647 if (old_byte1_table != NULL)
1648 destroy_byte_from_character_table (old_byte1_table);
1649 if (old_byte2_table != NULL)
1650 destroy_byte_from_character_table (old_byte2_table);
1656 /************************************************************************/
1657 /* Lisp primitives for working with characters */
1658 /************************************************************************/
1660 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1661 Make a character from CHARSET and octets ARG1 and ARG2.
1662 ARG2 is required only for characters from two-dimensional charsets.
1663 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1664 character s with caron.
1666 (charset, arg1, arg2))
1668 struct Lisp_Charset *cs;
1670 int lowlim, highlim;
1672 charset = Fget_charset (charset);
1673 cs = XCHARSET (charset);
1675 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1676 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1678 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1680 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1681 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1684 /* It is useful (and safe, according to Olivier Galibert) to strip
1685 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1686 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1687 Latin 2 code of the character. */
1695 if (a1 < lowlim || a1 > highlim)
1696 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1698 if (CHARSET_DIMENSION (cs) == 1)
1702 ("Charset is of dimension one; second octet must be nil", arg2);
1703 return make_char (MAKE_CHAR (charset, a1, 0));
1712 a2 = XINT (arg2) & 0x7f;
1714 if (a2 < lowlim || a2 > highlim)
1715 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1717 return make_char (MAKE_CHAR (charset, a1, a2));
1720 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1721 Return the character set of char CH.
1725 CHECK_CHAR_COERCE_INT (ch);
1727 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1730 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1731 Return list of charset and one or two position-codes of CHAR.
1735 /* This function can GC */
1736 struct gcpro gcpro1, gcpro2;
1737 Lisp_Object charset = Qnil;
1738 Lisp_Object rc = Qnil;
1741 GCPRO2 (charset, rc);
1742 CHECK_CHAR_COERCE_INT (character);
1744 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1746 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1748 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1752 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1760 #ifdef ENABLE_COMPOSITE_CHARS
1761 /************************************************************************/
1762 /* composite character functions */
1763 /************************************************************************/
1766 lookup_composite_char (Bufbyte *str, int len)
1768 Lisp_Object lispstr = make_string (str, len);
1769 Lisp_Object ch = Fgethash (lispstr,
1770 Vcomposite_char_string2char_hash_table,
1776 if (composite_char_row_next >= 128)
1777 signal_simple_error ("No more composite chars available", lispstr);
1778 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1779 composite_char_col_next);
1780 Fputhash (make_char (emch), lispstr,
1781 Vcomposite_char_char2string_hash_table);
1782 Fputhash (lispstr, make_char (emch),
1783 Vcomposite_char_string2char_hash_table);
1784 composite_char_col_next++;
1785 if (composite_char_col_next >= 128)
1787 composite_char_col_next = 32;
1788 composite_char_row_next++;
1797 composite_char_string (Emchar ch)
1799 Lisp_Object str = Fgethash (make_char (ch),
1800 Vcomposite_char_char2string_hash_table,
1802 assert (!UNBOUNDP (str));
1806 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1807 Convert a string into a single composite character.
1808 The character is the result of overstriking all the characters in
1813 CHECK_STRING (string);
1814 return make_char (lookup_composite_char (XSTRING_DATA (string),
1815 XSTRING_LENGTH (string)));
1818 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1819 Return a string of the characters comprising a composite character.
1827 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1828 signal_simple_error ("Must be composite char", ch);
1829 return composite_char_string (emch);
1831 #endif /* ENABLE_COMPOSITE_CHARS */
1834 /************************************************************************/
1835 /* initialization */
1836 /************************************************************************/
1839 syms_of_mule_charset (void)
1841 DEFSUBR (Fcharsetp);
1842 DEFSUBR (Ffind_charset);
1843 DEFSUBR (Fget_charset);
1844 DEFSUBR (Fcharset_list);
1845 DEFSUBR (Fcharset_name);
1846 DEFSUBR (Fmake_charset);
1847 DEFSUBR (Fmake_reverse_direction_charset);
1848 /* DEFSUBR (Freverse_direction_charset); */
1849 DEFSUBR (Fcharset_from_attributes);
1850 DEFSUBR (Fcharset_short_name);
1851 DEFSUBR (Fcharset_long_name);
1852 DEFSUBR (Fcharset_description);
1853 DEFSUBR (Fcharset_dimension);
1854 DEFSUBR (Fcharset_property);
1855 DEFSUBR (Fcharset_id);
1856 DEFSUBR (Fset_charset_ccl_program);
1857 DEFSUBR (Fset_charset_registry);
1859 DEFSUBR (Fcharset_mapping_table);
1860 DEFSUBR (Fset_charset_mapping_table);
1863 DEFSUBR (Fmake_char);
1864 DEFSUBR (Fchar_charset);
1865 DEFSUBR (Fsplit_char);
1867 #ifdef ENABLE_COMPOSITE_CHARS
1868 DEFSUBR (Fmake_composite_char);
1869 DEFSUBR (Fcomposite_char_string);
1872 defsymbol (&Qcharsetp, "charsetp");
1873 defsymbol (&Qregistry, "registry");
1874 defsymbol (&Qfinal, "final");
1875 defsymbol (&Qgraphic, "graphic");
1876 defsymbol (&Qdirection, "direction");
1877 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1878 defsymbol (&Qshort_name, "short-name");
1879 defsymbol (&Qlong_name, "long-name");
1881 defsymbol (&Ql2r, "l2r");
1882 defsymbol (&Qr2l, "r2l");
1884 /* Charsets, compatible with FSF 20.3
1885 Naming convention is Script-Charset[-Edition] */
1886 defsymbol (&Qascii, "ascii");
1887 defsymbol (&Qcontrol_1, "control-1");
1888 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1889 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1890 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1891 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1892 defsymbol (&Qthai_tis620, "thai-tis620");
1893 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1894 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1895 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1896 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1897 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1898 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1899 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1900 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1901 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1902 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1903 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1904 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1905 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1906 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1908 defsymbol (&Qucs_bmp, "ucs-bmp");
1909 defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
1910 defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
1911 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
1912 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
1914 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1915 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1917 defsymbol (&Qcomposite, "composite");
1921 vars_of_mule_charset (void)
1928 /* Table of charsets indexed by leading byte. */
1929 for (i = 0; i < countof (charset_by_leading_byte); i++)
1930 charset_by_leading_byte[i] = Qnil;
1933 /* Table of charsets indexed by type/final-byte. */
1934 for (i = 0; i < countof (charset_by_attributes); i++)
1935 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1936 charset_by_attributes[i][j] = Qnil;
1938 /* Table of charsets indexed by type/final-byte/direction. */
1939 for (i = 0; i < countof (charset_by_attributes); i++)
1940 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1941 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1942 charset_by_attributes[i][j][k] = Qnil;
1945 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1947 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
1949 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1953 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1954 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1955 Leading-code of private TYPE9N charset of column-width 1.
1957 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1961 Vutf_2000_version = build_string("0.8 (Kami)");
1962 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1963 Version number of UTF-2000.
1966 Vdefault_coded_charset_priority_list = Qnil;
1967 DEFVAR_LISP ("default-coded-charset-priority-list",
1968 &Vdefault_coded_charset_priority_list /*
1969 Default order of preferred coded-character-set.
1975 complex_vars_of_mule_charset (void)
1977 staticpro (&Vcharset_hash_table);
1978 Vcharset_hash_table =
1979 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1981 /* Predefined character sets. We store them into variables for
1986 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
1987 CHARSET_TYPE_256X256, 1, 0, 0,
1988 CHARSET_LEFT_TO_RIGHT,
1989 build_string ("BMP"),
1990 build_string ("BMP"),
1991 build_string ("BMP"),
1993 Qnil, 0, 0xFFFF, 0, 0);
1995 # define MIN_CHAR_THAI 0
1996 # define MAX_CHAR_THAI 0
1997 # define MIN_CHAR_GREEK 0
1998 # define MAX_CHAR_GREEK 0
1999 # define MIN_CHAR_HEBREW 0
2000 # define MAX_CHAR_HEBREW 0
2001 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2002 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2003 # define MIN_CHAR_CYRILLIC 0
2004 # define MAX_CHAR_CYRILLIC 0
2007 make_charset (LEADING_BYTE_ASCII, Qascii,
2008 CHARSET_TYPE_94, 1, 0, 'B',
2009 CHARSET_LEFT_TO_RIGHT,
2010 build_string ("ASCII"),
2011 build_string ("ASCII)"),
2012 build_string ("ASCII (ISO646 IRV)"),
2013 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2014 Qnil, 0, 0x7F, 0, 0);
2015 Vcharset_control_1 =
2016 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2017 CHARSET_TYPE_94, 1, 1, 0,
2018 CHARSET_LEFT_TO_RIGHT,
2019 build_string ("C1"),
2020 build_string ("Control characters"),
2021 build_string ("Control characters 128-191"),
2023 Qnil, 0x80, 0x9F, 0, 0);
2024 Vcharset_latin_iso8859_1 =
2025 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2026 CHARSET_TYPE_96, 1, 1, 'A',
2027 CHARSET_LEFT_TO_RIGHT,
2028 build_string ("Latin-1"),
2029 build_string ("ISO8859-1 (Latin-1)"),
2030 build_string ("ISO8859-1 (Latin-1)"),
2031 build_string ("iso8859-1"),
2032 Qnil, 0xA0, 0xFF, 0, 32);
2033 Vcharset_latin_iso8859_2 =
2034 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2035 CHARSET_TYPE_96, 1, 1, 'B',
2036 CHARSET_LEFT_TO_RIGHT,
2037 build_string ("Latin-2"),
2038 build_string ("ISO8859-2 (Latin-2)"),
2039 build_string ("ISO8859-2 (Latin-2)"),
2040 build_string ("iso8859-2"),
2042 Vcharset_latin_iso8859_3 =
2043 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2044 CHARSET_TYPE_96, 1, 1, 'C',
2045 CHARSET_LEFT_TO_RIGHT,
2046 build_string ("Latin-3"),
2047 build_string ("ISO8859-3 (Latin-3)"),
2048 build_string ("ISO8859-3 (Latin-3)"),
2049 build_string ("iso8859-3"),
2051 Vcharset_latin_iso8859_4 =
2052 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2053 CHARSET_TYPE_96, 1, 1, 'D',
2054 CHARSET_LEFT_TO_RIGHT,
2055 build_string ("Latin-4"),
2056 build_string ("ISO8859-4 (Latin-4)"),
2057 build_string ("ISO8859-4 (Latin-4)"),
2058 build_string ("iso8859-4"),
2060 Vcharset_thai_tis620 =
2061 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2062 CHARSET_TYPE_96, 1, 1, 'T',
2063 CHARSET_LEFT_TO_RIGHT,
2064 build_string ("TIS620"),
2065 build_string ("TIS620 (Thai)"),
2066 build_string ("TIS620.2529 (Thai)"),
2067 build_string ("tis620"),
2068 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2069 Vcharset_greek_iso8859_7 =
2070 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2071 CHARSET_TYPE_96, 1, 1, 'F',
2072 CHARSET_LEFT_TO_RIGHT,
2073 build_string ("ISO8859-7"),
2074 build_string ("ISO8859-7 (Greek)"),
2075 build_string ("ISO8859-7 (Greek)"),
2076 build_string ("iso8859-7"),
2077 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2078 Vcharset_arabic_iso8859_6 =
2079 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2080 CHARSET_TYPE_96, 1, 1, 'G',
2081 CHARSET_RIGHT_TO_LEFT,
2082 build_string ("ISO8859-6"),
2083 build_string ("ISO8859-6 (Arabic)"),
2084 build_string ("ISO8859-6 (Arabic)"),
2085 build_string ("iso8859-6"),
2087 Vcharset_hebrew_iso8859_8 =
2088 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2089 CHARSET_TYPE_96, 1, 1, 'H',
2090 CHARSET_RIGHT_TO_LEFT,
2091 build_string ("ISO8859-8"),
2092 build_string ("ISO8859-8 (Hebrew)"),
2093 build_string ("ISO8859-8 (Hebrew)"),
2094 build_string ("iso8859-8"),
2095 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2096 Vcharset_katakana_jisx0201 =
2097 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2098 CHARSET_TYPE_94, 1, 1, 'I',
2099 CHARSET_LEFT_TO_RIGHT,
2100 build_string ("JISX0201 Kana"),
2101 build_string ("JISX0201.1976 (Japanese Kana)"),
2102 build_string ("JISX0201.1976 Japanese Kana"),
2103 build_string ("jisx0201\\.1976"),
2105 MIN_CHAR_HALFWIDTH_KATAKANA,
2106 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2107 Vcharset_latin_jisx0201 =
2108 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2109 CHARSET_TYPE_94, 1, 0, 'J',
2110 CHARSET_LEFT_TO_RIGHT,
2111 build_string ("JISX0201 Roman"),
2112 build_string ("JISX0201.1976 (Japanese Roman)"),
2113 build_string ("JISX0201.1976 Japanese Roman"),
2114 build_string ("jisx0201\\.1976"),
2116 Vcharset_cyrillic_iso8859_5 =
2117 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2118 CHARSET_TYPE_96, 1, 1, 'L',
2119 CHARSET_LEFT_TO_RIGHT,
2120 build_string ("ISO8859-5"),
2121 build_string ("ISO8859-5 (Cyrillic)"),
2122 build_string ("ISO8859-5 (Cyrillic)"),
2123 build_string ("iso8859-5"),
2124 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2125 Vcharset_latin_iso8859_9 =
2126 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2127 CHARSET_TYPE_96, 1, 1, 'M',
2128 CHARSET_LEFT_TO_RIGHT,
2129 build_string ("Latin-5"),
2130 build_string ("ISO8859-9 (Latin-5)"),
2131 build_string ("ISO8859-9 (Latin-5)"),
2132 build_string ("iso8859-9"),
2134 Vcharset_japanese_jisx0208_1978 =
2135 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2136 CHARSET_TYPE_94X94, 2, 0, '@',
2137 CHARSET_LEFT_TO_RIGHT,
2138 build_string ("JIS X0208:1978"),
2139 build_string ("JIS X0208:1978 (Japanese)"),
2141 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2142 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2144 Vcharset_chinese_gb2312 =
2145 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2146 CHARSET_TYPE_94X94, 2, 0, 'A',
2147 CHARSET_LEFT_TO_RIGHT,
2148 build_string ("GB2312"),
2149 build_string ("GB2312)"),
2150 build_string ("GB2312 Chinese simplified"),
2151 build_string ("gb2312"),
2153 Vcharset_japanese_jisx0208 =
2154 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2155 CHARSET_TYPE_94X94, 2, 0, 'B',
2156 CHARSET_LEFT_TO_RIGHT,
2157 build_string ("JISX0208"),
2158 build_string ("JIS X0208:1983 (Japanese)"),
2159 build_string ("JIS X0208:1983 Japanese Kanji"),
2160 build_string ("jisx0208\\.1983"),
2162 Vcharset_korean_ksc5601 =
2163 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2164 CHARSET_TYPE_94X94, 2, 0, 'C',
2165 CHARSET_LEFT_TO_RIGHT,
2166 build_string ("KSC5601"),
2167 build_string ("KSC5601 (Korean"),
2168 build_string ("KSC5601 Korean Hangul and Hanja"),
2169 build_string ("ksc5601"),
2171 Vcharset_japanese_jisx0212 =
2172 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2173 CHARSET_TYPE_94X94, 2, 0, 'D',
2174 CHARSET_LEFT_TO_RIGHT,
2175 build_string ("JISX0212"),
2176 build_string ("JISX0212 (Japanese)"),
2177 build_string ("JISX0212 Japanese Supplement"),
2178 build_string ("jisx0212"),
2181 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2182 Vcharset_chinese_cns11643_1 =
2183 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2184 CHARSET_TYPE_94X94, 2, 0, 'G',
2185 CHARSET_LEFT_TO_RIGHT,
2186 build_string ("CNS11643-1"),
2187 build_string ("CNS11643-1 (Chinese traditional)"),
2189 ("CNS 11643 Plane 1 Chinese traditional"),
2190 build_string (CHINESE_CNS_PLANE_RE("1")),
2192 Vcharset_chinese_cns11643_2 =
2193 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2194 CHARSET_TYPE_94X94, 2, 0, 'H',
2195 CHARSET_LEFT_TO_RIGHT,
2196 build_string ("CNS11643-2"),
2197 build_string ("CNS11643-2 (Chinese traditional)"),
2199 ("CNS 11643 Plane 2 Chinese traditional"),
2200 build_string (CHINESE_CNS_PLANE_RE("2")),
2203 Vcharset_latin_viscii_lower =
2204 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2205 CHARSET_TYPE_96, 1, 1, '1',
2206 CHARSET_LEFT_TO_RIGHT,
2207 build_string ("VISCII lower"),
2208 build_string ("VISCII lower (Vietnamese)"),
2209 build_string ("VISCII lower (Vietnamese)"),
2210 build_string ("VISCII1\\.1"),
2212 Vcharset_latin_viscii_upper =
2213 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2214 CHARSET_TYPE_96, 1, 1, '2',
2215 CHARSET_LEFT_TO_RIGHT,
2216 build_string ("VISCII upper"),
2217 build_string ("VISCII upper (Vietnamese)"),
2218 build_string ("VISCII upper (Vietnamese)"),
2219 build_string ("VISCII1\\.1"),
2221 Vcharset_hiragana_jisx0208 =
2222 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2223 CHARSET_TYPE_94X94, 2, 0, 'B',
2224 CHARSET_LEFT_TO_RIGHT,
2225 build_string ("Hiragana"),
2226 build_string ("Hiragana of JIS X0208"),
2227 build_string ("Japanese Hiragana of JIS X0208"),
2228 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2229 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2230 (0x24 - 33) * 94 + (0x21 - 33), 33);
2231 Vcharset_katakana_jisx0208 =
2232 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2233 CHARSET_TYPE_94X94, 2, 0, 'B',
2234 CHARSET_LEFT_TO_RIGHT,
2235 build_string ("Katakana"),
2236 build_string ("Katakana of JIS X0208"),
2237 build_string ("Japanese Katakana of JIS X0208"),
2238 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2239 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2240 (0x25 - 33) * 94 + (0x21 - 33), 33);
2242 Vcharset_chinese_big5_1 =
2243 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2244 CHARSET_TYPE_94X94, 2, 0, '0',
2245 CHARSET_LEFT_TO_RIGHT,
2246 build_string ("Big5"),
2247 build_string ("Big5 (Level-1)"),
2249 ("Big5 Level-1 Chinese traditional"),
2250 build_string ("big5"),
2252 Vcharset_chinese_big5_2 =
2253 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2254 CHARSET_TYPE_94X94, 2, 0, '1',
2255 CHARSET_LEFT_TO_RIGHT,
2256 build_string ("Big5"),
2257 build_string ("Big5 (Level-2)"),
2259 ("Big5 Level-2 Chinese traditional"),
2260 build_string ("big5"),
2263 #ifdef ENABLE_COMPOSITE_CHARS
2264 /* #### For simplicity, we put composite chars into a 96x96 charset.
2265 This is going to lead to problems because you can run out of
2266 room, esp. as we don't yet recycle numbers. */
2267 Vcharset_composite =
2268 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2269 CHARSET_TYPE_96X96, 2, 0, 0,
2270 CHARSET_LEFT_TO_RIGHT,
2271 build_string ("Composite"),
2272 build_string ("Composite characters"),
2273 build_string ("Composite characters"),
2276 composite_char_row_next = 32;
2277 composite_char_col_next = 32;
2279 Vcomposite_char_string2char_hash_table =
2280 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2281 Vcomposite_char_char2string_hash_table =
2282 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2283 staticpro (&Vcomposite_char_string2char_hash_table);
2284 staticpro (&Vcomposite_char_char2string_hash_table);
2285 #endif /* ENABLE_COMPOSITE_CHARS */