1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii_lower;
63 Lisp_Object Vcharset_latin_viscii_upper;
64 Lisp_Object Vcharset_hiragana_jisx0208;
65 Lisp_Object Vcharset_katakana_jisx0208;
67 Lisp_Object Vcharset_chinese_big5_1;
68 Lisp_Object Vcharset_chinese_big5_2;
70 #ifdef ENABLE_COMPOSITE_CHARS
71 Lisp_Object Vcharset_composite;
73 /* Hash tables for composite chars. One maps string representing
74 composed chars to their equivalent chars; one goes the
76 Lisp_Object Vcomposite_char_char2string_hash_table;
77 Lisp_Object Vcomposite_char_string2char_hash_table;
79 static int composite_char_row_next;
80 static int composite_char_col_next;
82 #endif /* ENABLE_COMPOSITE_CHARS */
84 /* Table of charsets indexed by leading byte. */
85 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
87 /* Table of charsets indexed by type/final-byte/direction. */
89 Lisp_Object charset_by_attributes[4][128];
91 Lisp_Object charset_by_attributes[4][128][2];
95 /* Table of number of bytes in the string representation of a character
96 indexed by the first byte of that representation.
98 rep_bytes_by_first_byte(c) is more efficient than the equivalent
99 canonical computation:
101 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
103 Bytecount rep_bytes_by_first_byte[0xA0] =
104 { /* 0x00 - 0x7f are for straight ASCII */
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 /* 0x80 - 0x8f are for Dimension-1 official charsets */
115 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 /* 0x90 - 0x9d are for Dimension-2 official charsets */
120 /* 0x9e is for Dimension-1 private charsets */
121 /* 0x9f is for Dimension-2 private charsets */
122 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
127 Emchar_to_byte_table*
128 make_byte_from_character_table ()
130 Emchar_to_byte_table* table
131 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
137 #define destroy_byte_from_character_table(table) xfree(table)
140 put_byte_from_character_table (Emchar ch, unsigned char val,
141 Emchar_to_byte_table* table)
143 if (table->base == NULL)
145 table->base = xmalloc (128);
146 table->offset = ch - (ch % 128);
148 table->base[ch - table->offset] = val;
152 int i = ch - table->offset;
156 size_t new_size = table->size - i;
159 new_size += 128 - (new_size % 128);
160 table->base = xrealloc (table->base, new_size);
161 memmove (table->base + (new_size - table->size), table->base,
163 for (j = 0; j < (new_size - table->size); j++)
165 table->offset -= (new_size - table->size);
166 table->base[ch - table->offset] = val;
167 table->size = new_size;
169 else if (i >= table->size)
171 size_t new_size = i + 1;
174 new_size += 128 - (new_size % 128);
175 table->base = xrealloc (table->base, new_size);
176 for (j = table->size; j < new_size; j++)
178 table->base[i] = val;
179 table->size = new_size;
183 table->base[i] = val;
189 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
191 size_t i = ch - table->offset;
193 return table->base[i];
199 Lisp_Object Vutf_2000_version;
203 int leading_code_private_11;
206 Lisp_Object Qcharsetp;
208 /* Qdoc_string, Qdimension, Qchars defined in general.c */
209 Lisp_Object Qregistry, Qfinal, Qgraphic;
210 Lisp_Object Qdirection;
211 Lisp_Object Qreverse_direction_charset;
212 Lisp_Object Qleading_byte;
213 Lisp_Object Qshort_name, Qlong_name;
229 Qjapanese_jisx0208_1978,
247 Lisp_Object Ql2r, Qr2l;
249 Lisp_Object Vcharset_hash_table;
251 static Charset_ID next_allocated_1_byte_leading_byte;
252 static Charset_ID next_allocated_2_byte_leading_byte;
254 /* Composite characters are characters constructed by overstriking two
255 or more regular characters.
257 1) The old Mule implementation involves storing composite characters
258 in a buffer as a tag followed by all of the actual characters
259 used to make up the composite character. I think this is a bad
260 idea; it greatly complicates code that wants to handle strings
261 one character at a time because it has to deal with the possibility
262 of great big ungainly characters. It's much more reasonable to
263 simply store an index into a table of composite characters.
265 2) The current implementation only allows for 16,384 separate
266 composite characters over the lifetime of the XEmacs process.
267 This could become a potential problem if the user
268 edited lots of different files that use composite characters.
269 Due to FSF bogosity, increasing the number of allowable
270 composite characters under Mule would decrease the number
271 of possible faces that can exist. Mule already has shrunk
272 this to 2048, and further shrinkage would become uncomfortable.
273 No such problems exist in XEmacs.
275 Composite characters could be represented as 0x80 C1 C2 C3,
276 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
277 for slightly under 2^20 (one million) composite characters
278 over the XEmacs process lifetime, and you only need to
279 increase the size of a Mule character from 19 to 21 bits.
280 Or you could use 0x80 C1 C2 C3 C4, allowing for about
281 85 million (slightly over 2^26) composite characters. */
284 /************************************************************************/
285 /* Basic Emchar functions */
286 /************************************************************************/
288 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
289 string in STR. Returns the number of bytes stored.
290 Do not call this directly. Use the macro set_charptr_emchar() instead.
294 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
309 else if ( c <= 0x7ff )
311 *p++ = (c >> 6) | 0xc0;
312 *p++ = (c & 0x3f) | 0x80;
314 else if ( c <= 0xffff )
316 *p++ = (c >> 12) | 0xe0;
317 *p++ = ((c >> 6) & 0x3f) | 0x80;
318 *p++ = (c & 0x3f) | 0x80;
320 else if ( c <= 0x1fffff )
322 *p++ = (c >> 18) | 0xf0;
323 *p++ = ((c >> 12) & 0x3f) | 0x80;
324 *p++ = ((c >> 6) & 0x3f) | 0x80;
325 *p++ = (c & 0x3f) | 0x80;
327 else if ( c <= 0x3ffffff )
329 *p++ = (c >> 24) | 0xf8;
330 *p++ = ((c >> 18) & 0x3f) | 0x80;
331 *p++ = ((c >> 12) & 0x3f) | 0x80;
332 *p++ = ((c >> 6) & 0x3f) | 0x80;
333 *p++ = (c & 0x3f) | 0x80;
337 *p++ = (c >> 30) | 0xfc;
338 *p++ = ((c >> 24) & 0x3f) | 0x80;
339 *p++ = ((c >> 18) & 0x3f) | 0x80;
340 *p++ = ((c >> 12) & 0x3f) | 0x80;
341 *p++ = ((c >> 6) & 0x3f) | 0x80;
342 *p++ = (c & 0x3f) | 0x80;
345 BREAKUP_CHAR (c, charset, c1, c2);
346 lb = CHAR_LEADING_BYTE (c);
347 if (LEADING_BYTE_PRIVATE_P (lb))
348 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
350 if (EQ (charset, Vcharset_control_1))
359 /* Return the first character from a Mule-encoded string in STR,
360 assuming it's non-ASCII. Do not call this directly.
361 Use the macro charptr_emchar() instead. */
364 non_ascii_charptr_emchar (CONST Bufbyte *str)
377 else if ( b >= 0xf8 )
382 else if ( b >= 0xf0 )
387 else if ( b >= 0xe0 )
392 else if ( b >= 0xc0 )
402 for( ; len > 0; len-- )
405 ch = ( ch << 6 ) | ( b & 0x3f );
409 Bufbyte i0 = *str, i1, i2 = 0;
412 if (i0 == LEADING_BYTE_CONTROL_1)
413 return (Emchar) (*++str - 0x20);
415 if (LEADING_BYTE_PREFIX_P (i0))
420 charset = CHARSET_BY_LEADING_BYTE (i0);
421 if (XCHARSET_DIMENSION (charset) == 2)
424 return MAKE_CHAR (charset, i1, i2);
428 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
429 Do not call this directly. Use the macro valid_char_p() instead. */
433 non_ascii_valid_char_p (Emchar ch)
437 /* Must have only lowest 19 bits set */
441 f1 = CHAR_FIELD1 (ch);
442 f2 = CHAR_FIELD2 (ch);
443 f3 = CHAR_FIELD3 (ch);
449 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
450 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
451 f2 > MAX_CHAR_FIELD2_PRIVATE)
456 if (f3 != 0x20 && f3 != 0x7F)
460 NOTE: This takes advantage of the fact that
461 FIELD2_TO_OFFICIAL_LEADING_BYTE and
462 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
464 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
465 return (XCHARSET_CHARS (charset) == 96);
471 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
472 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
473 f1 > MAX_CHAR_FIELD1_PRIVATE)
475 if (f2 < 0x20 || f3 < 0x20)
478 #ifdef ENABLE_COMPOSITE_CHARS
479 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
481 if (UNBOUNDP (Fgethash (make_int (ch),
482 Vcomposite_char_char2string_hash_table,
487 #endif /* ENABLE_COMPOSITE_CHARS */
489 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
492 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
494 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
497 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
499 return (XCHARSET_CHARS (charset) == 96);
505 /************************************************************************/
506 /* Basic string functions */
507 /************************************************************************/
509 /* Copy the character pointed to by PTR into STR, assuming it's
510 non-ASCII. Do not call this directly. Use the macro
511 charptr_copy_char() instead. */
514 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
516 Bufbyte *strptr = str;
518 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
520 /* Notice fallthrough. */
522 case 6: *++strptr = *ptr++;
523 case 5: *++strptr = *ptr++;
525 case 4: *++strptr = *ptr++;
526 case 3: *++strptr = *ptr++;
527 case 2: *++strptr = *ptr;
532 return strptr + 1 - str;
536 /************************************************************************/
537 /* streams of Emchars */
538 /************************************************************************/
540 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
541 The functions below are not meant to be called directly; use
542 the macros in insdel.h. */
545 Lstream_get_emchar_1 (Lstream *stream, int ch)
547 Bufbyte str[MAX_EMCHAR_LEN];
548 Bufbyte *strptr = str;
550 str[0] = (Bufbyte) ch;
551 switch (REP_BYTES_BY_FIRST_BYTE (ch))
553 /* Notice fallthrough. */
556 ch = Lstream_getc (stream);
558 *++strptr = (Bufbyte) ch;
560 ch = Lstream_getc (stream);
562 *++strptr = (Bufbyte) ch;
565 ch = Lstream_getc (stream);
567 *++strptr = (Bufbyte) ch;
569 ch = Lstream_getc (stream);
571 *++strptr = (Bufbyte) ch;
573 ch = Lstream_getc (stream);
575 *++strptr = (Bufbyte) ch;
580 return charptr_emchar (str);
584 Lstream_fput_emchar (Lstream *stream, Emchar ch)
586 Bufbyte str[MAX_EMCHAR_LEN];
587 Bytecount len = set_charptr_emchar (str, ch);
588 return Lstream_write (stream, str, len);
592 Lstream_funget_emchar (Lstream *stream, Emchar ch)
594 Bufbyte str[MAX_EMCHAR_LEN];
595 Bytecount len = set_charptr_emchar (str, ch);
596 Lstream_unread (stream, str, len);
600 /************************************************************************/
602 /************************************************************************/
605 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
607 struct Lisp_Charset *cs = XCHARSET (obj);
609 markobj (cs->short_name);
610 markobj (cs->long_name);
611 markobj (cs->doc_string);
612 markobj (cs->registry);
613 markobj (cs->ccl_program);
614 markobj (cs->decoding_table);
619 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
621 struct Lisp_Charset *cs = XCHARSET (obj);
625 error ("printing unreadable object #<charset %s 0x%x>",
626 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
629 write_c_string ("#<charset ", printcharfun);
630 print_internal (CHARSET_NAME (cs), printcharfun, 0);
631 write_c_string (" ", printcharfun);
632 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
633 write_c_string (" ", printcharfun);
634 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
635 write_c_string (" ", printcharfun);
636 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
637 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
638 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
639 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
640 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
642 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
643 CHARSET_COLUMNS (cs),
644 CHARSET_GRAPHIC (cs),
646 write_c_string (buf, printcharfun);
647 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
648 sprintf (buf, " 0x%x>", cs->header.uid);
649 write_c_string (buf, printcharfun);
652 static const struct lrecord_description charset_description[] = {
653 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
657 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
658 mark_charset, print_charset, 0, 0, 0,
660 struct Lisp_Charset);
661 /* Make a new charset. */
664 make_charset (Charset_ID id, Lisp_Object name,
665 unsigned char type, unsigned char columns, unsigned char graphic,
666 Bufbyte final, unsigned char direction, Lisp_Object short_name,
667 Lisp_Object long_name, Lisp_Object doc,
669 Lisp_Object decoding_table,
670 Emchar ucs_min, Emchar ucs_max,
671 Emchar code_offset, unsigned char byte_offset)
674 struct Lisp_Charset *cs =
675 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
676 XSETCHARSET (obj, cs);
678 CHARSET_ID (cs) = id;
679 CHARSET_NAME (cs) = name;
680 CHARSET_SHORT_NAME (cs) = short_name;
681 CHARSET_LONG_NAME (cs) = long_name;
682 CHARSET_DIRECTION (cs) = direction;
683 CHARSET_TYPE (cs) = type;
684 CHARSET_COLUMNS (cs) = columns;
685 CHARSET_GRAPHIC (cs) = graphic;
686 CHARSET_FINAL (cs) = final;
687 CHARSET_DOC_STRING (cs) = doc;
688 CHARSET_REGISTRY (cs) = reg;
689 CHARSET_CCL_PROGRAM (cs) = Qnil;
690 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
692 CHARSET_DECODING_TABLE(cs) = decoding_table;
693 CHARSET_UCS_MIN(cs) = ucs_min;
694 CHARSET_UCS_MAX(cs) = ucs_max;
695 CHARSET_CODE_OFFSET(cs) = code_offset;
696 CHARSET_BYTE_OFFSET(cs) = byte_offset;
699 switch ( CHARSET_TYPE (cs) )
701 case CHARSET_TYPE_94:
702 CHARSET_DIMENSION (cs) = 1;
703 CHARSET_CHARS (cs) = 94;
705 if (!EQ (decoding_table, Qnil))
708 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
709 for (i = 0; i < 94; i++)
711 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
714 put_byte_from_character_table (XCHAR (c), i + 33,
715 CHARSET_TO_BYTE1_TABLE(cs));
719 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
720 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
723 case CHARSET_TYPE_96:
724 CHARSET_DIMENSION (cs) = 1;
725 CHARSET_CHARS (cs) = 96;
727 if (!EQ (decoding_table, Qnil))
730 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
731 for (i = 0; i < 96; i++)
733 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
736 put_byte_from_character_table (XCHAR (c), i + 32,
737 CHARSET_TO_BYTE1_TABLE(cs));
741 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
742 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
745 case CHARSET_TYPE_94X94:
746 CHARSET_DIMENSION (cs) = 2;
747 CHARSET_CHARS (cs) = 94;
749 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
750 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
753 case CHARSET_TYPE_96X96:
754 CHARSET_DIMENSION (cs) = 2;
755 CHARSET_CHARS (cs) = 96;
757 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
758 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
762 case CHARSET_TYPE_128X128:
763 CHARSET_DIMENSION (cs) = 2;
764 CHARSET_CHARS (cs) = 128;
766 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
767 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
770 case CHARSET_TYPE_256X256:
771 CHARSET_DIMENSION (cs) = 2;
772 CHARSET_CHARS (cs) = 256;
774 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
775 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
782 if (id == LEADING_BYTE_ASCII)
783 CHARSET_REP_BYTES (cs) = 1;
785 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
787 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
792 /* some charsets do not have final characters. This includes
793 ASCII, Control-1, Composite, and the two faux private
796 if (code_offset == 0)
798 assert (NILP (charset_by_attributes[type][final]));
799 charset_by_attributes[type][final] = obj;
802 assert (NILP (charset_by_attributes[type][final][direction]));
803 charset_by_attributes[type][final][direction] = obj;
807 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
808 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
811 /* official leading byte */
812 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
815 /* Some charsets are "faux" and don't have names or really exist at
816 all except in the leading-byte table. */
818 Fputhash (name, obj, Vcharset_hash_table);
823 get_unallocated_leading_byte (int dimension)
829 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
832 lb = next_allocated_1_byte_leading_byte++;
836 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
839 lb = next_allocated_2_byte_leading_byte++;
844 ("No more character sets free for this dimension",
845 make_int (dimension));
852 charset_get_byte1 (Lisp_Object charset, Emchar ch)
854 Emchar_to_byte_table* table;
857 if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL)
858 return get_byte_from_character_table (ch, table);
859 else if ((XCHARSET_UCS_MIN (charset) <= ch)
860 && (ch <= XCHARSET_UCS_MAX (charset)))
861 return (ch - XCHARSET_UCS_MIN (charset)
862 + XCHARSET_CODE_OFFSET (charset))
863 / (XCHARSET_DIMENSION (charset) == 1 ?
866 XCHARSET_DIMENSION (charset) == 2 ?
867 XCHARSET_CHARS (charset)
869 XCHARSET_DIMENSION (charset) == 3 ?
870 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
872 XCHARSET_CHARS (charset)
873 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
874 + XCHARSET_BYTE_OFFSET (charset);
875 else if (XCHARSET_CODE_OFFSET (charset) == 0)
877 if (XCHARSET_DIMENSION (charset) == 1)
879 if (XCHARSET_CHARS (charset) == 94)
881 if (((d = ch - (MIN_CHAR_94
882 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
886 else if (XCHARSET_CHARS (charset) == 96)
888 if (((d = ch - (MIN_CHAR_96
889 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
896 else if (XCHARSET_DIMENSION (charset) == 2)
898 if (XCHARSET_CHARS (charset) == 94)
900 if (((d = ch - (MIN_CHAR_94x94
901 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
904 return (d / 94) + 33;
906 else if (XCHARSET_CHARS (charset) == 96)
908 if (((d = ch - (MIN_CHAR_96x96
909 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
912 return (d / 96) + 32;
920 charset_get_byte2 (Lisp_Object charset, Emchar ch)
922 if (XCHARSET_DIMENSION (charset) == 1)
926 Emchar_to_byte_table* table;
928 if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL)
929 return get_byte_from_character_table (ch, table);
930 else if ((XCHARSET_UCS_MIN (charset) <= ch)
931 && (ch <= XCHARSET_UCS_MAX (charset)))
932 return ((ch - XCHARSET_UCS_MIN (charset)
933 + XCHARSET_CODE_OFFSET (charset))
934 / (XCHARSET_DIMENSION (charset) == 2 ?
937 XCHARSET_DIMENSION (charset) == 3 ?
938 XCHARSET_CHARS (charset)
940 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
941 % XCHARSET_CHARS (charset)
942 + XCHARSET_BYTE_OFFSET (charset);
943 else if (XCHARSET_CHARS (charset) == 94)
944 return (MIN_CHAR_94x94
945 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
946 && (ch < MIN_CHAR_94x94
947 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
948 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
949 else /* if (XCHARSET_CHARS (charset) == 96) */
950 return (MIN_CHAR_96x96
951 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
952 && (ch < MIN_CHAR_96x96
953 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
954 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
958 Lisp_Object Vdefault_coded_charset_priority_list;
962 /************************************************************************/
963 /* Basic charset Lisp functions */
964 /************************************************************************/
966 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
967 Return non-nil if OBJECT is a charset.
971 return CHARSETP (object) ? Qt : Qnil;
974 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
975 Retrieve the charset of the given name.
976 If CHARSET-OR-NAME is a charset object, it is simply returned.
977 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
978 nil is returned. Otherwise the associated charset object is returned.
982 if (CHARSETP (charset_or_name))
983 return charset_or_name;
985 CHECK_SYMBOL (charset_or_name);
986 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
989 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
990 Retrieve the charset of the given name.
991 Same as `find-charset' except an error is signalled if there is no such
992 charset instead of returning nil.
996 Lisp_Object charset = Ffind_charset (name);
999 signal_simple_error ("No such charset", name);
1003 /* We store the charsets in hash tables with the names as the key and the
1004 actual charset object as the value. Occasionally we need to use them
1005 in a list format. These routines provide us with that. */
1006 struct charset_list_closure
1008 Lisp_Object *charset_list;
1012 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1013 void *charset_list_closure)
1015 /* This function can GC */
1016 struct charset_list_closure *chcl =
1017 (struct charset_list_closure*) charset_list_closure;
1018 Lisp_Object *charset_list = chcl->charset_list;
1020 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1024 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1025 Return a list of the names of all defined charsets.
1029 Lisp_Object charset_list = Qnil;
1030 struct gcpro gcpro1;
1031 struct charset_list_closure charset_list_closure;
1033 GCPRO1 (charset_list);
1034 charset_list_closure.charset_list = &charset_list;
1035 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1036 &charset_list_closure);
1039 return charset_list;
1042 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1043 Return the name of the given charset.
1047 return XCHARSET_NAME (Fget_charset (charset));
1050 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1051 Define a new character set.
1052 This function is for use with Mule support.
1053 NAME is a symbol, the name by which the character set is normally referred.
1054 DOC-STRING is a string describing the character set.
1055 PROPS is a property list, describing the specific nature of the
1056 character set. Recognized properties are:
1058 'short-name Short version of the charset name (ex: Latin-1)
1059 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1060 'registry A regular expression matching the font registry field for
1062 'dimension Number of octets used to index a character in this charset.
1063 Either 1 or 2. Defaults to 1.
1064 'columns Number of columns used to display a character in this charset.
1065 Only used in TTY mode. (Under X, the actual width of a
1066 character can be derived from the font used to display the
1067 characters.) If unspecified, defaults to the dimension
1068 (this is almost always the correct value).
1069 'chars Number of characters in each dimension (94 or 96).
1070 Defaults to 94. Note that if the dimension is 2, the
1071 character set thus described is 94x94 or 96x96.
1072 'final Final byte of ISO 2022 escape sequence. Must be
1073 supplied. Each combination of (DIMENSION, CHARS) defines a
1074 separate namespace for final bytes. Note that ISO
1075 2022 restricts the final byte to the range
1076 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1077 dimension == 2. Note also that final bytes in the range
1078 0x30 - 0x3F are reserved for user-defined (not official)
1080 'graphic 0 (use left half of font on output) or 1 (use right half
1081 of font on output). Defaults to 0. For example, for
1082 a font whose registry is ISO8859-1, the left half
1083 (octets 0x20 - 0x7F) is the `ascii' character set, while
1084 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1085 character set. With 'graphic set to 0, the octets
1086 will have their high bit cleared; with it set to 1,
1087 the octets will have their high bit set.
1088 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1090 'ccl-program A compiled CCL program used to convert a character in
1091 this charset into an index into the font. This is in
1092 addition to the 'graphic property. The CCL program
1093 is passed the octets of the character, with the high
1094 bit cleared and set depending upon whether the value
1095 of the 'graphic property is 0 or 1.
1097 (name, doc_string, props))
1099 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1100 int direction = CHARSET_LEFT_TO_RIGHT;
1102 Lisp_Object registry = Qnil;
1103 Lisp_Object charset;
1104 Lisp_Object rest, keyword, value;
1105 Lisp_Object ccl_program = Qnil;
1106 Lisp_Object short_name = Qnil, long_name = Qnil;
1108 CHECK_SYMBOL (name);
1109 if (!NILP (doc_string))
1110 CHECK_STRING (doc_string);
1112 charset = Ffind_charset (name);
1113 if (!NILP (charset))
1114 signal_simple_error ("Cannot redefine existing charset", name);
1116 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1118 if (EQ (keyword, Qshort_name))
1120 CHECK_STRING (value);
1124 if (EQ (keyword, Qlong_name))
1126 CHECK_STRING (value);
1130 else if (EQ (keyword, Qdimension))
1133 dimension = XINT (value);
1134 if (dimension < 1 || dimension > 2)
1135 signal_simple_error ("Invalid value for 'dimension", value);
1138 else if (EQ (keyword, Qchars))
1141 chars = XINT (value);
1142 if (chars != 94 && chars != 96)
1143 signal_simple_error ("Invalid value for 'chars", value);
1146 else if (EQ (keyword, Qcolumns))
1149 columns = XINT (value);
1150 if (columns != 1 && columns != 2)
1151 signal_simple_error ("Invalid value for 'columns", value);
1154 else if (EQ (keyword, Qgraphic))
1157 graphic = XINT (value);
1158 if (graphic < 0 || graphic > 1)
1159 signal_simple_error ("Invalid value for 'graphic", value);
1162 else if (EQ (keyword, Qregistry))
1164 CHECK_STRING (value);
1168 else if (EQ (keyword, Qdirection))
1170 if (EQ (value, Ql2r))
1171 direction = CHARSET_LEFT_TO_RIGHT;
1172 else if (EQ (value, Qr2l))
1173 direction = CHARSET_RIGHT_TO_LEFT;
1175 signal_simple_error ("Invalid value for 'direction", value);
1178 else if (EQ (keyword, Qfinal))
1180 CHECK_CHAR_COERCE_INT (value);
1181 final = XCHAR (value);
1182 if (final < '0' || final > '~')
1183 signal_simple_error ("Invalid value for 'final", value);
1186 else if (EQ (keyword, Qccl_program))
1188 CHECK_VECTOR (value);
1189 ccl_program = value;
1193 signal_simple_error ("Unrecognized property", keyword);
1197 error ("'final must be specified");
1198 if (dimension == 2 && final > 0x5F)
1200 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1204 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1206 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1208 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1209 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1211 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1218 /* id = CHARSET_ID_OFFSET_94 + final; */
1219 id = get_unallocated_leading_byte (dimension);
1221 else if (chars == 96)
1223 id = get_unallocated_leading_byte (dimension);
1230 else if (dimension == 2)
1234 id = get_unallocated_leading_byte (dimension);
1236 else if (chars == 96)
1238 id = get_unallocated_leading_byte (dimension);
1250 id = get_unallocated_leading_byte (dimension);
1253 if (NILP (doc_string))
1254 doc_string = build_string ("");
1256 if (NILP (registry))
1257 registry = build_string ("");
1259 if (NILP (short_name))
1260 XSETSTRING (short_name, XSYMBOL (name)->name);
1262 if (NILP (long_name))
1263 long_name = doc_string;
1266 columns = dimension;
1267 charset = make_charset (id, name, type, columns, graphic,
1268 final, direction, short_name, long_name,
1269 doc_string, registry,
1271 if (!NILP (ccl_program))
1272 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1276 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1278 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1279 NEW-NAME is the name of the new charset. Return the new charset.
1281 (charset, new_name))
1283 Lisp_Object new_charset = Qnil;
1284 int id, dimension, columns, graphic, final;
1285 int direction, type;
1286 Lisp_Object registry, doc_string, short_name, long_name;
1287 struct Lisp_Charset *cs;
1289 charset = Fget_charset (charset);
1290 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1291 signal_simple_error ("Charset already has reverse-direction charset",
1294 CHECK_SYMBOL (new_name);
1295 if (!NILP (Ffind_charset (new_name)))
1296 signal_simple_error ("Cannot redefine existing charset", new_name);
1298 cs = XCHARSET (charset);
1300 type = CHARSET_TYPE (cs);
1301 columns = CHARSET_COLUMNS (cs);
1302 dimension = CHARSET_DIMENSION (cs);
1303 id = get_unallocated_leading_byte (dimension);
1305 graphic = CHARSET_GRAPHIC (cs);
1306 final = CHARSET_FINAL (cs);
1307 direction = CHARSET_RIGHT_TO_LEFT;
1308 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1309 direction = CHARSET_LEFT_TO_RIGHT;
1310 doc_string = CHARSET_DOC_STRING (cs);
1311 short_name = CHARSET_SHORT_NAME (cs);
1312 long_name = CHARSET_LONG_NAME (cs);
1313 registry = CHARSET_REGISTRY (cs);
1315 new_charset = make_charset (id, new_name, type, columns,
1316 graphic, final, direction, short_name, long_name,
1317 doc_string, registry,
1319 CHARSET_DECODING_TABLE(cs),
1320 CHARSET_UCS_MIN(cs),
1321 CHARSET_UCS_MAX(cs),
1322 CHARSET_CODE_OFFSET(cs),
1323 CHARSET_BYTE_OFFSET(cs)
1329 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1330 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1335 /* #### Reverse direction charsets not yet implemented. */
1337 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1339 Return the reverse-direction charset parallel to CHARSET, if any.
1340 This is the charset with the same properties (in particular, the same
1341 dimension, number of characters per dimension, and final byte) as
1342 CHARSET but whose characters are displayed in the opposite direction.
1346 charset = Fget_charset (charset);
1347 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1351 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1352 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1353 If DIRECTION is omitted, both directions will be checked (left-to-right
1354 will be returned if character sets exist for both directions).
1356 (dimension, chars, final, direction))
1358 int dm, ch, fi, di = -1;
1360 Lisp_Object obj = Qnil;
1362 CHECK_INT (dimension);
1363 dm = XINT (dimension);
1364 if (dm < 1 || dm > 2)
1365 signal_simple_error ("Invalid value for DIMENSION", dimension);
1369 if (ch != 94 && ch != 96)
1370 signal_simple_error ("Invalid value for CHARS", chars);
1372 CHECK_CHAR_COERCE_INT (final);
1374 if (fi < '0' || fi > '~')
1375 signal_simple_error ("Invalid value for FINAL", final);
1377 if (EQ (direction, Ql2r))
1378 di = CHARSET_LEFT_TO_RIGHT;
1379 else if (EQ (direction, Qr2l))
1380 di = CHARSET_RIGHT_TO_LEFT;
1381 else if (!NILP (direction))
1382 signal_simple_error ("Invalid value for DIRECTION", direction);
1384 if (dm == 2 && fi > 0x5F)
1386 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1389 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1391 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1395 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1397 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1400 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1403 return XCHARSET_NAME (obj);
1407 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1408 Return short name of CHARSET.
1412 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1415 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1416 Return long name of CHARSET.
1420 return XCHARSET_LONG_NAME (Fget_charset (charset));
1423 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1424 Return description of CHARSET.
1428 return XCHARSET_DOC_STRING (Fget_charset (charset));
1431 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1432 Return dimension of CHARSET.
1436 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1439 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1440 Return property PROP of CHARSET.
1441 Recognized properties are those listed in `make-charset', as well as
1442 'name and 'doc-string.
1446 struct Lisp_Charset *cs;
1448 charset = Fget_charset (charset);
1449 cs = XCHARSET (charset);
1451 CHECK_SYMBOL (prop);
1452 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1453 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1454 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1455 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1456 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1457 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1458 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1459 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1460 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1461 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1462 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1463 if (EQ (prop, Qdirection))
1464 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1465 if (EQ (prop, Qreverse_direction_charset))
1467 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1471 return XCHARSET_NAME (obj);
1473 signal_simple_error ("Unrecognized charset property name", prop);
1474 return Qnil; /* not reached */
1477 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1478 Return charset identification number of CHARSET.
1482 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1485 /* #### We need to figure out which properties we really want to
1488 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1489 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1491 (charset, ccl_program))
1493 charset = Fget_charset (charset);
1494 CHECK_VECTOR (ccl_program);
1495 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1500 invalidate_charset_font_caches (Lisp_Object charset)
1502 /* Invalidate font cache entries for charset on all devices. */
1503 Lisp_Object devcons, concons, hash_table;
1504 DEVICE_LOOP_NO_BREAK (devcons, concons)
1506 struct device *d = XDEVICE (XCAR (devcons));
1507 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1508 if (!UNBOUNDP (hash_table))
1509 Fclrhash (hash_table);
1513 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1514 Set the 'registry property of CHARSET to REGISTRY.
1516 (charset, registry))
1518 charset = Fget_charset (charset);
1519 CHECK_STRING (registry);
1520 XCHARSET_REGISTRY (charset) = registry;
1521 invalidate_charset_font_caches (charset);
1522 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1527 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1528 Return mapping-table of CHARSET.
1532 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1535 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1536 Set mapping-table of CHARSET to TABLE.
1540 struct Lisp_Charset *cs;
1541 Emchar_to_byte_table* old_byte1_table;
1542 Emchar_to_byte_table* old_byte2_table;
1544 charset = Fget_charset (charset);
1545 CHECK_VECTOR (table);
1547 cs = XCHARSET (charset);
1548 CHARSET_DECODING_TABLE(cs) = table;
1549 old_byte1_table = CHARSET_TO_BYTE1_TABLE(cs);
1550 old_byte2_table = CHARSET_TO_BYTE2_TABLE(cs);
1551 switch (CHARSET_TYPE (cs))
1553 case CHARSET_TYPE_94:
1554 if (!EQ (table, Qnil))
1557 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1558 for (i = 0; i < 94; i++)
1560 Lisp_Object c = XVECTOR_DATA(table)[i];
1563 put_byte_from_character_table (XCHAR (c), i + 33,
1564 CHARSET_TO_BYTE1_TABLE(cs));
1568 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1569 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1571 case CHARSET_TYPE_96:
1572 if (!EQ (table, Qnil))
1575 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1576 for (i = 0; i < 96; i++)
1578 Lisp_Object c = XVECTOR_DATA(table)[i];
1581 put_byte_from_character_table (XCHAR (c), i + 32,
1582 CHARSET_TO_BYTE1_TABLE(cs));
1586 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1587 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1589 case CHARSET_TYPE_94X94:
1590 if (!EQ (table, Qnil))
1594 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1595 CHARSET_TO_BYTE2_TABLE(cs) = make_byte_from_character_table();
1596 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1598 Lisp_Object v = XVECTOR_DATA(table)[i];
1604 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1606 Lisp_Object c = XVECTOR_DATA(v)[j];
1610 put_byte_from_character_table
1611 (XCHAR (c), i + 33, CHARSET_TO_BYTE1_TABLE(cs));
1612 put_byte_from_character_table
1613 (XCHAR (c), j + 33, CHARSET_TO_BYTE2_TABLE(cs));
1618 put_byte_from_character_table
1619 (XCHAR (v), i + 33, CHARSET_TO_BYTE1_TABLE(cs));
1624 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1625 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1628 case CHARSET_TYPE_96X96:
1629 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1630 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1632 case CHARSET_TYPE_128X128:
1633 CHARSET_DIMENSION (cs) = 2;
1634 CHARSET_CHARS (cs) = 128;
1635 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1636 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1638 case CHARSET_TYPE_256X256:
1639 CHARSET_DIMENSION (cs) = 2;
1640 CHARSET_CHARS (cs) = 256;
1641 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1642 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1645 if (old_byte1_table != NULL)
1646 destroy_byte_from_character_table (old_byte1_table);
1647 if (old_byte2_table != NULL)
1648 destroy_byte_from_character_table (old_byte2_table);
1654 /************************************************************************/
1655 /* Lisp primitives for working with characters */
1656 /************************************************************************/
1658 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1659 Make a character from CHARSET and octets ARG1 and ARG2.
1660 ARG2 is required only for characters from two-dimensional charsets.
1661 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1662 character s with caron.
1664 (charset, arg1, arg2))
1666 struct Lisp_Charset *cs;
1668 int lowlim, highlim;
1670 charset = Fget_charset (charset);
1671 cs = XCHARSET (charset);
1673 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1674 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1676 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1678 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1679 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1682 /* It is useful (and safe, according to Olivier Galibert) to strip
1683 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1684 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1685 Latin 2 code of the character. */
1693 if (a1 < lowlim || a1 > highlim)
1694 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1696 if (CHARSET_DIMENSION (cs) == 1)
1700 ("Charset is of dimension one; second octet must be nil", arg2);
1701 return make_char (MAKE_CHAR (charset, a1, 0));
1710 a2 = XINT (arg2) & 0x7f;
1712 if (a2 < lowlim || a2 > highlim)
1713 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1715 return make_char (MAKE_CHAR (charset, a1, a2));
1718 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1719 Return the character set of char CH.
1723 CHECK_CHAR_COERCE_INT (ch);
1725 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1728 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1729 Return list of charset and one or two position-codes of CHAR.
1733 /* This function can GC */
1734 struct gcpro gcpro1, gcpro2;
1735 Lisp_Object charset = Qnil;
1736 Lisp_Object rc = Qnil;
1739 GCPRO2 (charset, rc);
1740 CHECK_CHAR_COERCE_INT (character);
1742 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1744 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1746 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1750 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1758 #ifdef ENABLE_COMPOSITE_CHARS
1759 /************************************************************************/
1760 /* composite character functions */
1761 /************************************************************************/
1764 lookup_composite_char (Bufbyte *str, int len)
1766 Lisp_Object lispstr = make_string (str, len);
1767 Lisp_Object ch = Fgethash (lispstr,
1768 Vcomposite_char_string2char_hash_table,
1774 if (composite_char_row_next >= 128)
1775 signal_simple_error ("No more composite chars available", lispstr);
1776 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1777 composite_char_col_next);
1778 Fputhash (make_char (emch), lispstr,
1779 Vcomposite_char_char2string_hash_table);
1780 Fputhash (lispstr, make_char (emch),
1781 Vcomposite_char_string2char_hash_table);
1782 composite_char_col_next++;
1783 if (composite_char_col_next >= 128)
1785 composite_char_col_next = 32;
1786 composite_char_row_next++;
1795 composite_char_string (Emchar ch)
1797 Lisp_Object str = Fgethash (make_char (ch),
1798 Vcomposite_char_char2string_hash_table,
1800 assert (!UNBOUNDP (str));
1804 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1805 Convert a string into a single composite character.
1806 The character is the result of overstriking all the characters in
1811 CHECK_STRING (string);
1812 return make_char (lookup_composite_char (XSTRING_DATA (string),
1813 XSTRING_LENGTH (string)));
1816 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1817 Return a string of the characters comprising a composite character.
1825 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1826 signal_simple_error ("Must be composite char", ch);
1827 return composite_char_string (emch);
1829 #endif /* ENABLE_COMPOSITE_CHARS */
1832 /************************************************************************/
1833 /* initialization */
1834 /************************************************************************/
1837 syms_of_mule_charset (void)
1839 DEFSUBR (Fcharsetp);
1840 DEFSUBR (Ffind_charset);
1841 DEFSUBR (Fget_charset);
1842 DEFSUBR (Fcharset_list);
1843 DEFSUBR (Fcharset_name);
1844 DEFSUBR (Fmake_charset);
1845 DEFSUBR (Fmake_reverse_direction_charset);
1846 /* DEFSUBR (Freverse_direction_charset); */
1847 DEFSUBR (Fcharset_from_attributes);
1848 DEFSUBR (Fcharset_short_name);
1849 DEFSUBR (Fcharset_long_name);
1850 DEFSUBR (Fcharset_description);
1851 DEFSUBR (Fcharset_dimension);
1852 DEFSUBR (Fcharset_property);
1853 DEFSUBR (Fcharset_id);
1854 DEFSUBR (Fset_charset_ccl_program);
1855 DEFSUBR (Fset_charset_registry);
1857 DEFSUBR (Fcharset_mapping_table);
1858 DEFSUBR (Fset_charset_mapping_table);
1861 DEFSUBR (Fmake_char);
1862 DEFSUBR (Fchar_charset);
1863 DEFSUBR (Fsplit_char);
1865 #ifdef ENABLE_COMPOSITE_CHARS
1866 DEFSUBR (Fmake_composite_char);
1867 DEFSUBR (Fcomposite_char_string);
1870 defsymbol (&Qcharsetp, "charsetp");
1871 defsymbol (&Qregistry, "registry");
1872 defsymbol (&Qfinal, "final");
1873 defsymbol (&Qgraphic, "graphic");
1874 defsymbol (&Qdirection, "direction");
1875 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1876 defsymbol (&Qshort_name, "short-name");
1877 defsymbol (&Qlong_name, "long-name");
1879 defsymbol (&Ql2r, "l2r");
1880 defsymbol (&Qr2l, "r2l");
1882 /* Charsets, compatible with FSF 20.3
1883 Naming convention is Script-Charset[-Edition] */
1884 defsymbol (&Qascii, "ascii");
1885 defsymbol (&Qcontrol_1, "control-1");
1886 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1887 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1888 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1889 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1890 defsymbol (&Qthai_tis620, "thai-tis620");
1891 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1892 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1893 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1894 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1895 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1896 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1897 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1898 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1899 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1900 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1901 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1902 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1903 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1904 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1906 defsymbol (&Qucs_bmp, "ucs-bmp");
1907 defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
1908 defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
1909 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
1910 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
1912 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1913 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1915 defsymbol (&Qcomposite, "composite");
1919 vars_of_mule_charset (void)
1926 /* Table of charsets indexed by leading byte. */
1927 for (i = 0; i < countof (charset_by_leading_byte); i++)
1928 charset_by_leading_byte[i] = Qnil;
1931 /* Table of charsets indexed by type/final-byte. */
1932 for (i = 0; i < countof (charset_by_attributes); i++)
1933 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1934 charset_by_attributes[i][j] = Qnil;
1936 /* Table of charsets indexed by type/final-byte/direction. */
1937 for (i = 0; i < countof (charset_by_attributes); i++)
1938 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1939 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1940 charset_by_attributes[i][j][k] = Qnil;
1943 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1945 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
1947 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1951 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1952 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1953 Leading-code of private TYPE9N charset of column-width 1.
1955 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1959 Vutf_2000_version = build_string("0.8 (Kami)");
1960 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1961 Version number of UTF-2000.
1964 Vdefault_coded_charset_priority_list = Qnil;
1965 DEFVAR_LISP ("default-coded-charset-priority-list",
1966 &Vdefault_coded_charset_priority_list /*
1967 Default order of preferred coded-character-set.
1973 complex_vars_of_mule_charset (void)
1975 staticpro (&Vcharset_hash_table);
1976 Vcharset_hash_table =
1977 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1979 /* Predefined character sets. We store them into variables for
1984 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
1985 CHARSET_TYPE_256X256, 1, 0, 0,
1986 CHARSET_LEFT_TO_RIGHT,
1987 build_string ("BMP"),
1988 build_string ("BMP"),
1989 build_string ("BMP"),
1991 Qnil, 0, 0xFFFF, 0, 0);
1993 # define MIN_CHAR_THAI 0
1994 # define MAX_CHAR_THAI 0
1995 # define MIN_CHAR_GREEK 0
1996 # define MAX_CHAR_GREEK 0
1997 # define MIN_CHAR_HEBREW 0
1998 # define MAX_CHAR_HEBREW 0
1999 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2000 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2001 # define MIN_CHAR_CYRILLIC 0
2002 # define MAX_CHAR_CYRILLIC 0
2005 make_charset (LEADING_BYTE_ASCII, Qascii,
2006 CHARSET_TYPE_94, 1, 0, 'B',
2007 CHARSET_LEFT_TO_RIGHT,
2008 build_string ("ASCII"),
2009 build_string ("ASCII)"),
2010 build_string ("ASCII (ISO646 IRV)"),
2011 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2012 Qnil, 0, 0x7F, 0, 0);
2013 Vcharset_control_1 =
2014 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2015 CHARSET_TYPE_94, 1, 1, 0,
2016 CHARSET_LEFT_TO_RIGHT,
2017 build_string ("C1"),
2018 build_string ("Control characters"),
2019 build_string ("Control characters 128-191"),
2021 Qnil, 0x80, 0x9F, 0, 0);
2022 Vcharset_latin_iso8859_1 =
2023 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2024 CHARSET_TYPE_96, 1, 1, 'A',
2025 CHARSET_LEFT_TO_RIGHT,
2026 build_string ("Latin-1"),
2027 build_string ("ISO8859-1 (Latin-1)"),
2028 build_string ("ISO8859-1 (Latin-1)"),
2029 build_string ("iso8859-1"),
2030 Qnil, 0xA0, 0xFF, 0, 32);
2031 Vcharset_latin_iso8859_2 =
2032 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2033 CHARSET_TYPE_96, 1, 1, 'B',
2034 CHARSET_LEFT_TO_RIGHT,
2035 build_string ("Latin-2"),
2036 build_string ("ISO8859-2 (Latin-2)"),
2037 build_string ("ISO8859-2 (Latin-2)"),
2038 build_string ("iso8859-2"),
2040 Vcharset_latin_iso8859_3 =
2041 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2042 CHARSET_TYPE_96, 1, 1, 'C',
2043 CHARSET_LEFT_TO_RIGHT,
2044 build_string ("Latin-3"),
2045 build_string ("ISO8859-3 (Latin-3)"),
2046 build_string ("ISO8859-3 (Latin-3)"),
2047 build_string ("iso8859-3"),
2049 Vcharset_latin_iso8859_4 =
2050 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2051 CHARSET_TYPE_96, 1, 1, 'D',
2052 CHARSET_LEFT_TO_RIGHT,
2053 build_string ("Latin-4"),
2054 build_string ("ISO8859-4 (Latin-4)"),
2055 build_string ("ISO8859-4 (Latin-4)"),
2056 build_string ("iso8859-4"),
2058 Vcharset_thai_tis620 =
2059 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2060 CHARSET_TYPE_96, 1, 1, 'T',
2061 CHARSET_LEFT_TO_RIGHT,
2062 build_string ("TIS620"),
2063 build_string ("TIS620 (Thai)"),
2064 build_string ("TIS620.2529 (Thai)"),
2065 build_string ("tis620"),
2066 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2067 Vcharset_greek_iso8859_7 =
2068 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2069 CHARSET_TYPE_96, 1, 1, 'F',
2070 CHARSET_LEFT_TO_RIGHT,
2071 build_string ("ISO8859-7"),
2072 build_string ("ISO8859-7 (Greek)"),
2073 build_string ("ISO8859-7 (Greek)"),
2074 build_string ("iso8859-7"),
2075 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2076 Vcharset_arabic_iso8859_6 =
2077 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2078 CHARSET_TYPE_96, 1, 1, 'G',
2079 CHARSET_RIGHT_TO_LEFT,
2080 build_string ("ISO8859-6"),
2081 build_string ("ISO8859-6 (Arabic)"),
2082 build_string ("ISO8859-6 (Arabic)"),
2083 build_string ("iso8859-6"),
2085 Vcharset_hebrew_iso8859_8 =
2086 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2087 CHARSET_TYPE_96, 1, 1, 'H',
2088 CHARSET_RIGHT_TO_LEFT,
2089 build_string ("ISO8859-8"),
2090 build_string ("ISO8859-8 (Hebrew)"),
2091 build_string ("ISO8859-8 (Hebrew)"),
2092 build_string ("iso8859-8"),
2093 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2094 Vcharset_katakana_jisx0201 =
2095 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2096 CHARSET_TYPE_94, 1, 1, 'I',
2097 CHARSET_LEFT_TO_RIGHT,
2098 build_string ("JISX0201 Kana"),
2099 build_string ("JISX0201.1976 (Japanese Kana)"),
2100 build_string ("JISX0201.1976 Japanese Kana"),
2101 build_string ("jisx0201\\.1976"),
2103 MIN_CHAR_HALFWIDTH_KATAKANA,
2104 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2105 Vcharset_latin_jisx0201 =
2106 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2107 CHARSET_TYPE_94, 1, 0, 'J',
2108 CHARSET_LEFT_TO_RIGHT,
2109 build_string ("JISX0201 Roman"),
2110 build_string ("JISX0201.1976 (Japanese Roman)"),
2111 build_string ("JISX0201.1976 Japanese Roman"),
2112 build_string ("jisx0201\\.1976"),
2114 Vcharset_cyrillic_iso8859_5 =
2115 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2116 CHARSET_TYPE_96, 1, 1, 'L',
2117 CHARSET_LEFT_TO_RIGHT,
2118 build_string ("ISO8859-5"),
2119 build_string ("ISO8859-5 (Cyrillic)"),
2120 build_string ("ISO8859-5 (Cyrillic)"),
2121 build_string ("iso8859-5"),
2122 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2123 Vcharset_latin_iso8859_9 =
2124 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2125 CHARSET_TYPE_96, 1, 1, 'M',
2126 CHARSET_LEFT_TO_RIGHT,
2127 build_string ("Latin-5"),
2128 build_string ("ISO8859-9 (Latin-5)"),
2129 build_string ("ISO8859-9 (Latin-5)"),
2130 build_string ("iso8859-9"),
2132 Vcharset_japanese_jisx0208_1978 =
2133 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2134 CHARSET_TYPE_94X94, 2, 0, '@',
2135 CHARSET_LEFT_TO_RIGHT,
2136 build_string ("JIS X0208:1978"),
2137 build_string ("JIS X0208:1978 (Japanese)"),
2139 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2140 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2142 Vcharset_chinese_gb2312 =
2143 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2144 CHARSET_TYPE_94X94, 2, 0, 'A',
2145 CHARSET_LEFT_TO_RIGHT,
2146 build_string ("GB2312"),
2147 build_string ("GB2312)"),
2148 build_string ("GB2312 Chinese simplified"),
2149 build_string ("gb2312"),
2151 Vcharset_japanese_jisx0208 =
2152 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2153 CHARSET_TYPE_94X94, 2, 0, 'B',
2154 CHARSET_LEFT_TO_RIGHT,
2155 build_string ("JISX0208"),
2156 build_string ("JIS X0208:1983 (Japanese)"),
2157 build_string ("JIS X0208:1983 Japanese Kanji"),
2158 build_string ("jisx0208\\.1983"),
2160 Vcharset_korean_ksc5601 =
2161 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2162 CHARSET_TYPE_94X94, 2, 0, 'C',
2163 CHARSET_LEFT_TO_RIGHT,
2164 build_string ("KSC5601"),
2165 build_string ("KSC5601 (Korean"),
2166 build_string ("KSC5601 Korean Hangul and Hanja"),
2167 build_string ("ksc5601"),
2169 Vcharset_japanese_jisx0212 =
2170 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2171 CHARSET_TYPE_94X94, 2, 0, 'D',
2172 CHARSET_LEFT_TO_RIGHT,
2173 build_string ("JISX0212"),
2174 build_string ("JISX0212 (Japanese)"),
2175 build_string ("JISX0212 Japanese Supplement"),
2176 build_string ("jisx0212"),
2179 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2180 Vcharset_chinese_cns11643_1 =
2181 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2182 CHARSET_TYPE_94X94, 2, 0, 'G',
2183 CHARSET_LEFT_TO_RIGHT,
2184 build_string ("CNS11643-1"),
2185 build_string ("CNS11643-1 (Chinese traditional)"),
2187 ("CNS 11643 Plane 1 Chinese traditional"),
2188 build_string (CHINESE_CNS_PLANE_RE("1")),
2190 Vcharset_chinese_cns11643_2 =
2191 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2192 CHARSET_TYPE_94X94, 2, 0, 'H',
2193 CHARSET_LEFT_TO_RIGHT,
2194 build_string ("CNS11643-2"),
2195 build_string ("CNS11643-2 (Chinese traditional)"),
2197 ("CNS 11643 Plane 2 Chinese traditional"),
2198 build_string (CHINESE_CNS_PLANE_RE("2")),
2201 Vcharset_latin_viscii_lower =
2202 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2203 CHARSET_TYPE_96, 1, 1, '1',
2204 CHARSET_LEFT_TO_RIGHT,
2205 build_string ("VISCII lower"),
2206 build_string ("VISCII lower (Vietnamese)"),
2207 build_string ("VISCII lower (Vietnamese)"),
2208 build_string ("VISCII1\\.1"),
2210 Vcharset_latin_viscii_upper =
2211 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2212 CHARSET_TYPE_96, 1, 1, '2',
2213 CHARSET_LEFT_TO_RIGHT,
2214 build_string ("VISCII upper"),
2215 build_string ("VISCII upper (Vietnamese)"),
2216 build_string ("VISCII upper (Vietnamese)"),
2217 build_string ("VISCII1\\.1"),
2219 Vcharset_hiragana_jisx0208 =
2220 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2221 CHARSET_TYPE_94X94, 2, 0, 'B',
2222 CHARSET_LEFT_TO_RIGHT,
2223 build_string ("Hiragana"),
2224 build_string ("Hiragana of JIS X0208"),
2225 build_string ("Japanese Hiragana of JIS X0208"),
2226 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2227 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2228 (0x24 - 33) * 94 + (0x21 - 33), 33);
2229 Vcharset_katakana_jisx0208 =
2230 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2231 CHARSET_TYPE_94X94, 2, 0, 'B',
2232 CHARSET_LEFT_TO_RIGHT,
2233 build_string ("Katakana"),
2234 build_string ("Katakana of JIS X0208"),
2235 build_string ("Japanese Katakana of JIS X0208"),
2236 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2237 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2238 (0x25 - 33) * 94 + (0x21 - 33), 33);
2240 Vcharset_chinese_big5_1 =
2241 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2242 CHARSET_TYPE_94X94, 2, 0, '0',
2243 CHARSET_LEFT_TO_RIGHT,
2244 build_string ("Big5"),
2245 build_string ("Big5 (Level-1)"),
2247 ("Big5 Level-1 Chinese traditional"),
2248 build_string ("big5"),
2250 Vcharset_chinese_big5_2 =
2251 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2252 CHARSET_TYPE_94X94, 2, 0, '1',
2253 CHARSET_LEFT_TO_RIGHT,
2254 build_string ("Big5"),
2255 build_string ("Big5 (Level-2)"),
2257 ("Big5 Level-2 Chinese traditional"),
2258 build_string ("big5"),
2261 #ifdef ENABLE_COMPOSITE_CHARS
2262 /* #### For simplicity, we put composite chars into a 96x96 charset.
2263 This is going to lead to problems because you can run out of
2264 room, esp. as we don't yet recycle numbers. */
2265 Vcharset_composite =
2266 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2267 CHARSET_TYPE_96X96, 2, 0, 0,
2268 CHARSET_LEFT_TO_RIGHT,
2269 build_string ("Composite"),
2270 build_string ("Composite characters"),
2271 build_string ("Composite characters"),
2274 composite_char_row_next = 32;
2275 composite_char_col_next = 32;
2277 Vcomposite_char_string2char_hash_table =
2278 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2279 Vcomposite_char_char2string_hash_table =
2280 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2281 staticpro (&Vcomposite_char_string2char_hash_table);
2282 staticpro (&Vcomposite_char_char2string_hash_table);
2283 #endif /* ENABLE_COMPOSITE_CHARS */