1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii_lower;
63 Lisp_Object Vcharset_latin_viscii_upper;
64 Lisp_Object Vcharset_hiragana_jisx0208;
65 Lisp_Object Vcharset_katakana_jisx0208;
67 Lisp_Object Vcharset_chinese_big5_1;
68 Lisp_Object Vcharset_chinese_big5_2;
70 #ifdef ENABLE_COMPOSITE_CHARS
71 Lisp_Object Vcharset_composite;
73 /* Hash tables for composite chars. One maps string representing
74 composed chars to their equivalent chars; one goes the
76 Lisp_Object Vcomposite_char_char2string_hash_table;
77 Lisp_Object Vcomposite_char_string2char_hash_table;
79 static int composite_char_row_next;
80 static int composite_char_col_next;
82 #endif /* ENABLE_COMPOSITE_CHARS */
84 /* Table of charsets indexed by leading byte. */
85 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
87 /* Table of charsets indexed by type/final-byte/direction. */
89 Lisp_Object charset_by_attributes[4][128];
91 Lisp_Object charset_by_attributes[4][128][2];
95 /* Table of number of bytes in the string representation of a character
96 indexed by the first byte of that representation.
98 rep_bytes_by_first_byte(c) is more efficient than the equivalent
99 canonical computation:
101 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
103 Bytecount rep_bytes_by_first_byte[0xA0] =
104 { /* 0x00 - 0x7f are for straight ASCII */
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 /* 0x80 - 0x8f are for Dimension-1 official charsets */
115 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 /* 0x90 - 0x9d are for Dimension-2 official charsets */
120 /* 0x9e is for Dimension-1 private charsets */
121 /* 0x9f is for Dimension-2 private charsets */
122 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
127 Emchar_to_byte_table*
128 make_byte_from_character_table ()
130 Emchar_to_byte_table* table
131 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
137 #define destroy_byte_from_character_table(table) xfree(table)
140 put_byte_from_character_table (Emchar ch, unsigned char val,
141 Emchar_to_byte_table* table)
143 if (table->base == NULL)
145 table->base = xmalloc (256);
146 table->offset = ch - (ch % 256);
148 table->base[ch - table->offset] = val;
152 int i = ch - table->offset;
156 size_t new_size = table->size - i;
159 new_size += 256 - (new_size % 256);
160 table->base = xrealloc (table->base, new_size);
161 memmove (table->base + (new_size - table->size), table->base,
163 for (j = 0; j < (new_size - table->size); j++)
165 table->offset -= (new_size - table->size);
166 table->base[ch - table->offset] = val;
167 table->size = new_size;
169 else if (i >= table->size)
171 size_t new_size = i + 1;
174 new_size += 256 - (new_size % 256);
175 table->base = xrealloc (table->base, new_size);
176 for (j = table->size; j < new_size; j++)
178 table->base[i] = val;
179 table->size = new_size;
183 table->base[i] = val;
189 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
191 size_t i = ch - table->offset;
193 return table->base[i];
199 Lisp_Object Vutf_2000_version;
203 int leading_code_private_11;
206 Lisp_Object Qcharsetp;
208 /* Qdoc_string, Qdimension, Qchars defined in general.c */
209 Lisp_Object Qregistry, Qfinal, Qgraphic;
210 Lisp_Object Qdirection;
211 Lisp_Object Qreverse_direction_charset;
212 Lisp_Object Qleading_byte;
213 Lisp_Object Qshort_name, Qlong_name;
229 Qjapanese_jisx0208_1978,
240 Qvietnamese_viscii_lower,
241 Qvietnamese_viscii_upper,
249 Lisp_Object Ql2r, Qr2l;
251 Lisp_Object Vcharset_hash_table;
253 static Charset_ID next_allocated_1_byte_leading_byte;
254 static Charset_ID next_allocated_2_byte_leading_byte;
256 /* Composite characters are characters constructed by overstriking two
257 or more regular characters.
259 1) The old Mule implementation involves storing composite characters
260 in a buffer as a tag followed by all of the actual characters
261 used to make up the composite character. I think this is a bad
262 idea; it greatly complicates code that wants to handle strings
263 one character at a time because it has to deal with the possibility
264 of great big ungainly characters. It's much more reasonable to
265 simply store an index into a table of composite characters.
267 2) The current implementation only allows for 16,384 separate
268 composite characters over the lifetime of the XEmacs process.
269 This could become a potential problem if the user
270 edited lots of different files that use composite characters.
271 Due to FSF bogosity, increasing the number of allowable
272 composite characters under Mule would decrease the number
273 of possible faces that can exist. Mule already has shrunk
274 this to 2048, and further shrinkage would become uncomfortable.
275 No such problems exist in XEmacs.
277 Composite characters could be represented as 0x80 C1 C2 C3,
278 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
279 for slightly under 2^20 (one million) composite characters
280 over the XEmacs process lifetime, and you only need to
281 increase the size of a Mule character from 19 to 21 bits.
282 Or you could use 0x80 C1 C2 C3 C4, allowing for about
283 85 million (slightly over 2^26) composite characters. */
286 /************************************************************************/
287 /* Basic Emchar functions */
288 /************************************************************************/
290 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
291 string in STR. Returns the number of bytes stored.
292 Do not call this directly. Use the macro set_charptr_emchar() instead.
296 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
311 else if ( c <= 0x7ff )
313 *p++ = (c >> 6) | 0xc0;
314 *p++ = (c & 0x3f) | 0x80;
316 else if ( c <= 0xffff )
318 *p++ = (c >> 12) | 0xe0;
319 *p++ = ((c >> 6) & 0x3f) | 0x80;
320 *p++ = (c & 0x3f) | 0x80;
322 else if ( c <= 0x1fffff )
324 *p++ = (c >> 18) | 0xf0;
325 *p++ = ((c >> 12) & 0x3f) | 0x80;
326 *p++ = ((c >> 6) & 0x3f) | 0x80;
327 *p++ = (c & 0x3f) | 0x80;
329 else if ( c <= 0x3ffffff )
331 *p++ = (c >> 24) | 0xf8;
332 *p++ = ((c >> 18) & 0x3f) | 0x80;
333 *p++ = ((c >> 12) & 0x3f) | 0x80;
334 *p++ = ((c >> 6) & 0x3f) | 0x80;
335 *p++ = (c & 0x3f) | 0x80;
339 *p++ = (c >> 30) | 0xfc;
340 *p++ = ((c >> 24) & 0x3f) | 0x80;
341 *p++ = ((c >> 18) & 0x3f) | 0x80;
342 *p++ = ((c >> 12) & 0x3f) | 0x80;
343 *p++ = ((c >> 6) & 0x3f) | 0x80;
344 *p++ = (c & 0x3f) | 0x80;
347 BREAKUP_CHAR (c, charset, c1, c2);
348 lb = CHAR_LEADING_BYTE (c);
349 if (LEADING_BYTE_PRIVATE_P (lb))
350 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
352 if (EQ (charset, Vcharset_control_1))
361 /* Return the first character from a Mule-encoded string in STR,
362 assuming it's non-ASCII. Do not call this directly.
363 Use the macro charptr_emchar() instead. */
366 non_ascii_charptr_emchar (CONST Bufbyte *str)
379 else if ( b >= 0xf8 )
384 else if ( b >= 0xf0 )
389 else if ( b >= 0xe0 )
394 else if ( b >= 0xc0 )
404 for( ; len > 0; len-- )
407 ch = ( ch << 6 ) | ( b & 0x3f );
411 Bufbyte i0 = *str, i1, i2 = 0;
414 if (i0 == LEADING_BYTE_CONTROL_1)
415 return (Emchar) (*++str - 0x20);
417 if (LEADING_BYTE_PREFIX_P (i0))
422 charset = CHARSET_BY_LEADING_BYTE (i0);
423 if (XCHARSET_DIMENSION (charset) == 2)
426 return MAKE_CHAR (charset, i1, i2);
430 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
431 Do not call this directly. Use the macro valid_char_p() instead. */
435 non_ascii_valid_char_p (Emchar ch)
439 /* Must have only lowest 19 bits set */
443 f1 = CHAR_FIELD1 (ch);
444 f2 = CHAR_FIELD2 (ch);
445 f3 = CHAR_FIELD3 (ch);
451 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
452 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
453 f2 > MAX_CHAR_FIELD2_PRIVATE)
458 if (f3 != 0x20 && f3 != 0x7F)
462 NOTE: This takes advantage of the fact that
463 FIELD2_TO_OFFICIAL_LEADING_BYTE and
464 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
466 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
467 return (XCHARSET_CHARS (charset) == 96);
473 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
474 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
475 f1 > MAX_CHAR_FIELD1_PRIVATE)
477 if (f2 < 0x20 || f3 < 0x20)
480 #ifdef ENABLE_COMPOSITE_CHARS
481 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
483 if (UNBOUNDP (Fgethash (make_int (ch),
484 Vcomposite_char_char2string_hash_table,
489 #endif /* ENABLE_COMPOSITE_CHARS */
491 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
494 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
496 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
499 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
501 return (XCHARSET_CHARS (charset) == 96);
507 /************************************************************************/
508 /* Basic string functions */
509 /************************************************************************/
511 /* Copy the character pointed to by PTR into STR, assuming it's
512 non-ASCII. Do not call this directly. Use the macro
513 charptr_copy_char() instead. */
516 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
518 Bufbyte *strptr = str;
520 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
522 /* Notice fallthrough. */
524 case 6: *++strptr = *ptr++;
525 case 5: *++strptr = *ptr++;
527 case 4: *++strptr = *ptr++;
528 case 3: *++strptr = *ptr++;
529 case 2: *++strptr = *ptr;
534 return strptr + 1 - str;
538 /************************************************************************/
539 /* streams of Emchars */
540 /************************************************************************/
542 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
543 The functions below are not meant to be called directly; use
544 the macros in insdel.h. */
547 Lstream_get_emchar_1 (Lstream *stream, int ch)
549 Bufbyte str[MAX_EMCHAR_LEN];
550 Bufbyte *strptr = str;
552 str[0] = (Bufbyte) ch;
553 switch (REP_BYTES_BY_FIRST_BYTE (ch))
555 /* Notice fallthrough. */
558 ch = Lstream_getc (stream);
560 *++strptr = (Bufbyte) ch;
562 ch = Lstream_getc (stream);
564 *++strptr = (Bufbyte) ch;
567 ch = Lstream_getc (stream);
569 *++strptr = (Bufbyte) ch;
571 ch = Lstream_getc (stream);
573 *++strptr = (Bufbyte) ch;
575 ch = Lstream_getc (stream);
577 *++strptr = (Bufbyte) ch;
582 return charptr_emchar (str);
586 Lstream_fput_emchar (Lstream *stream, Emchar ch)
588 Bufbyte str[MAX_EMCHAR_LEN];
589 Bytecount len = set_charptr_emchar (str, ch);
590 return Lstream_write (stream, str, len);
594 Lstream_funget_emchar (Lstream *stream, Emchar ch)
596 Bufbyte str[MAX_EMCHAR_LEN];
597 Bytecount len = set_charptr_emchar (str, ch);
598 Lstream_unread (stream, str, len);
602 /************************************************************************/
604 /************************************************************************/
607 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
609 struct Lisp_Charset *cs = XCHARSET (obj);
611 markobj (cs->short_name);
612 markobj (cs->long_name);
613 markobj (cs->doc_string);
614 markobj (cs->registry);
615 markobj (cs->ccl_program);
617 markobj (cs->decoding_table);
623 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
625 struct Lisp_Charset *cs = XCHARSET (obj);
629 error ("printing unreadable object #<charset %s 0x%x>",
630 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
633 write_c_string ("#<charset ", printcharfun);
634 print_internal (CHARSET_NAME (cs), printcharfun, 0);
635 write_c_string (" ", printcharfun);
636 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
637 write_c_string (" ", printcharfun);
638 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
639 write_c_string (" ", printcharfun);
640 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
641 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
642 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
643 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
644 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
646 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
647 CHARSET_COLUMNS (cs),
648 CHARSET_GRAPHIC (cs),
650 write_c_string (buf, printcharfun);
651 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
652 sprintf (buf, " 0x%x>", cs->header.uid);
653 write_c_string (buf, printcharfun);
656 static const struct lrecord_description charset_description[] = {
657 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
661 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
662 mark_charset, print_charset, 0, 0, 0,
664 struct Lisp_Charset);
665 /* Make a new charset. */
668 make_charset (Charset_ID id, Lisp_Object name,
669 unsigned char type, unsigned char columns, unsigned char graphic,
670 Bufbyte final, unsigned char direction, Lisp_Object short_name,
671 Lisp_Object long_name, Lisp_Object doc,
673 Lisp_Object decoding_table,
674 Emchar ucs_min, Emchar ucs_max,
675 Emchar code_offset, unsigned char byte_offset)
678 struct Lisp_Charset *cs =
679 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
680 XSETCHARSET (obj, cs);
682 CHARSET_ID (cs) = id;
683 CHARSET_NAME (cs) = name;
684 CHARSET_SHORT_NAME (cs) = short_name;
685 CHARSET_LONG_NAME (cs) = long_name;
686 CHARSET_DIRECTION (cs) = direction;
687 CHARSET_TYPE (cs) = type;
688 CHARSET_COLUMNS (cs) = columns;
689 CHARSET_GRAPHIC (cs) = graphic;
690 CHARSET_FINAL (cs) = final;
691 CHARSET_DOC_STRING (cs) = doc;
692 CHARSET_REGISTRY (cs) = reg;
693 CHARSET_CCL_PROGRAM (cs) = Qnil;
694 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
696 CHARSET_DECODING_TABLE(cs) = decoding_table;
697 CHARSET_UCS_MIN(cs) = ucs_min;
698 CHARSET_UCS_MAX(cs) = ucs_max;
699 CHARSET_CODE_OFFSET(cs) = code_offset;
700 CHARSET_BYTE_OFFSET(cs) = byte_offset;
703 switch ( CHARSET_TYPE (cs) )
705 case CHARSET_TYPE_94:
706 CHARSET_DIMENSION (cs) = 1;
707 CHARSET_CHARS (cs) = 94;
709 if (!EQ (decoding_table, Qnil))
712 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
713 for (i = 0; i < 94; i++)
715 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
718 put_byte_from_character_table (XCHAR (c), i + 33,
719 CHARSET_TO_BYTE1_TABLE(cs));
723 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
724 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
727 case CHARSET_TYPE_96:
728 CHARSET_DIMENSION (cs) = 1;
729 CHARSET_CHARS (cs) = 96;
731 if (!EQ (decoding_table, Qnil))
734 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
735 for (i = 0; i < 96; i++)
737 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
740 put_byte_from_character_table (XCHAR (c), i + 32,
741 CHARSET_TO_BYTE1_TABLE(cs));
745 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
746 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
749 case CHARSET_TYPE_94X94:
750 CHARSET_DIMENSION (cs) = 2;
751 CHARSET_CHARS (cs) = 94;
753 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
754 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
757 case CHARSET_TYPE_96X96:
758 CHARSET_DIMENSION (cs) = 2;
759 CHARSET_CHARS (cs) = 96;
761 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
762 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
766 case CHARSET_TYPE_128X128:
767 CHARSET_DIMENSION (cs) = 2;
768 CHARSET_CHARS (cs) = 128;
770 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
771 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
774 case CHARSET_TYPE_256X256:
775 CHARSET_DIMENSION (cs) = 2;
776 CHARSET_CHARS (cs) = 256;
778 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
779 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
786 if (id == LEADING_BYTE_ASCII)
787 CHARSET_REP_BYTES (cs) = 1;
789 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
791 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
796 /* some charsets do not have final characters. This includes
797 ASCII, Control-1, Composite, and the two faux private
800 if (code_offset == 0)
802 assert (NILP (charset_by_attributes[type][final]));
803 charset_by_attributes[type][final] = obj;
806 assert (NILP (charset_by_attributes[type][final][direction]));
807 charset_by_attributes[type][final][direction] = obj;
811 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
812 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
815 /* official leading byte */
816 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
819 /* Some charsets are "faux" and don't have names or really exist at
820 all except in the leading-byte table. */
822 Fputhash (name, obj, Vcharset_hash_table);
827 get_unallocated_leading_byte (int dimension)
833 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
836 lb = next_allocated_1_byte_leading_byte++;
840 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
843 lb = next_allocated_2_byte_leading_byte++;
848 ("No more character sets free for this dimension",
849 make_int (dimension));
856 charset_get_byte1 (Lisp_Object charset, Emchar ch)
858 Emchar_to_byte_table* table;
861 if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL)
862 return get_byte_from_character_table (ch, table);
863 else if ((XCHARSET_UCS_MIN (charset) <= ch)
864 && (ch <= XCHARSET_UCS_MAX (charset)))
865 return (ch - XCHARSET_UCS_MIN (charset)
866 + XCHARSET_CODE_OFFSET (charset))
867 / (XCHARSET_DIMENSION (charset) == 1 ?
870 XCHARSET_DIMENSION (charset) == 2 ?
871 XCHARSET_CHARS (charset)
873 XCHARSET_DIMENSION (charset) == 3 ?
874 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
876 XCHARSET_CHARS (charset)
877 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
878 + XCHARSET_BYTE_OFFSET (charset);
879 else if (XCHARSET_CODE_OFFSET (charset) == 0)
881 if (XCHARSET_DIMENSION (charset) == 1)
883 if (XCHARSET_CHARS (charset) == 94)
885 if (((d = ch - (MIN_CHAR_94
886 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
890 else if (XCHARSET_CHARS (charset) == 96)
892 if (((d = ch - (MIN_CHAR_96
893 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
900 else if (XCHARSET_DIMENSION (charset) == 2)
902 if (XCHARSET_CHARS (charset) == 94)
904 if (((d = ch - (MIN_CHAR_94x94
905 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
908 return (d / 94) + 33;
910 else if (XCHARSET_CHARS (charset) == 96)
912 if (((d = ch - (MIN_CHAR_96x96
913 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
916 return (d / 96) + 32;
924 charset_get_byte2 (Lisp_Object charset, Emchar ch)
926 if (XCHARSET_DIMENSION (charset) == 1)
930 Emchar_to_byte_table* table;
932 if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL)
933 return get_byte_from_character_table (ch, table);
934 else if ((XCHARSET_UCS_MIN (charset) <= ch)
935 && (ch <= XCHARSET_UCS_MAX (charset)))
936 return ((ch - XCHARSET_UCS_MIN (charset)
937 + XCHARSET_CODE_OFFSET (charset))
938 / (XCHARSET_DIMENSION (charset) == 2 ?
941 XCHARSET_DIMENSION (charset) == 3 ?
942 XCHARSET_CHARS (charset)
944 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
945 % XCHARSET_CHARS (charset)
946 + XCHARSET_BYTE_OFFSET (charset);
947 else if (XCHARSET_CHARS (charset) == 94)
948 return (MIN_CHAR_94x94
949 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
950 && (ch < MIN_CHAR_94x94
951 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
952 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
953 else /* if (XCHARSET_CHARS (charset) == 96) */
954 return (MIN_CHAR_96x96
955 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
956 && (ch < MIN_CHAR_96x96
957 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
958 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
962 Lisp_Object Vdefault_coded_charset_priority_list;
966 /************************************************************************/
967 /* Basic charset Lisp functions */
968 /************************************************************************/
970 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
971 Return non-nil if OBJECT is a charset.
975 return CHARSETP (object) ? Qt : Qnil;
978 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
979 Retrieve the charset of the given name.
980 If CHARSET-OR-NAME is a charset object, it is simply returned.
981 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
982 nil is returned. Otherwise the associated charset object is returned.
986 if (CHARSETP (charset_or_name))
987 return charset_or_name;
989 CHECK_SYMBOL (charset_or_name);
990 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
993 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
994 Retrieve the charset of the given name.
995 Same as `find-charset' except an error is signalled if there is no such
996 charset instead of returning nil.
1000 Lisp_Object charset = Ffind_charset (name);
1003 signal_simple_error ("No such charset", name);
1007 /* We store the charsets in hash tables with the names as the key and the
1008 actual charset object as the value. Occasionally we need to use them
1009 in a list format. These routines provide us with that. */
1010 struct charset_list_closure
1012 Lisp_Object *charset_list;
1016 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1017 void *charset_list_closure)
1019 /* This function can GC */
1020 struct charset_list_closure *chcl =
1021 (struct charset_list_closure*) charset_list_closure;
1022 Lisp_Object *charset_list = chcl->charset_list;
1024 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1028 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1029 Return a list of the names of all defined charsets.
1033 Lisp_Object charset_list = Qnil;
1034 struct gcpro gcpro1;
1035 struct charset_list_closure charset_list_closure;
1037 GCPRO1 (charset_list);
1038 charset_list_closure.charset_list = &charset_list;
1039 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1040 &charset_list_closure);
1043 return charset_list;
1046 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1047 Return the name of the given charset.
1051 return XCHARSET_NAME (Fget_charset (charset));
1054 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1055 Define a new character set.
1056 This function is for use with Mule support.
1057 NAME is a symbol, the name by which the character set is normally referred.
1058 DOC-STRING is a string describing the character set.
1059 PROPS is a property list, describing the specific nature of the
1060 character set. Recognized properties are:
1062 'short-name Short version of the charset name (ex: Latin-1)
1063 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1064 'registry A regular expression matching the font registry field for
1066 'dimension Number of octets used to index a character in this charset.
1067 Either 1 or 2. Defaults to 1.
1068 'columns Number of columns used to display a character in this charset.
1069 Only used in TTY mode. (Under X, the actual width of a
1070 character can be derived from the font used to display the
1071 characters.) If unspecified, defaults to the dimension
1072 (this is almost always the correct value).
1073 'chars Number of characters in each dimension (94 or 96).
1074 Defaults to 94. Note that if the dimension is 2, the
1075 character set thus described is 94x94 or 96x96.
1076 'final Final byte of ISO 2022 escape sequence. Must be
1077 supplied. Each combination of (DIMENSION, CHARS) defines a
1078 separate namespace for final bytes. Note that ISO
1079 2022 restricts the final byte to the range
1080 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1081 dimension == 2. Note also that final bytes in the range
1082 0x30 - 0x3F are reserved for user-defined (not official)
1084 'graphic 0 (use left half of font on output) or 1 (use right half
1085 of font on output). Defaults to 0. For example, for
1086 a font whose registry is ISO8859-1, the left half
1087 (octets 0x20 - 0x7F) is the `ascii' character set, while
1088 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1089 character set. With 'graphic set to 0, the octets
1090 will have their high bit cleared; with it set to 1,
1091 the octets will have their high bit set.
1092 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1094 'ccl-program A compiled CCL program used to convert a character in
1095 this charset into an index into the font. This is in
1096 addition to the 'graphic property. The CCL program
1097 is passed the octets of the character, with the high
1098 bit cleared and set depending upon whether the value
1099 of the 'graphic property is 0 or 1.
1101 (name, doc_string, props))
1103 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1104 int direction = CHARSET_LEFT_TO_RIGHT;
1106 Lisp_Object registry = Qnil;
1107 Lisp_Object charset;
1108 Lisp_Object rest, keyword, value;
1109 Lisp_Object ccl_program = Qnil;
1110 Lisp_Object short_name = Qnil, long_name = Qnil;
1112 CHECK_SYMBOL (name);
1113 if (!NILP (doc_string))
1114 CHECK_STRING (doc_string);
1116 charset = Ffind_charset (name);
1117 if (!NILP (charset))
1118 signal_simple_error ("Cannot redefine existing charset", name);
1120 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1122 if (EQ (keyword, Qshort_name))
1124 CHECK_STRING (value);
1128 if (EQ (keyword, Qlong_name))
1130 CHECK_STRING (value);
1134 else if (EQ (keyword, Qdimension))
1137 dimension = XINT (value);
1138 if (dimension < 1 || dimension > 2)
1139 signal_simple_error ("Invalid value for 'dimension", value);
1142 else if (EQ (keyword, Qchars))
1145 chars = XINT (value);
1146 if (chars != 94 && chars != 96)
1147 signal_simple_error ("Invalid value for 'chars", value);
1150 else if (EQ (keyword, Qcolumns))
1153 columns = XINT (value);
1154 if (columns != 1 && columns != 2)
1155 signal_simple_error ("Invalid value for 'columns", value);
1158 else if (EQ (keyword, Qgraphic))
1161 graphic = XINT (value);
1162 if (graphic < 0 || graphic > 1)
1163 signal_simple_error ("Invalid value for 'graphic", value);
1166 else if (EQ (keyword, Qregistry))
1168 CHECK_STRING (value);
1172 else if (EQ (keyword, Qdirection))
1174 if (EQ (value, Ql2r))
1175 direction = CHARSET_LEFT_TO_RIGHT;
1176 else if (EQ (value, Qr2l))
1177 direction = CHARSET_RIGHT_TO_LEFT;
1179 signal_simple_error ("Invalid value for 'direction", value);
1182 else if (EQ (keyword, Qfinal))
1184 CHECK_CHAR_COERCE_INT (value);
1185 final = XCHAR (value);
1186 if (final < '0' || final > '~')
1187 signal_simple_error ("Invalid value for 'final", value);
1190 else if (EQ (keyword, Qccl_program))
1192 CHECK_VECTOR (value);
1193 ccl_program = value;
1197 signal_simple_error ("Unrecognized property", keyword);
1201 error ("'final must be specified");
1202 if (dimension == 2 && final > 0x5F)
1204 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1208 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1210 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1212 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1213 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1215 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1222 /* id = CHARSET_ID_OFFSET_94 + final; */
1223 id = get_unallocated_leading_byte (dimension);
1225 else if (chars == 96)
1227 id = get_unallocated_leading_byte (dimension);
1234 else if (dimension == 2)
1238 id = get_unallocated_leading_byte (dimension);
1240 else if (chars == 96)
1242 id = get_unallocated_leading_byte (dimension);
1254 id = get_unallocated_leading_byte (dimension);
1257 if (NILP (doc_string))
1258 doc_string = build_string ("");
1260 if (NILP (registry))
1261 registry = build_string ("");
1263 if (NILP (short_name))
1264 XSETSTRING (short_name, XSYMBOL (name)->name);
1266 if (NILP (long_name))
1267 long_name = doc_string;
1270 columns = dimension;
1271 charset = make_charset (id, name, type, columns, graphic,
1272 final, direction, short_name, long_name,
1273 doc_string, registry,
1275 if (!NILP (ccl_program))
1276 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1280 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1282 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1283 NEW-NAME is the name of the new charset. Return the new charset.
1285 (charset, new_name))
1287 Lisp_Object new_charset = Qnil;
1288 int id, dimension, columns, graphic, final;
1289 int direction, type;
1290 Lisp_Object registry, doc_string, short_name, long_name;
1291 struct Lisp_Charset *cs;
1293 charset = Fget_charset (charset);
1294 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1295 signal_simple_error ("Charset already has reverse-direction charset",
1298 CHECK_SYMBOL (new_name);
1299 if (!NILP (Ffind_charset (new_name)))
1300 signal_simple_error ("Cannot redefine existing charset", new_name);
1302 cs = XCHARSET (charset);
1304 type = CHARSET_TYPE (cs);
1305 columns = CHARSET_COLUMNS (cs);
1306 dimension = CHARSET_DIMENSION (cs);
1307 id = get_unallocated_leading_byte (dimension);
1309 graphic = CHARSET_GRAPHIC (cs);
1310 final = CHARSET_FINAL (cs);
1311 direction = CHARSET_RIGHT_TO_LEFT;
1312 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1313 direction = CHARSET_LEFT_TO_RIGHT;
1314 doc_string = CHARSET_DOC_STRING (cs);
1315 short_name = CHARSET_SHORT_NAME (cs);
1316 long_name = CHARSET_LONG_NAME (cs);
1317 registry = CHARSET_REGISTRY (cs);
1319 new_charset = make_charset (id, new_name, type, columns,
1320 graphic, final, direction, short_name, long_name,
1321 doc_string, registry,
1323 CHARSET_DECODING_TABLE(cs),
1324 CHARSET_UCS_MIN(cs),
1325 CHARSET_UCS_MAX(cs),
1326 CHARSET_CODE_OFFSET(cs),
1327 CHARSET_BYTE_OFFSET(cs)
1333 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1334 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1339 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1340 Define symbol ALIAS as an alias for CHARSET.
1344 CHECK_SYMBOL (alias);
1345 charset = Fget_charset (charset);
1346 return Fputhash (alias, charset, Vcharset_hash_table);
1349 /* #### Reverse direction charsets not yet implemented. */
1351 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1353 Return the reverse-direction charset parallel to CHARSET, if any.
1354 This is the charset with the same properties (in particular, the same
1355 dimension, number of characters per dimension, and final byte) as
1356 CHARSET but whose characters are displayed in the opposite direction.
1360 charset = Fget_charset (charset);
1361 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1365 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1366 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1367 If DIRECTION is omitted, both directions will be checked (left-to-right
1368 will be returned if character sets exist for both directions).
1370 (dimension, chars, final, direction))
1372 int dm, ch, fi, di = -1;
1374 Lisp_Object obj = Qnil;
1376 CHECK_INT (dimension);
1377 dm = XINT (dimension);
1378 if (dm < 1 || dm > 2)
1379 signal_simple_error ("Invalid value for DIMENSION", dimension);
1383 if (ch != 94 && ch != 96)
1384 signal_simple_error ("Invalid value for CHARS", chars);
1386 CHECK_CHAR_COERCE_INT (final);
1388 if (fi < '0' || fi > '~')
1389 signal_simple_error ("Invalid value for FINAL", final);
1391 if (EQ (direction, Ql2r))
1392 di = CHARSET_LEFT_TO_RIGHT;
1393 else if (EQ (direction, Qr2l))
1394 di = CHARSET_RIGHT_TO_LEFT;
1395 else if (!NILP (direction))
1396 signal_simple_error ("Invalid value for DIRECTION", direction);
1398 if (dm == 2 && fi > 0x5F)
1400 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1403 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1405 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1409 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1411 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1414 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1417 return XCHARSET_NAME (obj);
1421 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1422 Return short name of CHARSET.
1426 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1429 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1430 Return long name of CHARSET.
1434 return XCHARSET_LONG_NAME (Fget_charset (charset));
1437 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1438 Return description of CHARSET.
1442 return XCHARSET_DOC_STRING (Fget_charset (charset));
1445 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1446 Return dimension of CHARSET.
1450 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1453 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1454 Return property PROP of CHARSET.
1455 Recognized properties are those listed in `make-charset', as well as
1456 'name and 'doc-string.
1460 struct Lisp_Charset *cs;
1462 charset = Fget_charset (charset);
1463 cs = XCHARSET (charset);
1465 CHECK_SYMBOL (prop);
1466 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1467 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1468 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1469 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1470 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1471 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1472 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1473 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1474 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1475 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1476 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1477 if (EQ (prop, Qdirection))
1478 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1479 if (EQ (prop, Qreverse_direction_charset))
1481 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1485 return XCHARSET_NAME (obj);
1487 signal_simple_error ("Unrecognized charset property name", prop);
1488 return Qnil; /* not reached */
1491 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1492 Return charset identification number of CHARSET.
1496 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1499 /* #### We need to figure out which properties we really want to
1502 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1503 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1505 (charset, ccl_program))
1507 charset = Fget_charset (charset);
1508 CHECK_VECTOR (ccl_program);
1509 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1514 invalidate_charset_font_caches (Lisp_Object charset)
1516 /* Invalidate font cache entries for charset on all devices. */
1517 Lisp_Object devcons, concons, hash_table;
1518 DEVICE_LOOP_NO_BREAK (devcons, concons)
1520 struct device *d = XDEVICE (XCAR (devcons));
1521 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1522 if (!UNBOUNDP (hash_table))
1523 Fclrhash (hash_table);
1527 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1528 Set the 'registry property of CHARSET to REGISTRY.
1530 (charset, registry))
1532 charset = Fget_charset (charset);
1533 CHECK_STRING (registry);
1534 XCHARSET_REGISTRY (charset) = registry;
1535 invalidate_charset_font_caches (charset);
1536 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1541 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1542 Return mapping-table of CHARSET.
1546 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1549 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1550 Set mapping-table of CHARSET to TABLE.
1554 struct Lisp_Charset *cs;
1555 Emchar_to_byte_table* old_byte1_table;
1556 Emchar_to_byte_table* old_byte2_table;
1558 charset = Fget_charset (charset);
1559 CHECK_VECTOR (table);
1561 cs = XCHARSET (charset);
1562 CHARSET_DECODING_TABLE(cs) = table;
1563 old_byte1_table = CHARSET_TO_BYTE1_TABLE(cs);
1564 old_byte2_table = CHARSET_TO_BYTE2_TABLE(cs);
1565 switch (CHARSET_TYPE (cs))
1567 case CHARSET_TYPE_94:
1568 if (!EQ (table, Qnil))
1571 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1572 for (i = 0; i < 94; i++)
1574 Lisp_Object c = XVECTOR_DATA(table)[i];
1577 put_byte_from_character_table (XCHAR (c), i + 33,
1578 CHARSET_TO_BYTE1_TABLE(cs));
1582 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1583 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1585 case CHARSET_TYPE_96:
1586 if (!EQ (table, Qnil))
1589 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1590 for (i = 0; i < 96; i++)
1592 Lisp_Object c = XVECTOR_DATA(table)[i];
1595 put_byte_from_character_table (XCHAR (c), i + 32,
1596 CHARSET_TO_BYTE1_TABLE(cs));
1600 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1601 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1603 case CHARSET_TYPE_94X94:
1604 if (!EQ (table, Qnil))
1608 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1609 CHARSET_TO_BYTE2_TABLE(cs) = make_byte_from_character_table();
1610 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1612 Lisp_Object v = XVECTOR_DATA(table)[i];
1618 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1620 Lisp_Object c = XVECTOR_DATA(v)[j];
1624 put_byte_from_character_table
1625 (XCHAR (c), i + 33, CHARSET_TO_BYTE1_TABLE(cs));
1626 put_byte_from_character_table
1627 (XCHAR (c), j + 33, CHARSET_TO_BYTE2_TABLE(cs));
1632 put_byte_from_character_table
1633 (XCHAR (v), i + 33, CHARSET_TO_BYTE1_TABLE(cs));
1638 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1639 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1642 case CHARSET_TYPE_96X96:
1643 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1644 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1646 case CHARSET_TYPE_128X128:
1647 CHARSET_DIMENSION (cs) = 2;
1648 CHARSET_CHARS (cs) = 128;
1649 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1650 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1652 case CHARSET_TYPE_256X256:
1653 CHARSET_DIMENSION (cs) = 2;
1654 CHARSET_CHARS (cs) = 256;
1655 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1656 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1659 if (old_byte1_table != NULL)
1660 destroy_byte_from_character_table (old_byte1_table);
1661 if (old_byte2_table != NULL)
1662 destroy_byte_from_character_table (old_byte2_table);
1668 /************************************************************************/
1669 /* Lisp primitives for working with characters */
1670 /************************************************************************/
1672 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1673 Make a character from CHARSET and octets ARG1 and ARG2.
1674 ARG2 is required only for characters from two-dimensional charsets.
1675 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1676 character s with caron.
1678 (charset, arg1, arg2))
1680 struct Lisp_Charset *cs;
1682 int lowlim, highlim;
1684 charset = Fget_charset (charset);
1685 cs = XCHARSET (charset);
1687 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1688 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1690 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1692 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1693 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1696 /* It is useful (and safe, according to Olivier Galibert) to strip
1697 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1698 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1699 Latin 2 code of the character. */
1707 if (a1 < lowlim || a1 > highlim)
1708 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1710 if (CHARSET_DIMENSION (cs) == 1)
1714 ("Charset is of dimension one; second octet must be nil", arg2);
1715 return make_char (MAKE_CHAR (charset, a1, 0));
1724 a2 = XINT (arg2) & 0x7f;
1726 if (a2 < lowlim || a2 > highlim)
1727 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1729 return make_char (MAKE_CHAR (charset, a1, a2));
1732 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1733 Return the character set of char CH.
1737 CHECK_CHAR_COERCE_INT (ch);
1739 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1742 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1743 Return list of charset and one or two position-codes of CHAR.
1747 /* This function can GC */
1748 struct gcpro gcpro1, gcpro2;
1749 Lisp_Object charset = Qnil;
1750 Lisp_Object rc = Qnil;
1753 GCPRO2 (charset, rc);
1754 CHECK_CHAR_COERCE_INT (character);
1756 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1758 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1760 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1764 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1772 #ifdef ENABLE_COMPOSITE_CHARS
1773 /************************************************************************/
1774 /* composite character functions */
1775 /************************************************************************/
1778 lookup_composite_char (Bufbyte *str, int len)
1780 Lisp_Object lispstr = make_string (str, len);
1781 Lisp_Object ch = Fgethash (lispstr,
1782 Vcomposite_char_string2char_hash_table,
1788 if (composite_char_row_next >= 128)
1789 signal_simple_error ("No more composite chars available", lispstr);
1790 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1791 composite_char_col_next);
1792 Fputhash (make_char (emch), lispstr,
1793 Vcomposite_char_char2string_hash_table);
1794 Fputhash (lispstr, make_char (emch),
1795 Vcomposite_char_string2char_hash_table);
1796 composite_char_col_next++;
1797 if (composite_char_col_next >= 128)
1799 composite_char_col_next = 32;
1800 composite_char_row_next++;
1809 composite_char_string (Emchar ch)
1811 Lisp_Object str = Fgethash (make_char (ch),
1812 Vcomposite_char_char2string_hash_table,
1814 assert (!UNBOUNDP (str));
1818 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1819 Convert a string into a single composite character.
1820 The character is the result of overstriking all the characters in
1825 CHECK_STRING (string);
1826 return make_char (lookup_composite_char (XSTRING_DATA (string),
1827 XSTRING_LENGTH (string)));
1830 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1831 Return a string of the characters comprising a composite character.
1839 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1840 signal_simple_error ("Must be composite char", ch);
1841 return composite_char_string (emch);
1843 #endif /* ENABLE_COMPOSITE_CHARS */
1846 /************************************************************************/
1847 /* initialization */
1848 /************************************************************************/
1851 syms_of_mule_charset (void)
1853 DEFSUBR (Fcharsetp);
1854 DEFSUBR (Ffind_charset);
1855 DEFSUBR (Fget_charset);
1856 DEFSUBR (Fcharset_list);
1857 DEFSUBR (Fcharset_name);
1858 DEFSUBR (Fmake_charset);
1859 DEFSUBR (Fmake_reverse_direction_charset);
1860 /* DEFSUBR (Freverse_direction_charset); */
1861 DEFSUBR (Fdefine_charset_alias);
1862 DEFSUBR (Fcharset_from_attributes);
1863 DEFSUBR (Fcharset_short_name);
1864 DEFSUBR (Fcharset_long_name);
1865 DEFSUBR (Fcharset_description);
1866 DEFSUBR (Fcharset_dimension);
1867 DEFSUBR (Fcharset_property);
1868 DEFSUBR (Fcharset_id);
1869 DEFSUBR (Fset_charset_ccl_program);
1870 DEFSUBR (Fset_charset_registry);
1872 DEFSUBR (Fcharset_mapping_table);
1873 DEFSUBR (Fset_charset_mapping_table);
1876 DEFSUBR (Fmake_char);
1877 DEFSUBR (Fchar_charset);
1878 DEFSUBR (Fsplit_char);
1880 #ifdef ENABLE_COMPOSITE_CHARS
1881 DEFSUBR (Fmake_composite_char);
1882 DEFSUBR (Fcomposite_char_string);
1885 defsymbol (&Qcharsetp, "charsetp");
1886 defsymbol (&Qregistry, "registry");
1887 defsymbol (&Qfinal, "final");
1888 defsymbol (&Qgraphic, "graphic");
1889 defsymbol (&Qdirection, "direction");
1890 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1891 defsymbol (&Qshort_name, "short-name");
1892 defsymbol (&Qlong_name, "long-name");
1894 defsymbol (&Ql2r, "l2r");
1895 defsymbol (&Qr2l, "r2l");
1897 /* Charsets, compatible with FSF 20.3
1898 Naming convention is Script-Charset[-Edition] */
1899 defsymbol (&Qascii, "ascii");
1900 defsymbol (&Qcontrol_1, "control-1");
1901 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1902 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1903 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1904 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1905 defsymbol (&Qthai_tis620, "thai-tis620");
1906 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1907 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1908 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1909 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1910 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1911 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1912 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1913 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1914 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1915 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1916 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1917 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1918 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1919 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1921 defsymbol (&Qucs_bmp, "ucs-bmp");
1922 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
1923 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
1924 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
1925 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
1926 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
1927 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
1929 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1930 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1932 defsymbol (&Qcomposite, "composite");
1936 vars_of_mule_charset (void)
1943 /* Table of charsets indexed by leading byte. */
1944 for (i = 0; i < countof (charset_by_leading_byte); i++)
1945 charset_by_leading_byte[i] = Qnil;
1948 /* Table of charsets indexed by type/final-byte. */
1949 for (i = 0; i < countof (charset_by_attributes); i++)
1950 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1951 charset_by_attributes[i][j] = Qnil;
1953 /* Table of charsets indexed by type/final-byte/direction. */
1954 for (i = 0; i < countof (charset_by_attributes); i++)
1955 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1956 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1957 charset_by_attributes[i][j][k] = Qnil;
1960 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1962 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
1964 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1968 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1969 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1970 Leading-code of private TYPE9N charset of column-width 1.
1972 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1976 Vutf_2000_version = build_string("0.8 (Kami)");
1977 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1978 Version number of UTF-2000.
1981 Vdefault_coded_charset_priority_list = Qnil;
1982 DEFVAR_LISP ("default-coded-charset-priority-list",
1983 &Vdefault_coded_charset_priority_list /*
1984 Default order of preferred coded-character-set.
1990 complex_vars_of_mule_charset (void)
1992 staticpro (&Vcharset_hash_table);
1993 Vcharset_hash_table =
1994 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1996 /* Predefined character sets. We store them into variables for
2001 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2002 CHARSET_TYPE_256X256, 1, 0, 0,
2003 CHARSET_LEFT_TO_RIGHT,
2004 build_string ("BMP"),
2005 build_string ("BMP"),
2006 build_string ("BMP"),
2008 Qnil, 0, 0xFFFF, 0, 0);
2010 # define MIN_CHAR_THAI 0
2011 # define MAX_CHAR_THAI 0
2012 # define MIN_CHAR_GREEK 0
2013 # define MAX_CHAR_GREEK 0
2014 # define MIN_CHAR_HEBREW 0
2015 # define MAX_CHAR_HEBREW 0
2016 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2017 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2018 # define MIN_CHAR_CYRILLIC 0
2019 # define MAX_CHAR_CYRILLIC 0
2022 make_charset (LEADING_BYTE_ASCII, Qascii,
2023 CHARSET_TYPE_94, 1, 0, 'B',
2024 CHARSET_LEFT_TO_RIGHT,
2025 build_string ("ASCII"),
2026 build_string ("ASCII)"),
2027 build_string ("ASCII (ISO646 IRV)"),
2028 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2029 Qnil, 0, 0x7F, 0, 0);
2030 Vcharset_control_1 =
2031 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2032 CHARSET_TYPE_94, 1, 1, 0,
2033 CHARSET_LEFT_TO_RIGHT,
2034 build_string ("C1"),
2035 build_string ("Control characters"),
2036 build_string ("Control characters 128-191"),
2038 Qnil, 0x80, 0x9F, 0, 0);
2039 Vcharset_latin_iso8859_1 =
2040 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2041 CHARSET_TYPE_96, 1, 1, 'A',
2042 CHARSET_LEFT_TO_RIGHT,
2043 build_string ("Latin-1"),
2044 build_string ("ISO8859-1 (Latin-1)"),
2045 build_string ("ISO8859-1 (Latin-1)"),
2046 build_string ("iso8859-1"),
2047 Qnil, 0xA0, 0xFF, 0, 32);
2048 Vcharset_latin_iso8859_2 =
2049 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2050 CHARSET_TYPE_96, 1, 1, 'B',
2051 CHARSET_LEFT_TO_RIGHT,
2052 build_string ("Latin-2"),
2053 build_string ("ISO8859-2 (Latin-2)"),
2054 build_string ("ISO8859-2 (Latin-2)"),
2055 build_string ("iso8859-2"),
2057 Vcharset_latin_iso8859_3 =
2058 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2059 CHARSET_TYPE_96, 1, 1, 'C',
2060 CHARSET_LEFT_TO_RIGHT,
2061 build_string ("Latin-3"),
2062 build_string ("ISO8859-3 (Latin-3)"),
2063 build_string ("ISO8859-3 (Latin-3)"),
2064 build_string ("iso8859-3"),
2066 Vcharset_latin_iso8859_4 =
2067 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2068 CHARSET_TYPE_96, 1, 1, 'D',
2069 CHARSET_LEFT_TO_RIGHT,
2070 build_string ("Latin-4"),
2071 build_string ("ISO8859-4 (Latin-4)"),
2072 build_string ("ISO8859-4 (Latin-4)"),
2073 build_string ("iso8859-4"),
2075 Vcharset_thai_tis620 =
2076 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2077 CHARSET_TYPE_96, 1, 1, 'T',
2078 CHARSET_LEFT_TO_RIGHT,
2079 build_string ("TIS620"),
2080 build_string ("TIS620 (Thai)"),
2081 build_string ("TIS620.2529 (Thai)"),
2082 build_string ("tis620"),
2083 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2084 Vcharset_greek_iso8859_7 =
2085 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2086 CHARSET_TYPE_96, 1, 1, 'F',
2087 CHARSET_LEFT_TO_RIGHT,
2088 build_string ("ISO8859-7"),
2089 build_string ("ISO8859-7 (Greek)"),
2090 build_string ("ISO8859-7 (Greek)"),
2091 build_string ("iso8859-7"),
2092 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2093 Vcharset_arabic_iso8859_6 =
2094 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2095 CHARSET_TYPE_96, 1, 1, 'G',
2096 CHARSET_RIGHT_TO_LEFT,
2097 build_string ("ISO8859-6"),
2098 build_string ("ISO8859-6 (Arabic)"),
2099 build_string ("ISO8859-6 (Arabic)"),
2100 build_string ("iso8859-6"),
2102 Vcharset_hebrew_iso8859_8 =
2103 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2104 CHARSET_TYPE_96, 1, 1, 'H',
2105 CHARSET_RIGHT_TO_LEFT,
2106 build_string ("ISO8859-8"),
2107 build_string ("ISO8859-8 (Hebrew)"),
2108 build_string ("ISO8859-8 (Hebrew)"),
2109 build_string ("iso8859-8"),
2110 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2111 Vcharset_katakana_jisx0201 =
2112 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2113 CHARSET_TYPE_94, 1, 1, 'I',
2114 CHARSET_LEFT_TO_RIGHT,
2115 build_string ("JISX0201 Kana"),
2116 build_string ("JISX0201.1976 (Japanese Kana)"),
2117 build_string ("JISX0201.1976 Japanese Kana"),
2118 build_string ("jisx0201\\.1976"),
2120 MIN_CHAR_HALFWIDTH_KATAKANA,
2121 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2122 Vcharset_latin_jisx0201 =
2123 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2124 CHARSET_TYPE_94, 1, 0, 'J',
2125 CHARSET_LEFT_TO_RIGHT,
2126 build_string ("JISX0201 Roman"),
2127 build_string ("JISX0201.1976 (Japanese Roman)"),
2128 build_string ("JISX0201.1976 Japanese Roman"),
2129 build_string ("jisx0201\\.1976"),
2131 Vcharset_cyrillic_iso8859_5 =
2132 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2133 CHARSET_TYPE_96, 1, 1, 'L',
2134 CHARSET_LEFT_TO_RIGHT,
2135 build_string ("ISO8859-5"),
2136 build_string ("ISO8859-5 (Cyrillic)"),
2137 build_string ("ISO8859-5 (Cyrillic)"),
2138 build_string ("iso8859-5"),
2139 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2140 Vcharset_latin_iso8859_9 =
2141 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2142 CHARSET_TYPE_96, 1, 1, 'M',
2143 CHARSET_LEFT_TO_RIGHT,
2144 build_string ("Latin-5"),
2145 build_string ("ISO8859-9 (Latin-5)"),
2146 build_string ("ISO8859-9 (Latin-5)"),
2147 build_string ("iso8859-9"),
2149 Vcharset_japanese_jisx0208_1978 =
2150 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2151 CHARSET_TYPE_94X94, 2, 0, '@',
2152 CHARSET_LEFT_TO_RIGHT,
2153 build_string ("JIS X0208:1978"),
2154 build_string ("JIS X0208:1978 (Japanese)"),
2156 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2157 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2159 Vcharset_chinese_gb2312 =
2160 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2161 CHARSET_TYPE_94X94, 2, 0, 'A',
2162 CHARSET_LEFT_TO_RIGHT,
2163 build_string ("GB2312"),
2164 build_string ("GB2312)"),
2165 build_string ("GB2312 Chinese simplified"),
2166 build_string ("gb2312"),
2168 Vcharset_japanese_jisx0208 =
2169 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2170 CHARSET_TYPE_94X94, 2, 0, 'B',
2171 CHARSET_LEFT_TO_RIGHT,
2172 build_string ("JISX0208"),
2173 build_string ("JIS X0208:1983 (Japanese)"),
2174 build_string ("JIS X0208:1983 Japanese Kanji"),
2175 build_string ("jisx0208\\.1983"),
2177 Vcharset_korean_ksc5601 =
2178 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2179 CHARSET_TYPE_94X94, 2, 0, 'C',
2180 CHARSET_LEFT_TO_RIGHT,
2181 build_string ("KSC5601"),
2182 build_string ("KSC5601 (Korean"),
2183 build_string ("KSC5601 Korean Hangul and Hanja"),
2184 build_string ("ksc5601"),
2186 Vcharset_japanese_jisx0212 =
2187 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2188 CHARSET_TYPE_94X94, 2, 0, 'D',
2189 CHARSET_LEFT_TO_RIGHT,
2190 build_string ("JISX0212"),
2191 build_string ("JISX0212 (Japanese)"),
2192 build_string ("JISX0212 Japanese Supplement"),
2193 build_string ("jisx0212"),
2196 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2197 Vcharset_chinese_cns11643_1 =
2198 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2199 CHARSET_TYPE_94X94, 2, 0, 'G',
2200 CHARSET_LEFT_TO_RIGHT,
2201 build_string ("CNS11643-1"),
2202 build_string ("CNS11643-1 (Chinese traditional)"),
2204 ("CNS 11643 Plane 1 Chinese traditional"),
2205 build_string (CHINESE_CNS_PLANE_RE("1")),
2207 Vcharset_chinese_cns11643_2 =
2208 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2209 CHARSET_TYPE_94X94, 2, 0, 'H',
2210 CHARSET_LEFT_TO_RIGHT,
2211 build_string ("CNS11643-2"),
2212 build_string ("CNS11643-2 (Chinese traditional)"),
2214 ("CNS 11643 Plane 2 Chinese traditional"),
2215 build_string (CHINESE_CNS_PLANE_RE("2")),
2218 Vcharset_latin_viscii_lower =
2219 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2220 CHARSET_TYPE_96, 1, 1, '1',
2221 CHARSET_LEFT_TO_RIGHT,
2222 build_string ("VISCII lower"),
2223 build_string ("VISCII lower (Vietnamese)"),
2224 build_string ("VISCII lower (Vietnamese)"),
2225 build_string ("VISCII1\\.1"),
2227 Vcharset_latin_viscii_upper =
2228 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2229 CHARSET_TYPE_96, 1, 1, '2',
2230 CHARSET_LEFT_TO_RIGHT,
2231 build_string ("VISCII upper"),
2232 build_string ("VISCII upper (Vietnamese)"),
2233 build_string ("VISCII upper (Vietnamese)"),
2234 build_string ("VISCII1\\.1"),
2236 Vcharset_hiragana_jisx0208 =
2237 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2238 CHARSET_TYPE_94X94, 2, 0, 'B',
2239 CHARSET_LEFT_TO_RIGHT,
2240 build_string ("Hiragana"),
2241 build_string ("Hiragana of JIS X0208"),
2242 build_string ("Japanese Hiragana of JIS X0208"),
2243 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2244 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2245 (0x24 - 33) * 94 + (0x21 - 33), 33);
2246 Vcharset_katakana_jisx0208 =
2247 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2248 CHARSET_TYPE_94X94, 2, 0, 'B',
2249 CHARSET_LEFT_TO_RIGHT,
2250 build_string ("Katakana"),
2251 build_string ("Katakana of JIS X0208"),
2252 build_string ("Japanese Katakana of JIS X0208"),
2253 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2254 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2255 (0x25 - 33) * 94 + (0x21 - 33), 33);
2257 Vcharset_chinese_big5_1 =
2258 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2259 CHARSET_TYPE_94X94, 2, 0, '0',
2260 CHARSET_LEFT_TO_RIGHT,
2261 build_string ("Big5"),
2262 build_string ("Big5 (Level-1)"),
2264 ("Big5 Level-1 Chinese traditional"),
2265 build_string ("big5"),
2267 Vcharset_chinese_big5_2 =
2268 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2269 CHARSET_TYPE_94X94, 2, 0, '1',
2270 CHARSET_LEFT_TO_RIGHT,
2271 build_string ("Big5"),
2272 build_string ("Big5 (Level-2)"),
2274 ("Big5 Level-2 Chinese traditional"),
2275 build_string ("big5"),
2278 #ifdef ENABLE_COMPOSITE_CHARS
2279 /* #### For simplicity, we put composite chars into a 96x96 charset.
2280 This is going to lead to problems because you can run out of
2281 room, esp. as we don't yet recycle numbers. */
2282 Vcharset_composite =
2283 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2284 CHARSET_TYPE_96X96, 2, 0, 0,
2285 CHARSET_LEFT_TO_RIGHT,
2286 build_string ("Composite"),
2287 build_string ("Composite characters"),
2288 build_string ("Composite characters"),
2291 composite_char_row_next = 32;
2292 composite_char_col_next = 32;
2294 Vcomposite_char_string2char_hash_table =
2295 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2296 Vcomposite_char_char2string_hash_table =
2297 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2298 staticpro (&Vcomposite_char_string2char_hash_table);
2299 staticpro (&Vcomposite_char_char2string_hash_table);
2300 #endif /* ENABLE_COMPOSITE_CHARS */