1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_chinese_cns11643_3;
62 Lisp_Object Vcharset_chinese_cns11643_4;
63 Lisp_Object Vcharset_chinese_cns11643_5;
64 Lisp_Object Vcharset_chinese_cns11643_6;
65 Lisp_Object Vcharset_chinese_cns11643_7;
67 Lisp_Object Vcharset_chinese_big5_1;
68 Lisp_Object Vcharset_chinese_big5_2;
70 #ifdef ENABLE_COMPOSITE_CHARS
71 Lisp_Object Vcharset_composite;
73 /* Hash tables for composite chars. One maps string representing
74 composed chars to their equivalent chars; one goes the
76 Lisp_Object Vcomposite_char_char2string_hash_table;
77 Lisp_Object Vcomposite_char_string2char_hash_table;
79 static int composite_char_row_next;
80 static int composite_char_col_next;
82 #endif /* ENABLE_COMPOSITE_CHARS */
84 /* Table of charsets indexed by leading byte. */
85 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
87 /* Table of charsets indexed by type/final-byte/direction. */
88 Lisp_Object charset_by_attributes[4][128][2];
91 /* Table of number of bytes in the string representation of a character
92 indexed by the first byte of that representation.
94 rep_bytes_by_first_byte(c) is more efficient than the equivalent
95 canonical computation:
97 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
99 Bytecount rep_bytes_by_first_byte[0xA0] =
100 { /* 0x00 - 0x7f are for straight ASCII */
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 /* 0x80 - 0x8f are for Dimension-1 official charsets */
111 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 /* 0x90 - 0x9d are for Dimension-2 official charsets */
116 /* 0x9e is for Dimension-1 private charsets */
117 /* 0x9f is for Dimension-2 private charsets */
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
122 Lisp_Object Vutf_2000_version;
124 int leading_code_private_11;
126 Lisp_Object Qcharsetp;
128 /* Qdoc_string, Qdimension, Qchars defined in general.c */
129 Lisp_Object Qregistry, Qfinal, Qgraphic;
130 Lisp_Object Qdirection;
131 Lisp_Object Qreverse_direction_charset;
132 Lisp_Object Qleading_byte;
133 Lisp_Object Qshort_name, Qlong_name;
149 Qjapanese_jisx0208_1978,
167 Lisp_Object Ql2r, Qr2l;
169 Lisp_Object Vcharset_hash_table;
171 static Bufbyte next_allocated_1_byte_leading_byte;
172 static Bufbyte next_allocated_2_byte_leading_byte;
174 /* Composite characters are characters constructed by overstriking two
175 or more regular characters.
177 1) The old Mule implementation involves storing composite characters
178 in a buffer as a tag followed by all of the actual characters
179 used to make up the composite character. I think this is a bad
180 idea; it greatly complicates code that wants to handle strings
181 one character at a time because it has to deal with the possibility
182 of great big ungainly characters. It's much more reasonable to
183 simply store an index into a table of composite characters.
185 2) The current implementation only allows for 16,384 separate
186 composite characters over the lifetime of the XEmacs process.
187 This could become a potential problem if the user
188 edited lots of different files that use composite characters.
189 Due to FSF bogosity, increasing the number of allowable
190 composite characters under Mule would decrease the number
191 of possible faces that can exist. Mule already has shrunk
192 this to 2048, and further shrinkage would become uncomfortable.
193 No such problems exist in XEmacs.
195 Composite characters could be represented as 0x80 C1 C2 C3,
196 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
197 for slightly under 2^20 (one million) composite characters
198 over the XEmacs process lifetime, and you only need to
199 increase the size of a Mule character from 19 to 21 bits.
200 Or you could use 0x80 C1 C2 C3 C4, allowing for about
201 85 million (slightly over 2^26) composite characters. */
204 /************************************************************************/
205 /* Basic Emchar functions */
206 /************************************************************************/
208 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
209 string in STR. Returns the number of bytes stored.
210 Do not call this directly. Use the macro set_charptr_emchar() instead.
214 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
229 else if ( c <= 0x7ff )
231 *p++ = (c >> 6) | 0xc0;
232 *p++ = (c & 0x3f) | 0x80;
234 else if ( c <= 0xffff )
236 *p++ = (c >> 12) | 0xe0;
237 *p++ = ((c >> 6) & 0x3f) | 0x80;
238 *p++ = (c & 0x3f) | 0x80;
240 else if ( c <= 0x1fffff )
242 *p++ = (c >> 18) | 0xf0;
243 *p++ = ((c >> 12) & 0x3f) | 0x80;
244 *p++ = ((c >> 6) & 0x3f) | 0x80;
245 *p++ = (c & 0x3f) | 0x80;
247 else if ( c <= 0x3ffffff )
249 *p++ = (c >> 24) | 0xf8;
250 *p++ = ((c >> 18) & 0x3f) | 0x80;
251 *p++ = ((c >> 12) & 0x3f) | 0x80;
252 *p++ = ((c >> 6) & 0x3f) | 0x80;
253 *p++ = (c & 0x3f) | 0x80;
257 *p++ = (c >> 30) | 0xfc;
258 *p++ = ((c >> 24) & 0x3f) | 0x80;
259 *p++ = ((c >> 18) & 0x3f) | 0x80;
260 *p++ = ((c >> 12) & 0x3f) | 0x80;
261 *p++ = ((c >> 6) & 0x3f) | 0x80;
262 *p++ = (c & 0x3f) | 0x80;
265 BREAKUP_CHAR (c, charset, c1, c2);
266 lb = CHAR_LEADING_BYTE (c);
267 if (LEADING_BYTE_PRIVATE_P (lb))
268 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
270 if (EQ (charset, Vcharset_control_1))
279 /* Return the first character from a Mule-encoded string in STR,
280 assuming it's non-ASCII. Do not call this directly.
281 Use the macro charptr_emchar() instead. */
284 non_ascii_charptr_emchar (CONST Bufbyte *str)
297 else if ( b >= 0xf8 )
302 else if ( b >= 0xf0 )
307 else if ( b >= 0xe0 )
312 else if ( b >= 0xc0 )
322 for( ; len > 0; len-- )
325 ch = ( ch << 6 ) | ( b & 0x3f );
329 Bufbyte i0 = *str, i1, i2 = 0;
332 if (i0 == LEADING_BYTE_CONTROL_1)
333 return (Emchar) (*++str - 0x20);
335 if (LEADING_BYTE_PREFIX_P (i0))
340 charset = CHARSET_BY_LEADING_BYTE (i0);
341 if (XCHARSET_DIMENSION (charset) == 2)
344 return MAKE_CHAR (charset, i1, i2);
348 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
349 Do not call this directly. Use the macro valid_char_p() instead. */
353 non_ascii_valid_char_p (Emchar ch)
357 /* Must have only lowest 19 bits set */
361 f1 = CHAR_FIELD1 (ch);
362 f2 = CHAR_FIELD2 (ch);
363 f3 = CHAR_FIELD3 (ch);
369 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
370 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
371 f2 > MAX_CHAR_FIELD2_PRIVATE)
376 if (f3 != 0x20 && f3 != 0x7F)
380 NOTE: This takes advantage of the fact that
381 FIELD2_TO_OFFICIAL_LEADING_BYTE and
382 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
384 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
385 return (XCHARSET_CHARS (charset) == 96);
391 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
392 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
393 f1 > MAX_CHAR_FIELD1_PRIVATE)
395 if (f2 < 0x20 || f3 < 0x20)
398 #ifdef ENABLE_COMPOSITE_CHARS
399 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
401 if (UNBOUNDP (Fgethash (make_int (ch),
402 Vcomposite_char_char2string_hash_table,
407 #endif /* ENABLE_COMPOSITE_CHARS */
409 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
412 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
414 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
417 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
419 return (XCHARSET_CHARS (charset) == 96);
425 /************************************************************************/
426 /* Basic string functions */
427 /************************************************************************/
429 /* Copy the character pointed to by PTR into STR, assuming it's
430 non-ASCII. Do not call this directly. Use the macro
431 charptr_copy_char() instead. */
434 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
436 Bufbyte *strptr = str;
438 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
440 /* Notice fallthrough. */
442 case 6: *++strptr = *ptr++;
443 case 5: *++strptr = *ptr++;
445 case 4: *++strptr = *ptr++;
446 case 3: *++strptr = *ptr++;
447 case 2: *++strptr = *ptr;
452 return strptr + 1 - str;
456 /************************************************************************/
457 /* streams of Emchars */
458 /************************************************************************/
460 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
461 The functions below are not meant to be called directly; use
462 the macros in insdel.h. */
465 Lstream_get_emchar_1 (Lstream *stream, int ch)
467 Bufbyte str[MAX_EMCHAR_LEN];
468 Bufbyte *strptr = str;
470 str[0] = (Bufbyte) ch;
471 switch (REP_BYTES_BY_FIRST_BYTE (ch))
473 /* Notice fallthrough. */
476 ch = Lstream_getc (stream);
478 *++strptr = (Bufbyte) ch;
480 ch = Lstream_getc (stream);
482 *++strptr = (Bufbyte) ch;
485 ch = Lstream_getc (stream);
487 *++strptr = (Bufbyte) ch;
489 ch = Lstream_getc (stream);
491 *++strptr = (Bufbyte) ch;
493 ch = Lstream_getc (stream);
495 *++strptr = (Bufbyte) ch;
500 return charptr_emchar (str);
504 Lstream_fput_emchar (Lstream *stream, Emchar ch)
506 Bufbyte str[MAX_EMCHAR_LEN];
507 Bytecount len = set_charptr_emchar (str, ch);
508 return Lstream_write (stream, str, len);
512 Lstream_funget_emchar (Lstream *stream, Emchar ch)
514 Bufbyte str[MAX_EMCHAR_LEN];
515 Bytecount len = set_charptr_emchar (str, ch);
516 Lstream_unread (stream, str, len);
520 /************************************************************************/
522 /************************************************************************/
525 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
527 struct Lisp_Charset *cs = XCHARSET (obj);
529 markobj (cs->short_name);
530 markobj (cs->long_name);
531 markobj (cs->doc_string);
532 markobj (cs->registry);
533 markobj (cs->ccl_program);
538 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
540 struct Lisp_Charset *cs = XCHARSET (obj);
544 error ("printing unreadable object #<charset %s 0x%x>",
545 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
548 write_c_string ("#<charset ", printcharfun);
549 print_internal (CHARSET_NAME (cs), printcharfun, 0);
550 write_c_string (" ", printcharfun);
551 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
552 write_c_string (" ", printcharfun);
553 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
554 write_c_string (" ", printcharfun);
555 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
556 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
557 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
558 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
559 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
561 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
562 CHARSET_COLUMNS (cs),
563 CHARSET_GRAPHIC (cs),
565 write_c_string (buf, printcharfun);
566 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
567 sprintf (buf, " 0x%x>", cs->header.uid);
568 write_c_string (buf, printcharfun);
571 static const struct lrecord_description charset_description[] = {
572 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
576 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
577 mark_charset, print_charset, 0, 0, 0, charset_description,
578 struct Lisp_Charset);
579 /* Make a new charset. */
582 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
583 unsigned char type, unsigned char columns, unsigned char graphic,
584 Bufbyte final, unsigned char direction, Lisp_Object short_name,
585 Lisp_Object long_name, Lisp_Object doc,
589 struct Lisp_Charset *cs =
590 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
591 XSETCHARSET (obj, cs);
593 CHARSET_ID (cs) = id;
594 CHARSET_NAME (cs) = name;
595 CHARSET_SHORT_NAME (cs) = short_name;
596 CHARSET_LONG_NAME (cs) = long_name;
597 CHARSET_REP_BYTES (cs) = rep_bytes;
598 CHARSET_DIRECTION (cs) = direction;
599 CHARSET_TYPE (cs) = type;
600 CHARSET_COLUMNS (cs) = columns;
601 CHARSET_GRAPHIC (cs) = graphic;
602 CHARSET_FINAL (cs) = final;
603 CHARSET_DOC_STRING (cs) = doc;
604 CHARSET_REGISTRY (cs) = reg;
605 CHARSET_CCL_PROGRAM (cs) = Qnil;
606 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
608 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
609 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
610 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
611 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
615 /* some charsets do not have final characters. This includes
616 ASCII, Control-1, Composite, and the two faux private
618 assert (NILP (charset_by_attributes[type][final][direction]));
619 charset_by_attributes[type][final][direction] = obj;
622 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
623 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
626 /* official leading byte */
627 rep_bytes_by_first_byte[id] = rep_bytes;
630 /* Some charsets are "faux" and don't have names or really exist at
631 all except in the leading-byte table. */
633 Fputhash (name, obj, Vcharset_hash_table);
638 get_unallocated_leading_byte (int dimension)
644 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
647 lb = next_allocated_1_byte_leading_byte++;
651 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
654 lb = next_allocated_2_byte_leading_byte++;
659 ("No more character sets free for this dimension",
660 make_int (dimension));
666 /************************************************************************/
667 /* Basic charset Lisp functions */
668 /************************************************************************/
670 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
671 Return non-nil if OBJECT is a charset.
675 return CHARSETP (object) ? Qt : Qnil;
678 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
679 Retrieve the charset of the given name.
680 If CHARSET-OR-NAME is a charset object, it is simply returned.
681 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
682 nil is returned. Otherwise the associated charset object is returned.
686 if (CHARSETP (charset_or_name))
687 return charset_or_name;
689 CHECK_SYMBOL (charset_or_name);
690 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
693 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
694 Retrieve the charset of the given name.
695 Same as `find-charset' except an error is signalled if there is no such
696 charset instead of returning nil.
700 Lisp_Object charset = Ffind_charset (name);
703 signal_simple_error ("No such charset", name);
707 /* We store the charsets in hash tables with the names as the key and the
708 actual charset object as the value. Occasionally we need to use them
709 in a list format. These routines provide us with that. */
710 struct charset_list_closure
712 Lisp_Object *charset_list;
716 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
717 void *charset_list_closure)
719 /* This function can GC */
720 struct charset_list_closure *chcl =
721 (struct charset_list_closure*) charset_list_closure;
722 Lisp_Object *charset_list = chcl->charset_list;
724 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
728 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
729 Return a list of the names of all defined charsets.
733 Lisp_Object charset_list = Qnil;
735 struct charset_list_closure charset_list_closure;
737 GCPRO1 (charset_list);
738 charset_list_closure.charset_list = &charset_list;
739 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
740 &charset_list_closure);
746 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
747 Return the name of the given charset.
751 return XCHARSET_NAME (Fget_charset (charset));
754 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
755 Define a new character set.
756 This function is for use with Mule support.
757 NAME is a symbol, the name by which the character set is normally referred.
758 DOC-STRING is a string describing the character set.
759 PROPS is a property list, describing the specific nature of the
760 character set. Recognized properties are:
762 'short-name Short version of the charset name (ex: Latin-1)
763 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
764 'registry A regular expression matching the font registry field for
766 'dimension Number of octets used to index a character in this charset.
767 Either 1 or 2. Defaults to 1.
768 'columns Number of columns used to display a character in this charset.
769 Only used in TTY mode. (Under X, the actual width of a
770 character can be derived from the font used to display the
771 characters.) If unspecified, defaults to the dimension
772 (this is almost always the correct value).
773 'chars Number of characters in each dimension (94 or 96).
774 Defaults to 94. Note that if the dimension is 2, the
775 character set thus described is 94x94 or 96x96.
776 'final Final byte of ISO 2022 escape sequence. Must be
777 supplied. Each combination of (DIMENSION, CHARS) defines a
778 separate namespace for final bytes. Note that ISO
779 2022 restricts the final byte to the range
780 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
781 dimension == 2. Note also that final bytes in the range
782 0x30 - 0x3F are reserved for user-defined (not official)
784 'graphic 0 (use left half of font on output) or 1 (use right half
785 of font on output). Defaults to 0. For example, for
786 a font whose registry is ISO8859-1, the left half
787 (octets 0x20 - 0x7F) is the `ascii' character set, while
788 the right half (octets 0xA0 - 0xFF) is the `latin-1'
789 character set. With 'graphic set to 0, the octets
790 will have their high bit cleared; with it set to 1,
791 the octets will have their high bit set.
792 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
794 'ccl-program A compiled CCL program used to convert a character in
795 this charset into an index into the font. This is in
796 addition to the 'graphic property. The CCL program
797 is passed the octets of the character, with the high
798 bit cleared and set depending upon whether the value
799 of the 'graphic property is 0 or 1.
801 (name, doc_string, props))
803 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
804 int direction = CHARSET_LEFT_TO_RIGHT;
806 Lisp_Object registry = Qnil;
808 Lisp_Object rest, keyword, value;
809 Lisp_Object ccl_program = Qnil;
810 Lisp_Object short_name = Qnil, long_name = Qnil;
813 if (!NILP (doc_string))
814 CHECK_STRING (doc_string);
816 charset = Ffind_charset (name);
818 signal_simple_error ("Cannot redefine existing charset", name);
820 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
822 if (EQ (keyword, Qshort_name))
824 CHECK_STRING (value);
828 if (EQ (keyword, Qlong_name))
830 CHECK_STRING (value);
834 else if (EQ (keyword, Qdimension))
837 dimension = XINT (value);
838 if (dimension < 1 || dimension > 2)
839 signal_simple_error ("Invalid value for 'dimension", value);
842 else if (EQ (keyword, Qchars))
845 chars = XINT (value);
846 if (chars != 94 && chars != 96)
847 signal_simple_error ("Invalid value for 'chars", value);
850 else if (EQ (keyword, Qcolumns))
853 columns = XINT (value);
854 if (columns != 1 && columns != 2)
855 signal_simple_error ("Invalid value for 'columns", value);
858 else if (EQ (keyword, Qgraphic))
861 graphic = XINT (value);
862 if (graphic < 0 || graphic > 1)
863 signal_simple_error ("Invalid value for 'graphic", value);
866 else if (EQ (keyword, Qregistry))
868 CHECK_STRING (value);
872 else if (EQ (keyword, Qdirection))
874 if (EQ (value, Ql2r))
875 direction = CHARSET_LEFT_TO_RIGHT;
876 else if (EQ (value, Qr2l))
877 direction = CHARSET_RIGHT_TO_LEFT;
879 signal_simple_error ("Invalid value for 'direction", value);
882 else if (EQ (keyword, Qfinal))
884 CHECK_CHAR_COERCE_INT (value);
885 final = XCHAR (value);
886 if (final < '0' || final > '~')
887 signal_simple_error ("Invalid value for 'final", value);
890 else if (EQ (keyword, Qccl_program))
892 CHECK_VECTOR (value);
897 signal_simple_error ("Unrecognized property", keyword);
901 error ("'final must be specified");
902 if (dimension == 2 && final > 0x5F)
904 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
908 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
910 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
912 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
913 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
915 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
917 id = get_unallocated_leading_byte (dimension);
919 if (NILP (doc_string))
920 doc_string = build_string ("");
923 registry = build_string ("");
925 if (NILP (short_name))
926 XSETSTRING (short_name, XSYMBOL (name)->name);
928 if (NILP (long_name))
929 long_name = doc_string;
933 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
934 final, direction, short_name, long_name, doc_string, registry);
935 if (!NILP (ccl_program))
936 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
940 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
942 Make a charset equivalent to CHARSET but which goes in the opposite direction.
943 NEW-NAME is the name of the new charset. Return the new charset.
947 Lisp_Object new_charset = Qnil;
948 int id, dimension, columns, graphic, final;
950 Lisp_Object registry, doc_string, short_name, long_name;
951 struct Lisp_Charset *cs;
953 charset = Fget_charset (charset);
954 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
955 signal_simple_error ("Charset already has reverse-direction charset",
958 CHECK_SYMBOL (new_name);
959 if (!NILP (Ffind_charset (new_name)))
960 signal_simple_error ("Cannot redefine existing charset", new_name);
962 cs = XCHARSET (charset);
964 type = CHARSET_TYPE (cs);
965 columns = CHARSET_COLUMNS (cs);
966 dimension = CHARSET_DIMENSION (cs);
967 id = get_unallocated_leading_byte (dimension);
969 graphic = CHARSET_GRAPHIC (cs);
970 final = CHARSET_FINAL (cs);
971 direction = CHARSET_RIGHT_TO_LEFT;
972 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
973 direction = CHARSET_LEFT_TO_RIGHT;
974 doc_string = CHARSET_DOC_STRING (cs);
975 short_name = CHARSET_SHORT_NAME (cs);
976 long_name = CHARSET_LONG_NAME (cs);
977 registry = CHARSET_REGISTRY (cs);
979 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
980 graphic, final, direction, short_name, long_name,
981 doc_string, registry);
983 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
984 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
989 /* #### Reverse direction charsets not yet implemented. */
991 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
993 Return the reverse-direction charset parallel to CHARSET, if any.
994 This is the charset with the same properties (in particular, the same
995 dimension, number of characters per dimension, and final byte) as
996 CHARSET but whose characters are displayed in the opposite direction.
1000 charset = Fget_charset (charset);
1001 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1005 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1006 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1007 If DIRECTION is omitted, both directions will be checked (left-to-right
1008 will be returned if character sets exist for both directions).
1010 (dimension, chars, final, direction))
1012 int dm, ch, fi, di = -1;
1014 Lisp_Object obj = Qnil;
1016 CHECK_INT (dimension);
1017 dm = XINT (dimension);
1018 if (dm < 1 || dm > 2)
1019 signal_simple_error ("Invalid value for DIMENSION", dimension);
1023 if (ch != 94 && ch != 96)
1024 signal_simple_error ("Invalid value for CHARS", chars);
1026 CHECK_CHAR_COERCE_INT (final);
1028 if (fi < '0' || fi > '~')
1029 signal_simple_error ("Invalid value for FINAL", final);
1031 if (EQ (direction, Ql2r))
1032 di = CHARSET_LEFT_TO_RIGHT;
1033 else if (EQ (direction, Qr2l))
1034 di = CHARSET_RIGHT_TO_LEFT;
1035 else if (!NILP (direction))
1036 signal_simple_error ("Invalid value for DIRECTION", direction);
1038 if (dm == 2 && fi > 0x5F)
1040 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1043 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1045 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1049 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1051 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1054 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1057 return XCHARSET_NAME (obj);
1061 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1062 Return short name of CHARSET.
1066 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1069 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1070 Return long name of CHARSET.
1074 return XCHARSET_LONG_NAME (Fget_charset (charset));
1077 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1078 Return description of CHARSET.
1082 return XCHARSET_DOC_STRING (Fget_charset (charset));
1085 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1086 Return dimension of CHARSET.
1090 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1093 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1094 Return property PROP of CHARSET.
1095 Recognized properties are those listed in `make-charset', as well as
1096 'name and 'doc-string.
1100 struct Lisp_Charset *cs;
1102 charset = Fget_charset (charset);
1103 cs = XCHARSET (charset);
1105 CHECK_SYMBOL (prop);
1106 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1107 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1108 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1109 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1110 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1111 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1112 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1113 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1114 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1115 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1116 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1117 if (EQ (prop, Qdirection))
1118 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1119 if (EQ (prop, Qreverse_direction_charset))
1121 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1125 return XCHARSET_NAME (obj);
1127 signal_simple_error ("Unrecognized charset property name", prop);
1128 return Qnil; /* not reached */
1131 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1132 Return charset identification number of CHARSET.
1136 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1139 /* #### We need to figure out which properties we really want to
1142 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1143 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1145 (charset, ccl_program))
1147 charset = Fget_charset (charset);
1148 CHECK_VECTOR (ccl_program);
1149 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1154 invalidate_charset_font_caches (Lisp_Object charset)
1156 /* Invalidate font cache entries for charset on all devices. */
1157 Lisp_Object devcons, concons, hash_table;
1158 DEVICE_LOOP_NO_BREAK (devcons, concons)
1160 struct device *d = XDEVICE (XCAR (devcons));
1161 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1162 if (!UNBOUNDP (hash_table))
1163 Fclrhash (hash_table);
1167 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1168 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1169 Set the 'registry property of CHARSET to REGISTRY.
1171 (charset, registry))
1173 charset = Fget_charset (charset);
1174 CHECK_STRING (registry);
1175 XCHARSET_REGISTRY (charset) = registry;
1176 invalidate_charset_font_caches (charset);
1177 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1182 /************************************************************************/
1183 /* Lisp primitives for working with characters */
1184 /************************************************************************/
1186 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1187 Make a character from CHARSET and octets ARG1 and ARG2.
1188 ARG2 is required only for characters from two-dimensional charsets.
1189 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1190 character s with caron.
1192 (charset, arg1, arg2))
1194 struct Lisp_Charset *cs;
1196 int lowlim, highlim;
1198 charset = Fget_charset (charset);
1199 cs = XCHARSET (charset);
1201 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1202 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1203 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1204 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1207 /* It is useful (and safe, according to Olivier Galibert) to strip
1208 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1209 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1210 Latin 2 code of the character. */
1211 a1 = XINT (arg1) & 0x7f;
1212 if (a1 < lowlim || a1 > highlim)
1213 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1215 if (CHARSET_DIMENSION (cs) == 1)
1219 ("Charset is of dimension one; second octet must be nil", arg2);
1220 return make_char (MAKE_CHAR (charset, a1, 0));
1224 a2 = XINT (arg2) & 0x7f;
1225 if (a2 < lowlim || a2 > highlim)
1226 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1228 return make_char (MAKE_CHAR (charset, a1, a2));
1231 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1232 Return the character set of char CH.
1236 CHECK_CHAR_COERCE_INT (ch);
1238 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1239 (CHAR_LEADING_BYTE (XCHAR (ch))));
1242 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1243 Return list of charset and one or two position-codes of CHAR.
1247 /* This function can GC */
1248 struct gcpro gcpro1, gcpro2;
1249 Lisp_Object charset = Qnil;
1250 Lisp_Object rc = Qnil;
1253 GCPRO2 (charset, rc);
1254 CHECK_CHAR_COERCE_INT (character);
1256 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1258 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1260 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1264 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1272 #ifdef ENABLE_COMPOSITE_CHARS
1273 /************************************************************************/
1274 /* composite character functions */
1275 /************************************************************************/
1278 lookup_composite_char (Bufbyte *str, int len)
1280 Lisp_Object lispstr = make_string (str, len);
1281 Lisp_Object ch = Fgethash (lispstr,
1282 Vcomposite_char_string2char_hash_table,
1288 if (composite_char_row_next >= 128)
1289 signal_simple_error ("No more composite chars available", lispstr);
1290 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1291 composite_char_col_next);
1292 Fputhash (make_char (emch), lispstr,
1293 Vcomposite_char_char2string_hash_table);
1294 Fputhash (lispstr, make_char (emch),
1295 Vcomposite_char_string2char_hash_table);
1296 composite_char_col_next++;
1297 if (composite_char_col_next >= 128)
1299 composite_char_col_next = 32;
1300 composite_char_row_next++;
1309 composite_char_string (Emchar ch)
1311 Lisp_Object str = Fgethash (make_char (ch),
1312 Vcomposite_char_char2string_hash_table,
1314 assert (!UNBOUNDP (str));
1318 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1319 Convert a string into a single composite character.
1320 The character is the result of overstriking all the characters in
1325 CHECK_STRING (string);
1326 return make_char (lookup_composite_char (XSTRING_DATA (string),
1327 XSTRING_LENGTH (string)));
1330 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1331 Return a string of the characters comprising a composite character.
1339 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1340 signal_simple_error ("Must be composite char", ch);
1341 return composite_char_string (emch);
1343 #endif /* ENABLE_COMPOSITE_CHARS */
1346 /************************************************************************/
1347 /* initialization */
1348 /************************************************************************/
1351 syms_of_mule_charset (void)
1353 DEFSUBR (Fcharsetp);
1354 DEFSUBR (Ffind_charset);
1355 DEFSUBR (Fget_charset);
1356 DEFSUBR (Fcharset_list);
1357 DEFSUBR (Fcharset_name);
1358 DEFSUBR (Fmake_charset);
1359 DEFSUBR (Fmake_reverse_direction_charset);
1360 /* DEFSUBR (Freverse_direction_charset); */
1361 DEFSUBR (Fcharset_from_attributes);
1362 DEFSUBR (Fcharset_short_name);
1363 DEFSUBR (Fcharset_long_name);
1364 DEFSUBR (Fcharset_description);
1365 DEFSUBR (Fcharset_dimension);
1366 DEFSUBR (Fcharset_property);
1367 DEFSUBR (Fcharset_id);
1368 DEFSUBR (Fset_charset_ccl_program);
1369 DEFSUBR (Fset_charset_registry);
1371 DEFSUBR (Fmake_char);
1372 DEFSUBR (Fchar_charset);
1373 DEFSUBR (Fsplit_char);
1375 #ifdef ENABLE_COMPOSITE_CHARS
1376 DEFSUBR (Fmake_composite_char);
1377 DEFSUBR (Fcomposite_char_string);
1380 defsymbol (&Qcharsetp, "charsetp");
1381 defsymbol (&Qregistry, "registry");
1382 defsymbol (&Qfinal, "final");
1383 defsymbol (&Qgraphic, "graphic");
1384 defsymbol (&Qdirection, "direction");
1385 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1386 defsymbol (&Qshort_name, "short-name");
1387 defsymbol (&Qlong_name, "long-name");
1389 defsymbol (&Ql2r, "l2r");
1390 defsymbol (&Qr2l, "r2l");
1392 /* Charsets, compatible with FSF 20.3
1393 Naming convention is Script-Charset[-Edition] */
1394 defsymbol (&Qascii, "ascii");
1395 defsymbol (&Qcontrol_1, "control-1");
1396 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1397 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1398 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1399 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1400 defsymbol (&Qthai_tis620, "thai-tis620");
1401 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1402 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1403 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1404 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1405 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1406 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1407 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1408 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1409 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1410 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1411 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1412 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1413 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1414 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1416 defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
1417 defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
1418 defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
1419 defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
1420 defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
1422 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1423 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1425 defsymbol (&Qcomposite, "composite");
1428 Vutf_2000_version = build_string("0.4 (Shin-Imamiya)");
1429 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1430 Version number of UTF-2000.
1436 vars_of_mule_charset (void)
1440 /* Table of charsets indexed by leading byte. */
1441 for (i = 0; i < countof (charset_by_leading_byte); i++)
1442 charset_by_leading_byte[i] = Qnil;
1444 /* Table of charsets indexed by type/final-byte/direction. */
1445 for (i = 0; i < countof (charset_by_attributes); i++)
1446 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1447 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1448 charset_by_attributes[i][j][k] = Qnil;
1450 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1452 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
1454 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1457 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1458 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1459 Leading-code of private TYPE9N charset of column-width 1.
1461 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1465 complex_vars_of_mule_charset (void)
1467 staticpro (&Vcharset_hash_table);
1468 Vcharset_hash_table =
1469 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1471 /* Predefined character sets. We store them into variables for
1475 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1476 CHARSET_TYPE_94, 1, 0, 'B',
1477 CHARSET_LEFT_TO_RIGHT,
1478 build_string ("ASCII"),
1479 build_string ("ASCII)"),
1480 build_string ("ASCII (ISO646 IRV)"),
1481 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1482 Vcharset_control_1 =
1483 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1484 CHARSET_TYPE_94, 1, 1, 0,
1485 CHARSET_LEFT_TO_RIGHT,
1486 build_string ("C1"),
1487 build_string ("Control characters"),
1488 build_string ("Control characters 128-191"),
1490 Vcharset_latin_iso8859_1 =
1491 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1492 CHARSET_TYPE_96, 1, 1, 'A',
1493 CHARSET_LEFT_TO_RIGHT,
1494 build_string ("Latin-1"),
1495 build_string ("ISO8859-1 (Latin-1)"),
1496 build_string ("ISO8859-1 (Latin-1)"),
1497 build_string ("iso8859-1"));
1498 Vcharset_latin_iso8859_2 =
1499 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1500 CHARSET_TYPE_96, 1, 1, 'B',
1501 CHARSET_LEFT_TO_RIGHT,
1502 build_string ("Latin-2"),
1503 build_string ("ISO8859-2 (Latin-2)"),
1504 build_string ("ISO8859-2 (Latin-2)"),
1505 build_string ("iso8859-2"));
1506 Vcharset_latin_iso8859_3 =
1507 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1508 CHARSET_TYPE_96, 1, 1, 'C',
1509 CHARSET_LEFT_TO_RIGHT,
1510 build_string ("Latin-3"),
1511 build_string ("ISO8859-3 (Latin-3)"),
1512 build_string ("ISO8859-3 (Latin-3)"),
1513 build_string ("iso8859-3"));
1514 Vcharset_latin_iso8859_4 =
1515 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1516 CHARSET_TYPE_96, 1, 1, 'D',
1517 CHARSET_LEFT_TO_RIGHT,
1518 build_string ("Latin-4"),
1519 build_string ("ISO8859-4 (Latin-4)"),
1520 build_string ("ISO8859-4 (Latin-4)"),
1521 build_string ("iso8859-4"));
1522 Vcharset_thai_tis620 =
1523 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1524 CHARSET_TYPE_96, 1, 1, 'T',
1525 CHARSET_LEFT_TO_RIGHT,
1526 build_string ("TIS620"),
1527 build_string ("TIS620 (Thai)"),
1528 build_string ("TIS620.2529 (Thai)"),
1529 build_string ("tis620"));
1530 Vcharset_greek_iso8859_7 =
1531 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1532 CHARSET_TYPE_96, 1, 1, 'F',
1533 CHARSET_LEFT_TO_RIGHT,
1534 build_string ("ISO8859-7"),
1535 build_string ("ISO8859-7 (Greek)"),
1536 build_string ("ISO8859-7 (Greek)"),
1537 build_string ("iso8859-7"));
1538 Vcharset_arabic_iso8859_6 =
1539 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1540 CHARSET_TYPE_96, 1, 1, 'G',
1541 CHARSET_RIGHT_TO_LEFT,
1542 build_string ("ISO8859-6"),
1543 build_string ("ISO8859-6 (Arabic)"),
1544 build_string ("ISO8859-6 (Arabic)"),
1545 build_string ("iso8859-6"));
1546 Vcharset_hebrew_iso8859_8 =
1547 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1548 CHARSET_TYPE_96, 1, 1, 'H',
1549 CHARSET_RIGHT_TO_LEFT,
1550 build_string ("ISO8859-8"),
1551 build_string ("ISO8859-8 (Hebrew)"),
1552 build_string ("ISO8859-8 (Hebrew)"),
1553 build_string ("iso8859-8"));
1554 Vcharset_katakana_jisx0201 =
1555 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1556 CHARSET_TYPE_94, 1, 1, 'I',
1557 CHARSET_LEFT_TO_RIGHT,
1558 build_string ("JISX0201 Kana"),
1559 build_string ("JISX0201.1976 (Japanese Kana)"),
1560 build_string ("JISX0201.1976 Japanese Kana"),
1561 build_string ("jisx0201.1976"));
1562 Vcharset_latin_jisx0201 =
1563 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1564 CHARSET_TYPE_94, 1, 0, 'J',
1565 CHARSET_LEFT_TO_RIGHT,
1566 build_string ("JISX0201 Roman"),
1567 build_string ("JISX0201.1976 (Japanese Roman)"),
1568 build_string ("JISX0201.1976 Japanese Roman"),
1569 build_string ("jisx0201.1976"));
1570 Vcharset_cyrillic_iso8859_5 =
1571 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1572 CHARSET_TYPE_96, 1, 1, 'L',
1573 CHARSET_LEFT_TO_RIGHT,
1574 build_string ("ISO8859-5"),
1575 build_string ("ISO8859-5 (Cyrillic)"),
1576 build_string ("ISO8859-5 (Cyrillic)"),
1577 build_string ("iso8859-5"));
1578 Vcharset_latin_iso8859_9 =
1579 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1580 CHARSET_TYPE_96, 1, 1, 'M',
1581 CHARSET_LEFT_TO_RIGHT,
1582 build_string ("Latin-5"),
1583 build_string ("ISO8859-9 (Latin-5)"),
1584 build_string ("ISO8859-9 (Latin-5)"),
1585 build_string ("iso8859-9"));
1586 Vcharset_japanese_jisx0208_1978 =
1587 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1588 CHARSET_TYPE_94X94, 2, 0, '@',
1589 CHARSET_LEFT_TO_RIGHT,
1590 build_string ("JISX0208.1978"),
1591 build_string ("JISX0208.1978 (Japanese)"),
1593 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1594 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1595 Vcharset_chinese_gb2312 =
1596 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1597 CHARSET_TYPE_94X94, 2, 0, 'A',
1598 CHARSET_LEFT_TO_RIGHT,
1599 build_string ("GB2312"),
1600 build_string ("GB2312)"),
1601 build_string ("GB2312 Chinese simplified"),
1602 build_string ("gb2312"));
1603 Vcharset_japanese_jisx0208 =
1604 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1605 CHARSET_TYPE_94X94, 2, 0, 'B',
1606 CHARSET_LEFT_TO_RIGHT,
1607 build_string ("JISX0208"),
1608 build_string ("JISX0208.1983/1990 (Japanese)"),
1609 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1610 build_string ("jisx0208.19\\(83\\|90\\)"));
1611 Vcharset_korean_ksc5601 =
1612 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1613 CHARSET_TYPE_94X94, 2, 0, 'C',
1614 CHARSET_LEFT_TO_RIGHT,
1615 build_string ("KSC5601"),
1616 build_string ("KSC5601 (Korean"),
1617 build_string ("KSC5601 Korean Hangul and Hanja"),
1618 build_string ("ksc5601"));
1619 Vcharset_japanese_jisx0212 =
1620 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1621 CHARSET_TYPE_94X94, 2, 0, 'D',
1622 CHARSET_LEFT_TO_RIGHT,
1623 build_string ("JISX0212"),
1624 build_string ("JISX0212 (Japanese)"),
1625 build_string ("JISX0212 Japanese Supplement"),
1626 build_string ("jisx0212"));
1628 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1629 Vcharset_chinese_cns11643_1 =
1630 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1631 CHARSET_TYPE_94X94, 2, 0, 'G',
1632 CHARSET_LEFT_TO_RIGHT,
1633 build_string ("CNS11643-1"),
1634 build_string ("CNS11643-1 (Chinese traditional)"),
1636 ("CNS 11643 Plane 1 Chinese traditional"),
1637 build_string (CHINESE_CNS_PLANE_RE("1")));
1638 Vcharset_chinese_cns11643_2 =
1639 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1640 CHARSET_TYPE_94X94, 2, 0, 'H',
1641 CHARSET_LEFT_TO_RIGHT,
1642 build_string ("CNS11643-2"),
1643 build_string ("CNS11643-2 (Chinese traditional)"),
1645 ("CNS 11643 Plane 2 Chinese traditional"),
1646 build_string (CHINESE_CNS_PLANE_RE("2")));
1648 Vcharset_chinese_cns11643_3 =
1649 make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
1650 CHARSET_TYPE_94X94, 2, 0, 'I',
1651 CHARSET_LEFT_TO_RIGHT,
1652 build_string ("CNS11643-3"),
1653 build_string ("CNS11643-3 (Chinese traditional)"),
1655 ("CNS 11643 Plane 3 Chinese traditional"),
1656 build_string (CHINESE_CNS_PLANE_RE("3")));
1657 Vcharset_chinese_cns11643_4 =
1658 make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
1659 CHARSET_TYPE_94X94, 2, 0, 'J',
1660 CHARSET_LEFT_TO_RIGHT,
1661 build_string ("CNS11643-4"),
1662 build_string ("CNS11643-4 (Chinese traditional)"),
1664 ("CNS 11643 Plane 4 Chinese traditional"),
1665 build_string (CHINESE_CNS_PLANE_RE("4")));
1666 Vcharset_chinese_cns11643_5 =
1667 make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
1668 CHARSET_TYPE_94X94, 2, 0, 'K',
1669 CHARSET_LEFT_TO_RIGHT,
1670 build_string ("CNS11643-5"),
1671 build_string ("CNS11643-5 (Chinese traditional)"),
1673 ("CNS 11643 Plane 5 Chinese traditional"),
1674 build_string (CHINESE_CNS_PLANE_RE("5")));
1675 Vcharset_chinese_cns11643_6 =
1676 make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
1677 CHARSET_TYPE_94X94, 2, 0, 'L',
1678 CHARSET_LEFT_TO_RIGHT,
1679 build_string ("CNS11643-6"),
1680 build_string ("CNS11643-6 (Chinese traditional)"),
1682 ("CNS 11643 Plane 6 Chinese traditional"),
1683 build_string (CHINESE_CNS_PLANE_RE("6")));
1684 Vcharset_chinese_cns11643_7 =
1685 make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
1686 CHARSET_TYPE_94X94, 2, 0, 'M',
1687 CHARSET_LEFT_TO_RIGHT,
1688 build_string ("CNS11643-7"),
1689 build_string ("CNS11643-7 (Chinese traditional)"),
1691 ("CNS 11643 Plane 7 Chinese traditional"),
1692 build_string (CHINESE_CNS_PLANE_RE("7")));
1694 Vcharset_chinese_big5_1 =
1695 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1696 CHARSET_TYPE_94X94, 2, 0, '0',
1697 CHARSET_LEFT_TO_RIGHT,
1698 build_string ("Big5"),
1699 build_string ("Big5 (Level-1)"),
1701 ("Big5 Level-1 Chinese traditional"),
1702 build_string ("big5"));
1703 Vcharset_chinese_big5_2 =
1704 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1705 CHARSET_TYPE_94X94, 2, 0, '1',
1706 CHARSET_LEFT_TO_RIGHT,
1707 build_string ("Big5"),
1708 build_string ("Big5 (Level-2)"),
1710 ("Big5 Level-2 Chinese traditional"),
1711 build_string ("big5"));
1714 #ifdef ENABLE_COMPOSITE_CHARS
1715 /* #### For simplicity, we put composite chars into a 96x96 charset.
1716 This is going to lead to problems because you can run out of
1717 room, esp. as we don't yet recycle numbers. */
1718 Vcharset_composite =
1719 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1720 CHARSET_TYPE_96X96, 2, 0, 0,
1721 CHARSET_LEFT_TO_RIGHT,
1722 build_string ("Composite"),
1723 build_string ("Composite characters"),
1724 build_string ("Composite characters"),
1727 composite_char_row_next = 32;
1728 composite_char_col_next = 32;
1730 Vcomposite_char_string2char_hash_table =
1731 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1732 Vcomposite_char_char2string_hash_table =
1733 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1734 staticpro (&Vcomposite_char_string2char_hash_table);
1735 staticpro (&Vcomposite_char_char2string_hash_table);
1736 #endif /* ENABLE_COMPOSITE_CHARS */