1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 /* Table of charsets indexed by leading byte. */
78 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
80 /* Table of charsets indexed by type/final-byte/direction. */
81 Lisp_Object charset_by_attributes[4][128][2];
84 /* Table of number of bytes in the string representation of a character
85 indexed by the first byte of that representation.
87 rep_bytes_by_first_byte(c) is more efficient than the equivalent
88 canonical computation:
90 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
92 Bytecount rep_bytes_by_first_byte[0xA0] =
93 { /* 0x00 - 0x7f are for straight ASCII */
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 /* 0x80 - 0x8f are for Dimension-1 official charsets */
104 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 /* 0x90 - 0x9d are for Dimension-2 official charsets */
109 /* 0x9e is for Dimension-1 private charsets */
110 /* 0x9f is for Dimension-2 private charsets */
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Vutf_2000_version;
117 int leading_code_private_11;
119 Lisp_Object Qcharsetp;
121 /* Qdoc_string, Qdimension, Qchars defined in general.c */
122 Lisp_Object Qregistry, Qfinal, Qgraphic;
123 Lisp_Object Qdirection;
124 Lisp_Object Qreverse_direction_charset;
125 Lisp_Object Qleading_byte;
126 Lisp_Object Qshort_name, Qlong_name;
142 Qjapanese_jisx0208_1978,
153 Lisp_Object Ql2r, Qr2l;
155 Lisp_Object Vcharset_hash_table;
157 static Bufbyte next_allocated_1_byte_leading_byte;
158 static Bufbyte next_allocated_2_byte_leading_byte;
160 /* Composite characters are characters constructed by overstriking two
161 or more regular characters.
163 1) The old Mule implementation involves storing composite characters
164 in a buffer as a tag followed by all of the actual characters
165 used to make up the composite character. I think this is a bad
166 idea; it greatly complicates code that wants to handle strings
167 one character at a time because it has to deal with the possibility
168 of great big ungainly characters. It's much more reasonable to
169 simply store an index into a table of composite characters.
171 2) The current implementation only allows for 16,384 separate
172 composite characters over the lifetime of the XEmacs process.
173 This could become a potential problem if the user
174 edited lots of different files that use composite characters.
175 Due to FSF bogosity, increasing the number of allowable
176 composite characters under Mule would decrease the number
177 of possible faces that can exist. Mule already has shrunk
178 this to 2048, and further shrinkage would become uncomfortable.
179 No such problems exist in XEmacs.
181 Composite characters could be represented as 0x80 C1 C2 C3,
182 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
183 for slightly under 2^20 (one million) composite characters
184 over the XEmacs process lifetime, and you only need to
185 increase the size of a Mule character from 19 to 21 bits.
186 Or you could use 0x80 C1 C2 C3 C4, allowing for about
187 85 million (slightly over 2^26) composite characters. */
190 /************************************************************************/
191 /* Basic Emchar functions */
192 /************************************************************************/
194 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
195 string in STR. Returns the number of bytes stored.
196 Do not call this directly. Use the macro set_charptr_emchar() instead.
200 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
215 else if ( c <= 0x7ff )
217 *p++ = (c >> 6) | 0xc0;
218 *p++ = (c & 0x3f) | 0x80;
220 else if ( c <= 0xffff )
222 *p++ = (c >> 12) | 0xe0;
223 *p++ = ((c >> 6) & 0x3f) | 0x80;
224 *p++ = (c & 0x3f) | 0x80;
226 else if ( c <= 0x1fffff )
228 *p++ = (c >> 18) | 0xf0;
229 *p++ = ((c >> 12) & 0x3f) | 0x80;
230 *p++ = ((c >> 6) & 0x3f) | 0x80;
231 *p++ = (c & 0x3f) | 0x80;
233 else if ( c <= 0x3ffffff )
235 *p++ = (c >> 24) | 0xf8;
236 *p++ = ((c >> 18) & 0x3f) | 0x80;
237 *p++ = ((c >> 12) & 0x3f) | 0x80;
238 *p++ = ((c >> 6) & 0x3f) | 0x80;
239 *p++ = (c & 0x3f) | 0x80;
243 *p++ = (c >> 30) | 0xfc;
244 *p++ = ((c >> 24) & 0x3f) | 0x80;
245 *p++ = ((c >> 18) & 0x3f) | 0x80;
246 *p++ = ((c >> 12) & 0x3f) | 0x80;
247 *p++ = ((c >> 6) & 0x3f) | 0x80;
248 *p++ = (c & 0x3f) | 0x80;
251 BREAKUP_CHAR (c, charset, c1, c2);
252 lb = CHAR_LEADING_BYTE (c);
253 if (LEADING_BYTE_PRIVATE_P (lb))
254 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
256 if (EQ (charset, Vcharset_control_1))
265 /* Return the first character from a Mule-encoded string in STR,
266 assuming it's non-ASCII. Do not call this directly.
267 Use the macro charptr_emchar() instead. */
270 non_ascii_charptr_emchar (CONST Bufbyte *str)
283 else if ( b >= 0xf8 )
288 else if ( b >= 0xf0 )
293 else if ( b >= 0xe0 )
298 else if ( b >= 0xc0 )
308 for( ; len > 0; len-- )
311 ch = ( ch << 6 ) | ( b & 0x3f );
315 Bufbyte i0 = *str, i1, i2 = 0;
318 if (i0 == LEADING_BYTE_CONTROL_1)
319 return (Emchar) (*++str - 0x20);
321 if (LEADING_BYTE_PREFIX_P (i0))
326 charset = CHARSET_BY_LEADING_BYTE (i0);
327 if (XCHARSET_DIMENSION (charset) == 2)
330 return MAKE_CHAR (charset, i1, i2);
334 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
335 Do not call this directly. Use the macro valid_char_p() instead. */
339 non_ascii_valid_char_p (Emchar ch)
343 /* Must have only lowest 19 bits set */
347 f1 = CHAR_FIELD1 (ch);
348 f2 = CHAR_FIELD2 (ch);
349 f3 = CHAR_FIELD3 (ch);
355 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
356 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
357 f2 > MAX_CHAR_FIELD2_PRIVATE)
362 if (f3 != 0x20 && f3 != 0x7F)
366 NOTE: This takes advantage of the fact that
367 FIELD2_TO_OFFICIAL_LEADING_BYTE and
368 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
370 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
371 return (XCHARSET_CHARS (charset) == 96);
377 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
378 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
379 f1 > MAX_CHAR_FIELD1_PRIVATE)
381 if (f2 < 0x20 || f3 < 0x20)
384 #ifdef ENABLE_COMPOSITE_CHARS
385 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
387 if (UNBOUNDP (Fgethash (make_int (ch),
388 Vcomposite_char_char2string_hash_table,
393 #endif /* ENABLE_COMPOSITE_CHARS */
395 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
398 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
400 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
403 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
405 return (XCHARSET_CHARS (charset) == 96);
411 /************************************************************************/
412 /* Basic string functions */
413 /************************************************************************/
415 /* Copy the character pointed to by PTR into STR, assuming it's
416 non-ASCII. Do not call this directly. Use the macro
417 charptr_copy_char() instead. */
420 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
422 Bufbyte *strptr = str;
424 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
426 /* Notice fallthrough. */
428 case 6: *++strptr = *ptr++;
429 case 5: *++strptr = *ptr++;
431 case 4: *++strptr = *ptr++;
432 case 3: *++strptr = *ptr++;
433 case 2: *++strptr = *ptr;
438 return strptr + 1 - str;
442 /************************************************************************/
443 /* streams of Emchars */
444 /************************************************************************/
446 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
447 The functions below are not meant to be called directly; use
448 the macros in insdel.h. */
451 Lstream_get_emchar_1 (Lstream *stream, int ch)
453 Bufbyte str[MAX_EMCHAR_LEN];
454 Bufbyte *strptr = str;
456 str[0] = (Bufbyte) ch;
457 switch (REP_BYTES_BY_FIRST_BYTE (ch))
459 /* Notice fallthrough. */
462 ch = Lstream_getc (stream);
464 *++strptr = (Bufbyte) ch;
466 ch = Lstream_getc (stream);
468 *++strptr = (Bufbyte) ch;
471 ch = Lstream_getc (stream);
473 *++strptr = (Bufbyte) ch;
475 ch = Lstream_getc (stream);
477 *++strptr = (Bufbyte) ch;
479 ch = Lstream_getc (stream);
481 *++strptr = (Bufbyte) ch;
486 return charptr_emchar (str);
490 Lstream_fput_emchar (Lstream *stream, Emchar ch)
492 Bufbyte str[MAX_EMCHAR_LEN];
493 Bytecount len = set_charptr_emchar (str, ch);
494 return Lstream_write (stream, str, len);
498 Lstream_funget_emchar (Lstream *stream, Emchar ch)
500 Bufbyte str[MAX_EMCHAR_LEN];
501 Bytecount len = set_charptr_emchar (str, ch);
502 Lstream_unread (stream, str, len);
506 /************************************************************************/
508 /************************************************************************/
511 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
513 struct Lisp_Charset *cs = XCHARSET (obj);
515 markobj (cs->short_name);
516 markobj (cs->long_name);
517 markobj (cs->doc_string);
518 markobj (cs->registry);
519 markobj (cs->ccl_program);
524 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
526 struct Lisp_Charset *cs = XCHARSET (obj);
530 error ("printing unreadable object #<charset %s 0x%x>",
531 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
534 write_c_string ("#<charset ", printcharfun);
535 print_internal (CHARSET_NAME (cs), printcharfun, 0);
536 write_c_string (" ", printcharfun);
537 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
538 write_c_string (" ", printcharfun);
539 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
540 write_c_string (" ", printcharfun);
541 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
542 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
543 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
544 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
545 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
547 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
548 CHARSET_COLUMNS (cs),
549 CHARSET_GRAPHIC (cs),
551 write_c_string (buf, printcharfun);
552 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
553 sprintf (buf, " 0x%x>", cs->header.uid);
554 write_c_string (buf, printcharfun);
557 static const struct lrecord_description charset_description[] = {
558 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
562 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
563 mark_charset, print_charset, 0, 0, 0, charset_description,
564 struct Lisp_Charset);
565 /* Make a new charset. */
568 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
569 unsigned char type, unsigned char columns, unsigned char graphic,
570 Bufbyte final, unsigned char direction, Lisp_Object short_name,
571 Lisp_Object long_name, Lisp_Object doc,
575 struct Lisp_Charset *cs =
576 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
577 XSETCHARSET (obj, cs);
579 CHARSET_ID (cs) = id;
580 CHARSET_NAME (cs) = name;
581 CHARSET_SHORT_NAME (cs) = short_name;
582 CHARSET_LONG_NAME (cs) = long_name;
583 CHARSET_REP_BYTES (cs) = rep_bytes;
584 CHARSET_DIRECTION (cs) = direction;
585 CHARSET_TYPE (cs) = type;
586 CHARSET_COLUMNS (cs) = columns;
587 CHARSET_GRAPHIC (cs) = graphic;
588 CHARSET_FINAL (cs) = final;
589 CHARSET_DOC_STRING (cs) = doc;
590 CHARSET_REGISTRY (cs) = reg;
591 CHARSET_CCL_PROGRAM (cs) = Qnil;
592 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
594 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
595 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
596 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
597 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
601 /* some charsets do not have final characters. This includes
602 ASCII, Control-1, Composite, and the two faux private
604 assert (NILP (charset_by_attributes[type][final][direction]));
605 charset_by_attributes[type][final][direction] = obj;
608 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
609 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
612 /* official leading byte */
613 rep_bytes_by_first_byte[id] = rep_bytes;
616 /* Some charsets are "faux" and don't have names or really exist at
617 all except in the leading-byte table. */
619 Fputhash (name, obj, Vcharset_hash_table);
624 get_unallocated_leading_byte (int dimension)
630 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
633 lb = next_allocated_1_byte_leading_byte++;
637 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
640 lb = next_allocated_2_byte_leading_byte++;
645 ("No more character sets free for this dimension",
646 make_int (dimension));
652 /************************************************************************/
653 /* Basic charset Lisp functions */
654 /************************************************************************/
656 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
657 Return non-nil if OBJECT is a charset.
661 return CHARSETP (object) ? Qt : Qnil;
664 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
665 Retrieve the charset of the given name.
666 If CHARSET-OR-NAME is a charset object, it is simply returned.
667 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
668 nil is returned. Otherwise the associated charset object is returned.
672 if (CHARSETP (charset_or_name))
673 return charset_or_name;
675 CHECK_SYMBOL (charset_or_name);
676 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
679 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
680 Retrieve the charset of the given name.
681 Same as `find-charset' except an error is signalled if there is no such
682 charset instead of returning nil.
686 Lisp_Object charset = Ffind_charset (name);
689 signal_simple_error ("No such charset", name);
693 /* We store the charsets in hash tables with the names as the key and the
694 actual charset object as the value. Occasionally we need to use them
695 in a list format. These routines provide us with that. */
696 struct charset_list_closure
698 Lisp_Object *charset_list;
702 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
703 void *charset_list_closure)
705 /* This function can GC */
706 struct charset_list_closure *chcl =
707 (struct charset_list_closure*) charset_list_closure;
708 Lisp_Object *charset_list = chcl->charset_list;
710 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
714 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
715 Return a list of the names of all defined charsets.
719 Lisp_Object charset_list = Qnil;
721 struct charset_list_closure charset_list_closure;
723 GCPRO1 (charset_list);
724 charset_list_closure.charset_list = &charset_list;
725 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
726 &charset_list_closure);
732 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
733 Return the name of the given charset.
737 return XCHARSET_NAME (Fget_charset (charset));
740 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
741 Define a new character set.
742 This function is for use with Mule support.
743 NAME is a symbol, the name by which the character set is normally referred.
744 DOC-STRING is a string describing the character set.
745 PROPS is a property list, describing the specific nature of the
746 character set. Recognized properties are:
748 'short-name Short version of the charset name (ex: Latin-1)
749 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
750 'registry A regular expression matching the font registry field for
752 'dimension Number of octets used to index a character in this charset.
753 Either 1 or 2. Defaults to 1.
754 'columns Number of columns used to display a character in this charset.
755 Only used in TTY mode. (Under X, the actual width of a
756 character can be derived from the font used to display the
757 characters.) If unspecified, defaults to the dimension
758 (this is almost always the correct value).
759 'chars Number of characters in each dimension (94 or 96).
760 Defaults to 94. Note that if the dimension is 2, the
761 character set thus described is 94x94 or 96x96.
762 'final Final byte of ISO 2022 escape sequence. Must be
763 supplied. Each combination of (DIMENSION, CHARS) defines a
764 separate namespace for final bytes. Note that ISO
765 2022 restricts the final byte to the range
766 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
767 dimension == 2. Note also that final bytes in the range
768 0x30 - 0x3F are reserved for user-defined (not official)
770 'graphic 0 (use left half of font on output) or 1 (use right half
771 of font on output). Defaults to 0. For example, for
772 a font whose registry is ISO8859-1, the left half
773 (octets 0x20 - 0x7F) is the `ascii' character set, while
774 the right half (octets 0xA0 - 0xFF) is the `latin-1'
775 character set. With 'graphic set to 0, the octets
776 will have their high bit cleared; with it set to 1,
777 the octets will have their high bit set.
778 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
780 'ccl-program A compiled CCL program used to convert a character in
781 this charset into an index into the font. This is in
782 addition to the 'graphic property. The CCL program
783 is passed the octets of the character, with the high
784 bit cleared and set depending upon whether the value
785 of the 'graphic property is 0 or 1.
787 (name, doc_string, props))
789 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
790 int direction = CHARSET_LEFT_TO_RIGHT;
792 Lisp_Object registry = Qnil;
794 Lisp_Object rest, keyword, value;
795 Lisp_Object ccl_program = Qnil;
796 Lisp_Object short_name = Qnil, long_name = Qnil;
799 if (!NILP (doc_string))
800 CHECK_STRING (doc_string);
802 charset = Ffind_charset (name);
804 signal_simple_error ("Cannot redefine existing charset", name);
806 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
808 if (EQ (keyword, Qshort_name))
810 CHECK_STRING (value);
814 if (EQ (keyword, Qlong_name))
816 CHECK_STRING (value);
820 else if (EQ (keyword, Qdimension))
823 dimension = XINT (value);
824 if (dimension < 1 || dimension > 2)
825 signal_simple_error ("Invalid value for 'dimension", value);
828 else if (EQ (keyword, Qchars))
831 chars = XINT (value);
832 if (chars != 94 && chars != 96)
833 signal_simple_error ("Invalid value for 'chars", value);
836 else if (EQ (keyword, Qcolumns))
839 columns = XINT (value);
840 if (columns != 1 && columns != 2)
841 signal_simple_error ("Invalid value for 'columns", value);
844 else if (EQ (keyword, Qgraphic))
847 graphic = XINT (value);
848 if (graphic < 0 || graphic > 1)
849 signal_simple_error ("Invalid value for 'graphic", value);
852 else if (EQ (keyword, Qregistry))
854 CHECK_STRING (value);
858 else if (EQ (keyword, Qdirection))
860 if (EQ (value, Ql2r))
861 direction = CHARSET_LEFT_TO_RIGHT;
862 else if (EQ (value, Qr2l))
863 direction = CHARSET_RIGHT_TO_LEFT;
865 signal_simple_error ("Invalid value for 'direction", value);
868 else if (EQ (keyword, Qfinal))
870 CHECK_CHAR_COERCE_INT (value);
871 final = XCHAR (value);
872 if (final < '0' || final > '~')
873 signal_simple_error ("Invalid value for 'final", value);
876 else if (EQ (keyword, Qccl_program))
878 CHECK_VECTOR (value);
883 signal_simple_error ("Unrecognized property", keyword);
887 error ("'final must be specified");
888 if (dimension == 2 && final > 0x5F)
890 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
894 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
896 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
898 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
899 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
901 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
903 id = get_unallocated_leading_byte (dimension);
905 if (NILP (doc_string))
906 doc_string = build_string ("");
909 registry = build_string ("");
911 if (NILP (short_name))
912 XSETSTRING (short_name, XSYMBOL (name)->name);
914 if (NILP (long_name))
915 long_name = doc_string;
919 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
920 final, direction, short_name, long_name, doc_string, registry);
921 if (!NILP (ccl_program))
922 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
926 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
928 Make a charset equivalent to CHARSET but which goes in the opposite direction.
929 NEW-NAME is the name of the new charset. Return the new charset.
933 Lisp_Object new_charset = Qnil;
934 int id, dimension, columns, graphic, final;
936 Lisp_Object registry, doc_string, short_name, long_name;
937 struct Lisp_Charset *cs;
939 charset = Fget_charset (charset);
940 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
941 signal_simple_error ("Charset already has reverse-direction charset",
944 CHECK_SYMBOL (new_name);
945 if (!NILP (Ffind_charset (new_name)))
946 signal_simple_error ("Cannot redefine existing charset", new_name);
948 cs = XCHARSET (charset);
950 type = CHARSET_TYPE (cs);
951 columns = CHARSET_COLUMNS (cs);
952 dimension = CHARSET_DIMENSION (cs);
953 id = get_unallocated_leading_byte (dimension);
955 graphic = CHARSET_GRAPHIC (cs);
956 final = CHARSET_FINAL (cs);
957 direction = CHARSET_RIGHT_TO_LEFT;
958 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
959 direction = CHARSET_LEFT_TO_RIGHT;
960 doc_string = CHARSET_DOC_STRING (cs);
961 short_name = CHARSET_SHORT_NAME (cs);
962 long_name = CHARSET_LONG_NAME (cs);
963 registry = CHARSET_REGISTRY (cs);
965 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
966 graphic, final, direction, short_name, long_name,
967 doc_string, registry);
969 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
970 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
975 /* #### Reverse direction charsets not yet implemented. */
977 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
979 Return the reverse-direction charset parallel to CHARSET, if any.
980 This is the charset with the same properties (in particular, the same
981 dimension, number of characters per dimension, and final byte) as
982 CHARSET but whose characters are displayed in the opposite direction.
986 charset = Fget_charset (charset);
987 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
991 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
992 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
993 If DIRECTION is omitted, both directions will be checked (left-to-right
994 will be returned if character sets exist for both directions).
996 (dimension, chars, final, direction))
998 int dm, ch, fi, di = -1;
1000 Lisp_Object obj = Qnil;
1002 CHECK_INT (dimension);
1003 dm = XINT (dimension);
1004 if (dm < 1 || dm > 2)
1005 signal_simple_error ("Invalid value for DIMENSION", dimension);
1009 if (ch != 94 && ch != 96)
1010 signal_simple_error ("Invalid value for CHARS", chars);
1012 CHECK_CHAR_COERCE_INT (final);
1014 if (fi < '0' || fi > '~')
1015 signal_simple_error ("Invalid value for FINAL", final);
1017 if (EQ (direction, Ql2r))
1018 di = CHARSET_LEFT_TO_RIGHT;
1019 else if (EQ (direction, Qr2l))
1020 di = CHARSET_RIGHT_TO_LEFT;
1021 else if (!NILP (direction))
1022 signal_simple_error ("Invalid value for DIRECTION", direction);
1024 if (dm == 2 && fi > 0x5F)
1026 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1029 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1031 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1035 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1037 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1040 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1043 return XCHARSET_NAME (obj);
1047 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1048 Return short name of CHARSET.
1052 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1055 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1056 Return long name of CHARSET.
1060 return XCHARSET_LONG_NAME (Fget_charset (charset));
1063 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1064 Return description of CHARSET.
1068 return XCHARSET_DOC_STRING (Fget_charset (charset));
1071 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1072 Return dimension of CHARSET.
1076 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1079 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1080 Return property PROP of CHARSET.
1081 Recognized properties are those listed in `make-charset', as well as
1082 'name and 'doc-string.
1086 struct Lisp_Charset *cs;
1088 charset = Fget_charset (charset);
1089 cs = XCHARSET (charset);
1091 CHECK_SYMBOL (prop);
1092 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1093 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1094 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1095 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1096 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1097 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1098 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1099 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1100 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1101 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1102 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1103 if (EQ (prop, Qdirection))
1104 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1105 if (EQ (prop, Qreverse_direction_charset))
1107 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1111 return XCHARSET_NAME (obj);
1113 signal_simple_error ("Unrecognized charset property name", prop);
1114 return Qnil; /* not reached */
1117 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1118 Return charset identification number of CHARSET.
1122 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1125 /* #### We need to figure out which properties we really want to
1128 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1129 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1131 (charset, ccl_program))
1133 charset = Fget_charset (charset);
1134 CHECK_VECTOR (ccl_program);
1135 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1140 invalidate_charset_font_caches (Lisp_Object charset)
1142 /* Invalidate font cache entries for charset on all devices. */
1143 Lisp_Object devcons, concons, hash_table;
1144 DEVICE_LOOP_NO_BREAK (devcons, concons)
1146 struct device *d = XDEVICE (XCAR (devcons));
1147 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1148 if (!UNBOUNDP (hash_table))
1149 Fclrhash (hash_table);
1153 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1154 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1155 Set the 'registry property of CHARSET to REGISTRY.
1157 (charset, registry))
1159 charset = Fget_charset (charset);
1160 CHECK_STRING (registry);
1161 XCHARSET_REGISTRY (charset) = registry;
1162 invalidate_charset_font_caches (charset);
1163 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1168 /************************************************************************/
1169 /* Lisp primitives for working with characters */
1170 /************************************************************************/
1172 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1173 Make a character from CHARSET and octets ARG1 and ARG2.
1174 ARG2 is required only for characters from two-dimensional charsets.
1175 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1176 character s with caron.
1178 (charset, arg1, arg2))
1180 struct Lisp_Charset *cs;
1182 int lowlim, highlim;
1184 charset = Fget_charset (charset);
1185 cs = XCHARSET (charset);
1187 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1188 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1189 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1190 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1193 /* It is useful (and safe, according to Olivier Galibert) to strip
1194 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1195 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1196 Latin 2 code of the character. */
1197 a1 = XINT (arg1) & 0x7f;
1198 if (a1 < lowlim || a1 > highlim)
1199 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1201 if (CHARSET_DIMENSION (cs) == 1)
1205 ("Charset is of dimension one; second octet must be nil", arg2);
1206 return make_char (MAKE_CHAR (charset, a1, 0));
1210 a2 = XINT (arg2) & 0x7f;
1211 if (a2 < lowlim || a2 > highlim)
1212 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1214 return make_char (MAKE_CHAR (charset, a1, a2));
1217 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1218 Return the character set of char CH.
1222 CHECK_CHAR_COERCE_INT (ch);
1224 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1225 (CHAR_LEADING_BYTE (XCHAR (ch))));
1228 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1229 Return list of charset and one or two position-codes of CHAR.
1233 /* This function can GC */
1234 struct gcpro gcpro1, gcpro2;
1235 Lisp_Object charset = Qnil;
1236 Lisp_Object rc = Qnil;
1239 GCPRO2 (charset, rc);
1240 CHECK_CHAR_COERCE_INT (character);
1242 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1244 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1246 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1250 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1258 #ifdef ENABLE_COMPOSITE_CHARS
1259 /************************************************************************/
1260 /* composite character functions */
1261 /************************************************************************/
1264 lookup_composite_char (Bufbyte *str, int len)
1266 Lisp_Object lispstr = make_string (str, len);
1267 Lisp_Object ch = Fgethash (lispstr,
1268 Vcomposite_char_string2char_hash_table,
1274 if (composite_char_row_next >= 128)
1275 signal_simple_error ("No more composite chars available", lispstr);
1276 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1277 composite_char_col_next);
1278 Fputhash (make_char (emch), lispstr,
1279 Vcomposite_char_char2string_hash_table);
1280 Fputhash (lispstr, make_char (emch),
1281 Vcomposite_char_string2char_hash_table);
1282 composite_char_col_next++;
1283 if (composite_char_col_next >= 128)
1285 composite_char_col_next = 32;
1286 composite_char_row_next++;
1295 composite_char_string (Emchar ch)
1297 Lisp_Object str = Fgethash (make_char (ch),
1298 Vcomposite_char_char2string_hash_table,
1300 assert (!UNBOUNDP (str));
1304 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1305 Convert a string into a single composite character.
1306 The character is the result of overstriking all the characters in
1311 CHECK_STRING (string);
1312 return make_char (lookup_composite_char (XSTRING_DATA (string),
1313 XSTRING_LENGTH (string)));
1316 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1317 Return a string of the characters comprising a composite character.
1325 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1326 signal_simple_error ("Must be composite char", ch);
1327 return composite_char_string (emch);
1329 #endif /* ENABLE_COMPOSITE_CHARS */
1332 /************************************************************************/
1333 /* initialization */
1334 /************************************************************************/
1337 syms_of_mule_charset (void)
1339 DEFSUBR (Fcharsetp);
1340 DEFSUBR (Ffind_charset);
1341 DEFSUBR (Fget_charset);
1342 DEFSUBR (Fcharset_list);
1343 DEFSUBR (Fcharset_name);
1344 DEFSUBR (Fmake_charset);
1345 DEFSUBR (Fmake_reverse_direction_charset);
1346 /* DEFSUBR (Freverse_direction_charset); */
1347 DEFSUBR (Fcharset_from_attributes);
1348 DEFSUBR (Fcharset_short_name);
1349 DEFSUBR (Fcharset_long_name);
1350 DEFSUBR (Fcharset_description);
1351 DEFSUBR (Fcharset_dimension);
1352 DEFSUBR (Fcharset_property);
1353 DEFSUBR (Fcharset_id);
1354 DEFSUBR (Fset_charset_ccl_program);
1355 DEFSUBR (Fset_charset_registry);
1357 DEFSUBR (Fmake_char);
1358 DEFSUBR (Fchar_charset);
1359 DEFSUBR (Fsplit_char);
1361 #ifdef ENABLE_COMPOSITE_CHARS
1362 DEFSUBR (Fmake_composite_char);
1363 DEFSUBR (Fcomposite_char_string);
1366 defsymbol (&Qcharsetp, "charsetp");
1367 defsymbol (&Qregistry, "registry");
1368 defsymbol (&Qfinal, "final");
1369 defsymbol (&Qgraphic, "graphic");
1370 defsymbol (&Qdirection, "direction");
1371 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1372 defsymbol (&Qshort_name, "short-name");
1373 defsymbol (&Qlong_name, "long-name");
1375 defsymbol (&Ql2r, "l2r");
1376 defsymbol (&Qr2l, "r2l");
1378 /* Charsets, compatible with FSF 20.3
1379 Naming convention is Script-Charset[-Edition] */
1380 defsymbol (&Qascii, "ascii");
1381 defsymbol (&Qcontrol_1, "control-1");
1382 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1383 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1384 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1385 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1386 defsymbol (&Qthai_tis620, "thai-tis620");
1387 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1388 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1389 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1390 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1391 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1392 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1393 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1394 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1395 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1396 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1397 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1398 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1399 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1400 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1401 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1402 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1404 defsymbol (&Qcomposite, "composite");
1407 Vutf_2000_version = build_string("0.4 (Shin-Imamiya)");
1408 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1409 Version number of UTF-2000.
1415 vars_of_mule_charset (void)
1419 /* Table of charsets indexed by leading byte. */
1420 for (i = 0; i < countof (charset_by_leading_byte); i++)
1421 charset_by_leading_byte[i] = Qnil;
1423 /* Table of charsets indexed by type/final-byte/direction. */
1424 for (i = 0; i < countof (charset_by_attributes); i++)
1425 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1426 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1427 charset_by_attributes[i][j][k] = Qnil;
1429 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1430 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1432 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1433 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1434 Leading-code of private TYPE9N charset of column-width 1.
1436 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1440 complex_vars_of_mule_charset (void)
1442 staticpro (&Vcharset_hash_table);
1443 Vcharset_hash_table =
1444 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1446 /* Predefined character sets. We store them into variables for
1450 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1451 CHARSET_TYPE_94, 1, 0, 'B',
1452 CHARSET_LEFT_TO_RIGHT,
1453 build_string ("ASCII"),
1454 build_string ("ASCII)"),
1455 build_string ("ASCII (ISO646 IRV)"),
1456 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1457 Vcharset_control_1 =
1458 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1459 CHARSET_TYPE_94, 1, 1, 0,
1460 CHARSET_LEFT_TO_RIGHT,
1461 build_string ("C1"),
1462 build_string ("Control characters"),
1463 build_string ("Control characters 128-191"),
1465 Vcharset_latin_iso8859_1 =
1466 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1467 CHARSET_TYPE_96, 1, 1, 'A',
1468 CHARSET_LEFT_TO_RIGHT,
1469 build_string ("Latin-1"),
1470 build_string ("ISO8859-1 (Latin-1)"),
1471 build_string ("ISO8859-1 (Latin-1)"),
1472 build_string ("iso8859-1"));
1473 Vcharset_latin_iso8859_2 =
1474 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1475 CHARSET_TYPE_96, 1, 1, 'B',
1476 CHARSET_LEFT_TO_RIGHT,
1477 build_string ("Latin-2"),
1478 build_string ("ISO8859-2 (Latin-2)"),
1479 build_string ("ISO8859-2 (Latin-2)"),
1480 build_string ("iso8859-2"));
1481 Vcharset_latin_iso8859_3 =
1482 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1483 CHARSET_TYPE_96, 1, 1, 'C',
1484 CHARSET_LEFT_TO_RIGHT,
1485 build_string ("Latin-3"),
1486 build_string ("ISO8859-3 (Latin-3)"),
1487 build_string ("ISO8859-3 (Latin-3)"),
1488 build_string ("iso8859-3"));
1489 Vcharset_latin_iso8859_4 =
1490 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1491 CHARSET_TYPE_96, 1, 1, 'D',
1492 CHARSET_LEFT_TO_RIGHT,
1493 build_string ("Latin-4"),
1494 build_string ("ISO8859-4 (Latin-4)"),
1495 build_string ("ISO8859-4 (Latin-4)"),
1496 build_string ("iso8859-4"));
1497 Vcharset_thai_tis620 =
1498 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1499 CHARSET_TYPE_96, 1, 1, 'T',
1500 CHARSET_LEFT_TO_RIGHT,
1501 build_string ("TIS620"),
1502 build_string ("TIS620 (Thai)"),
1503 build_string ("TIS620.2529 (Thai)"),
1504 build_string ("tis620"));
1505 Vcharset_greek_iso8859_7 =
1506 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1507 CHARSET_TYPE_96, 1, 1, 'F',
1508 CHARSET_LEFT_TO_RIGHT,
1509 build_string ("ISO8859-7"),
1510 build_string ("ISO8859-7 (Greek)"),
1511 build_string ("ISO8859-7 (Greek)"),
1512 build_string ("iso8859-7"));
1513 Vcharset_arabic_iso8859_6 =
1514 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1515 CHARSET_TYPE_96, 1, 1, 'G',
1516 CHARSET_RIGHT_TO_LEFT,
1517 build_string ("ISO8859-6"),
1518 build_string ("ISO8859-6 (Arabic)"),
1519 build_string ("ISO8859-6 (Arabic)"),
1520 build_string ("iso8859-6"));
1521 Vcharset_hebrew_iso8859_8 =
1522 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1523 CHARSET_TYPE_96, 1, 1, 'H',
1524 CHARSET_RIGHT_TO_LEFT,
1525 build_string ("ISO8859-8"),
1526 build_string ("ISO8859-8 (Hebrew)"),
1527 build_string ("ISO8859-8 (Hebrew)"),
1528 build_string ("iso8859-8"));
1529 Vcharset_katakana_jisx0201 =
1530 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1531 CHARSET_TYPE_94, 1, 1, 'I',
1532 CHARSET_LEFT_TO_RIGHT,
1533 build_string ("JISX0201 Kana"),
1534 build_string ("JISX0201.1976 (Japanese Kana)"),
1535 build_string ("JISX0201.1976 Japanese Kana"),
1536 build_string ("jisx0201.1976"));
1537 Vcharset_latin_jisx0201 =
1538 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1539 CHARSET_TYPE_94, 1, 0, 'J',
1540 CHARSET_LEFT_TO_RIGHT,
1541 build_string ("JISX0201 Roman"),
1542 build_string ("JISX0201.1976 (Japanese Roman)"),
1543 build_string ("JISX0201.1976 Japanese Roman"),
1544 build_string ("jisx0201.1976"));
1545 Vcharset_cyrillic_iso8859_5 =
1546 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1547 CHARSET_TYPE_96, 1, 1, 'L',
1548 CHARSET_LEFT_TO_RIGHT,
1549 build_string ("ISO8859-5"),
1550 build_string ("ISO8859-5 (Cyrillic)"),
1551 build_string ("ISO8859-5 (Cyrillic)"),
1552 build_string ("iso8859-5"));
1553 Vcharset_latin_iso8859_9 =
1554 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1555 CHARSET_TYPE_96, 1, 1, 'M',
1556 CHARSET_LEFT_TO_RIGHT,
1557 build_string ("Latin-5"),
1558 build_string ("ISO8859-9 (Latin-5)"),
1559 build_string ("ISO8859-9 (Latin-5)"),
1560 build_string ("iso8859-9"));
1561 Vcharset_japanese_jisx0208_1978 =
1562 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1563 CHARSET_TYPE_94X94, 2, 0, '@',
1564 CHARSET_LEFT_TO_RIGHT,
1565 build_string ("JISX0208.1978"),
1566 build_string ("JISX0208.1978 (Japanese)"),
1568 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1569 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1570 Vcharset_chinese_gb2312 =
1571 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1572 CHARSET_TYPE_94X94, 2, 0, 'A',
1573 CHARSET_LEFT_TO_RIGHT,
1574 build_string ("GB2312"),
1575 build_string ("GB2312)"),
1576 build_string ("GB2312 Chinese simplified"),
1577 build_string ("gb2312"));
1578 Vcharset_japanese_jisx0208 =
1579 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1580 CHARSET_TYPE_94X94, 2, 0, 'B',
1581 CHARSET_LEFT_TO_RIGHT,
1582 build_string ("JISX0208"),
1583 build_string ("JISX0208.1983/1990 (Japanese)"),
1584 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1585 build_string ("jisx0208.19\\(83\\|90\\)"));
1586 Vcharset_korean_ksc5601 =
1587 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1588 CHARSET_TYPE_94X94, 2, 0, 'C',
1589 CHARSET_LEFT_TO_RIGHT,
1590 build_string ("KSC5601"),
1591 build_string ("KSC5601 (Korean"),
1592 build_string ("KSC5601 Korean Hangul and Hanja"),
1593 build_string ("ksc5601"));
1594 Vcharset_japanese_jisx0212 =
1595 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1596 CHARSET_TYPE_94X94, 2, 0, 'D',
1597 CHARSET_LEFT_TO_RIGHT,
1598 build_string ("JISX0212"),
1599 build_string ("JISX0212 (Japanese)"),
1600 build_string ("JISX0212 Japanese Supplement"),
1601 build_string ("jisx0212"));
1603 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1604 Vcharset_chinese_cns11643_1 =
1605 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1606 CHARSET_TYPE_94X94, 2, 0, 'G',
1607 CHARSET_LEFT_TO_RIGHT,
1608 build_string ("CNS11643-1"),
1609 build_string ("CNS11643-1 (Chinese traditional)"),
1611 ("CNS 11643 Plane 1 Chinese traditional"),
1612 build_string (CHINESE_CNS_PLANE_RE("1")));
1613 Vcharset_chinese_cns11643_2 =
1614 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1615 CHARSET_TYPE_94X94, 2, 0, 'H',
1616 CHARSET_LEFT_TO_RIGHT,
1617 build_string ("CNS11643-2"),
1618 build_string ("CNS11643-2 (Chinese traditional)"),
1620 ("CNS 11643 Plane 2 Chinese traditional"),
1621 build_string (CHINESE_CNS_PLANE_RE("2")));
1622 Vcharset_chinese_big5_1 =
1623 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1624 CHARSET_TYPE_94X94, 2, 0, '0',
1625 CHARSET_LEFT_TO_RIGHT,
1626 build_string ("Big5"),
1627 build_string ("Big5 (Level-1)"),
1629 ("Big5 Level-1 Chinese traditional"),
1630 build_string ("big5"));
1631 Vcharset_chinese_big5_2 =
1632 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1633 CHARSET_TYPE_94X94, 2, 0, '1',
1634 CHARSET_LEFT_TO_RIGHT,
1635 build_string ("Big5"),
1636 build_string ("Big5 (Level-2)"),
1638 ("Big5 Level-2 Chinese traditional"),
1639 build_string ("big5"));
1642 #ifdef ENABLE_COMPOSITE_CHARS
1643 /* #### For simplicity, we put composite chars into a 96x96 charset.
1644 This is going to lead to problems because you can run out of
1645 room, esp. as we don't yet recycle numbers. */
1646 Vcharset_composite =
1647 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1648 CHARSET_TYPE_96X96, 2, 0, 0,
1649 CHARSET_LEFT_TO_RIGHT,
1650 build_string ("Composite"),
1651 build_string ("Composite characters"),
1652 build_string ("Composite characters"),
1655 composite_char_row_next = 32;
1656 composite_char_col_next = 32;
1658 Vcomposite_char_string2char_hash_table =
1659 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1660 Vcomposite_char_char2string_hash_table =
1661 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1662 staticpro (&Vcomposite_char_string2char_hash_table);
1663 staticpro (&Vcomposite_char_char2string_hash_table);
1664 #endif /* ENABLE_COMPOSITE_CHARS */