1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
82 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation.
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent
86 canonical computation:
88 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
90 Bytecount rep_bytes_by_first_byte[0xA0] =
91 { /* 0x00 - 0x7f are for straight ASCII */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 /* 0x80 - 0x8f are for Dimension-1 official charsets */
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
102 /* 0x90 - 0x9d are for Dimension-2 official charsets */
103 /* 0x9e is for Dimension-1 private charsets */
104 /* 0x9f is for Dimension-2 private charsets */
105 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
108 Lisp_Object Vutf_2000_version;
110 int leading_code_private_11;
112 Lisp_Object Qcharsetp;
114 /* Qdoc_string, Qdimension, Qchars defined in general.c */
115 Lisp_Object Qregistry, Qfinal, Qgraphic;
116 Lisp_Object Qdirection;
117 Lisp_Object Qreverse_direction_charset;
118 Lisp_Object Qccl_program;
119 Lisp_Object Qleading_byte;
120 Lisp_Object Qshort_name, Qlong_name;
136 Qjapanese_jisx0208_1978,
147 Lisp_Object Ql2r, Qr2l;
149 Lisp_Object Vcharset_hash_table;
151 static Bufbyte next_allocated_1_byte_leading_byte;
152 static Bufbyte next_allocated_2_byte_leading_byte;
154 /* Composite characters are characters constructed by overstriking two
155 or more regular characters.
157 1) The old Mule implementation involves storing composite characters
158 in a buffer as a tag followed by all of the actual characters
159 used to make up the composite character. I think this is a bad
160 idea; it greatly complicates code that wants to handle strings
161 one character at a time because it has to deal with the possibility
162 of great big ungainly characters. It's much more reasonable to
163 simply store an index into a table of composite characters.
165 2) The current implementation only allows for 16,384 separate
166 composite characters over the lifetime of the XEmacs process.
167 This could become a potential problem if the user
168 edited lots of different files that use composite characters.
169 Due to FSF bogosity, increasing the number of allowable
170 composite characters under Mule would decrease the number
171 of possible faces that can exist. Mule already has shrunk
172 this to 2048, and further shrinkage would become uncomfortable.
173 No such problems exist in XEmacs.
175 Composite characters could be represented as 0x80 C1 C2 C3,
176 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
177 for slightly under 2^20 (one million) composite characters
178 over the XEmacs process lifetime, and you only need to
179 increase the size of a Mule character from 19 to 21 bits.
180 Or you could use 0x80 C1 C2 C3 C4, allowing for about
181 85 million (slightly over 2^26) composite characters. */
184 /************************************************************************/
185 /* Basic Emchar functions */
186 /************************************************************************/
188 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
189 string in STR. Returns the number of bytes stored.
190 Do not call this directly. Use the macro set_charptr_emchar() instead.
194 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
209 else if ( c <= 0x7ff )
211 *p++ = (c >> 6) | 0xc0;
212 *p++ = (c & 0x3f) | 0x80;
214 else if ( c <= 0xffff )
216 *p++ = (c >> 12) | 0xe0;
217 *p++ = ((c >> 6) & 0x3f) | 0x80;
218 *p++ = (c & 0x3f) | 0x80;
220 else if ( c <= 0x1fffff )
222 *p++ = (c >> 18) | 0xf0;
223 *p++ = ((c >> 12) & 0x3f) | 0x80;
224 *p++ = ((c >> 6) & 0x3f) | 0x80;
225 *p++ = (c & 0x3f) | 0x80;
227 else if ( c <= 0x3ffffff )
229 *p++ = (c >> 24) | 0xf8;
230 *p++ = ((c >> 18) & 0x3f) | 0x80;
231 *p++ = ((c >> 12) & 0x3f) | 0x80;
232 *p++ = ((c >> 6) & 0x3f) | 0x80;
233 *p++ = (c & 0x3f) | 0x80;
237 *p++ = (c >> 30) | 0xfc;
238 *p++ = ((c >> 24) & 0x3f) | 0x80;
239 *p++ = ((c >> 18) & 0x3f) | 0x80;
240 *p++ = ((c >> 12) & 0x3f) | 0x80;
241 *p++ = ((c >> 6) & 0x3f) | 0x80;
242 *p++ = (c & 0x3f) | 0x80;
245 BREAKUP_CHAR (c, charset, c1, c2);
246 lb = CHAR_LEADING_BYTE (c);
247 if (LEADING_BYTE_PRIVATE_P (lb))
248 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
250 if (EQ (charset, Vcharset_control_1))
259 /* Return the first character from a Mule-encoded string in STR,
260 assuming it's non-ASCII. Do not call this directly.
261 Use the macro charptr_emchar() instead. */
264 non_ascii_charptr_emchar (CONST Bufbyte *str)
277 else if ( b >= 0xf8 )
282 else if ( b >= 0xf0 )
287 else if ( b >= 0xe0 )
292 else if ( b >= 0xc0 )
302 for( ; len > 0; len-- )
305 ch = ( ch << 6 ) | ( b & 0x3f );
309 Bufbyte i0 = *str, i1, i2 = 0;
312 if (i0 == LEADING_BYTE_CONTROL_1)
313 return (Emchar) (*++str - 0x20);
315 if (LEADING_BYTE_PREFIX_P (i0))
320 charset = CHARSET_BY_LEADING_BYTE (i0);
321 if (XCHARSET_DIMENSION (charset) == 2)
324 return MAKE_CHAR (charset, i1, i2);
328 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
329 Do not call this directly. Use the macro valid_char_p() instead. */
333 non_ascii_valid_char_p (Emchar ch)
337 /* Must have only lowest 19 bits set */
341 f1 = CHAR_FIELD1 (ch);
342 f2 = CHAR_FIELD2 (ch);
343 f3 = CHAR_FIELD3 (ch);
349 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
350 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
351 f2 > MAX_CHAR_FIELD2_PRIVATE)
356 if (f3 != 0x20 && f3 != 0x7F)
360 NOTE: This takes advantage of the fact that
361 FIELD2_TO_OFFICIAL_LEADING_BYTE and
362 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
364 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
365 return (XCHARSET_CHARS (charset) == 96);
371 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
372 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
373 f1 > MAX_CHAR_FIELD1_PRIVATE)
375 if (f2 < 0x20 || f3 < 0x20)
378 #ifdef ENABLE_COMPOSITE_CHARS
379 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
381 if (UNBOUNDP (Fgethash (make_int (ch),
382 Vcomposite_char_char2string_hash_table,
387 #endif /* ENABLE_COMPOSITE_CHARS */
389 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
392 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
394 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
397 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
399 return (XCHARSET_CHARS (charset) == 96);
405 /************************************************************************/
406 /* Basic string functions */
407 /************************************************************************/
409 /* Copy the character pointed to by PTR into STR, assuming it's
410 non-ASCII. Do not call this directly. Use the macro
411 charptr_copy_char() instead. */
414 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
416 Bufbyte *strptr = str;
418 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
420 /* Notice fallthrough. */
422 case 6: *++strptr = *ptr++;
423 case 5: *++strptr = *ptr++;
425 case 4: *++strptr = *ptr++;
426 case 3: *++strptr = *ptr++;
427 case 2: *++strptr = *ptr;
432 return strptr + 1 - str;
436 /************************************************************************/
437 /* streams of Emchars */
438 /************************************************************************/
440 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
441 The functions below are not meant to be called directly; use
442 the macros in insdel.h. */
445 Lstream_get_emchar_1 (Lstream *stream, int ch)
447 Bufbyte str[MAX_EMCHAR_LEN];
448 Bufbyte *strptr = str;
450 str[0] = (Bufbyte) ch;
451 switch (REP_BYTES_BY_FIRST_BYTE (ch))
453 /* Notice fallthrough. */
456 ch = Lstream_getc (stream);
458 *++strptr = (Bufbyte) ch;
460 ch = Lstream_getc (stream);
462 *++strptr = (Bufbyte) ch;
465 ch = Lstream_getc (stream);
467 *++strptr = (Bufbyte) ch;
469 ch = Lstream_getc (stream);
471 *++strptr = (Bufbyte) ch;
473 ch = Lstream_getc (stream);
475 *++strptr = (Bufbyte) ch;
480 return charptr_emchar (str);
484 Lstream_fput_emchar (Lstream *stream, Emchar ch)
486 Bufbyte str[MAX_EMCHAR_LEN];
487 Bytecount len = set_charptr_emchar (str, ch);
488 return Lstream_write (stream, str, len);
492 Lstream_funget_emchar (Lstream *stream, Emchar ch)
494 Bufbyte str[MAX_EMCHAR_LEN];
495 Bytecount len = set_charptr_emchar (str, ch);
496 Lstream_unread (stream, str, len);
500 /************************************************************************/
502 /************************************************************************/
505 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
507 struct Lisp_Charset *cs = XCHARSET (obj);
509 markobj (cs->short_name);
510 markobj (cs->long_name);
511 markobj (cs->doc_string);
512 markobj (cs->registry);
513 markobj (cs->ccl_program);
518 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
520 struct Lisp_Charset *cs = XCHARSET (obj);
524 error ("printing unreadable object #<charset %s 0x%x>",
525 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
528 write_c_string ("#<charset ", printcharfun);
529 print_internal (CHARSET_NAME (cs), printcharfun, 0);
530 write_c_string (" ", printcharfun);
531 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
532 write_c_string (" ", printcharfun);
533 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
534 write_c_string (" ", printcharfun);
535 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
536 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
537 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
538 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
539 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
541 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
542 CHARSET_COLUMNS (cs),
543 CHARSET_GRAPHIC (cs),
545 write_c_string (buf, printcharfun);
546 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
547 sprintf (buf, " 0x%x>", cs->header.uid);
548 write_c_string (buf, printcharfun);
551 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
552 mark_charset, print_charset, 0, 0, 0,
553 struct Lisp_Charset);
554 /* Make a new charset. */
557 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
558 unsigned char type, unsigned char columns, unsigned char graphic,
559 Bufbyte final, unsigned char direction, Lisp_Object short_name,
560 Lisp_Object long_name, Lisp_Object doc,
564 struct Lisp_Charset *cs =
565 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
566 XSETCHARSET (obj, cs);
568 CHARSET_ID (cs) = id;
569 CHARSET_NAME (cs) = name;
570 CHARSET_SHORT_NAME (cs) = short_name;
571 CHARSET_LONG_NAME (cs) = long_name;
572 CHARSET_REP_BYTES (cs) = rep_bytes;
573 CHARSET_DIRECTION (cs) = direction;
574 CHARSET_TYPE (cs) = type;
575 CHARSET_COLUMNS (cs) = columns;
576 CHARSET_GRAPHIC (cs) = graphic;
577 CHARSET_FINAL (cs) = final;
578 CHARSET_DOC_STRING (cs) = doc;
579 CHARSET_REGISTRY (cs) = reg;
580 CHARSET_CCL_PROGRAM (cs) = Qnil;
581 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
583 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
584 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
585 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
586 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
590 /* some charsets do not have final characters. This includes
591 ASCII, Control-1, Composite, and the two faux private
593 assert (NILP (charset_by_attributes[type][final][direction]));
594 charset_by_attributes[type][final][direction] = obj;
597 assert (NILP (charset_by_leading_byte[id - 128]));
598 charset_by_leading_byte[id - 128] = obj;
600 /* official leading byte */
601 rep_bytes_by_first_byte[id] = rep_bytes;
603 /* Some charsets are "faux" and don't have names or really exist at
604 all except in the leading-byte table. */
606 Fputhash (name, obj, Vcharset_hash_table);
611 get_unallocated_leading_byte (int dimension)
617 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
620 lb = next_allocated_1_byte_leading_byte++;
624 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
627 lb = next_allocated_2_byte_leading_byte++;
632 ("No more character sets free for this dimension",
633 make_int (dimension));
639 /************************************************************************/
640 /* Basic charset Lisp functions */
641 /************************************************************************/
643 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
644 Return non-nil if OBJECT is a charset.
648 return CHARSETP (object) ? Qt : Qnil;
651 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
652 Retrieve the charset of the given name.
653 If CHARSET-OR-NAME is a charset object, it is simply returned.
654 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
655 nil is returned. Otherwise the associated charset object is returned.
659 if (CHARSETP (charset_or_name))
660 return charset_or_name;
662 CHECK_SYMBOL (charset_or_name);
663 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
666 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
667 Retrieve the charset of the given name.
668 Same as `find-charset' except an error is signalled if there is no such
669 charset instead of returning nil.
673 Lisp_Object charset = Ffind_charset (name);
676 signal_simple_error ("No such charset", name);
680 /* We store the charsets in hash tables with the names as the key and the
681 actual charset object as the value. Occasionally we need to use them
682 in a list format. These routines provide us with that. */
683 struct charset_list_closure
685 Lisp_Object *charset_list;
689 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
690 void *charset_list_closure)
692 /* This function can GC */
693 struct charset_list_closure *chcl =
694 (struct charset_list_closure*) charset_list_closure;
695 Lisp_Object *charset_list = chcl->charset_list;
697 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
701 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
702 Return a list of the names of all defined charsets.
706 Lisp_Object charset_list = Qnil;
708 struct charset_list_closure charset_list_closure;
710 GCPRO1 (charset_list);
711 charset_list_closure.charset_list = &charset_list;
712 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
713 &charset_list_closure);
719 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
720 Return the name of the given charset.
724 return XCHARSET_NAME (Fget_charset (charset));
727 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
728 Define a new character set.
729 This function is for use with Mule support.
730 NAME is a symbol, the name by which the character set is normally referred.
731 DOC-STRING is a string describing the character set.
732 PROPS is a property list, describing the specific nature of the
733 character set. Recognized properties are:
735 'short-name Short version of the charset name (ex: Latin-1)
736 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
737 'registry A regular expression matching the font registry field for
739 'dimension Number of octets used to index a character in this charset.
740 Either 1 or 2. Defaults to 1.
741 'columns Number of columns used to display a character in this charset.
742 Only used in TTY mode. (Under X, the actual width of a
743 character can be derived from the font used to display the
744 characters.) If unspecified, defaults to the dimension
745 (this is almost always the correct value).
746 'chars Number of characters in each dimension (94 or 96).
747 Defaults to 94. Note that if the dimension is 2, the
748 character set thus described is 94x94 or 96x96.
749 'final Final byte of ISO 2022 escape sequence. Must be
750 supplied. Each combination of (DIMENSION, CHARS) defines a
751 separate namespace for final bytes. Note that ISO
752 2022 restricts the final byte to the range
753 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
754 dimension == 2. Note also that final bytes in the range
755 0x30 - 0x3F are reserved for user-defined (not official)
757 'graphic 0 (use left half of font on output) or 1 (use right half
758 of font on output). Defaults to 0. For example, for
759 a font whose registry is ISO8859-1, the left half
760 (octets 0x20 - 0x7F) is the `ascii' character set, while
761 the right half (octets 0xA0 - 0xFF) is the `latin-1'
762 character set. With 'graphic set to 0, the octets
763 will have their high bit cleared; with it set to 1,
764 the octets will have their high bit set.
765 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
767 'ccl-program A compiled CCL program used to convert a character in
768 this charset into an index into the font. This is in
769 addition to the 'graphic property. The CCL program
770 is passed the octets of the character, with the high
771 bit cleared and set depending upon whether the value
772 of the 'graphic property is 0 or 1.
774 (name, doc_string, props))
776 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
777 int direction = CHARSET_LEFT_TO_RIGHT;
779 Lisp_Object registry = Qnil;
781 Lisp_Object rest, keyword, value;
782 Lisp_Object ccl_program = Qnil;
783 Lisp_Object short_name = Qnil, long_name = Qnil;
786 if (!NILP (doc_string))
787 CHECK_STRING (doc_string);
789 charset = Ffind_charset (name);
791 signal_simple_error ("Cannot redefine existing charset", name);
793 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
795 if (EQ (keyword, Qshort_name))
797 CHECK_STRING (value);
801 if (EQ (keyword, Qlong_name))
803 CHECK_STRING (value);
807 else if (EQ (keyword, Qdimension))
810 dimension = XINT (value);
811 if (dimension < 1 || dimension > 2)
812 signal_simple_error ("Invalid value for 'dimension", value);
815 else if (EQ (keyword, Qchars))
818 chars = XINT (value);
819 if (chars != 94 && chars != 96)
820 signal_simple_error ("Invalid value for 'chars", value);
823 else if (EQ (keyword, Qcolumns))
826 columns = XINT (value);
827 if (columns != 1 && columns != 2)
828 signal_simple_error ("Invalid value for 'columns", value);
831 else if (EQ (keyword, Qgraphic))
834 graphic = XINT (value);
835 if (graphic < 0 || graphic > 1)
836 signal_simple_error ("Invalid value for 'graphic", value);
839 else if (EQ (keyword, Qregistry))
841 CHECK_STRING (value);
845 else if (EQ (keyword, Qdirection))
847 if (EQ (value, Ql2r))
848 direction = CHARSET_LEFT_TO_RIGHT;
849 else if (EQ (value, Qr2l))
850 direction = CHARSET_RIGHT_TO_LEFT;
852 signal_simple_error ("Invalid value for 'direction", value);
855 else if (EQ (keyword, Qfinal))
857 CHECK_CHAR_COERCE_INT (value);
858 final = XCHAR (value);
859 if (final < '0' || final > '~')
860 signal_simple_error ("Invalid value for 'final", value);
863 else if (EQ (keyword, Qccl_program))
865 CHECK_VECTOR (value);
870 signal_simple_error ("Unrecognized property", keyword);
874 error ("'final must be specified");
875 if (dimension == 2 && final > 0x5F)
877 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
881 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
883 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
885 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
886 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
888 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
890 id = get_unallocated_leading_byte (dimension);
892 if (NILP (doc_string))
893 doc_string = build_string ("");
896 registry = build_string ("");
898 if (NILP (short_name))
899 XSETSTRING (short_name, XSYMBOL (name)->name);
901 if (NILP (long_name))
902 long_name = doc_string;
906 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
907 final, direction, short_name, long_name, doc_string, registry);
908 if (!NILP (ccl_program))
909 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
913 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
915 Make a charset equivalent to CHARSET but which goes in the opposite direction.
916 NEW-NAME is the name of the new charset. Return the new charset.
920 Lisp_Object new_charset = Qnil;
921 int id, dimension, columns, graphic, final;
923 Lisp_Object registry, doc_string, short_name, long_name;
924 struct Lisp_Charset *cs;
926 charset = Fget_charset (charset);
927 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
928 signal_simple_error ("Charset already has reverse-direction charset",
931 CHECK_SYMBOL (new_name);
932 if (!NILP (Ffind_charset (new_name)))
933 signal_simple_error ("Cannot redefine existing charset", new_name);
935 cs = XCHARSET (charset);
937 type = CHARSET_TYPE (cs);
938 columns = CHARSET_COLUMNS (cs);
939 dimension = CHARSET_DIMENSION (cs);
940 id = get_unallocated_leading_byte (dimension);
942 graphic = CHARSET_GRAPHIC (cs);
943 final = CHARSET_FINAL (cs);
944 direction = CHARSET_RIGHT_TO_LEFT;
945 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
946 direction = CHARSET_LEFT_TO_RIGHT;
947 doc_string = CHARSET_DOC_STRING (cs);
948 short_name = CHARSET_SHORT_NAME (cs);
949 long_name = CHARSET_LONG_NAME (cs);
950 registry = CHARSET_REGISTRY (cs);
952 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
953 graphic, final, direction, short_name, long_name,
954 doc_string, registry);
956 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
957 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
962 /* #### Reverse direction charsets not yet implemented. */
964 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
966 Return the reverse-direction charset parallel to CHARSET, if any.
967 This is the charset with the same properties (in particular, the same
968 dimension, number of characters per dimension, and final byte) as
969 CHARSET but whose characters are displayed in the opposite direction.
973 charset = Fget_charset (charset);
974 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
978 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
979 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
980 If DIRECTION is omitted, both directions will be checked (left-to-right
981 will be returned if character sets exist for both directions).
983 (dimension, chars, final, direction))
985 int dm, ch, fi, di = -1;
987 Lisp_Object obj = Qnil;
989 CHECK_INT (dimension);
990 dm = XINT (dimension);
991 if (dm < 1 || dm > 2)
992 signal_simple_error ("Invalid value for DIMENSION", dimension);
996 if (ch != 94 && ch != 96)
997 signal_simple_error ("Invalid value for CHARS", chars);
999 CHECK_CHAR_COERCE_INT (final);
1001 if (fi < '0' || fi > '~')
1002 signal_simple_error ("Invalid value for FINAL", final);
1004 if (EQ (direction, Ql2r))
1005 di = CHARSET_LEFT_TO_RIGHT;
1006 else if (EQ (direction, Qr2l))
1007 di = CHARSET_RIGHT_TO_LEFT;
1008 else if (!NILP (direction))
1009 signal_simple_error ("Invalid value for DIRECTION", direction);
1011 if (dm == 2 && fi > 0x5F)
1013 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1016 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1018 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1022 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1024 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1027 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1030 return XCHARSET_NAME (obj);
1034 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1035 Return short name of CHARSET.
1039 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1042 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1043 Return long name of CHARSET.
1047 return XCHARSET_LONG_NAME (Fget_charset (charset));
1050 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1051 Return description of CHARSET.
1055 return XCHARSET_DOC_STRING (Fget_charset (charset));
1058 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1059 Return dimension of CHARSET.
1063 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1066 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1067 Return property PROP of CHARSET.
1068 Recognized properties are those listed in `make-charset', as well as
1069 'name and 'doc-string.
1073 struct Lisp_Charset *cs;
1075 charset = Fget_charset (charset);
1076 cs = XCHARSET (charset);
1078 CHECK_SYMBOL (prop);
1079 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1080 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1081 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1082 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1083 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1084 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1085 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1086 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1087 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1088 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1089 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1090 if (EQ (prop, Qdirection))
1091 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1092 if (EQ (prop, Qreverse_direction_charset))
1094 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1098 return XCHARSET_NAME (obj);
1100 signal_simple_error ("Unrecognized charset property name", prop);
1101 return Qnil; /* not reached */
1104 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1105 Return charset identification number of CHARSET.
1109 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1112 /* #### We need to figure out which properties we really want to
1115 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1116 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1118 (charset, ccl_program))
1120 charset = Fget_charset (charset);
1121 CHECK_VECTOR (ccl_program);
1122 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1127 invalidate_charset_font_caches (Lisp_Object charset)
1129 /* Invalidate font cache entries for charset on all devices. */
1130 Lisp_Object devcons, concons, hash_table;
1131 DEVICE_LOOP_NO_BREAK (devcons, concons)
1133 struct device *d = XDEVICE (XCAR (devcons));
1134 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1135 if (!UNBOUNDP (hash_table))
1136 Fclrhash (hash_table);
1140 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1141 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1142 Set the 'registry property of CHARSET to REGISTRY.
1144 (charset, registry))
1146 charset = Fget_charset (charset);
1147 CHECK_STRING (registry);
1148 XCHARSET_REGISTRY (charset) = registry;
1149 invalidate_charset_font_caches (charset);
1150 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1155 /************************************************************************/
1156 /* Lisp primitives for working with characters */
1157 /************************************************************************/
1159 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1160 Make a character from CHARSET and octets ARG1 and ARG2.
1161 ARG2 is required only for characters from two-dimensional charsets.
1162 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1163 character s with caron.
1165 (charset, arg1, arg2))
1167 struct Lisp_Charset *cs;
1169 int lowlim, highlim;
1171 charset = Fget_charset (charset);
1172 cs = XCHARSET (charset);
1174 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1175 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1176 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1177 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1180 /* It is useful (and safe, according to Olivier Galibert) to strip
1181 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1182 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1183 Latin 2 code of the character. */
1184 a1 = XINT (arg1) & 0x7f;
1185 if (a1 < lowlim || a1 > highlim)
1186 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1188 if (CHARSET_DIMENSION (cs) == 1)
1192 ("Charset is of dimension one; second octet must be nil", arg2);
1193 return make_char (MAKE_CHAR (charset, a1, 0));
1197 a2 = XINT (arg2) & 0x7f;
1198 if (a2 < lowlim || a2 > highlim)
1199 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1201 return make_char (MAKE_CHAR (charset, a1, a2));
1204 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1205 Return the character set of char CH.
1209 CHECK_CHAR_COERCE_INT (ch);
1211 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1212 (CHAR_LEADING_BYTE (XCHAR (ch))));
1215 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1216 Return the octet numbered N (should be 0 or 1) of char CH.
1217 N defaults to 0 if omitted.
1221 Lisp_Object charset;
1224 CHECK_CHAR_COERCE_INT (ch);
1231 if (int_n != 0 && int_n != 1)
1232 signal_simple_error ("Octet number must be 0 or 1", n);
1234 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1235 return make_int (int_n == 0 ? c1 : c2);
1239 #ifdef ENABLE_COMPOSITE_CHARS
1240 /************************************************************************/
1241 /* composite character functions */
1242 /************************************************************************/
1245 lookup_composite_char (Bufbyte *str, int len)
1247 Lisp_Object lispstr = make_string (str, len);
1248 Lisp_Object ch = Fgethash (lispstr,
1249 Vcomposite_char_string2char_hash_table,
1255 if (composite_char_row_next >= 128)
1256 signal_simple_error ("No more composite chars available", lispstr);
1257 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1258 composite_char_col_next);
1259 Fputhash (make_char (emch), lispstr,
1260 Vcomposite_char_char2string_hash_table);
1261 Fputhash (lispstr, make_char (emch),
1262 Vcomposite_char_string2char_hash_table);
1263 composite_char_col_next++;
1264 if (composite_char_col_next >= 128)
1266 composite_char_col_next = 32;
1267 composite_char_row_next++;
1276 composite_char_string (Emchar ch)
1278 Lisp_Object str = Fgethash (make_char (ch),
1279 Vcomposite_char_char2string_hash_table,
1281 assert (!UNBOUNDP (str));
1285 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1286 Convert a string into a single composite character.
1287 The character is the result of overstriking all the characters in
1292 CHECK_STRING (string);
1293 return make_char (lookup_composite_char (XSTRING_DATA (string),
1294 XSTRING_LENGTH (string)));
1297 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1298 Return a string of the characters comprising a composite character.
1306 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1307 signal_simple_error ("Must be composite char", ch);
1308 return composite_char_string (emch);
1310 #endif /* ENABLE_COMPOSITE_CHARS */
1313 /************************************************************************/
1314 /* initialization */
1315 /************************************************************************/
1318 syms_of_mule_charset (void)
1320 DEFSUBR (Fcharsetp);
1321 DEFSUBR (Ffind_charset);
1322 DEFSUBR (Fget_charset);
1323 DEFSUBR (Fcharset_list);
1324 DEFSUBR (Fcharset_name);
1325 DEFSUBR (Fmake_charset);
1326 DEFSUBR (Fmake_reverse_direction_charset);
1327 /* DEFSUBR (Freverse_direction_charset); */
1328 DEFSUBR (Fcharset_from_attributes);
1329 DEFSUBR (Fcharset_short_name);
1330 DEFSUBR (Fcharset_long_name);
1331 DEFSUBR (Fcharset_description);
1332 DEFSUBR (Fcharset_dimension);
1333 DEFSUBR (Fcharset_property);
1334 DEFSUBR (Fcharset_id);
1335 DEFSUBR (Fset_charset_ccl_program);
1336 DEFSUBR (Fset_charset_registry);
1338 DEFSUBR (Fmake_char);
1339 DEFSUBR (Fchar_charset);
1340 DEFSUBR (Fchar_octet);
1342 #ifdef ENABLE_COMPOSITE_CHARS
1343 DEFSUBR (Fmake_composite_char);
1344 DEFSUBR (Fcomposite_char_string);
1347 defsymbol (&Qcharsetp, "charsetp");
1348 defsymbol (&Qregistry, "registry");
1349 defsymbol (&Qfinal, "final");
1350 defsymbol (&Qgraphic, "graphic");
1351 defsymbol (&Qdirection, "direction");
1352 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1353 defsymbol (&Qccl_program, "ccl-program");
1354 defsymbol (&Qshort_name, "short-name");
1355 defsymbol (&Qlong_name, "long-name");
1357 defsymbol (&Ql2r, "l2r");
1358 defsymbol (&Qr2l, "r2l");
1360 /* Charsets, compatible with FSF 20.3
1361 Naming convention is Script-Charset[-Edition] */
1362 defsymbol (&Qascii, "ascii");
1363 defsymbol (&Qcontrol_1, "control-1");
1364 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1365 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1366 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1367 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1368 defsymbol (&Qthai_tis620, "thai-tis620");
1369 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1370 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1371 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1372 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1373 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1374 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1375 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1376 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1377 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1378 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1379 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1380 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1381 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1382 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1383 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1384 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1386 defsymbol (&Qcomposite, "composite");
1389 Vutf_2000_version = build_string("0.2 (JR NAMBA)");
1390 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1391 Version number of UTF-2000.
1395 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1396 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1397 Leading-code of private TYPE9N charset of column-width 1.
1399 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1403 vars_of_mule_charset (void)
1407 /* Table of charsets indexed by leading byte. */
1408 for (i = 0; i < countof (charset_by_leading_byte); i++)
1409 charset_by_leading_byte[i] = Qnil;
1411 /* Table of charsets indexed by type/final-byte/direction. */
1412 for (i = 0; i < countof (charset_by_attributes); i++)
1413 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1414 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1415 charset_by_attributes[i][j][k] = Qnil;
1417 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1418 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1422 complex_vars_of_mule_charset (void)
1424 staticpro (&Vcharset_hash_table);
1425 Vcharset_hash_table =
1426 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1428 /* Predefined character sets. We store them into variables for
1432 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1433 CHARSET_TYPE_94, 1, 0, 'B',
1434 CHARSET_LEFT_TO_RIGHT,
1435 build_string ("ASCII"),
1436 build_string ("ASCII)"),
1437 build_string ("ASCII (ISO646 IRV)"),
1438 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1439 Vcharset_control_1 =
1440 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1441 CHARSET_TYPE_94, 1, 1, 0,
1442 CHARSET_LEFT_TO_RIGHT,
1443 build_string ("C1"),
1444 build_string ("Control characters"),
1445 build_string ("Control characters 128-191"),
1447 Vcharset_latin_iso8859_1 =
1448 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1449 CHARSET_TYPE_96, 1, 1, 'A',
1450 CHARSET_LEFT_TO_RIGHT,
1451 build_string ("Latin-1"),
1452 build_string ("ISO8859-1 (Latin-1)"),
1453 build_string ("ISO8859-1 (Latin-1)"),
1454 build_string ("iso8859-1"));
1455 Vcharset_latin_iso8859_2 =
1456 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1457 CHARSET_TYPE_96, 1, 1, 'B',
1458 CHARSET_LEFT_TO_RIGHT,
1459 build_string ("Latin-2"),
1460 build_string ("ISO8859-2 (Latin-2)"),
1461 build_string ("ISO8859-2 (Latin-2)"),
1462 build_string ("iso8859-2"));
1463 Vcharset_latin_iso8859_3 =
1464 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1465 CHARSET_TYPE_96, 1, 1, 'C',
1466 CHARSET_LEFT_TO_RIGHT,
1467 build_string ("Latin-3"),
1468 build_string ("ISO8859-3 (Latin-3)"),
1469 build_string ("ISO8859-3 (Latin-3)"),
1470 build_string ("iso8859-3"));
1471 Vcharset_latin_iso8859_4 =
1472 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1473 CHARSET_TYPE_96, 1, 1, 'D',
1474 CHARSET_LEFT_TO_RIGHT,
1475 build_string ("Latin-4"),
1476 build_string ("ISO8859-4 (Latin-4)"),
1477 build_string ("ISO8859-4 (Latin-4)"),
1478 build_string ("iso8859-4"));
1479 Vcharset_thai_tis620 =
1480 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1481 CHARSET_TYPE_96, 1, 1, 'T',
1482 CHARSET_LEFT_TO_RIGHT,
1483 build_string ("TIS620"),
1484 build_string ("TIS620 (Thai)"),
1485 build_string ("TIS620.2529 (Thai)"),
1486 build_string ("tis620"));
1487 Vcharset_greek_iso8859_7 =
1488 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1489 CHARSET_TYPE_96, 1, 1, 'F',
1490 CHARSET_LEFT_TO_RIGHT,
1491 build_string ("ISO8859-7"),
1492 build_string ("ISO8859-7 (Greek)"),
1493 build_string ("ISO8859-7 (Greek)"),
1494 build_string ("iso8859-7"));
1495 Vcharset_arabic_iso8859_6 =
1496 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1497 CHARSET_TYPE_96, 1, 1, 'G',
1498 CHARSET_RIGHT_TO_LEFT,
1499 build_string ("ISO8859-6"),
1500 build_string ("ISO8859-6 (Arabic)"),
1501 build_string ("ISO8859-6 (Arabic)"),
1502 build_string ("iso8859-6"));
1503 Vcharset_hebrew_iso8859_8 =
1504 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1505 CHARSET_TYPE_96, 1, 1, 'H',
1506 CHARSET_RIGHT_TO_LEFT,
1507 build_string ("ISO8859-8"),
1508 build_string ("ISO8859-8 (Hebrew)"),
1509 build_string ("ISO8859-8 (Hebrew)"),
1510 build_string ("iso8859-8"));
1511 Vcharset_katakana_jisx0201 =
1512 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1513 CHARSET_TYPE_94, 1, 1, 'I',
1514 CHARSET_LEFT_TO_RIGHT,
1515 build_string ("JISX0201 Kana"),
1516 build_string ("JISX0201.1976 (Japanese Kana)"),
1517 build_string ("JISX0201.1976 Japanese Kana"),
1518 build_string ("jisx0201.1976"));
1519 Vcharset_latin_jisx0201 =
1520 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1521 CHARSET_TYPE_94, 1, 0, 'J',
1522 CHARSET_LEFT_TO_RIGHT,
1523 build_string ("JISX0201 Roman"),
1524 build_string ("JISX0201.1976 (Japanese Roman)"),
1525 build_string ("JISX0201.1976 Japanese Roman"),
1526 build_string ("jisx0201.1976"));
1527 Vcharset_cyrillic_iso8859_5 =
1528 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1529 CHARSET_TYPE_96, 1, 1, 'L',
1530 CHARSET_LEFT_TO_RIGHT,
1531 build_string ("ISO8859-5"),
1532 build_string ("ISO8859-5 (Cyrillic)"),
1533 build_string ("ISO8859-5 (Cyrillic)"),
1534 build_string ("iso8859-5"));
1535 Vcharset_latin_iso8859_9 =
1536 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1537 CHARSET_TYPE_96, 1, 1, 'M',
1538 CHARSET_LEFT_TO_RIGHT,
1539 build_string ("Latin-5"),
1540 build_string ("ISO8859-9 (Latin-5)"),
1541 build_string ("ISO8859-9 (Latin-5)"),
1542 build_string ("iso8859-9"));
1543 Vcharset_japanese_jisx0208_1978 =
1544 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1545 CHARSET_TYPE_94X94, 2, 0, '@',
1546 CHARSET_LEFT_TO_RIGHT,
1547 build_string ("JISX0208.1978"),
1548 build_string ("JISX0208.1978 (Japanese)"),
1550 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1551 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1552 Vcharset_chinese_gb2312 =
1553 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1554 CHARSET_TYPE_94X94, 2, 0, 'A',
1555 CHARSET_LEFT_TO_RIGHT,
1556 build_string ("GB2312"),
1557 build_string ("GB2312)"),
1558 build_string ("GB2312 Chinese simplified"),
1559 build_string ("gb2312"));
1560 Vcharset_japanese_jisx0208 =
1561 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1562 CHARSET_TYPE_94X94, 2, 0, 'B',
1563 CHARSET_LEFT_TO_RIGHT,
1564 build_string ("JISX0208"),
1565 build_string ("JISX0208.1983/1990 (Japanese)"),
1566 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1567 build_string ("jisx0208.19\\(83\\|90\\)"));
1568 Vcharset_korean_ksc5601 =
1569 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1570 CHARSET_TYPE_94X94, 2, 0, 'C',
1571 CHARSET_LEFT_TO_RIGHT,
1572 build_string ("KSC5601"),
1573 build_string ("KSC5601 (Korean"),
1574 build_string ("KSC5601 Korean Hangul and Hanja"),
1575 build_string ("ksc5601"));
1576 Vcharset_japanese_jisx0212 =
1577 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1578 CHARSET_TYPE_94X94, 2, 0, 'D',
1579 CHARSET_LEFT_TO_RIGHT,
1580 build_string ("JISX0212"),
1581 build_string ("JISX0212 (Japanese)"),
1582 build_string ("JISX0212 Japanese Supplement"),
1583 build_string ("jisx0212"));
1585 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1586 Vcharset_chinese_cns11643_1 =
1587 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1588 CHARSET_TYPE_94X94, 2, 0, 'G',
1589 CHARSET_LEFT_TO_RIGHT,
1590 build_string ("CNS11643-1"),
1591 build_string ("CNS11643-1 (Chinese traditional)"),
1593 ("CNS 11643 Plane 1 Chinese traditional"),
1594 build_string (CHINESE_CNS_PLANE_RE("1")));
1595 Vcharset_chinese_cns11643_2 =
1596 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1597 CHARSET_TYPE_94X94, 2, 0, 'H',
1598 CHARSET_LEFT_TO_RIGHT,
1599 build_string ("CNS11643-2"),
1600 build_string ("CNS11643-2 (Chinese traditional)"),
1602 ("CNS 11643 Plane 2 Chinese traditional"),
1603 build_string (CHINESE_CNS_PLANE_RE("2")));
1604 Vcharset_chinese_big5_1 =
1605 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1606 CHARSET_TYPE_94X94, 2, 0, '0',
1607 CHARSET_LEFT_TO_RIGHT,
1608 build_string ("Big5"),
1609 build_string ("Big5 (Level-1)"),
1611 ("Big5 Level-1 Chinese traditional"),
1612 build_string ("big5"));
1613 Vcharset_chinese_big5_2 =
1614 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1615 CHARSET_TYPE_94X94, 2, 0, '1',
1616 CHARSET_LEFT_TO_RIGHT,
1617 build_string ("Big5"),
1618 build_string ("Big5 (Level-2)"),
1620 ("Big5 Level-2 Chinese traditional"),
1621 build_string ("big5"));
1624 #ifdef ENABLE_COMPOSITE_CHARS
1625 /* #### For simplicity, we put composite chars into a 96x96 charset.
1626 This is going to lead to problems because you can run out of
1627 room, esp. as we don't yet recycle numbers. */
1628 Vcharset_composite =
1629 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1630 CHARSET_TYPE_96X96, 2, 0, 0,
1631 CHARSET_LEFT_TO_RIGHT,
1632 build_string ("Composite"),
1633 build_string ("Composite characters"),
1634 build_string ("Composite characters"),
1637 composite_char_row_next = 32;
1638 composite_char_col_next = 32;
1640 Vcomposite_char_string2char_hash_table =
1641 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1642 Vcomposite_char_char2string_hash_table =
1643 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1644 staticpro (&Vcomposite_char_string2char_hash_table);
1645 staticpro (&Vcomposite_char_char2string_hash_table);
1646 #endif /* ENABLE_COMPOSITE_CHARS */