1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
82 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation.
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent
86 canonical computation:
88 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
90 Bytecount rep_bytes_by_first_byte[0xA0] =
91 { /* 0x00 - 0x7f are for straight ASCII */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 /* 0x80 - 0x8f are for Dimension-1 official charsets */
102 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
104 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 /* 0x90 - 0x9d are for Dimension-2 official charsets */
107 /* 0x9e is for Dimension-1 private charsets */
108 /* 0x9f is for Dimension-2 private charsets */
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
112 Lisp_Object Vutf_2000_version;
114 int leading_code_private_11;
116 Lisp_Object Qcharsetp;
118 /* Qdoc_string, Qdimension, Qchars defined in general.c */
119 Lisp_Object Qregistry, Qfinal, Qgraphic;
120 Lisp_Object Qdirection;
121 Lisp_Object Qreverse_direction_charset;
122 Lisp_Object Qccl_program;
123 Lisp_Object Qleading_byte;
124 Lisp_Object Qshort_name, Qlong_name;
140 Qjapanese_jisx0208_1978,
151 Lisp_Object Ql2r, Qr2l;
153 Lisp_Object Vcharset_hash_table;
155 static Bufbyte next_allocated_1_byte_leading_byte;
156 static Bufbyte next_allocated_2_byte_leading_byte;
158 /* Composite characters are characters constructed by overstriking two
159 or more regular characters.
161 1) The old Mule implementation involves storing composite characters
162 in a buffer as a tag followed by all of the actual characters
163 used to make up the composite character. I think this is a bad
164 idea; it greatly complicates code that wants to handle strings
165 one character at a time because it has to deal with the possibility
166 of great big ungainly characters. It's much more reasonable to
167 simply store an index into a table of composite characters.
169 2) The current implementation only allows for 16,384 separate
170 composite characters over the lifetime of the XEmacs process.
171 This could become a potential problem if the user
172 edited lots of different files that use composite characters.
173 Due to FSF bogosity, increasing the number of allowable
174 composite characters under Mule would decrease the number
175 of possible faces that can exist. Mule already has shrunk
176 this to 2048, and further shrinkage would become uncomfortable.
177 No such problems exist in XEmacs.
179 Composite characters could be represented as 0x80 C1 C2 C3,
180 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
181 for slightly under 2^20 (one million) composite characters
182 over the XEmacs process lifetime, and you only need to
183 increase the size of a Mule character from 19 to 21 bits.
184 Or you could use 0x80 C1 C2 C3 C4, allowing for about
185 85 million (slightly over 2^26) composite characters. */
188 /************************************************************************/
189 /* Basic Emchar functions */
190 /************************************************************************/
192 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
193 string in STR. Returns the number of bytes stored.
194 Do not call this directly. Use the macro set_charptr_emchar() instead.
198 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
213 else if ( c <= 0x7ff )
215 *p++ = (c >> 6) | 0xc0;
216 *p++ = (c & 0x3f) | 0x80;
218 else if ( c <= 0xffff )
220 *p++ = (c >> 12) | 0xe0;
221 *p++ = ((c >> 6) & 0x3f) | 0x80;
222 *p++ = (c & 0x3f) | 0x80;
224 else if ( c <= 0x1fffff )
226 *p++ = (c >> 18) | 0xf0;
227 *p++ = ((c >> 12) & 0x3f) | 0x80;
228 *p++ = ((c >> 6) & 0x3f) | 0x80;
229 *p++ = (c & 0x3f) | 0x80;
231 else if ( c <= 0x3ffffff )
233 *p++ = (c >> 24) | 0xf8;
234 *p++ = ((c >> 18) & 0x3f) | 0x80;
235 *p++ = ((c >> 12) & 0x3f) | 0x80;
236 *p++ = ((c >> 6) & 0x3f) | 0x80;
237 *p++ = (c & 0x3f) | 0x80;
241 *p++ = (c >> 30) | 0xfc;
242 *p++ = ((c >> 24) & 0x3f) | 0x80;
243 *p++ = ((c >> 18) & 0x3f) | 0x80;
244 *p++ = ((c >> 12) & 0x3f) | 0x80;
245 *p++ = ((c >> 6) & 0x3f) | 0x80;
246 *p++ = (c & 0x3f) | 0x80;
249 BREAKUP_CHAR (c, charset, c1, c2);
250 lb = CHAR_LEADING_BYTE (c);
251 if (LEADING_BYTE_PRIVATE_P (lb))
252 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
254 if (EQ (charset, Vcharset_control_1))
263 /* Return the first character from a Mule-encoded string in STR,
264 assuming it's non-ASCII. Do not call this directly.
265 Use the macro charptr_emchar() instead. */
268 non_ascii_charptr_emchar (CONST Bufbyte *str)
281 else if ( b >= 0xf8 )
286 else if ( b >= 0xf0 )
291 else if ( b >= 0xe0 )
296 else if ( b >= 0xc0 )
306 for( ; len > 0; len-- )
309 ch = ( ch << 6 ) | ( b & 0x3f );
313 Bufbyte i0 = *str, i1, i2 = 0;
316 if (i0 == LEADING_BYTE_CONTROL_1)
317 return (Emchar) (*++str - 0x20);
319 if (LEADING_BYTE_PREFIX_P (i0))
324 charset = CHARSET_BY_LEADING_BYTE (i0);
325 if (XCHARSET_DIMENSION (charset) == 2)
328 return MAKE_CHAR (charset, i1, i2);
332 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
333 Do not call this directly. Use the macro valid_char_p() instead. */
337 non_ascii_valid_char_p (Emchar ch)
341 /* Must have only lowest 19 bits set */
345 f1 = CHAR_FIELD1 (ch);
346 f2 = CHAR_FIELD2 (ch);
347 f3 = CHAR_FIELD3 (ch);
353 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
354 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
355 f2 > MAX_CHAR_FIELD2_PRIVATE)
360 if (f3 != 0x20 && f3 != 0x7F)
364 NOTE: This takes advantage of the fact that
365 FIELD2_TO_OFFICIAL_LEADING_BYTE and
366 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
368 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
369 return (XCHARSET_CHARS (charset) == 96);
375 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
376 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
377 f1 > MAX_CHAR_FIELD1_PRIVATE)
379 if (f2 < 0x20 || f3 < 0x20)
382 #ifdef ENABLE_COMPOSITE_CHARS
383 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
385 if (UNBOUNDP (Fgethash (make_int (ch),
386 Vcomposite_char_char2string_hash_table,
391 #endif /* ENABLE_COMPOSITE_CHARS */
393 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
396 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
398 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
401 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
403 return (XCHARSET_CHARS (charset) == 96);
409 /************************************************************************/
410 /* Basic string functions */
411 /************************************************************************/
413 /* Copy the character pointed to by PTR into STR, assuming it's
414 non-ASCII. Do not call this directly. Use the macro
415 charptr_copy_char() instead. */
418 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
420 Bufbyte *strptr = str;
422 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
424 /* Notice fallthrough. */
426 case 6: *++strptr = *ptr++;
427 case 5: *++strptr = *ptr++;
429 case 4: *++strptr = *ptr++;
430 case 3: *++strptr = *ptr++;
431 case 2: *++strptr = *ptr;
436 return strptr + 1 - str;
440 /************************************************************************/
441 /* streams of Emchars */
442 /************************************************************************/
444 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
445 The functions below are not meant to be called directly; use
446 the macros in insdel.h. */
449 Lstream_get_emchar_1 (Lstream *stream, int ch)
451 Bufbyte str[MAX_EMCHAR_LEN];
452 Bufbyte *strptr = str;
454 str[0] = (Bufbyte) ch;
455 switch (REP_BYTES_BY_FIRST_BYTE (ch))
457 /* Notice fallthrough. */
460 ch = Lstream_getc (stream);
462 *++strptr = (Bufbyte) ch;
464 ch = Lstream_getc (stream);
466 *++strptr = (Bufbyte) ch;
469 ch = Lstream_getc (stream);
471 *++strptr = (Bufbyte) ch;
473 ch = Lstream_getc (stream);
475 *++strptr = (Bufbyte) ch;
477 ch = Lstream_getc (stream);
479 *++strptr = (Bufbyte) ch;
484 return charptr_emchar (str);
488 Lstream_fput_emchar (Lstream *stream, Emchar ch)
490 Bufbyte str[MAX_EMCHAR_LEN];
491 Bytecount len = set_charptr_emchar (str, ch);
492 return Lstream_write (stream, str, len);
496 Lstream_funget_emchar (Lstream *stream, Emchar ch)
498 Bufbyte str[MAX_EMCHAR_LEN];
499 Bytecount len = set_charptr_emchar (str, ch);
500 Lstream_unread (stream, str, len);
504 /************************************************************************/
506 /************************************************************************/
509 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
511 struct Lisp_Charset *cs = XCHARSET (obj);
513 markobj (cs->short_name);
514 markobj (cs->long_name);
515 markobj (cs->doc_string);
516 markobj (cs->registry);
517 markobj (cs->ccl_program);
522 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
524 struct Lisp_Charset *cs = XCHARSET (obj);
528 error ("printing unreadable object #<charset %s 0x%x>",
529 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
532 write_c_string ("#<charset ", printcharfun);
533 print_internal (CHARSET_NAME (cs), printcharfun, 0);
534 write_c_string (" ", printcharfun);
535 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
536 write_c_string (" ", printcharfun);
537 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
538 write_c_string (" ", printcharfun);
539 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
540 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
541 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
542 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
543 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
545 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
546 CHARSET_COLUMNS (cs),
547 CHARSET_GRAPHIC (cs),
549 write_c_string (buf, printcharfun);
550 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
551 sprintf (buf, " 0x%x>", cs->header.uid);
552 write_c_string (buf, printcharfun);
555 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
556 mark_charset, print_charset, 0, 0, 0,
557 struct Lisp_Charset);
558 /* Make a new charset. */
561 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
562 unsigned char type, unsigned char columns, unsigned char graphic,
563 Bufbyte final, unsigned char direction, Lisp_Object short_name,
564 Lisp_Object long_name, Lisp_Object doc,
568 struct Lisp_Charset *cs =
569 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
570 XSETCHARSET (obj, cs);
572 CHARSET_ID (cs) = id;
573 CHARSET_NAME (cs) = name;
574 CHARSET_SHORT_NAME (cs) = short_name;
575 CHARSET_LONG_NAME (cs) = long_name;
576 CHARSET_REP_BYTES (cs) = rep_bytes;
577 CHARSET_DIRECTION (cs) = direction;
578 CHARSET_TYPE (cs) = type;
579 CHARSET_COLUMNS (cs) = columns;
580 CHARSET_GRAPHIC (cs) = graphic;
581 CHARSET_FINAL (cs) = final;
582 CHARSET_DOC_STRING (cs) = doc;
583 CHARSET_REGISTRY (cs) = reg;
584 CHARSET_CCL_PROGRAM (cs) = Qnil;
585 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
587 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
588 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
589 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
590 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
594 /* some charsets do not have final characters. This includes
595 ASCII, Control-1, Composite, and the two faux private
597 assert (NILP (charset_by_attributes[type][final][direction]));
598 charset_by_attributes[type][final][direction] = obj;
601 assert (NILP (charset_by_leading_byte[id - 128]));
602 charset_by_leading_byte[id - 128] = obj;
604 /* official leading byte */
605 rep_bytes_by_first_byte[id] = rep_bytes;
607 /* Some charsets are "faux" and don't have names or really exist at
608 all except in the leading-byte table. */
610 Fputhash (name, obj, Vcharset_hash_table);
615 get_unallocated_leading_byte (int dimension)
621 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
624 lb = next_allocated_1_byte_leading_byte++;
628 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
631 lb = next_allocated_2_byte_leading_byte++;
636 ("No more character sets free for this dimension",
637 make_int (dimension));
643 /************************************************************************/
644 /* Basic charset Lisp functions */
645 /************************************************************************/
647 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
648 Return non-nil if OBJECT is a charset.
652 return CHARSETP (object) ? Qt : Qnil;
655 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
656 Retrieve the charset of the given name.
657 If CHARSET-OR-NAME is a charset object, it is simply returned.
658 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
659 nil is returned. Otherwise the associated charset object is returned.
663 if (CHARSETP (charset_or_name))
664 return charset_or_name;
666 CHECK_SYMBOL (charset_or_name);
667 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
670 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
671 Retrieve the charset of the given name.
672 Same as `find-charset' except an error is signalled if there is no such
673 charset instead of returning nil.
677 Lisp_Object charset = Ffind_charset (name);
680 signal_simple_error ("No such charset", name);
684 /* We store the charsets in hash tables with the names as the key and the
685 actual charset object as the value. Occasionally we need to use them
686 in a list format. These routines provide us with that. */
687 struct charset_list_closure
689 Lisp_Object *charset_list;
693 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
694 void *charset_list_closure)
696 /* This function can GC */
697 struct charset_list_closure *chcl =
698 (struct charset_list_closure*) charset_list_closure;
699 Lisp_Object *charset_list = chcl->charset_list;
701 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
705 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
706 Return a list of the names of all defined charsets.
710 Lisp_Object charset_list = Qnil;
712 struct charset_list_closure charset_list_closure;
714 GCPRO1 (charset_list);
715 charset_list_closure.charset_list = &charset_list;
716 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
717 &charset_list_closure);
723 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
724 Return the name of the given charset.
728 return XCHARSET_NAME (Fget_charset (charset));
731 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
732 Define a new character set.
733 This function is for use with Mule support.
734 NAME is a symbol, the name by which the character set is normally referred.
735 DOC-STRING is a string describing the character set.
736 PROPS is a property list, describing the specific nature of the
737 character set. Recognized properties are:
739 'short-name Short version of the charset name (ex: Latin-1)
740 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
741 'registry A regular expression matching the font registry field for
743 'dimension Number of octets used to index a character in this charset.
744 Either 1 or 2. Defaults to 1.
745 'columns Number of columns used to display a character in this charset.
746 Only used in TTY mode. (Under X, the actual width of a
747 character can be derived from the font used to display the
748 characters.) If unspecified, defaults to the dimension
749 (this is almost always the correct value).
750 'chars Number of characters in each dimension (94 or 96).
751 Defaults to 94. Note that if the dimension is 2, the
752 character set thus described is 94x94 or 96x96.
753 'final Final byte of ISO 2022 escape sequence. Must be
754 supplied. Each combination of (DIMENSION, CHARS) defines a
755 separate namespace for final bytes. Note that ISO
756 2022 restricts the final byte to the range
757 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
758 dimension == 2. Note also that final bytes in the range
759 0x30 - 0x3F are reserved for user-defined (not official)
761 'graphic 0 (use left half of font on output) or 1 (use right half
762 of font on output). Defaults to 0. For example, for
763 a font whose registry is ISO8859-1, the left half
764 (octets 0x20 - 0x7F) is the `ascii' character set, while
765 the right half (octets 0xA0 - 0xFF) is the `latin-1'
766 character set. With 'graphic set to 0, the octets
767 will have their high bit cleared; with it set to 1,
768 the octets will have their high bit set.
769 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
771 'ccl-program A compiled CCL program used to convert a character in
772 this charset into an index into the font. This is in
773 addition to the 'graphic property. The CCL program
774 is passed the octets of the character, with the high
775 bit cleared and set depending upon whether the value
776 of the 'graphic property is 0 or 1.
778 (name, doc_string, props))
780 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
781 int direction = CHARSET_LEFT_TO_RIGHT;
783 Lisp_Object registry = Qnil;
785 Lisp_Object rest, keyword, value;
786 Lisp_Object ccl_program = Qnil;
787 Lisp_Object short_name = Qnil, long_name = Qnil;
790 if (!NILP (doc_string))
791 CHECK_STRING (doc_string);
793 charset = Ffind_charset (name);
795 signal_simple_error ("Cannot redefine existing charset", name);
797 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
799 if (EQ (keyword, Qshort_name))
801 CHECK_STRING (value);
805 if (EQ (keyword, Qlong_name))
807 CHECK_STRING (value);
811 else if (EQ (keyword, Qdimension))
814 dimension = XINT (value);
815 if (dimension < 1 || dimension > 2)
816 signal_simple_error ("Invalid value for 'dimension", value);
819 else if (EQ (keyword, Qchars))
822 chars = XINT (value);
823 if (chars != 94 && chars != 96)
824 signal_simple_error ("Invalid value for 'chars", value);
827 else if (EQ (keyword, Qcolumns))
830 columns = XINT (value);
831 if (columns != 1 && columns != 2)
832 signal_simple_error ("Invalid value for 'columns", value);
835 else if (EQ (keyword, Qgraphic))
838 graphic = XINT (value);
839 if (graphic < 0 || graphic > 1)
840 signal_simple_error ("Invalid value for 'graphic", value);
843 else if (EQ (keyword, Qregistry))
845 CHECK_STRING (value);
849 else if (EQ (keyword, Qdirection))
851 if (EQ (value, Ql2r))
852 direction = CHARSET_LEFT_TO_RIGHT;
853 else if (EQ (value, Qr2l))
854 direction = CHARSET_RIGHT_TO_LEFT;
856 signal_simple_error ("Invalid value for 'direction", value);
859 else if (EQ (keyword, Qfinal))
861 CHECK_CHAR_COERCE_INT (value);
862 final = XCHAR (value);
863 if (final < '0' || final > '~')
864 signal_simple_error ("Invalid value for 'final", value);
867 else if (EQ (keyword, Qccl_program))
869 CHECK_VECTOR (value);
874 signal_simple_error ("Unrecognized property", keyword);
878 error ("'final must be specified");
879 if (dimension == 2 && final > 0x5F)
881 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
885 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
887 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
889 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
890 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
892 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
894 id = get_unallocated_leading_byte (dimension);
896 if (NILP (doc_string))
897 doc_string = build_string ("");
900 registry = build_string ("");
902 if (NILP (short_name))
903 XSETSTRING (short_name, XSYMBOL (name)->name);
905 if (NILP (long_name))
906 long_name = doc_string;
910 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
911 final, direction, short_name, long_name, doc_string, registry);
912 if (!NILP (ccl_program))
913 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
917 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
919 Make a charset equivalent to CHARSET but which goes in the opposite direction.
920 NEW-NAME is the name of the new charset. Return the new charset.
924 Lisp_Object new_charset = Qnil;
925 int id, dimension, columns, graphic, final;
927 Lisp_Object registry, doc_string, short_name, long_name;
928 struct Lisp_Charset *cs;
930 charset = Fget_charset (charset);
931 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
932 signal_simple_error ("Charset already has reverse-direction charset",
935 CHECK_SYMBOL (new_name);
936 if (!NILP (Ffind_charset (new_name)))
937 signal_simple_error ("Cannot redefine existing charset", new_name);
939 cs = XCHARSET (charset);
941 type = CHARSET_TYPE (cs);
942 columns = CHARSET_COLUMNS (cs);
943 dimension = CHARSET_DIMENSION (cs);
944 id = get_unallocated_leading_byte (dimension);
946 graphic = CHARSET_GRAPHIC (cs);
947 final = CHARSET_FINAL (cs);
948 direction = CHARSET_RIGHT_TO_LEFT;
949 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
950 direction = CHARSET_LEFT_TO_RIGHT;
951 doc_string = CHARSET_DOC_STRING (cs);
952 short_name = CHARSET_SHORT_NAME (cs);
953 long_name = CHARSET_LONG_NAME (cs);
954 registry = CHARSET_REGISTRY (cs);
956 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
957 graphic, final, direction, short_name, long_name,
958 doc_string, registry);
960 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
961 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
966 /* #### Reverse direction charsets not yet implemented. */
968 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
970 Return the reverse-direction charset parallel to CHARSET, if any.
971 This is the charset with the same properties (in particular, the same
972 dimension, number of characters per dimension, and final byte) as
973 CHARSET but whose characters are displayed in the opposite direction.
977 charset = Fget_charset (charset);
978 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
982 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
983 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
984 If DIRECTION is omitted, both directions will be checked (left-to-right
985 will be returned if character sets exist for both directions).
987 (dimension, chars, final, direction))
989 int dm, ch, fi, di = -1;
991 Lisp_Object obj = Qnil;
993 CHECK_INT (dimension);
994 dm = XINT (dimension);
995 if (dm < 1 || dm > 2)
996 signal_simple_error ("Invalid value for DIMENSION", dimension);
1000 if (ch != 94 && ch != 96)
1001 signal_simple_error ("Invalid value for CHARS", chars);
1003 CHECK_CHAR_COERCE_INT (final);
1005 if (fi < '0' || fi > '~')
1006 signal_simple_error ("Invalid value for FINAL", final);
1008 if (EQ (direction, Ql2r))
1009 di = CHARSET_LEFT_TO_RIGHT;
1010 else if (EQ (direction, Qr2l))
1011 di = CHARSET_RIGHT_TO_LEFT;
1012 else if (!NILP (direction))
1013 signal_simple_error ("Invalid value for DIRECTION", direction);
1015 if (dm == 2 && fi > 0x5F)
1017 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1020 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1022 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1026 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1028 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1031 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1034 return XCHARSET_NAME (obj);
1038 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1039 Return short name of CHARSET.
1043 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1046 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1047 Return long name of CHARSET.
1051 return XCHARSET_LONG_NAME (Fget_charset (charset));
1054 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1055 Return description of CHARSET.
1059 return XCHARSET_DOC_STRING (Fget_charset (charset));
1062 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1063 Return dimension of CHARSET.
1067 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1070 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1071 Return property PROP of CHARSET.
1072 Recognized properties are those listed in `make-charset', as well as
1073 'name and 'doc-string.
1077 struct Lisp_Charset *cs;
1079 charset = Fget_charset (charset);
1080 cs = XCHARSET (charset);
1082 CHECK_SYMBOL (prop);
1083 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1084 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1085 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1086 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1087 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1088 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1089 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1090 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1091 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1092 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1093 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1094 if (EQ (prop, Qdirection))
1095 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1096 if (EQ (prop, Qreverse_direction_charset))
1098 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1102 return XCHARSET_NAME (obj);
1104 signal_simple_error ("Unrecognized charset property name", prop);
1105 return Qnil; /* not reached */
1108 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1109 Return charset identification number of CHARSET.
1113 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1116 /* #### We need to figure out which properties we really want to
1119 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1120 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1122 (charset, ccl_program))
1124 charset = Fget_charset (charset);
1125 CHECK_VECTOR (ccl_program);
1126 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1131 invalidate_charset_font_caches (Lisp_Object charset)
1133 /* Invalidate font cache entries for charset on all devices. */
1134 Lisp_Object devcons, concons, hash_table;
1135 DEVICE_LOOP_NO_BREAK (devcons, concons)
1137 struct device *d = XDEVICE (XCAR (devcons));
1138 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1139 if (!UNBOUNDP (hash_table))
1140 Fclrhash (hash_table);
1144 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1145 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1146 Set the 'registry property of CHARSET to REGISTRY.
1148 (charset, registry))
1150 charset = Fget_charset (charset);
1151 CHECK_STRING (registry);
1152 XCHARSET_REGISTRY (charset) = registry;
1153 invalidate_charset_font_caches (charset);
1154 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1159 /************************************************************************/
1160 /* Lisp primitives for working with characters */
1161 /************************************************************************/
1163 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1164 Make a character from CHARSET and octets ARG1 and ARG2.
1165 ARG2 is required only for characters from two-dimensional charsets.
1166 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1167 character s with caron.
1169 (charset, arg1, arg2))
1171 struct Lisp_Charset *cs;
1173 int lowlim, highlim;
1175 charset = Fget_charset (charset);
1176 cs = XCHARSET (charset);
1178 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1179 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1180 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1181 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1184 /* It is useful (and safe, according to Olivier Galibert) to strip
1185 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1186 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1187 Latin 2 code of the character. */
1188 a1 = XINT (arg1) & 0x7f;
1189 if (a1 < lowlim || a1 > highlim)
1190 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1192 if (CHARSET_DIMENSION (cs) == 1)
1196 ("Charset is of dimension one; second octet must be nil", arg2);
1197 return make_char (MAKE_CHAR (charset, a1, 0));
1201 a2 = XINT (arg2) & 0x7f;
1202 if (a2 < lowlim || a2 > highlim)
1203 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1205 return make_char (MAKE_CHAR (charset, a1, a2));
1208 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1209 Return the character set of char CH.
1213 CHECK_CHAR_COERCE_INT (ch);
1215 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1216 (CHAR_LEADING_BYTE (XCHAR (ch))));
1219 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1220 Return the octet numbered N (should be 0 or 1) of char CH.
1221 N defaults to 0 if omitted.
1225 Lisp_Object charset;
1228 CHECK_CHAR_COERCE_INT (ch);
1235 if (int_n != 0 && int_n != 1)
1236 signal_simple_error ("Octet number must be 0 or 1", n);
1238 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1239 return make_int (int_n == 0 ? c1 : c2);
1243 #ifdef ENABLE_COMPOSITE_CHARS
1244 /************************************************************************/
1245 /* composite character functions */
1246 /************************************************************************/
1249 lookup_composite_char (Bufbyte *str, int len)
1251 Lisp_Object lispstr = make_string (str, len);
1252 Lisp_Object ch = Fgethash (lispstr,
1253 Vcomposite_char_string2char_hash_table,
1259 if (composite_char_row_next >= 128)
1260 signal_simple_error ("No more composite chars available", lispstr);
1261 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1262 composite_char_col_next);
1263 Fputhash (make_char (emch), lispstr,
1264 Vcomposite_char_char2string_hash_table);
1265 Fputhash (lispstr, make_char (emch),
1266 Vcomposite_char_string2char_hash_table);
1267 composite_char_col_next++;
1268 if (composite_char_col_next >= 128)
1270 composite_char_col_next = 32;
1271 composite_char_row_next++;
1280 composite_char_string (Emchar ch)
1282 Lisp_Object str = Fgethash (make_char (ch),
1283 Vcomposite_char_char2string_hash_table,
1285 assert (!UNBOUNDP (str));
1289 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1290 Convert a string into a single composite character.
1291 The character is the result of overstriking all the characters in
1296 CHECK_STRING (string);
1297 return make_char (lookup_composite_char (XSTRING_DATA (string),
1298 XSTRING_LENGTH (string)));
1301 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1302 Return a string of the characters comprising a composite character.
1310 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1311 signal_simple_error ("Must be composite char", ch);
1312 return composite_char_string (emch);
1314 #endif /* ENABLE_COMPOSITE_CHARS */
1317 /************************************************************************/
1318 /* initialization */
1319 /************************************************************************/
1322 syms_of_mule_charset (void)
1324 DEFSUBR (Fcharsetp);
1325 DEFSUBR (Ffind_charset);
1326 DEFSUBR (Fget_charset);
1327 DEFSUBR (Fcharset_list);
1328 DEFSUBR (Fcharset_name);
1329 DEFSUBR (Fmake_charset);
1330 DEFSUBR (Fmake_reverse_direction_charset);
1331 /* DEFSUBR (Freverse_direction_charset); */
1332 DEFSUBR (Fcharset_from_attributes);
1333 DEFSUBR (Fcharset_short_name);
1334 DEFSUBR (Fcharset_long_name);
1335 DEFSUBR (Fcharset_description);
1336 DEFSUBR (Fcharset_dimension);
1337 DEFSUBR (Fcharset_property);
1338 DEFSUBR (Fcharset_id);
1339 DEFSUBR (Fset_charset_ccl_program);
1340 DEFSUBR (Fset_charset_registry);
1342 DEFSUBR (Fmake_char);
1343 DEFSUBR (Fchar_charset);
1344 DEFSUBR (Fchar_octet);
1346 #ifdef ENABLE_COMPOSITE_CHARS
1347 DEFSUBR (Fmake_composite_char);
1348 DEFSUBR (Fcomposite_char_string);
1351 defsymbol (&Qcharsetp, "charsetp");
1352 defsymbol (&Qregistry, "registry");
1353 defsymbol (&Qfinal, "final");
1354 defsymbol (&Qgraphic, "graphic");
1355 defsymbol (&Qdirection, "direction");
1356 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1357 defsymbol (&Qccl_program, "ccl-program");
1358 defsymbol (&Qshort_name, "short-name");
1359 defsymbol (&Qlong_name, "long-name");
1361 defsymbol (&Ql2r, "l2r");
1362 defsymbol (&Qr2l, "r2l");
1364 /* Charsets, compatible with FSF 20.3
1365 Naming convention is Script-Charset[-Edition] */
1366 defsymbol (&Qascii, "ascii");
1367 defsymbol (&Qcontrol_1, "control-1");
1368 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1369 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1370 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1371 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1372 defsymbol (&Qthai_tis620, "thai-tis620");
1373 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1374 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1375 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1376 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1377 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1378 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1379 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1380 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1381 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1382 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1383 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1384 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1385 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1386 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1387 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1388 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1390 defsymbol (&Qcomposite, "composite");
1393 Vutf_2000_version = build_string("0.3 (Imamiya)");
1394 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1395 Version number of UTF-2000.
1399 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1400 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1401 Leading-code of private TYPE9N charset of column-width 1.
1403 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1407 vars_of_mule_charset (void)
1411 /* Table of charsets indexed by leading byte. */
1412 for (i = 0; i < countof (charset_by_leading_byte); i++)
1413 charset_by_leading_byte[i] = Qnil;
1415 /* Table of charsets indexed by type/final-byte/direction. */
1416 for (i = 0; i < countof (charset_by_attributes); i++)
1417 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1418 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1419 charset_by_attributes[i][j][k] = Qnil;
1421 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1422 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1426 complex_vars_of_mule_charset (void)
1428 staticpro (&Vcharset_hash_table);
1429 Vcharset_hash_table =
1430 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1432 /* Predefined character sets. We store them into variables for
1436 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1437 CHARSET_TYPE_94, 1, 0, 'B',
1438 CHARSET_LEFT_TO_RIGHT,
1439 build_string ("ASCII"),
1440 build_string ("ASCII)"),
1441 build_string ("ASCII (ISO646 IRV)"),
1442 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1443 Vcharset_control_1 =
1444 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1445 CHARSET_TYPE_94, 1, 1, 0,
1446 CHARSET_LEFT_TO_RIGHT,
1447 build_string ("C1"),
1448 build_string ("Control characters"),
1449 build_string ("Control characters 128-191"),
1451 Vcharset_latin_iso8859_1 =
1452 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1453 CHARSET_TYPE_96, 1, 1, 'A',
1454 CHARSET_LEFT_TO_RIGHT,
1455 build_string ("Latin-1"),
1456 build_string ("ISO8859-1 (Latin-1)"),
1457 build_string ("ISO8859-1 (Latin-1)"),
1458 build_string ("iso8859-1"));
1459 Vcharset_latin_iso8859_2 =
1460 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1461 CHARSET_TYPE_96, 1, 1, 'B',
1462 CHARSET_LEFT_TO_RIGHT,
1463 build_string ("Latin-2"),
1464 build_string ("ISO8859-2 (Latin-2)"),
1465 build_string ("ISO8859-2 (Latin-2)"),
1466 build_string ("iso8859-2"));
1467 Vcharset_latin_iso8859_3 =
1468 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1469 CHARSET_TYPE_96, 1, 1, 'C',
1470 CHARSET_LEFT_TO_RIGHT,
1471 build_string ("Latin-3"),
1472 build_string ("ISO8859-3 (Latin-3)"),
1473 build_string ("ISO8859-3 (Latin-3)"),
1474 build_string ("iso8859-3"));
1475 Vcharset_latin_iso8859_4 =
1476 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1477 CHARSET_TYPE_96, 1, 1, 'D',
1478 CHARSET_LEFT_TO_RIGHT,
1479 build_string ("Latin-4"),
1480 build_string ("ISO8859-4 (Latin-4)"),
1481 build_string ("ISO8859-4 (Latin-4)"),
1482 build_string ("iso8859-4"));
1483 Vcharset_thai_tis620 =
1484 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1485 CHARSET_TYPE_96, 1, 1, 'T',
1486 CHARSET_LEFT_TO_RIGHT,
1487 build_string ("TIS620"),
1488 build_string ("TIS620 (Thai)"),
1489 build_string ("TIS620.2529 (Thai)"),
1490 build_string ("tis620"));
1491 Vcharset_greek_iso8859_7 =
1492 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1493 CHARSET_TYPE_96, 1, 1, 'F',
1494 CHARSET_LEFT_TO_RIGHT,
1495 build_string ("ISO8859-7"),
1496 build_string ("ISO8859-7 (Greek)"),
1497 build_string ("ISO8859-7 (Greek)"),
1498 build_string ("iso8859-7"));
1499 Vcharset_arabic_iso8859_6 =
1500 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1501 CHARSET_TYPE_96, 1, 1, 'G',
1502 CHARSET_RIGHT_TO_LEFT,
1503 build_string ("ISO8859-6"),
1504 build_string ("ISO8859-6 (Arabic)"),
1505 build_string ("ISO8859-6 (Arabic)"),
1506 build_string ("iso8859-6"));
1507 Vcharset_hebrew_iso8859_8 =
1508 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1509 CHARSET_TYPE_96, 1, 1, 'H',
1510 CHARSET_RIGHT_TO_LEFT,
1511 build_string ("ISO8859-8"),
1512 build_string ("ISO8859-8 (Hebrew)"),
1513 build_string ("ISO8859-8 (Hebrew)"),
1514 build_string ("iso8859-8"));
1515 Vcharset_katakana_jisx0201 =
1516 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1517 CHARSET_TYPE_94, 1, 1, 'I',
1518 CHARSET_LEFT_TO_RIGHT,
1519 build_string ("JISX0201 Kana"),
1520 build_string ("JISX0201.1976 (Japanese Kana)"),
1521 build_string ("JISX0201.1976 Japanese Kana"),
1522 build_string ("jisx0201.1976"));
1523 Vcharset_latin_jisx0201 =
1524 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1525 CHARSET_TYPE_94, 1, 0, 'J',
1526 CHARSET_LEFT_TO_RIGHT,
1527 build_string ("JISX0201 Roman"),
1528 build_string ("JISX0201.1976 (Japanese Roman)"),
1529 build_string ("JISX0201.1976 Japanese Roman"),
1530 build_string ("jisx0201.1976"));
1531 Vcharset_cyrillic_iso8859_5 =
1532 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1533 CHARSET_TYPE_96, 1, 1, 'L',
1534 CHARSET_LEFT_TO_RIGHT,
1535 build_string ("ISO8859-5"),
1536 build_string ("ISO8859-5 (Cyrillic)"),
1537 build_string ("ISO8859-5 (Cyrillic)"),
1538 build_string ("iso8859-5"));
1539 Vcharset_latin_iso8859_9 =
1540 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1541 CHARSET_TYPE_96, 1, 1, 'M',
1542 CHARSET_LEFT_TO_RIGHT,
1543 build_string ("Latin-5"),
1544 build_string ("ISO8859-9 (Latin-5)"),
1545 build_string ("ISO8859-9 (Latin-5)"),
1546 build_string ("iso8859-9"));
1547 Vcharset_japanese_jisx0208_1978 =
1548 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1549 CHARSET_TYPE_94X94, 2, 0, '@',
1550 CHARSET_LEFT_TO_RIGHT,
1551 build_string ("JISX0208.1978"),
1552 build_string ("JISX0208.1978 (Japanese)"),
1554 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1555 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1556 Vcharset_chinese_gb2312 =
1557 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1558 CHARSET_TYPE_94X94, 2, 0, 'A',
1559 CHARSET_LEFT_TO_RIGHT,
1560 build_string ("GB2312"),
1561 build_string ("GB2312)"),
1562 build_string ("GB2312 Chinese simplified"),
1563 build_string ("gb2312"));
1564 Vcharset_japanese_jisx0208 =
1565 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1566 CHARSET_TYPE_94X94, 2, 0, 'B',
1567 CHARSET_LEFT_TO_RIGHT,
1568 build_string ("JISX0208"),
1569 build_string ("JISX0208.1983/1990 (Japanese)"),
1570 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1571 build_string ("jisx0208.19\\(83\\|90\\)"));
1572 Vcharset_korean_ksc5601 =
1573 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1574 CHARSET_TYPE_94X94, 2, 0, 'C',
1575 CHARSET_LEFT_TO_RIGHT,
1576 build_string ("KSC5601"),
1577 build_string ("KSC5601 (Korean"),
1578 build_string ("KSC5601 Korean Hangul and Hanja"),
1579 build_string ("ksc5601"));
1580 Vcharset_japanese_jisx0212 =
1581 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1582 CHARSET_TYPE_94X94, 2, 0, 'D',
1583 CHARSET_LEFT_TO_RIGHT,
1584 build_string ("JISX0212"),
1585 build_string ("JISX0212 (Japanese)"),
1586 build_string ("JISX0212 Japanese Supplement"),
1587 build_string ("jisx0212"));
1589 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1590 Vcharset_chinese_cns11643_1 =
1591 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1592 CHARSET_TYPE_94X94, 2, 0, 'G',
1593 CHARSET_LEFT_TO_RIGHT,
1594 build_string ("CNS11643-1"),
1595 build_string ("CNS11643-1 (Chinese traditional)"),
1597 ("CNS 11643 Plane 1 Chinese traditional"),
1598 build_string (CHINESE_CNS_PLANE_RE("1")));
1599 Vcharset_chinese_cns11643_2 =
1600 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1601 CHARSET_TYPE_94X94, 2, 0, 'H',
1602 CHARSET_LEFT_TO_RIGHT,
1603 build_string ("CNS11643-2"),
1604 build_string ("CNS11643-2 (Chinese traditional)"),
1606 ("CNS 11643 Plane 2 Chinese traditional"),
1607 build_string (CHINESE_CNS_PLANE_RE("2")));
1608 Vcharset_chinese_big5_1 =
1609 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1610 CHARSET_TYPE_94X94, 2, 0, '0',
1611 CHARSET_LEFT_TO_RIGHT,
1612 build_string ("Big5"),
1613 build_string ("Big5 (Level-1)"),
1615 ("Big5 Level-1 Chinese traditional"),
1616 build_string ("big5"));
1617 Vcharset_chinese_big5_2 =
1618 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1619 CHARSET_TYPE_94X94, 2, 0, '1',
1620 CHARSET_LEFT_TO_RIGHT,
1621 build_string ("Big5"),
1622 build_string ("Big5 (Level-2)"),
1624 ("Big5 Level-2 Chinese traditional"),
1625 build_string ("big5"));
1628 #ifdef ENABLE_COMPOSITE_CHARS
1629 /* #### For simplicity, we put composite chars into a 96x96 charset.
1630 This is going to lead to problems because you can run out of
1631 room, esp. as we don't yet recycle numbers. */
1632 Vcharset_composite =
1633 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1634 CHARSET_TYPE_96X96, 2, 0, 0,
1635 CHARSET_LEFT_TO_RIGHT,
1636 build_string ("Composite"),
1637 build_string ("Composite characters"),
1638 build_string ("Composite characters"),
1641 composite_char_row_next = 32;
1642 composite_char_col_next = 32;
1644 Vcomposite_char_string2char_hash_table =
1645 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1646 Vcomposite_char_char2string_hash_table =
1647 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1648 staticpro (&Vcomposite_char_string2char_hash_table);
1649 staticpro (&Vcomposite_char_char2string_hash_table);
1650 #endif /* ENABLE_COMPOSITE_CHARS */