1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 struct charset_lookup *chlook;
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
84 static const struct struct_description charset_lookup_description = {
85 sizeof (struct charset_lookup),
86 charset_lookup_description_1
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
95 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97 const Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
149 Lisp_Object Ql2r, Qr2l;
151 Lisp_Object Vcharset_hash_table;
153 /* Composite characters are characters constructed by overstriking two
154 or more regular characters.
156 1) The old Mule implementation involves storing composite characters
157 in a buffer as a tag followed by all of the actual characters
158 used to make up the composite character. I think this is a bad
159 idea; it greatly complicates code that wants to handle strings
160 one character at a time because it has to deal with the possibility
161 of great big ungainly characters. It's much more reasonable to
162 simply store an index into a table of composite characters.
164 2) The current implementation only allows for 16,384 separate
165 composite characters over the lifetime of the XEmacs process.
166 This could become a potential problem if the user
167 edited lots of different files that use composite characters.
168 Due to FSF bogosity, increasing the number of allowable
169 composite characters under Mule would decrease the number
170 of possible faces that can exist. Mule already has shrunk
171 this to 2048, and further shrinkage would become uncomfortable.
172 No such problems exist in XEmacs.
174 Composite characters could be represented as 0x80 C1 C2 C3,
175 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
176 for slightly under 2^20 (one million) composite characters
177 over the XEmacs process lifetime, and you only need to
178 increase the size of a Mule character from 19 to 21 bits.
179 Or you could use 0x80 C1 C2 C3 C4, allowing for about
180 85 million (slightly over 2^26) composite characters. */
183 /************************************************************************/
184 /* Basic Emchar functions */
185 /************************************************************************/
187 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
188 string in STR. Returns the number of bytes stored.
189 Do not call this directly. Use the macro set_charptr_emchar() instead.
193 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
201 BREAKUP_CHAR (c, charset, c1, c2);
202 lb = CHAR_LEADING_BYTE (c);
203 if (LEADING_BYTE_PRIVATE_P (lb))
204 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
206 if (EQ (charset, Vcharset_control_1))
215 /* Return the first character from a Mule-encoded string in STR,
216 assuming it's non-ASCII. Do not call this directly.
217 Use the macro charptr_emchar() instead. */
220 non_ascii_charptr_emchar (const Bufbyte *str)
222 Bufbyte i0 = *str, i1, i2 = 0;
225 if (i0 == LEADING_BYTE_CONTROL_1)
226 return (Emchar) (*++str - 0x20);
228 if (LEADING_BYTE_PREFIX_P (i0))
233 charset = CHARSET_BY_LEADING_BYTE (i0);
234 if (XCHARSET_DIMENSION (charset) == 2)
237 return MAKE_CHAR (charset, i1, i2);
240 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
241 Do not call this directly. Use the macro valid_char_p() instead. */
244 non_ascii_valid_char_p (Emchar ch)
248 /* Must have only lowest 19 bits set */
252 f1 = CHAR_FIELD1 (ch);
253 f2 = CHAR_FIELD2 (ch);
254 f3 = CHAR_FIELD3 (ch);
260 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
261 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
262 f2 > MAX_CHAR_FIELD2_PRIVATE)
267 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
268 f2 <= MAX_CHAR_FIELD2_PRIVATE))
272 NOTE: This takes advantage of the fact that
273 FIELD2_TO_OFFICIAL_LEADING_BYTE and
274 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
276 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
277 if (EQ (charset, Qnil))
279 return (XCHARSET_CHARS (charset) == 96);
285 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
286 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
287 f1 > MAX_CHAR_FIELD1_PRIVATE)
289 if (f2 < 0x20 || f3 < 0x20)
292 #ifdef ENABLE_COMPOSITE_CHARS
293 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
295 if (UNBOUNDP (Fgethash (make_int (ch),
296 Vcomposite_char_char2string_hash_table,
301 #endif /* ENABLE_COMPOSITE_CHARS */
303 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
304 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
307 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
309 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
312 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
314 if (EQ (charset, Qnil))
316 return (XCHARSET_CHARS (charset) == 96);
321 /************************************************************************/
322 /* Basic string functions */
323 /************************************************************************/
325 /* Copy the character pointed to by SRC into DST. Do not call this
326 directly. Use the macro charptr_copy_char() instead.
327 Return the number of bytes copied. */
330 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
332 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
334 for (i = bytes; i; i--, dst++, src++)
340 /************************************************************************/
341 /* streams of Emchars */
342 /************************************************************************/
344 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
345 The functions below are not meant to be called directly; use
346 the macros in insdel.h. */
349 Lstream_get_emchar_1 (Lstream *stream, int ch)
351 Bufbyte str[MAX_EMCHAR_LEN];
352 Bufbyte *strptr = str;
355 str[0] = (Bufbyte) ch;
357 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
359 int c = Lstream_getc (stream);
360 bufpos_checking_assert (c >= 0);
361 *++strptr = (Bufbyte) c;
363 return charptr_emchar (str);
367 Lstream_fput_emchar (Lstream *stream, Emchar ch)
369 Bufbyte str[MAX_EMCHAR_LEN];
370 Bytecount len = set_charptr_emchar (str, ch);
371 return Lstream_write (stream, str, len);
375 Lstream_funget_emchar (Lstream *stream, Emchar ch)
377 Bufbyte str[MAX_EMCHAR_LEN];
378 Bytecount len = set_charptr_emchar (str, ch);
379 Lstream_unread (stream, str, len);
383 /************************************************************************/
385 /************************************************************************/
388 mark_charset (Lisp_Object obj)
390 Lisp_Charset *cs = XCHARSET (obj);
392 mark_object (cs->short_name);
393 mark_object (cs->long_name);
394 mark_object (cs->doc_string);
395 mark_object (cs->registry);
396 mark_object (cs->ccl_program);
401 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
403 Lisp_Charset *cs = XCHARSET (obj);
407 error ("printing unreadable object #<charset %s 0x%x>",
408 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
411 write_c_string ("#<charset ", printcharfun);
412 print_internal (CHARSET_NAME (cs), printcharfun, 0);
413 write_c_string (" ", printcharfun);
414 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
415 write_c_string (" ", printcharfun);
416 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
417 write_c_string (" ", printcharfun);
418 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
419 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
420 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
421 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
422 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
424 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
425 CHARSET_COLUMNS (cs),
426 CHARSET_GRAPHIC (cs),
428 write_c_string (buf, printcharfun);
429 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
430 sprintf (buf, " 0x%x>", cs->header.uid);
431 write_c_string (buf, printcharfun);
434 static const struct lrecord_description charset_description[] = {
435 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
436 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
437 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
438 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
439 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
440 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
441 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
445 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
446 mark_charset, print_charset, 0, 0, 0, charset_description,
449 /* Make a new charset. */
450 /* #### SJT Should generic properties be allowed? */
452 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
453 unsigned char type, unsigned char columns, unsigned char graphic,
454 Bufbyte final, unsigned char direction, Lisp_Object short_name,
455 Lisp_Object long_name, Lisp_Object doc,
459 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
463 XSETCHARSET (obj, cs);
465 CHARSET_ID (cs) = id;
466 CHARSET_NAME (cs) = name;
467 CHARSET_SHORT_NAME (cs) = short_name;
468 CHARSET_LONG_NAME (cs) = long_name;
469 CHARSET_REP_BYTES (cs) = rep_bytes;
470 CHARSET_DIRECTION (cs) = direction;
471 CHARSET_TYPE (cs) = type;
472 CHARSET_COLUMNS (cs) = columns;
473 CHARSET_GRAPHIC (cs) = graphic;
474 CHARSET_FINAL (cs) = final;
475 CHARSET_DOC_STRING (cs) = doc;
476 CHARSET_REGISTRY (cs) = reg;
477 CHARSET_CCL_PROGRAM (cs) = Qnil;
478 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
480 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
481 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
482 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
483 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
487 /* some charsets do not have final characters. This includes
488 ASCII, Control-1, Composite, and the two faux private
490 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
491 chlook->charset_by_attributes[type][final][direction] = obj;
494 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
495 chlook->charset_by_leading_byte[id - 128] = obj;
497 /* Some charsets are "faux" and don't have names or really exist at
498 all except in the leading-byte table. */
500 Fputhash (name, obj, Vcharset_hash_table);
505 get_unallocated_leading_byte (int dimension)
511 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
514 lb = chlook->next_allocated_1_byte_leading_byte++;
518 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
521 lb = chlook->next_allocated_2_byte_leading_byte++;
526 ("No more character sets free for this dimension",
527 make_int (dimension));
533 /************************************************************************/
534 /* Basic charset Lisp functions */
535 /************************************************************************/
537 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
538 Return non-nil if OBJECT is a charset.
542 return CHARSETP (object) ? Qt : Qnil;
545 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
546 Retrieve the charset of the given name.
547 If CHARSET-OR-NAME is a charset object, it is simply returned.
548 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
549 nil is returned. Otherwise the associated charset object is returned.
553 if (CHARSETP (charset_or_name))
554 return charset_or_name;
556 CHECK_SYMBOL (charset_or_name);
557 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
560 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
561 Retrieve the charset of the given name.
562 Same as `find-charset' except an error is signalled if there is no such
563 charset instead of returning nil.
567 Lisp_Object charset = Ffind_charset (name);
570 signal_simple_error ("No such charset", name);
574 /* We store the charsets in hash tables with the names as the key and the
575 actual charset object as the value. Occasionally we need to use them
576 in a list format. These routines provide us with that. */
577 struct charset_list_closure
579 Lisp_Object *charset_list;
583 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
584 void *charset_list_closure)
586 /* This function can GC */
587 struct charset_list_closure *chcl =
588 (struct charset_list_closure*) charset_list_closure;
589 Lisp_Object *charset_list = chcl->charset_list;
591 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
595 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
596 Return a list of the names of all defined charsets.
600 Lisp_Object charset_list = Qnil;
602 struct charset_list_closure charset_list_closure;
604 GCPRO1 (charset_list);
605 charset_list_closure.charset_list = &charset_list;
606 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
607 &charset_list_closure);
613 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
614 Return the name of charset CHARSET.
618 return XCHARSET_NAME (Fget_charset (charset));
621 /* #### SJT Should generic properties be allowed? */
622 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
623 Define a new character set.
624 This function is for use with Mule support.
625 NAME is a symbol, the name by which the character set is normally referred.
626 DOC-STRING is a string describing the character set.
627 PROPS is a property list, describing the specific nature of the
628 character set. Recognized properties are:
630 'short-name Short version of the charset name (ex: Latin-1)
631 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
632 'registry A regular expression matching the font registry field for
634 'dimension Number of octets used to index a character in this charset.
635 Either 1 or 2. Defaults to 1.
636 'columns Number of columns used to display a character in this charset.
637 Only used in TTY mode. (Under X, the actual width of a
638 character can be derived from the font used to display the
639 characters.) If unspecified, defaults to the dimension
640 (this is almost always the correct value).
641 'chars Number of characters in each dimension (94 or 96).
642 Defaults to 94. Note that if the dimension is 2, the
643 character set thus described is 94x94 or 96x96.
644 'final Final byte of ISO 2022 escape sequence. Must be
645 supplied. Each combination of (DIMENSION, CHARS) defines a
646 separate namespace for final bytes. Note that ISO
647 2022 restricts the final byte to the range
648 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
649 dimension == 2. Note also that final bytes in the range
650 0x30 - 0x3F are reserved for user-defined (not official)
652 'graphic 0 (use left half of font on output) or 1 (use right half
653 of font on output). Defaults to 0. For example, for
654 a font whose registry is ISO8859-1, the left half
655 (octets 0x20 - 0x7F) is the `ascii' character set, while
656 the right half (octets 0xA0 - 0xFF) is the `latin-1'
657 character set. With 'graphic set to 0, the octets
658 will have their high bit cleared; with it set to 1,
659 the octets will have their high bit set.
660 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
662 'ccl-program A compiled CCL program used to convert a character in
663 this charset into an index into the font. This is in
664 addition to the 'graphic property. The CCL program
665 is passed the octets of the character, with the high
666 bit cleared and set depending upon whether the value
667 of the 'graphic property is 0 or 1.
669 (name, doc_string, props))
671 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
672 int direction = CHARSET_LEFT_TO_RIGHT;
674 Lisp_Object registry = Qnil;
676 Lisp_Object ccl_program = Qnil;
677 Lisp_Object short_name = Qnil, long_name = Qnil;
680 if (!NILP (doc_string))
681 CHECK_STRING (doc_string);
683 charset = Ffind_charset (name);
685 signal_simple_error ("Cannot redefine existing charset", name);
688 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
690 if (EQ (keyword, Qshort_name))
692 CHECK_STRING (value);
696 if (EQ (keyword, Qlong_name))
698 CHECK_STRING (value);
702 else if (EQ (keyword, Qdimension))
705 dimension = XINT (value);
706 if (dimension < 1 || dimension > 2)
707 signal_simple_error ("Invalid value for 'dimension", value);
710 else if (EQ (keyword, Qchars))
713 chars = XINT (value);
714 if (chars != 94 && chars != 96)
715 signal_simple_error ("Invalid value for 'chars", value);
718 else if (EQ (keyword, Qcolumns))
721 columns = XINT (value);
722 if (columns != 1 && columns != 2)
723 signal_simple_error ("Invalid value for 'columns", value);
726 else if (EQ (keyword, Qgraphic))
729 graphic = XINT (value);
730 if (graphic < 0 || graphic > 1)
731 signal_simple_error ("Invalid value for 'graphic", value);
734 else if (EQ (keyword, Qregistry))
736 CHECK_STRING (value);
740 else if (EQ (keyword, Qdirection))
742 if (EQ (value, Ql2r))
743 direction = CHARSET_LEFT_TO_RIGHT;
744 else if (EQ (value, Qr2l))
745 direction = CHARSET_RIGHT_TO_LEFT;
747 signal_simple_error ("Invalid value for 'direction", value);
750 else if (EQ (keyword, Qfinal))
752 CHECK_CHAR_COERCE_INT (value);
753 final = XCHAR (value);
754 if (final < '0' || final > '~')
755 signal_simple_error ("Invalid value for 'final", value);
758 else if (EQ (keyword, Qccl_program))
760 struct ccl_program test_ccl;
762 if (setup_ccl_program (&test_ccl, value) < 0)
763 signal_simple_error ("Invalid value for 'ccl-program", value);
768 signal_simple_error ("Unrecognized property", keyword);
773 error ("'final must be specified");
774 if (dimension == 2 && final > 0x5F)
776 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
780 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
782 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
784 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
785 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
787 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
789 id = get_unallocated_leading_byte (dimension);
791 if (NILP (doc_string))
792 doc_string = build_string ("");
795 registry = build_string ("");
797 if (NILP (short_name))
798 XSETSTRING (short_name, XSYMBOL (name)->name);
800 if (NILP (long_name))
801 long_name = doc_string;
805 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
806 final, direction, short_name, long_name, doc_string, registry);
807 if (!NILP (ccl_program))
808 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
812 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
814 Make a charset equivalent to CHARSET but which goes in the opposite direction.
815 NEW-NAME is the name of the new charset. Return the new charset.
819 Lisp_Object new_charset = Qnil;
820 int id, dimension, columns, graphic, final;
822 Lisp_Object registry, doc_string, short_name, long_name;
825 charset = Fget_charset (charset);
826 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
827 signal_simple_error ("Charset already has reverse-direction charset",
830 CHECK_SYMBOL (new_name);
831 if (!NILP (Ffind_charset (new_name)))
832 signal_simple_error ("Cannot redefine existing charset", new_name);
834 cs = XCHARSET (charset);
836 type = CHARSET_TYPE (cs);
837 columns = CHARSET_COLUMNS (cs);
838 dimension = CHARSET_DIMENSION (cs);
839 id = get_unallocated_leading_byte (dimension);
841 graphic = CHARSET_GRAPHIC (cs);
842 final = CHARSET_FINAL (cs);
843 direction = CHARSET_RIGHT_TO_LEFT;
844 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
845 direction = CHARSET_LEFT_TO_RIGHT;
846 doc_string = CHARSET_DOC_STRING (cs);
847 short_name = CHARSET_SHORT_NAME (cs);
848 long_name = CHARSET_LONG_NAME (cs);
849 registry = CHARSET_REGISTRY (cs);
851 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
852 graphic, final, direction, short_name, long_name,
853 doc_string, registry);
855 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
856 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
861 /* #### Reverse direction charsets not yet implemented. */
863 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
865 Return the reverse-direction charset parallel to CHARSET, if any.
866 This is the charset with the same properties (in particular, the same
867 dimension, number of characters per dimension, and final byte) as
868 CHARSET but whose characters are displayed in the opposite direction.
872 charset = Fget_charset (charset);
873 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
877 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
878 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
879 If DIRECTION is omitted, both directions will be checked (left-to-right
880 will be returned if character sets exist for both directions).
882 (dimension, chars, final, direction))
884 int dm, ch, fi, di = -1;
886 Lisp_Object obj = Qnil;
888 CHECK_INT (dimension);
889 dm = XINT (dimension);
890 if (dm < 1 || dm > 2)
891 signal_simple_error ("Invalid value for DIMENSION", dimension);
895 if (ch != 94 && ch != 96)
896 signal_simple_error ("Invalid value for CHARS", chars);
898 CHECK_CHAR_COERCE_INT (final);
900 if (fi < '0' || fi > '~')
901 signal_simple_error ("Invalid value for FINAL", final);
903 if (EQ (direction, Ql2r))
904 di = CHARSET_LEFT_TO_RIGHT;
905 else if (EQ (direction, Qr2l))
906 di = CHARSET_RIGHT_TO_LEFT;
907 else if (!NILP (direction))
908 signal_simple_error ("Invalid value for DIRECTION", direction);
910 if (dm == 2 && fi > 0x5F)
912 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
915 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
917 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
921 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
923 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
926 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
929 return XCHARSET_NAME (obj);
933 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
934 Return short name of CHARSET.
938 return XCHARSET_SHORT_NAME (Fget_charset (charset));
941 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
942 Return long name of CHARSET.
946 return XCHARSET_LONG_NAME (Fget_charset (charset));
949 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
950 Return description of CHARSET.
954 return XCHARSET_DOC_STRING (Fget_charset (charset));
957 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
958 Return dimension of CHARSET.
962 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
965 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
966 Return property PROP of CHARSET, a charset object or symbol naming a charset.
967 Recognized properties are those listed in `make-charset', as well as
968 'name and 'doc-string.
974 charset = Fget_charset (charset);
975 cs = XCHARSET (charset);
978 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
979 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
980 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
981 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
982 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
983 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
984 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
985 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
986 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
987 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
988 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
989 if (EQ (prop, Qdirection))
990 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
991 if (EQ (prop, Qreverse_direction_charset))
993 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
994 /* #### Is this translation OK? If so, error checking sufficient? */
995 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
997 signal_simple_error ("Unrecognized charset property name", prop);
998 return Qnil; /* not reached */
1001 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1002 Return charset identification number of CHARSET.
1006 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1009 /* #### We need to figure out which properties we really want to
1012 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1013 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1015 (charset, ccl_program))
1017 struct ccl_program test_ccl;
1019 charset = Fget_charset (charset);
1020 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1021 signal_simple_error ("Invalid ccl-program", ccl_program);
1022 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1027 invalidate_charset_font_caches (Lisp_Object charset)
1029 /* Invalidate font cache entries for charset on all devices. */
1030 Lisp_Object devcons, concons, hash_table;
1031 DEVICE_LOOP_NO_BREAK (devcons, concons)
1033 struct device *d = XDEVICE (XCAR (devcons));
1034 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1035 if (!UNBOUNDP (hash_table))
1036 Fclrhash (hash_table);
1040 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1041 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1042 Set the 'registry property of CHARSET to REGISTRY.
1044 (charset, registry))
1046 charset = Fget_charset (charset);
1047 CHECK_STRING (registry);
1048 XCHARSET_REGISTRY (charset) = registry;
1049 invalidate_charset_font_caches (charset);
1050 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1055 /************************************************************************/
1056 /* Lisp primitives for working with characters */
1057 /************************************************************************/
1059 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1060 Make a character from CHARSET and octets ARG1 and ARG2.
1061 ARG2 is required only for characters from two-dimensional charsets.
1062 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1063 character s with caron.
1065 (charset, arg1, arg2))
1069 int lowlim, highlim;
1071 charset = Fget_charset (charset);
1072 cs = XCHARSET (charset);
1074 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1075 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1076 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1077 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1080 /* It is useful (and safe, according to Olivier Galibert) to strip
1081 the 8th bit off ARG1 and ARG2 because it allows programmers to
1082 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1083 Latin 2 code of the character. */
1084 a1 = XINT (arg1) & 0x7f;
1085 if (a1 < lowlim || a1 > highlim)
1086 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1088 if (CHARSET_DIMENSION (cs) == 1)
1092 ("Charset is of dimension one; second octet must be nil", arg2);
1093 return make_char (MAKE_CHAR (charset, a1, 0));
1097 a2 = XINT (arg2) & 0x7f;
1098 if (a2 < lowlim || a2 > highlim)
1099 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1101 return make_char (MAKE_CHAR (charset, a1, a2));
1104 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1105 Return the character set of CHARACTER.
1109 CHECK_CHAR_COERCE_INT (character);
1111 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1112 (CHAR_LEADING_BYTE (XCHAR (character))));
1115 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1116 Return the octet numbered N (should be 0 or 1) of CHARACTER.
1117 N defaults to 0 if omitted.
1121 Lisp_Object charset;
1124 CHECK_CHAR_COERCE_INT (character);
1126 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
1128 if (NILP (n) || EQ (n, Qzero))
1129 return make_int (octet0);
1130 else if (EQ (n, make_int (1)))
1131 return make_int (octet1);
1133 signal_simple_error ("Octet number must be 0 or 1", n);
1136 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1137 Return list of charset and one or two position-codes of CHARACTER.
1141 /* This function can GC */
1142 struct gcpro gcpro1, gcpro2;
1143 Lisp_Object charset = Qnil;
1144 Lisp_Object rc = Qnil;
1147 GCPRO2 (charset, rc);
1148 CHECK_CHAR_COERCE_INT (character);
1150 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1152 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1154 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1158 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1166 #ifdef ENABLE_COMPOSITE_CHARS
1167 /************************************************************************/
1168 /* composite character functions */
1169 /************************************************************************/
1172 lookup_composite_char (Bufbyte *str, int len)
1174 Lisp_Object lispstr = make_string (str, len);
1175 Lisp_Object ch = Fgethash (lispstr,
1176 Vcomposite_char_string2char_hash_table,
1182 if (composite_char_row_next >= 128)
1183 signal_simple_error ("No more composite chars available", lispstr);
1184 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1185 composite_char_col_next);
1186 Fputhash (make_char (emch), lispstr,
1187 Vcomposite_char_char2string_hash_table);
1188 Fputhash (lispstr, make_char (emch),
1189 Vcomposite_char_string2char_hash_table);
1190 composite_char_col_next++;
1191 if (composite_char_col_next >= 128)
1193 composite_char_col_next = 32;
1194 composite_char_row_next++;
1203 composite_char_string (Emchar ch)
1205 Lisp_Object str = Fgethash (make_char (ch),
1206 Vcomposite_char_char2string_hash_table,
1208 assert (!UNBOUNDP (str));
1212 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1213 Convert a string into a single composite character.
1214 The character is the result of overstriking all the characters in
1219 CHECK_STRING (string);
1220 return make_char (lookup_composite_char (XSTRING_DATA (string),
1221 XSTRING_LENGTH (string)));
1224 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1225 Return a string of the characters comprising a composite character.
1233 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1234 signal_simple_error ("Must be composite char", ch);
1235 return composite_char_string (emch);
1237 #endif /* ENABLE_COMPOSITE_CHARS */
1240 /************************************************************************/
1241 /* initialization */
1242 /************************************************************************/
1245 syms_of_mule_charset (void)
1247 INIT_LRECORD_IMPLEMENTATION (charset);
1249 DEFSUBR (Fcharsetp);
1250 DEFSUBR (Ffind_charset);
1251 DEFSUBR (Fget_charset);
1252 DEFSUBR (Fcharset_list);
1253 DEFSUBR (Fcharset_name);
1254 DEFSUBR (Fmake_charset);
1255 DEFSUBR (Fmake_reverse_direction_charset);
1256 /* DEFSUBR (Freverse_direction_charset); */
1257 DEFSUBR (Fcharset_from_attributes);
1258 DEFSUBR (Fcharset_short_name);
1259 DEFSUBR (Fcharset_long_name);
1260 DEFSUBR (Fcharset_description);
1261 DEFSUBR (Fcharset_dimension);
1262 DEFSUBR (Fcharset_property);
1263 DEFSUBR (Fcharset_id);
1264 DEFSUBR (Fset_charset_ccl_program);
1265 DEFSUBR (Fset_charset_registry);
1267 DEFSUBR (Fmake_char);
1268 DEFSUBR (Fchar_charset);
1269 DEFSUBR (Fchar_octet);
1270 DEFSUBR (Fsplit_char);
1272 #ifdef ENABLE_COMPOSITE_CHARS
1273 DEFSUBR (Fmake_composite_char);
1274 DEFSUBR (Fcomposite_char_string);
1277 defsymbol (&Qcharsetp, "charsetp");
1278 defsymbol (&Qregistry, "registry");
1279 defsymbol (&Qfinal, "final");
1280 defsymbol (&Qgraphic, "graphic");
1281 defsymbol (&Qdirection, "direction");
1282 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1283 defsymbol (&Qshort_name, "short-name");
1284 defsymbol (&Qlong_name, "long-name");
1286 defsymbol (&Ql2r, "l2r");
1287 defsymbol (&Qr2l, "r2l");
1289 /* Charsets, compatible with FSF 20.3
1290 Naming convention is Script-Charset[-Edition] */
1291 defsymbol (&Qascii, "ascii");
1292 defsymbol (&Qcontrol_1, "control-1");
1293 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1294 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1295 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1296 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1297 defsymbol (&Qthai_tis620, "thai-tis620");
1298 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1299 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1300 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1301 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1302 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1303 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1304 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1305 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1306 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1307 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1308 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1309 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1310 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1311 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1312 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1313 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1315 defsymbol (&Qcomposite, "composite");
1319 vars_of_mule_charset (void)
1323 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
1324 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
1326 /* Table of charsets indexed by leading byte. */
1327 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1328 chlook->charset_by_leading_byte[i] = Qnil;
1330 /* Table of charsets indexed by type/final-byte/direction. */
1331 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1332 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1333 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1334 chlook->charset_by_attributes[i][j][k] = Qnil;
1336 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1337 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1341 complex_vars_of_mule_charset (void)
1343 staticpro (&Vcharset_hash_table);
1344 Vcharset_hash_table =
1345 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1347 /* Predefined character sets. We store them into variables for
1350 staticpro (&Vcharset_ascii);
1352 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1353 CHARSET_TYPE_94, 1, 0, 'B',
1354 CHARSET_LEFT_TO_RIGHT,
1355 build_string ("ASCII"),
1356 build_string ("ASCII)"),
1357 build_string ("ASCII (ISO646 IRV)"),
1358 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1359 staticpro (&Vcharset_control_1);
1360 Vcharset_control_1 =
1361 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1362 CHARSET_TYPE_94, 1, 1, 0,
1363 CHARSET_LEFT_TO_RIGHT,
1364 build_string ("C1"),
1365 build_string ("Control characters"),
1366 build_string ("Control characters 128-191"),
1368 staticpro (&Vcharset_latin_iso8859_1);
1369 Vcharset_latin_iso8859_1 =
1370 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1371 CHARSET_TYPE_96, 1, 1, 'A',
1372 CHARSET_LEFT_TO_RIGHT,
1373 build_string ("Latin-1"),
1374 build_string ("ISO8859-1 (Latin-1)"),
1375 build_string ("ISO8859-1 (Latin-1)"),
1376 build_string ("iso8859-1"));
1377 staticpro (&Vcharset_latin_iso8859_2);
1378 Vcharset_latin_iso8859_2 =
1379 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1380 CHARSET_TYPE_96, 1, 1, 'B',
1381 CHARSET_LEFT_TO_RIGHT,
1382 build_string ("Latin-2"),
1383 build_string ("ISO8859-2 (Latin-2)"),
1384 build_string ("ISO8859-2 (Latin-2)"),
1385 build_string ("iso8859-2"));
1386 staticpro (&Vcharset_latin_iso8859_3);
1387 Vcharset_latin_iso8859_3 =
1388 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1389 CHARSET_TYPE_96, 1, 1, 'C',
1390 CHARSET_LEFT_TO_RIGHT,
1391 build_string ("Latin-3"),
1392 build_string ("ISO8859-3 (Latin-3)"),
1393 build_string ("ISO8859-3 (Latin-3)"),
1394 build_string ("iso8859-3"));
1395 staticpro (&Vcharset_latin_iso8859_4);
1396 Vcharset_latin_iso8859_4 =
1397 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1398 CHARSET_TYPE_96, 1, 1, 'D',
1399 CHARSET_LEFT_TO_RIGHT,
1400 build_string ("Latin-4"),
1401 build_string ("ISO8859-4 (Latin-4)"),
1402 build_string ("ISO8859-4 (Latin-4)"),
1403 build_string ("iso8859-4"));
1404 staticpro (&Vcharset_thai_tis620);
1405 Vcharset_thai_tis620 =
1406 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1407 CHARSET_TYPE_96, 1, 1, 'T',
1408 CHARSET_LEFT_TO_RIGHT,
1409 build_string ("TIS620"),
1410 build_string ("TIS620 (Thai)"),
1411 build_string ("TIS620.2529 (Thai)"),
1412 build_string ("tis620"));
1413 staticpro (&Vcharset_greek_iso8859_7);
1414 Vcharset_greek_iso8859_7 =
1415 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1416 CHARSET_TYPE_96, 1, 1, 'F',
1417 CHARSET_LEFT_TO_RIGHT,
1418 build_string ("ISO8859-7"),
1419 build_string ("ISO8859-7 (Greek)"),
1420 build_string ("ISO8859-7 (Greek)"),
1421 build_string ("iso8859-7"));
1422 staticpro (&Vcharset_arabic_iso8859_6);
1423 Vcharset_arabic_iso8859_6 =
1424 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1425 CHARSET_TYPE_96, 1, 1, 'G',
1426 CHARSET_RIGHT_TO_LEFT,
1427 build_string ("ISO8859-6"),
1428 build_string ("ISO8859-6 (Arabic)"),
1429 build_string ("ISO8859-6 (Arabic)"),
1430 build_string ("iso8859-6"));
1431 staticpro (&Vcharset_hebrew_iso8859_8);
1432 Vcharset_hebrew_iso8859_8 =
1433 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1434 CHARSET_TYPE_96, 1, 1, 'H',
1435 CHARSET_RIGHT_TO_LEFT,
1436 build_string ("ISO8859-8"),
1437 build_string ("ISO8859-8 (Hebrew)"),
1438 build_string ("ISO8859-8 (Hebrew)"),
1439 build_string ("iso8859-8"));
1440 staticpro (&Vcharset_katakana_jisx0201);
1441 Vcharset_katakana_jisx0201 =
1442 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1443 CHARSET_TYPE_94, 1, 1, 'I',
1444 CHARSET_LEFT_TO_RIGHT,
1445 build_string ("JISX0201 Kana"),
1446 build_string ("JISX0201.1976 (Japanese Kana)"),
1447 build_string ("JISX0201.1976 Japanese Kana"),
1448 build_string ("jisx0201.1976"));
1449 staticpro (&Vcharset_latin_jisx0201);
1450 Vcharset_latin_jisx0201 =
1451 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1452 CHARSET_TYPE_94, 1, 0, 'J',
1453 CHARSET_LEFT_TO_RIGHT,
1454 build_string ("JISX0201 Roman"),
1455 build_string ("JISX0201.1976 (Japanese Roman)"),
1456 build_string ("JISX0201.1976 Japanese Roman"),
1457 build_string ("jisx0201.1976"));
1458 staticpro (&Vcharset_cyrillic_iso8859_5);
1459 Vcharset_cyrillic_iso8859_5 =
1460 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1461 CHARSET_TYPE_96, 1, 1, 'L',
1462 CHARSET_LEFT_TO_RIGHT,
1463 build_string ("ISO8859-5"),
1464 build_string ("ISO8859-5 (Cyrillic)"),
1465 build_string ("ISO8859-5 (Cyrillic)"),
1466 build_string ("iso8859-5"));
1467 staticpro (&Vcharset_latin_iso8859_9);
1468 Vcharset_latin_iso8859_9 =
1469 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1470 CHARSET_TYPE_96, 1, 1, 'M',
1471 CHARSET_LEFT_TO_RIGHT,
1472 build_string ("Latin-5"),
1473 build_string ("ISO8859-9 (Latin-5)"),
1474 build_string ("ISO8859-9 (Latin-5)"),
1475 build_string ("iso8859-9"));
1476 staticpro (&Vcharset_japanese_jisx0208_1978);
1477 Vcharset_japanese_jisx0208_1978 =
1478 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1479 CHARSET_TYPE_94X94, 2, 0, '@',
1480 CHARSET_LEFT_TO_RIGHT,
1481 build_string ("JISX0208.1978"),
1482 build_string ("JISX0208.1978 (Japanese)"),
1484 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1485 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1486 staticpro (&Vcharset_chinese_gb2312);
1487 Vcharset_chinese_gb2312 =
1488 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1489 CHARSET_TYPE_94X94, 2, 0, 'A',
1490 CHARSET_LEFT_TO_RIGHT,
1491 build_string ("GB2312"),
1492 build_string ("GB2312)"),
1493 build_string ("GB2312 Chinese simplified"),
1494 build_string ("gb2312"));
1495 staticpro (&Vcharset_japanese_jisx0208);
1496 Vcharset_japanese_jisx0208 =
1497 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1498 CHARSET_TYPE_94X94, 2, 0, 'B',
1499 CHARSET_LEFT_TO_RIGHT,
1500 build_string ("JISX0208"),
1501 build_string ("JISX0208.1983/1990 (Japanese)"),
1502 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1503 build_string ("jisx0208.19\\(83\\|90\\)"));
1504 staticpro (&Vcharset_korean_ksc5601);
1505 Vcharset_korean_ksc5601 =
1506 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1507 CHARSET_TYPE_94X94, 2, 0, 'C',
1508 CHARSET_LEFT_TO_RIGHT,
1509 build_string ("KSC5601"),
1510 build_string ("KSC5601 (Korean"),
1511 build_string ("KSC5601 Korean Hangul and Hanja"),
1512 build_string ("ksc5601"));
1513 staticpro (&Vcharset_japanese_jisx0212);
1514 Vcharset_japanese_jisx0212 =
1515 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1516 CHARSET_TYPE_94X94, 2, 0, 'D',
1517 CHARSET_LEFT_TO_RIGHT,
1518 build_string ("JISX0212"),
1519 build_string ("JISX0212 (Japanese)"),
1520 build_string ("JISX0212 Japanese Supplement"),
1521 build_string ("jisx0212"));
1523 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1524 staticpro (&Vcharset_chinese_cns11643_1);
1525 Vcharset_chinese_cns11643_1 =
1526 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1527 CHARSET_TYPE_94X94, 2, 0, 'G',
1528 CHARSET_LEFT_TO_RIGHT,
1529 build_string ("CNS11643-1"),
1530 build_string ("CNS11643-1 (Chinese traditional)"),
1532 ("CNS 11643 Plane 1 Chinese traditional"),
1533 build_string (CHINESE_CNS_PLANE_RE("1")));
1534 staticpro (&Vcharset_chinese_cns11643_2);
1535 Vcharset_chinese_cns11643_2 =
1536 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1537 CHARSET_TYPE_94X94, 2, 0, 'H',
1538 CHARSET_LEFT_TO_RIGHT,
1539 build_string ("CNS11643-2"),
1540 build_string ("CNS11643-2 (Chinese traditional)"),
1542 ("CNS 11643 Plane 2 Chinese traditional"),
1543 build_string (CHINESE_CNS_PLANE_RE("2")));
1544 staticpro (&Vcharset_chinese_big5_1);
1545 Vcharset_chinese_big5_1 =
1546 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1547 CHARSET_TYPE_94X94, 2, 0, '0',
1548 CHARSET_LEFT_TO_RIGHT,
1549 build_string ("Big5"),
1550 build_string ("Big5 (Level-1)"),
1552 ("Big5 Level-1 Chinese traditional"),
1553 build_string ("big5"));
1554 staticpro (&Vcharset_chinese_big5_2);
1555 Vcharset_chinese_big5_2 =
1556 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1557 CHARSET_TYPE_94X94, 2, 0, '1',
1558 CHARSET_LEFT_TO_RIGHT,
1559 build_string ("Big5"),
1560 build_string ("Big5 (Level-2)"),
1562 ("Big5 Level-2 Chinese traditional"),
1563 build_string ("big5"));
1566 #ifdef ENABLE_COMPOSITE_CHARS
1567 /* #### For simplicity, we put composite chars into a 96x96 charset.
1568 This is going to lead to problems because you can run out of
1569 room, esp. as we don't yet recycle numbers. */
1570 staticpro (&Vcharset_composite);
1571 Vcharset_composite =
1572 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1573 CHARSET_TYPE_96X96, 2, 0, 0,
1574 CHARSET_LEFT_TO_RIGHT,
1575 build_string ("Composite"),
1576 build_string ("Composite characters"),
1577 build_string ("Composite characters"),
1580 /* #### not dumped properly */
1581 composite_char_row_next = 32;
1582 composite_char_col_next = 32;
1584 Vcomposite_char_string2char_hash_table =
1585 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1586 Vcomposite_char_char2string_hash_table =
1587 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1588 staticpro (&Vcomposite_char_string2char_hash_table);
1589 staticpro (&Vcomposite_char_char2string_hash_table);
1590 #endif /* ENABLE_COMPOSITE_CHARS */