1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 struct charset_lookup *chlook;
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
84 static const struct struct_description charset_lookup_description = {
85 sizeof (struct charset_lookup),
86 charset_lookup_description_1
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
95 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97 const Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
149 Lisp_Object Ql2r, Qr2l;
151 Lisp_Object Vcharset_hash_table;
153 /* Composite characters are characters constructed by overstriking two
154 or more regular characters.
156 1) The old Mule implementation involves storing composite characters
157 in a buffer as a tag followed by all of the actual characters
158 used to make up the composite character. I think this is a bad
159 idea; it greatly complicates code that wants to handle strings
160 one character at a time because it has to deal with the possibility
161 of great big ungainly characters. It's much more reasonable to
162 simply store an index into a table of composite characters.
164 2) The current implementation only allows for 16,384 separate
165 composite characters over the lifetime of the XEmacs process.
166 This could become a potential problem if the user
167 edited lots of different files that use composite characters.
168 Due to FSF bogosity, increasing the number of allowable
169 composite characters under Mule would decrease the number
170 of possible faces that can exist. Mule already has shrunk
171 this to 2048, and further shrinkage would become uncomfortable.
172 No such problems exist in XEmacs.
174 Composite characters could be represented as 0x80 C1 C2 C3,
175 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
176 for slightly under 2^20 (one million) composite characters
177 over the XEmacs process lifetime, and you only need to
178 increase the size of a Mule character from 19 to 21 bits.
179 Or you could use 0x80 C1 C2 C3 C4, allowing for about
180 85 million (slightly over 2^26) composite characters. */
183 /************************************************************************/
184 /* Basic Emchar functions */
185 /************************************************************************/
187 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
188 string in STR. Returns the number of bytes stored.
189 Do not call this directly. Use the macro set_charptr_emchar() instead.
193 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
201 BREAKUP_CHAR (c, charset, c1, c2);
202 lb = CHAR_LEADING_BYTE (c);
203 if (LEADING_BYTE_PRIVATE_P (lb))
204 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
206 if (EQ (charset, Vcharset_control_1))
215 /* Return the first character from a Mule-encoded string in STR,
216 assuming it's non-ASCII. Do not call this directly.
217 Use the macro charptr_emchar() instead. */
220 non_ascii_charptr_emchar (const Bufbyte *str)
222 Bufbyte i0 = *str, i1, i2 = 0;
225 if (i0 == LEADING_BYTE_CONTROL_1)
226 return (Emchar) (*++str - 0x20);
228 if (LEADING_BYTE_PREFIX_P (i0))
233 charset = CHARSET_BY_LEADING_BYTE (i0);
234 if (XCHARSET_DIMENSION (charset) == 2)
237 return MAKE_CHAR (charset, i1, i2);
240 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
241 Do not call this directly. Use the macro valid_char_p() instead. */
244 non_ascii_valid_char_p (Emchar ch)
248 /* Must have only lowest 19 bits set */
252 f1 = CHAR_FIELD1 (ch);
253 f2 = CHAR_FIELD2 (ch);
254 f3 = CHAR_FIELD3 (ch);
260 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
261 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
262 f2 > MAX_CHAR_FIELD2_PRIVATE)
267 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
268 f2 <= MAX_CHAR_FIELD2_PRIVATE))
272 NOTE: This takes advantage of the fact that
273 FIELD2_TO_OFFICIAL_LEADING_BYTE and
274 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
276 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
277 if (EQ (charset, Qnil))
279 return (XCHARSET_CHARS (charset) == 96);
285 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
286 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
287 f1 > MAX_CHAR_FIELD1_PRIVATE)
289 if (f2 < 0x20 || f3 < 0x20)
292 #ifdef ENABLE_COMPOSITE_CHARS
293 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
295 if (UNBOUNDP (Fgethash (make_int (ch),
296 Vcomposite_char_char2string_hash_table,
301 #endif /* ENABLE_COMPOSITE_CHARS */
303 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
304 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
307 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
309 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
312 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
314 if (EQ (charset, Qnil))
316 return (XCHARSET_CHARS (charset) == 96);
321 /************************************************************************/
322 /* Basic string functions */
323 /************************************************************************/
325 /* Copy the character pointed to by PTR into STR, assuming it's
326 non-ASCII. Do not call this directly. Use the macro
327 charptr_copy_char() instead. */
330 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
332 Bufbyte *strptr = str;
334 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
336 /* Notice fallthrough. */
337 case 4: *++strptr = *ptr++;
338 case 3: *++strptr = *ptr++;
339 case 2: *++strptr = *ptr;
344 return strptr + 1 - str;
348 /************************************************************************/
349 /* streams of Emchars */
350 /************************************************************************/
352 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
353 The functions below are not meant to be called directly; use
354 the macros in insdel.h. */
357 Lstream_get_emchar_1 (Lstream *stream, int ch)
359 Bufbyte str[MAX_EMCHAR_LEN];
360 Bufbyte *strptr = str;
362 str[0] = (Bufbyte) ch;
363 switch (REP_BYTES_BY_FIRST_BYTE (ch))
365 /* Notice fallthrough. */
367 ch = Lstream_getc (stream);
369 *++strptr = (Bufbyte) ch;
371 ch = Lstream_getc (stream);
373 *++strptr = (Bufbyte) ch;
375 ch = Lstream_getc (stream);
377 *++strptr = (Bufbyte) ch;
382 return charptr_emchar (str);
386 Lstream_fput_emchar (Lstream *stream, Emchar ch)
388 Bufbyte str[MAX_EMCHAR_LEN];
389 Bytecount len = set_charptr_emchar (str, ch);
390 return Lstream_write (stream, str, len);
394 Lstream_funget_emchar (Lstream *stream, Emchar ch)
396 Bufbyte str[MAX_EMCHAR_LEN];
397 Bytecount len = set_charptr_emchar (str, ch);
398 Lstream_unread (stream, str, len);
402 /************************************************************************/
404 /************************************************************************/
407 mark_charset (Lisp_Object obj)
409 Lisp_Charset *cs = XCHARSET (obj);
411 mark_object (cs->short_name);
412 mark_object (cs->long_name);
413 mark_object (cs->doc_string);
414 mark_object (cs->registry);
415 mark_object (cs->ccl_program);
420 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
422 Lisp_Charset *cs = XCHARSET (obj);
426 error ("printing unreadable object #<charset %s 0x%x>",
427 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
430 write_c_string ("#<charset ", printcharfun);
431 print_internal (CHARSET_NAME (cs), printcharfun, 0);
432 write_c_string (" ", printcharfun);
433 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
434 write_c_string (" ", printcharfun);
435 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
436 write_c_string (" ", printcharfun);
437 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
438 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
439 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
440 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
441 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
443 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
444 CHARSET_COLUMNS (cs),
445 CHARSET_GRAPHIC (cs),
447 write_c_string (buf, printcharfun);
448 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
449 sprintf (buf, " 0x%x>", cs->header.uid);
450 write_c_string (buf, printcharfun);
453 static const struct lrecord_description charset_description[] = {
454 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
455 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
456 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
457 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
458 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
459 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
460 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
464 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
465 mark_charset, print_charset, 0, 0, 0, charset_description,
467 /* Make a new charset. */
470 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
471 unsigned char type, unsigned char columns, unsigned char graphic,
472 Bufbyte final, unsigned char direction, Lisp_Object short_name,
473 Lisp_Object long_name, Lisp_Object doc,
477 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
481 XSETCHARSET (obj, cs);
483 CHARSET_ID (cs) = id;
484 CHARSET_NAME (cs) = name;
485 CHARSET_SHORT_NAME (cs) = short_name;
486 CHARSET_LONG_NAME (cs) = long_name;
487 CHARSET_REP_BYTES (cs) = rep_bytes;
488 CHARSET_DIRECTION (cs) = direction;
489 CHARSET_TYPE (cs) = type;
490 CHARSET_COLUMNS (cs) = columns;
491 CHARSET_GRAPHIC (cs) = graphic;
492 CHARSET_FINAL (cs) = final;
493 CHARSET_DOC_STRING (cs) = doc;
494 CHARSET_REGISTRY (cs) = reg;
495 CHARSET_CCL_PROGRAM (cs) = Qnil;
496 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
498 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
499 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
500 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
501 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
505 /* some charsets do not have final characters. This includes
506 ASCII, Control-1, Composite, and the two faux private
508 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
509 chlook->charset_by_attributes[type][final][direction] = obj;
512 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
513 chlook->charset_by_leading_byte[id - 128] = obj;
515 /* Some charsets are "faux" and don't have names or really exist at
516 all except in the leading-byte table. */
518 Fputhash (name, obj, Vcharset_hash_table);
523 get_unallocated_leading_byte (int dimension)
529 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
532 lb = chlook->next_allocated_1_byte_leading_byte++;
536 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
539 lb = chlook->next_allocated_2_byte_leading_byte++;
544 ("No more character sets free for this dimension",
545 make_int (dimension));
551 /************************************************************************/
552 /* Basic charset Lisp functions */
553 /************************************************************************/
555 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
556 Return non-nil if OBJECT is a charset.
560 return CHARSETP (object) ? Qt : Qnil;
563 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
564 Retrieve the charset of the given name.
565 If CHARSET-OR-NAME is a charset object, it is simply returned.
566 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
567 nil is returned. Otherwise the associated charset object is returned.
571 if (CHARSETP (charset_or_name))
572 return charset_or_name;
574 CHECK_SYMBOL (charset_or_name);
575 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
578 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
579 Retrieve the charset of the given name.
580 Same as `find-charset' except an error is signalled if there is no such
581 charset instead of returning nil.
585 Lisp_Object charset = Ffind_charset (name);
588 signal_simple_error ("No such charset", name);
592 /* We store the charsets in hash tables with the names as the key and the
593 actual charset object as the value. Occasionally we need to use them
594 in a list format. These routines provide us with that. */
595 struct charset_list_closure
597 Lisp_Object *charset_list;
601 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
602 void *charset_list_closure)
604 /* This function can GC */
605 struct charset_list_closure *chcl =
606 (struct charset_list_closure*) charset_list_closure;
607 Lisp_Object *charset_list = chcl->charset_list;
609 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
613 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
614 Return a list of the names of all defined charsets.
618 Lisp_Object charset_list = Qnil;
620 struct charset_list_closure charset_list_closure;
622 GCPRO1 (charset_list);
623 charset_list_closure.charset_list = &charset_list;
624 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
625 &charset_list_closure);
631 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
632 Return the name of the given charset.
636 return XCHARSET_NAME (Fget_charset (charset));
639 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
640 Define a new character set.
641 This function is for use with Mule support.
642 NAME is a symbol, the name by which the character set is normally referred.
643 DOC-STRING is a string describing the character set.
644 PROPS is a property list, describing the specific nature of the
645 character set. Recognized properties are:
647 'short-name Short version of the charset name (ex: Latin-1)
648 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
649 'registry A regular expression matching the font registry field for
651 'dimension Number of octets used to index a character in this charset.
652 Either 1 or 2. Defaults to 1.
653 'columns Number of columns used to display a character in this charset.
654 Only used in TTY mode. (Under X, the actual width of a
655 character can be derived from the font used to display the
656 characters.) If unspecified, defaults to the dimension
657 (this is almost always the correct value).
658 'chars Number of characters in each dimension (94 or 96).
659 Defaults to 94. Note that if the dimension is 2, the
660 character set thus described is 94x94 or 96x96.
661 'final Final byte of ISO 2022 escape sequence. Must be
662 supplied. Each combination of (DIMENSION, CHARS) defines a
663 separate namespace for final bytes. Note that ISO
664 2022 restricts the final byte to the range
665 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
666 dimension == 2. Note also that final bytes in the range
667 0x30 - 0x3F are reserved for user-defined (not official)
669 'graphic 0 (use left half of font on output) or 1 (use right half
670 of font on output). Defaults to 0. For example, for
671 a font whose registry is ISO8859-1, the left half
672 (octets 0x20 - 0x7F) is the `ascii' character set, while
673 the right half (octets 0xA0 - 0xFF) is the `latin-1'
674 character set. With 'graphic set to 0, the octets
675 will have their high bit cleared; with it set to 1,
676 the octets will have their high bit set.
677 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
679 'ccl-program A compiled CCL program used to convert a character in
680 this charset into an index into the font. This is in
681 addition to the 'graphic property. The CCL program
682 is passed the octets of the character, with the high
683 bit cleared and set depending upon whether the value
684 of the 'graphic property is 0 or 1.
686 (name, doc_string, props))
688 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
689 int direction = CHARSET_LEFT_TO_RIGHT;
691 Lisp_Object registry = Qnil;
693 Lisp_Object rest, keyword, value;
694 Lisp_Object ccl_program = Qnil;
695 Lisp_Object short_name = Qnil, long_name = Qnil;
698 if (!NILP (doc_string))
699 CHECK_STRING (doc_string);
701 charset = Ffind_charset (name);
703 signal_simple_error ("Cannot redefine existing charset", name);
705 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
707 if (EQ (keyword, Qshort_name))
709 CHECK_STRING (value);
713 if (EQ (keyword, Qlong_name))
715 CHECK_STRING (value);
719 else if (EQ (keyword, Qdimension))
722 dimension = XINT (value);
723 if (dimension < 1 || dimension > 2)
724 signal_simple_error ("Invalid value for 'dimension", value);
727 else if (EQ (keyword, Qchars))
730 chars = XINT (value);
731 if (chars != 94 && chars != 96)
732 signal_simple_error ("Invalid value for 'chars", value);
735 else if (EQ (keyword, Qcolumns))
738 columns = XINT (value);
739 if (columns != 1 && columns != 2)
740 signal_simple_error ("Invalid value for 'columns", value);
743 else if (EQ (keyword, Qgraphic))
746 graphic = XINT (value);
747 if (graphic < 0 || graphic > 1)
748 signal_simple_error ("Invalid value for 'graphic", value);
751 else if (EQ (keyword, Qregistry))
753 CHECK_STRING (value);
757 else if (EQ (keyword, Qdirection))
759 if (EQ (value, Ql2r))
760 direction = CHARSET_LEFT_TO_RIGHT;
761 else if (EQ (value, Qr2l))
762 direction = CHARSET_RIGHT_TO_LEFT;
764 signal_simple_error ("Invalid value for 'direction", value);
767 else if (EQ (keyword, Qfinal))
769 CHECK_CHAR_COERCE_INT (value);
770 final = XCHAR (value);
771 if (final < '0' || final > '~')
772 signal_simple_error ("Invalid value for 'final", value);
775 else if (EQ (keyword, Qccl_program))
777 CHECK_VECTOR (value);
782 signal_simple_error ("Unrecognized property", keyword);
786 error ("'final must be specified");
787 if (dimension == 2 && final > 0x5F)
789 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
793 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
795 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
797 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
798 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
800 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
802 id = get_unallocated_leading_byte (dimension);
804 if (NILP (doc_string))
805 doc_string = build_string ("");
808 registry = build_string ("");
810 if (NILP (short_name))
811 XSETSTRING (short_name, XSYMBOL (name)->name);
813 if (NILP (long_name))
814 long_name = doc_string;
818 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
819 final, direction, short_name, long_name, doc_string, registry);
820 if (!NILP (ccl_program))
821 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
825 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
827 Make a charset equivalent to CHARSET but which goes in the opposite direction.
828 NEW-NAME is the name of the new charset. Return the new charset.
832 Lisp_Object new_charset = Qnil;
833 int id, dimension, columns, graphic, final;
835 Lisp_Object registry, doc_string, short_name, long_name;
838 charset = Fget_charset (charset);
839 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
840 signal_simple_error ("Charset already has reverse-direction charset",
843 CHECK_SYMBOL (new_name);
844 if (!NILP (Ffind_charset (new_name)))
845 signal_simple_error ("Cannot redefine existing charset", new_name);
847 cs = XCHARSET (charset);
849 type = CHARSET_TYPE (cs);
850 columns = CHARSET_COLUMNS (cs);
851 dimension = CHARSET_DIMENSION (cs);
852 id = get_unallocated_leading_byte (dimension);
854 graphic = CHARSET_GRAPHIC (cs);
855 final = CHARSET_FINAL (cs);
856 direction = CHARSET_RIGHT_TO_LEFT;
857 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
858 direction = CHARSET_LEFT_TO_RIGHT;
859 doc_string = CHARSET_DOC_STRING (cs);
860 short_name = CHARSET_SHORT_NAME (cs);
861 long_name = CHARSET_LONG_NAME (cs);
862 registry = CHARSET_REGISTRY (cs);
864 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
865 graphic, final, direction, short_name, long_name,
866 doc_string, registry);
868 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
869 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
874 /* #### Reverse direction charsets not yet implemented. */
876 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
878 Return the reverse-direction charset parallel to CHARSET, if any.
879 This is the charset with the same properties (in particular, the same
880 dimension, number of characters per dimension, and final byte) as
881 CHARSET but whose characters are displayed in the opposite direction.
885 charset = Fget_charset (charset);
886 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
890 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
891 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
892 If DIRECTION is omitted, both directions will be checked (left-to-right
893 will be returned if character sets exist for both directions).
895 (dimension, chars, final, direction))
897 int dm, ch, fi, di = -1;
899 Lisp_Object obj = Qnil;
901 CHECK_INT (dimension);
902 dm = XINT (dimension);
903 if (dm < 1 || dm > 2)
904 signal_simple_error ("Invalid value for DIMENSION", dimension);
908 if (ch != 94 && ch != 96)
909 signal_simple_error ("Invalid value for CHARS", chars);
911 CHECK_CHAR_COERCE_INT (final);
913 if (fi < '0' || fi > '~')
914 signal_simple_error ("Invalid value for FINAL", final);
916 if (EQ (direction, Ql2r))
917 di = CHARSET_LEFT_TO_RIGHT;
918 else if (EQ (direction, Qr2l))
919 di = CHARSET_RIGHT_TO_LEFT;
920 else if (!NILP (direction))
921 signal_simple_error ("Invalid value for DIRECTION", direction);
923 if (dm == 2 && fi > 0x5F)
925 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
928 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
930 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
934 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
936 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
939 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
942 return XCHARSET_NAME (obj);
946 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
947 Return short name of CHARSET.
951 return XCHARSET_SHORT_NAME (Fget_charset (charset));
954 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
955 Return long name of CHARSET.
959 return XCHARSET_LONG_NAME (Fget_charset (charset));
962 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
963 Return description of CHARSET.
967 return XCHARSET_DOC_STRING (Fget_charset (charset));
970 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
971 Return dimension of CHARSET.
975 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
978 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
979 Return property PROP of CHARSET.
980 Recognized properties are those listed in `make-charset', as well as
981 'name and 'doc-string.
987 charset = Fget_charset (charset);
988 cs = XCHARSET (charset);
991 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
992 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
993 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
994 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
995 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
996 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
997 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
998 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
999 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1000 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1001 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1002 if (EQ (prop, Qdirection))
1003 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1004 if (EQ (prop, Qreverse_direction_charset))
1006 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1010 return XCHARSET_NAME (obj);
1012 signal_simple_error ("Unrecognized charset property name", prop);
1013 return Qnil; /* not reached */
1016 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1017 Return charset identification number of CHARSET.
1021 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1024 /* #### We need to figure out which properties we really want to
1027 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1028 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1030 (charset, ccl_program))
1032 charset = Fget_charset (charset);
1033 CHECK_VECTOR (ccl_program);
1034 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1039 invalidate_charset_font_caches (Lisp_Object charset)
1041 /* Invalidate font cache entries for charset on all devices. */
1042 Lisp_Object devcons, concons, hash_table;
1043 DEVICE_LOOP_NO_BREAK (devcons, concons)
1045 struct device *d = XDEVICE (XCAR (devcons));
1046 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1047 if (!UNBOUNDP (hash_table))
1048 Fclrhash (hash_table);
1052 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1053 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1054 Set the 'registry property of CHARSET to REGISTRY.
1056 (charset, registry))
1058 charset = Fget_charset (charset);
1059 CHECK_STRING (registry);
1060 XCHARSET_REGISTRY (charset) = registry;
1061 invalidate_charset_font_caches (charset);
1062 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1067 /************************************************************************/
1068 /* Lisp primitives for working with characters */
1069 /************************************************************************/
1071 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1072 Make a character from CHARSET and octets ARG1 and ARG2.
1073 ARG2 is required only for characters from two-dimensional charsets.
1074 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1075 character s with caron.
1077 (charset, arg1, arg2))
1081 int lowlim, highlim;
1083 charset = Fget_charset (charset);
1084 cs = XCHARSET (charset);
1086 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1087 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1088 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1089 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1092 /* It is useful (and safe, according to Olivier Galibert) to strip
1093 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1094 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1095 Latin 2 code of the character. */
1096 a1 = XINT (arg1) & 0x7f;
1097 if (a1 < lowlim || a1 > highlim)
1098 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1100 if (CHARSET_DIMENSION (cs) == 1)
1104 ("Charset is of dimension one; second octet must be nil", arg2);
1105 return make_char (MAKE_CHAR (charset, a1, 0));
1109 a2 = XINT (arg2) & 0x7f;
1110 if (a2 < lowlim || a2 > highlim)
1111 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1113 return make_char (MAKE_CHAR (charset, a1, a2));
1116 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1117 Return the character set of char CH.
1121 CHECK_CHAR_COERCE_INT (ch);
1123 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1124 (CHAR_LEADING_BYTE (XCHAR (ch))));
1127 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1128 Return the octet numbered N (should be 0 or 1) of char CH.
1129 N defaults to 0 if omitted.
1133 Lisp_Object charset;
1136 CHECK_CHAR_COERCE_INT (ch);
1138 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
1140 if (NILP (n) || EQ (n, Qzero))
1141 return make_int (octet0);
1142 else if (EQ (n, make_int (1)))
1143 return make_int (octet1);
1145 signal_simple_error ("Octet number must be 0 or 1", n);
1148 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1149 Return list of charset and one or two position-codes of CHAR.
1153 /* This function can GC */
1154 struct gcpro gcpro1, gcpro2;
1155 Lisp_Object charset = Qnil;
1156 Lisp_Object rc = Qnil;
1159 GCPRO2 (charset, rc);
1160 CHECK_CHAR_COERCE_INT (character);
1162 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1164 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1166 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1170 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1178 #ifdef ENABLE_COMPOSITE_CHARS
1179 /************************************************************************/
1180 /* composite character functions */
1181 /************************************************************************/
1184 lookup_composite_char (Bufbyte *str, int len)
1186 Lisp_Object lispstr = make_string (str, len);
1187 Lisp_Object ch = Fgethash (lispstr,
1188 Vcomposite_char_string2char_hash_table,
1194 if (composite_char_row_next >= 128)
1195 signal_simple_error ("No more composite chars available", lispstr);
1196 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1197 composite_char_col_next);
1198 Fputhash (make_char (emch), lispstr,
1199 Vcomposite_char_char2string_hash_table);
1200 Fputhash (lispstr, make_char (emch),
1201 Vcomposite_char_string2char_hash_table);
1202 composite_char_col_next++;
1203 if (composite_char_col_next >= 128)
1205 composite_char_col_next = 32;
1206 composite_char_row_next++;
1215 composite_char_string (Emchar ch)
1217 Lisp_Object str = Fgethash (make_char (ch),
1218 Vcomposite_char_char2string_hash_table,
1220 assert (!UNBOUNDP (str));
1224 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1225 Convert a string into a single composite character.
1226 The character is the result of overstriking all the characters in
1231 CHECK_STRING (string);
1232 return make_char (lookup_composite_char (XSTRING_DATA (string),
1233 XSTRING_LENGTH (string)));
1236 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1237 Return a string of the characters comprising a composite character.
1245 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1246 signal_simple_error ("Must be composite char", ch);
1247 return composite_char_string (emch);
1249 #endif /* ENABLE_COMPOSITE_CHARS */
1252 /************************************************************************/
1253 /* initialization */
1254 /************************************************************************/
1257 syms_of_mule_charset (void)
1259 INIT_LRECORD_IMPLEMENTATION (charset);
1261 DEFSUBR (Fcharsetp);
1262 DEFSUBR (Ffind_charset);
1263 DEFSUBR (Fget_charset);
1264 DEFSUBR (Fcharset_list);
1265 DEFSUBR (Fcharset_name);
1266 DEFSUBR (Fmake_charset);
1267 DEFSUBR (Fmake_reverse_direction_charset);
1268 /* DEFSUBR (Freverse_direction_charset); */
1269 DEFSUBR (Fcharset_from_attributes);
1270 DEFSUBR (Fcharset_short_name);
1271 DEFSUBR (Fcharset_long_name);
1272 DEFSUBR (Fcharset_description);
1273 DEFSUBR (Fcharset_dimension);
1274 DEFSUBR (Fcharset_property);
1275 DEFSUBR (Fcharset_id);
1276 DEFSUBR (Fset_charset_ccl_program);
1277 DEFSUBR (Fset_charset_registry);
1279 DEFSUBR (Fmake_char);
1280 DEFSUBR (Fchar_charset);
1281 DEFSUBR (Fchar_octet);
1282 DEFSUBR (Fsplit_char);
1284 #ifdef ENABLE_COMPOSITE_CHARS
1285 DEFSUBR (Fmake_composite_char);
1286 DEFSUBR (Fcomposite_char_string);
1289 defsymbol (&Qcharsetp, "charsetp");
1290 defsymbol (&Qregistry, "registry");
1291 defsymbol (&Qfinal, "final");
1292 defsymbol (&Qgraphic, "graphic");
1293 defsymbol (&Qdirection, "direction");
1294 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1295 defsymbol (&Qshort_name, "short-name");
1296 defsymbol (&Qlong_name, "long-name");
1298 defsymbol (&Ql2r, "l2r");
1299 defsymbol (&Qr2l, "r2l");
1301 /* Charsets, compatible with FSF 20.3
1302 Naming convention is Script-Charset[-Edition] */
1303 defsymbol (&Qascii, "ascii");
1304 defsymbol (&Qcontrol_1, "control-1");
1305 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1306 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1307 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1308 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1309 defsymbol (&Qthai_tis620, "thai-tis620");
1310 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1311 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1312 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1313 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1314 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1315 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1316 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1317 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1318 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1319 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1320 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1321 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1322 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1323 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1324 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1325 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1327 defsymbol (&Qcomposite, "composite");
1331 vars_of_mule_charset (void)
1335 chlook = xnew (struct charset_lookup);
1336 dumpstruct (&chlook, &charset_lookup_description);
1338 /* Table of charsets indexed by leading byte. */
1339 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1340 chlook->charset_by_leading_byte[i] = Qnil;
1342 /* Table of charsets indexed by type/final-byte/direction. */
1343 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1344 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1345 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1346 chlook->charset_by_attributes[i][j][k] = Qnil;
1348 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1349 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1353 complex_vars_of_mule_charset (void)
1355 staticpro (&Vcharset_hash_table);
1356 Vcharset_hash_table =
1357 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1359 /* Predefined character sets. We store them into variables for
1362 staticpro (&Vcharset_ascii);
1364 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1365 CHARSET_TYPE_94, 1, 0, 'B',
1366 CHARSET_LEFT_TO_RIGHT,
1367 build_string ("ASCII"),
1368 build_string ("ASCII)"),
1369 build_string ("ASCII (ISO646 IRV)"),
1370 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1371 staticpro (&Vcharset_control_1);
1372 Vcharset_control_1 =
1373 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1374 CHARSET_TYPE_94, 1, 1, 0,
1375 CHARSET_LEFT_TO_RIGHT,
1376 build_string ("C1"),
1377 build_string ("Control characters"),
1378 build_string ("Control characters 128-191"),
1380 staticpro (&Vcharset_latin_iso8859_1);
1381 Vcharset_latin_iso8859_1 =
1382 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1383 CHARSET_TYPE_96, 1, 1, 'A',
1384 CHARSET_LEFT_TO_RIGHT,
1385 build_string ("Latin-1"),
1386 build_string ("ISO8859-1 (Latin-1)"),
1387 build_string ("ISO8859-1 (Latin-1)"),
1388 build_string ("iso8859-1"));
1389 staticpro (&Vcharset_latin_iso8859_2);
1390 Vcharset_latin_iso8859_2 =
1391 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1392 CHARSET_TYPE_96, 1, 1, 'B',
1393 CHARSET_LEFT_TO_RIGHT,
1394 build_string ("Latin-2"),
1395 build_string ("ISO8859-2 (Latin-2)"),
1396 build_string ("ISO8859-2 (Latin-2)"),
1397 build_string ("iso8859-2"));
1398 staticpro (&Vcharset_latin_iso8859_3);
1399 Vcharset_latin_iso8859_3 =
1400 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1401 CHARSET_TYPE_96, 1, 1, 'C',
1402 CHARSET_LEFT_TO_RIGHT,
1403 build_string ("Latin-3"),
1404 build_string ("ISO8859-3 (Latin-3)"),
1405 build_string ("ISO8859-3 (Latin-3)"),
1406 build_string ("iso8859-3"));
1407 staticpro (&Vcharset_latin_iso8859_4);
1408 Vcharset_latin_iso8859_4 =
1409 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1410 CHARSET_TYPE_96, 1, 1, 'D',
1411 CHARSET_LEFT_TO_RIGHT,
1412 build_string ("Latin-4"),
1413 build_string ("ISO8859-4 (Latin-4)"),
1414 build_string ("ISO8859-4 (Latin-4)"),
1415 build_string ("iso8859-4"));
1416 staticpro (&Vcharset_thai_tis620);
1417 Vcharset_thai_tis620 =
1418 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1419 CHARSET_TYPE_96, 1, 1, 'T',
1420 CHARSET_LEFT_TO_RIGHT,
1421 build_string ("TIS620"),
1422 build_string ("TIS620 (Thai)"),
1423 build_string ("TIS620.2529 (Thai)"),
1424 build_string ("tis620"));
1425 staticpro (&Vcharset_greek_iso8859_7);
1426 Vcharset_greek_iso8859_7 =
1427 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1428 CHARSET_TYPE_96, 1, 1, 'F',
1429 CHARSET_LEFT_TO_RIGHT,
1430 build_string ("ISO8859-7"),
1431 build_string ("ISO8859-7 (Greek)"),
1432 build_string ("ISO8859-7 (Greek)"),
1433 build_string ("iso8859-7"));
1434 staticpro (&Vcharset_arabic_iso8859_6);
1435 Vcharset_arabic_iso8859_6 =
1436 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1437 CHARSET_TYPE_96, 1, 1, 'G',
1438 CHARSET_RIGHT_TO_LEFT,
1439 build_string ("ISO8859-6"),
1440 build_string ("ISO8859-6 (Arabic)"),
1441 build_string ("ISO8859-6 (Arabic)"),
1442 build_string ("iso8859-6"));
1443 staticpro (&Vcharset_hebrew_iso8859_8);
1444 Vcharset_hebrew_iso8859_8 =
1445 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1446 CHARSET_TYPE_96, 1, 1, 'H',
1447 CHARSET_RIGHT_TO_LEFT,
1448 build_string ("ISO8859-8"),
1449 build_string ("ISO8859-8 (Hebrew)"),
1450 build_string ("ISO8859-8 (Hebrew)"),
1451 build_string ("iso8859-8"));
1452 staticpro (&Vcharset_katakana_jisx0201);
1453 Vcharset_katakana_jisx0201 =
1454 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1455 CHARSET_TYPE_94, 1, 1, 'I',
1456 CHARSET_LEFT_TO_RIGHT,
1457 build_string ("JISX0201 Kana"),
1458 build_string ("JISX0201.1976 (Japanese Kana)"),
1459 build_string ("JISX0201.1976 Japanese Kana"),
1460 build_string ("jisx0201.1976"));
1461 staticpro (&Vcharset_latin_jisx0201);
1462 Vcharset_latin_jisx0201 =
1463 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1464 CHARSET_TYPE_94, 1, 0, 'J',
1465 CHARSET_LEFT_TO_RIGHT,
1466 build_string ("JISX0201 Roman"),
1467 build_string ("JISX0201.1976 (Japanese Roman)"),
1468 build_string ("JISX0201.1976 Japanese Roman"),
1469 build_string ("jisx0201.1976"));
1470 staticpro (&Vcharset_cyrillic_iso8859_5);
1471 Vcharset_cyrillic_iso8859_5 =
1472 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1473 CHARSET_TYPE_96, 1, 1, 'L',
1474 CHARSET_LEFT_TO_RIGHT,
1475 build_string ("ISO8859-5"),
1476 build_string ("ISO8859-5 (Cyrillic)"),
1477 build_string ("ISO8859-5 (Cyrillic)"),
1478 build_string ("iso8859-5"));
1479 staticpro (&Vcharset_latin_iso8859_9);
1480 Vcharset_latin_iso8859_9 =
1481 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1482 CHARSET_TYPE_96, 1, 1, 'M',
1483 CHARSET_LEFT_TO_RIGHT,
1484 build_string ("Latin-5"),
1485 build_string ("ISO8859-9 (Latin-5)"),
1486 build_string ("ISO8859-9 (Latin-5)"),
1487 build_string ("iso8859-9"));
1488 staticpro (&Vcharset_japanese_jisx0208_1978);
1489 Vcharset_japanese_jisx0208_1978 =
1490 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1491 CHARSET_TYPE_94X94, 2, 0, '@',
1492 CHARSET_LEFT_TO_RIGHT,
1493 build_string ("JISX0208.1978"),
1494 build_string ("JISX0208.1978 (Japanese)"),
1496 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1497 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1498 staticpro (&Vcharset_chinese_gb2312);
1499 Vcharset_chinese_gb2312 =
1500 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1501 CHARSET_TYPE_94X94, 2, 0, 'A',
1502 CHARSET_LEFT_TO_RIGHT,
1503 build_string ("GB2312"),
1504 build_string ("GB2312)"),
1505 build_string ("GB2312 Chinese simplified"),
1506 build_string ("gb2312"));
1507 staticpro (&Vcharset_japanese_jisx0208);
1508 Vcharset_japanese_jisx0208 =
1509 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1510 CHARSET_TYPE_94X94, 2, 0, 'B',
1511 CHARSET_LEFT_TO_RIGHT,
1512 build_string ("JISX0208"),
1513 build_string ("JISX0208.1983/1990 (Japanese)"),
1514 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1515 build_string ("jisx0208.19\\(83\\|90\\)"));
1516 staticpro (&Vcharset_korean_ksc5601);
1517 Vcharset_korean_ksc5601 =
1518 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1519 CHARSET_TYPE_94X94, 2, 0, 'C',
1520 CHARSET_LEFT_TO_RIGHT,
1521 build_string ("KSC5601"),
1522 build_string ("KSC5601 (Korean"),
1523 build_string ("KSC5601 Korean Hangul and Hanja"),
1524 build_string ("ksc5601"));
1525 staticpro (&Vcharset_japanese_jisx0212);
1526 Vcharset_japanese_jisx0212 =
1527 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1528 CHARSET_TYPE_94X94, 2, 0, 'D',
1529 CHARSET_LEFT_TO_RIGHT,
1530 build_string ("JISX0212"),
1531 build_string ("JISX0212 (Japanese)"),
1532 build_string ("JISX0212 Japanese Supplement"),
1533 build_string ("jisx0212"));
1535 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1536 staticpro (&Vcharset_chinese_cns11643_1);
1537 Vcharset_chinese_cns11643_1 =
1538 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1539 CHARSET_TYPE_94X94, 2, 0, 'G',
1540 CHARSET_LEFT_TO_RIGHT,
1541 build_string ("CNS11643-1"),
1542 build_string ("CNS11643-1 (Chinese traditional)"),
1544 ("CNS 11643 Plane 1 Chinese traditional"),
1545 build_string (CHINESE_CNS_PLANE_RE("1")));
1546 staticpro (&Vcharset_chinese_cns11643_2);
1547 Vcharset_chinese_cns11643_2 =
1548 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1549 CHARSET_TYPE_94X94, 2, 0, 'H',
1550 CHARSET_LEFT_TO_RIGHT,
1551 build_string ("CNS11643-2"),
1552 build_string ("CNS11643-2 (Chinese traditional)"),
1554 ("CNS 11643 Plane 2 Chinese traditional"),
1555 build_string (CHINESE_CNS_PLANE_RE("2")));
1556 staticpro (&Vcharset_chinese_big5_1);
1557 Vcharset_chinese_big5_1 =
1558 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1559 CHARSET_TYPE_94X94, 2, 0, '0',
1560 CHARSET_LEFT_TO_RIGHT,
1561 build_string ("Big5"),
1562 build_string ("Big5 (Level-1)"),
1564 ("Big5 Level-1 Chinese traditional"),
1565 build_string ("big5"));
1566 staticpro (&Vcharset_chinese_big5_2);
1567 Vcharset_chinese_big5_2 =
1568 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1569 CHARSET_TYPE_94X94, 2, 0, '1',
1570 CHARSET_LEFT_TO_RIGHT,
1571 build_string ("Big5"),
1572 build_string ("Big5 (Level-2)"),
1574 ("Big5 Level-2 Chinese traditional"),
1575 build_string ("big5"));
1578 #ifdef ENABLE_COMPOSITE_CHARS
1579 /* #### For simplicity, we put composite chars into a 96x96 charset.
1580 This is going to lead to problems because you can run out of
1581 room, esp. as we don't yet recycle numbers. */
1582 staticpro (&Vcharset_composite);
1583 Vcharset_composite =
1584 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1585 CHARSET_TYPE_96X96, 2, 0, 0,
1586 CHARSET_LEFT_TO_RIGHT,
1587 build_string ("Composite"),
1588 build_string ("Composite characters"),
1589 build_string ("Composite characters"),
1592 /* #### not dumped properly */
1593 composite_char_row_next = 32;
1594 composite_char_col_next = 32;
1596 Vcomposite_char_string2char_hash_table =
1597 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1598 Vcomposite_char_char2string_hash_table =
1599 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1600 staticpro (&Vcomposite_char_string2char_hash_table);
1601 staticpro (&Vcomposite_char_char2string_hash_table);
1602 #endif /* ENABLE_COMPOSITE_CHARS */