1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 struct charset_lookup *chlook;
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
84 static const struct struct_description charset_lookup_description = {
85 sizeof (struct charset_lookup),
86 charset_lookup_description_1
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
95 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97 const Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
149 Lisp_Object Ql2r, Qr2l;
151 Lisp_Object Vcharset_hash_table;
153 static Bufbyte next_allocated_1_byte_leading_byte;
154 static Bufbyte next_allocated_2_byte_leading_byte;
156 /* Composite characters are characters constructed by overstriking two
157 or more regular characters.
159 1) The old Mule implementation involves storing composite characters
160 in a buffer as a tag followed by all of the actual characters
161 used to make up the composite character. I think this is a bad
162 idea; it greatly complicates code that wants to handle strings
163 one character at a time because it has to deal with the possibility
164 of great big ungainly characters. It's much more reasonable to
165 simply store an index into a table of composite characters.
167 2) The current implementation only allows for 16,384 separate
168 composite characters over the lifetime of the XEmacs process.
169 This could become a potential problem if the user
170 edited lots of different files that use composite characters.
171 Due to FSF bogosity, increasing the number of allowable
172 composite characters under Mule would decrease the number
173 of possible faces that can exist. Mule already has shrunk
174 this to 2048, and further shrinkage would become uncomfortable.
175 No such problems exist in XEmacs.
177 Composite characters could be represented as 0x80 C1 C2 C3,
178 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
179 for slightly under 2^20 (one million) composite characters
180 over the XEmacs process lifetime, and you only need to
181 increase the size of a Mule character from 19 to 21 bits.
182 Or you could use 0x80 C1 C2 C3 C4, allowing for about
183 85 million (slightly over 2^26) composite characters. */
186 /************************************************************************/
187 /* Basic Emchar functions */
188 /************************************************************************/
190 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
191 string in STR. Returns the number of bytes stored.
192 Do not call this directly. Use the macro set_charptr_emchar() instead.
196 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
204 BREAKUP_CHAR (c, charset, c1, c2);
205 lb = CHAR_LEADING_BYTE (c);
206 if (LEADING_BYTE_PRIVATE_P (lb))
207 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
209 if (EQ (charset, Vcharset_control_1))
218 /* Return the first character from a Mule-encoded string in STR,
219 assuming it's non-ASCII. Do not call this directly.
220 Use the macro charptr_emchar() instead. */
223 non_ascii_charptr_emchar (const Bufbyte *str)
225 Bufbyte i0 = *str, i1, i2 = 0;
228 if (i0 == LEADING_BYTE_CONTROL_1)
229 return (Emchar) (*++str - 0x20);
231 if (LEADING_BYTE_PREFIX_P (i0))
236 charset = CHARSET_BY_LEADING_BYTE (i0);
237 if (XCHARSET_DIMENSION (charset) == 2)
240 return MAKE_CHAR (charset, i1, i2);
243 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
244 Do not call this directly. Use the macro valid_char_p() instead. */
247 non_ascii_valid_char_p (Emchar ch)
251 /* Must have only lowest 19 bits set */
255 f1 = CHAR_FIELD1 (ch);
256 f2 = CHAR_FIELD2 (ch);
257 f3 = CHAR_FIELD3 (ch);
263 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
264 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
265 f2 > MAX_CHAR_FIELD2_PRIVATE)
270 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
271 f2 <= MAX_CHAR_FIELD2_PRIVATE))
275 NOTE: This takes advantage of the fact that
276 FIELD2_TO_OFFICIAL_LEADING_BYTE and
277 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
279 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
280 if (EQ (charset, Qnil))
282 return (XCHARSET_CHARS (charset) == 96);
288 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
289 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
290 f1 > MAX_CHAR_FIELD1_PRIVATE)
292 if (f2 < 0x20 || f3 < 0x20)
295 #ifdef ENABLE_COMPOSITE_CHARS
296 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
298 if (UNBOUNDP (Fgethash (make_int (ch),
299 Vcomposite_char_char2string_hash_table,
304 #endif /* ENABLE_COMPOSITE_CHARS */
306 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
307 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
310 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
312 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
315 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
317 if (EQ (charset, Qnil))
319 return (XCHARSET_CHARS (charset) == 96);
324 /************************************************************************/
325 /* Basic string functions */
326 /************************************************************************/
328 /* Copy the character pointed to by PTR into STR, assuming it's
329 non-ASCII. Do not call this directly. Use the macro
330 charptr_copy_char() instead. */
333 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
335 Bufbyte *strptr = str;
337 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
339 /* Notice fallthrough. */
340 case 4: *++strptr = *ptr++;
341 case 3: *++strptr = *ptr++;
342 case 2: *++strptr = *ptr;
347 return strptr + 1 - str;
351 /************************************************************************/
352 /* streams of Emchars */
353 /************************************************************************/
355 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
356 The functions below are not meant to be called directly; use
357 the macros in insdel.h. */
360 Lstream_get_emchar_1 (Lstream *stream, int ch)
362 Bufbyte str[MAX_EMCHAR_LEN];
363 Bufbyte *strptr = str;
365 str[0] = (Bufbyte) ch;
366 switch (REP_BYTES_BY_FIRST_BYTE (ch))
368 /* Notice fallthrough. */
370 ch = Lstream_getc (stream);
372 *++strptr = (Bufbyte) ch;
374 ch = Lstream_getc (stream);
376 *++strptr = (Bufbyte) ch;
378 ch = Lstream_getc (stream);
380 *++strptr = (Bufbyte) ch;
385 return charptr_emchar (str);
389 Lstream_fput_emchar (Lstream *stream, Emchar ch)
391 Bufbyte str[MAX_EMCHAR_LEN];
392 Bytecount len = set_charptr_emchar (str, ch);
393 return Lstream_write (stream, str, len);
397 Lstream_funget_emchar (Lstream *stream, Emchar ch)
399 Bufbyte str[MAX_EMCHAR_LEN];
400 Bytecount len = set_charptr_emchar (str, ch);
401 Lstream_unread (stream, str, len);
405 /************************************************************************/
407 /************************************************************************/
410 mark_charset (Lisp_Object obj)
412 Lisp_Charset *cs = XCHARSET (obj);
414 mark_object (cs->short_name);
415 mark_object (cs->long_name);
416 mark_object (cs->doc_string);
417 mark_object (cs->registry);
418 mark_object (cs->ccl_program);
423 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
425 Lisp_Charset *cs = XCHARSET (obj);
429 error ("printing unreadable object #<charset %s 0x%x>",
430 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
433 write_c_string ("#<charset ", printcharfun);
434 print_internal (CHARSET_NAME (cs), printcharfun, 0);
435 write_c_string (" ", printcharfun);
436 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
437 write_c_string (" ", printcharfun);
438 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
439 write_c_string (" ", printcharfun);
440 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
441 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
442 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
443 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
444 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
446 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
447 CHARSET_COLUMNS (cs),
448 CHARSET_GRAPHIC (cs),
450 write_c_string (buf, printcharfun);
451 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
452 sprintf (buf, " 0x%x>", cs->header.uid);
453 write_c_string (buf, printcharfun);
456 static const struct lrecord_description charset_description[] = {
457 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
458 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
459 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
460 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
461 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
462 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
463 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
467 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
468 mark_charset, print_charset, 0, 0, 0, charset_description,
470 /* Make a new charset. */
473 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
474 unsigned char type, unsigned char columns, unsigned char graphic,
475 Bufbyte final, unsigned char direction, Lisp_Object short_name,
476 Lisp_Object long_name, Lisp_Object doc,
480 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
484 XSETCHARSET (obj, cs);
486 CHARSET_ID (cs) = id;
487 CHARSET_NAME (cs) = name;
488 CHARSET_SHORT_NAME (cs) = short_name;
489 CHARSET_LONG_NAME (cs) = long_name;
490 CHARSET_REP_BYTES (cs) = rep_bytes;
491 CHARSET_DIRECTION (cs) = direction;
492 CHARSET_TYPE (cs) = type;
493 CHARSET_COLUMNS (cs) = columns;
494 CHARSET_GRAPHIC (cs) = graphic;
495 CHARSET_FINAL (cs) = final;
496 CHARSET_DOC_STRING (cs) = doc;
497 CHARSET_REGISTRY (cs) = reg;
498 CHARSET_CCL_PROGRAM (cs) = Qnil;
499 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
501 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
502 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
503 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
504 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
508 /* some charsets do not have final characters. This includes
509 ASCII, Control-1, Composite, and the two faux private
511 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
512 chlook->charset_by_attributes[type][final][direction] = obj;
515 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
516 chlook->charset_by_leading_byte[id - 128] = obj;
518 /* Some charsets are "faux" and don't have names or really exist at
519 all except in the leading-byte table. */
521 Fputhash (name, obj, Vcharset_hash_table);
526 get_unallocated_leading_byte (int dimension)
532 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
535 lb = next_allocated_1_byte_leading_byte++;
539 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
542 lb = next_allocated_2_byte_leading_byte++;
547 ("No more character sets free for this dimension",
548 make_int (dimension));
554 /************************************************************************/
555 /* Basic charset Lisp functions */
556 /************************************************************************/
558 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
559 Return non-nil if OBJECT is a charset.
563 return CHARSETP (object) ? Qt : Qnil;
566 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
567 Retrieve the charset of the given name.
568 If CHARSET-OR-NAME is a charset object, it is simply returned.
569 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
570 nil is returned. Otherwise the associated charset object is returned.
574 if (CHARSETP (charset_or_name))
575 return charset_or_name;
577 CHECK_SYMBOL (charset_or_name);
578 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
581 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
582 Retrieve the charset of the given name.
583 Same as `find-charset' except an error is signalled if there is no such
584 charset instead of returning nil.
588 Lisp_Object charset = Ffind_charset (name);
591 signal_simple_error ("No such charset", name);
595 /* We store the charsets in hash tables with the names as the key and the
596 actual charset object as the value. Occasionally we need to use them
597 in a list format. These routines provide us with that. */
598 struct charset_list_closure
600 Lisp_Object *charset_list;
604 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
605 void *charset_list_closure)
607 /* This function can GC */
608 struct charset_list_closure *chcl =
609 (struct charset_list_closure*) charset_list_closure;
610 Lisp_Object *charset_list = chcl->charset_list;
612 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
616 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
617 Return a list of the names of all defined charsets.
621 Lisp_Object charset_list = Qnil;
623 struct charset_list_closure charset_list_closure;
625 GCPRO1 (charset_list);
626 charset_list_closure.charset_list = &charset_list;
627 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
628 &charset_list_closure);
634 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
635 Return the name of the given charset.
639 return XCHARSET_NAME (Fget_charset (charset));
642 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
643 Define a new character set.
644 This function is for use with Mule support.
645 NAME is a symbol, the name by which the character set is normally referred.
646 DOC-STRING is a string describing the character set.
647 PROPS is a property list, describing the specific nature of the
648 character set. Recognized properties are:
650 'short-name Short version of the charset name (ex: Latin-1)
651 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
652 'registry A regular expression matching the font registry field for
654 'dimension Number of octets used to index a character in this charset.
655 Either 1 or 2. Defaults to 1.
656 'columns Number of columns used to display a character in this charset.
657 Only used in TTY mode. (Under X, the actual width of a
658 character can be derived from the font used to display the
659 characters.) If unspecified, defaults to the dimension
660 (this is almost always the correct value).
661 'chars Number of characters in each dimension (94 or 96).
662 Defaults to 94. Note that if the dimension is 2, the
663 character set thus described is 94x94 or 96x96.
664 'final Final byte of ISO 2022 escape sequence. Must be
665 supplied. Each combination of (DIMENSION, CHARS) defines a
666 separate namespace for final bytes. Note that ISO
667 2022 restricts the final byte to the range
668 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
669 dimension == 2. Note also that final bytes in the range
670 0x30 - 0x3F are reserved for user-defined (not official)
672 'graphic 0 (use left half of font on output) or 1 (use right half
673 of font on output). Defaults to 0. For example, for
674 a font whose registry is ISO8859-1, the left half
675 (octets 0x20 - 0x7F) is the `ascii' character set, while
676 the right half (octets 0xA0 - 0xFF) is the `latin-1'
677 character set. With 'graphic set to 0, the octets
678 will have their high bit cleared; with it set to 1,
679 the octets will have their high bit set.
680 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
682 'ccl-program A compiled CCL program used to convert a character in
683 this charset into an index into the font. This is in
684 addition to the 'graphic property. The CCL program
685 is passed the octets of the character, with the high
686 bit cleared and set depending upon whether the value
687 of the 'graphic property is 0 or 1.
689 (name, doc_string, props))
691 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
692 int direction = CHARSET_LEFT_TO_RIGHT;
694 Lisp_Object registry = Qnil;
696 Lisp_Object rest, keyword, value;
697 Lisp_Object ccl_program = Qnil;
698 Lisp_Object short_name = Qnil, long_name = Qnil;
701 if (!NILP (doc_string))
702 CHECK_STRING (doc_string);
704 charset = Ffind_charset (name);
706 signal_simple_error ("Cannot redefine existing charset", name);
708 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
710 if (EQ (keyword, Qshort_name))
712 CHECK_STRING (value);
716 if (EQ (keyword, Qlong_name))
718 CHECK_STRING (value);
722 else if (EQ (keyword, Qdimension))
725 dimension = XINT (value);
726 if (dimension < 1 || dimension > 2)
727 signal_simple_error ("Invalid value for 'dimension", value);
730 else if (EQ (keyword, Qchars))
733 chars = XINT (value);
734 if (chars != 94 && chars != 96)
735 signal_simple_error ("Invalid value for 'chars", value);
738 else if (EQ (keyword, Qcolumns))
741 columns = XINT (value);
742 if (columns != 1 && columns != 2)
743 signal_simple_error ("Invalid value for 'columns", value);
746 else if (EQ (keyword, Qgraphic))
749 graphic = XINT (value);
750 if (graphic < 0 || graphic > 1)
751 signal_simple_error ("Invalid value for 'graphic", value);
754 else if (EQ (keyword, Qregistry))
756 CHECK_STRING (value);
760 else if (EQ (keyword, Qdirection))
762 if (EQ (value, Ql2r))
763 direction = CHARSET_LEFT_TO_RIGHT;
764 else if (EQ (value, Qr2l))
765 direction = CHARSET_RIGHT_TO_LEFT;
767 signal_simple_error ("Invalid value for 'direction", value);
770 else if (EQ (keyword, Qfinal))
772 CHECK_CHAR_COERCE_INT (value);
773 final = XCHAR (value);
774 if (final < '0' || final > '~')
775 signal_simple_error ("Invalid value for 'final", value);
778 else if (EQ (keyword, Qccl_program))
780 CHECK_VECTOR (value);
785 signal_simple_error ("Unrecognized property", keyword);
789 error ("'final must be specified");
790 if (dimension == 2 && final > 0x5F)
792 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
796 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
798 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
800 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
801 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
803 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
805 id = get_unallocated_leading_byte (dimension);
807 if (NILP (doc_string))
808 doc_string = build_string ("");
811 registry = build_string ("");
813 if (NILP (short_name))
814 XSETSTRING (short_name, XSYMBOL (name)->name);
816 if (NILP (long_name))
817 long_name = doc_string;
821 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
822 final, direction, short_name, long_name, doc_string, registry);
823 if (!NILP (ccl_program))
824 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
828 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
830 Make a charset equivalent to CHARSET but which goes in the opposite direction.
831 NEW-NAME is the name of the new charset. Return the new charset.
835 Lisp_Object new_charset = Qnil;
836 int id, dimension, columns, graphic, final;
838 Lisp_Object registry, doc_string, short_name, long_name;
841 charset = Fget_charset (charset);
842 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
843 signal_simple_error ("Charset already has reverse-direction charset",
846 CHECK_SYMBOL (new_name);
847 if (!NILP (Ffind_charset (new_name)))
848 signal_simple_error ("Cannot redefine existing charset", new_name);
850 cs = XCHARSET (charset);
852 type = CHARSET_TYPE (cs);
853 columns = CHARSET_COLUMNS (cs);
854 dimension = CHARSET_DIMENSION (cs);
855 id = get_unallocated_leading_byte (dimension);
857 graphic = CHARSET_GRAPHIC (cs);
858 final = CHARSET_FINAL (cs);
859 direction = CHARSET_RIGHT_TO_LEFT;
860 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
861 direction = CHARSET_LEFT_TO_RIGHT;
862 doc_string = CHARSET_DOC_STRING (cs);
863 short_name = CHARSET_SHORT_NAME (cs);
864 long_name = CHARSET_LONG_NAME (cs);
865 registry = CHARSET_REGISTRY (cs);
867 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
868 graphic, final, direction, short_name, long_name,
869 doc_string, registry);
871 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
872 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
877 /* #### Reverse direction charsets not yet implemented. */
879 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
881 Return the reverse-direction charset parallel to CHARSET, if any.
882 This is the charset with the same properties (in particular, the same
883 dimension, number of characters per dimension, and final byte) as
884 CHARSET but whose characters are displayed in the opposite direction.
888 charset = Fget_charset (charset);
889 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
893 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
894 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
895 If DIRECTION is omitted, both directions will be checked (left-to-right
896 will be returned if character sets exist for both directions).
898 (dimension, chars, final, direction))
900 int dm, ch, fi, di = -1;
902 Lisp_Object obj = Qnil;
904 CHECK_INT (dimension);
905 dm = XINT (dimension);
906 if (dm < 1 || dm > 2)
907 signal_simple_error ("Invalid value for DIMENSION", dimension);
911 if (ch != 94 && ch != 96)
912 signal_simple_error ("Invalid value for CHARS", chars);
914 CHECK_CHAR_COERCE_INT (final);
916 if (fi < '0' || fi > '~')
917 signal_simple_error ("Invalid value for FINAL", final);
919 if (EQ (direction, Ql2r))
920 di = CHARSET_LEFT_TO_RIGHT;
921 else if (EQ (direction, Qr2l))
922 di = CHARSET_RIGHT_TO_LEFT;
923 else if (!NILP (direction))
924 signal_simple_error ("Invalid value for DIRECTION", direction);
926 if (dm == 2 && fi > 0x5F)
928 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
931 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
933 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
937 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
939 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
942 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
945 return XCHARSET_NAME (obj);
949 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
950 Return short name of CHARSET.
954 return XCHARSET_SHORT_NAME (Fget_charset (charset));
957 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
958 Return long name of CHARSET.
962 return XCHARSET_LONG_NAME (Fget_charset (charset));
965 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
966 Return description of CHARSET.
970 return XCHARSET_DOC_STRING (Fget_charset (charset));
973 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
974 Return dimension of CHARSET.
978 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
981 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
982 Return property PROP of CHARSET.
983 Recognized properties are those listed in `make-charset', as well as
984 'name and 'doc-string.
990 charset = Fget_charset (charset);
991 cs = XCHARSET (charset);
994 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
995 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
996 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
997 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
998 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
999 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1000 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1001 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1002 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1003 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1004 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1005 if (EQ (prop, Qdirection))
1006 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1007 if (EQ (prop, Qreverse_direction_charset))
1009 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1013 return XCHARSET_NAME (obj);
1015 signal_simple_error ("Unrecognized charset property name", prop);
1016 return Qnil; /* not reached */
1019 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1020 Return charset identification number of CHARSET.
1024 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1027 /* #### We need to figure out which properties we really want to
1030 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1031 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1033 (charset, ccl_program))
1035 charset = Fget_charset (charset);
1036 CHECK_VECTOR (ccl_program);
1037 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1042 invalidate_charset_font_caches (Lisp_Object charset)
1044 /* Invalidate font cache entries for charset on all devices. */
1045 Lisp_Object devcons, concons, hash_table;
1046 DEVICE_LOOP_NO_BREAK (devcons, concons)
1048 struct device *d = XDEVICE (XCAR (devcons));
1049 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1050 if (!UNBOUNDP (hash_table))
1051 Fclrhash (hash_table);
1055 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1056 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1057 Set the 'registry property of CHARSET to REGISTRY.
1059 (charset, registry))
1061 charset = Fget_charset (charset);
1062 CHECK_STRING (registry);
1063 XCHARSET_REGISTRY (charset) = registry;
1064 invalidate_charset_font_caches (charset);
1065 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1070 /************************************************************************/
1071 /* Lisp primitives for working with characters */
1072 /************************************************************************/
1074 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1075 Make a character from CHARSET and octets ARG1 and ARG2.
1076 ARG2 is required only for characters from two-dimensional charsets.
1077 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1078 character s with caron.
1080 (charset, arg1, arg2))
1084 int lowlim, highlim;
1086 charset = Fget_charset (charset);
1087 cs = XCHARSET (charset);
1089 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1090 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1091 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1092 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1095 /* It is useful (and safe, according to Olivier Galibert) to strip
1096 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1097 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1098 Latin 2 code of the character. */
1099 a1 = XINT (arg1) & 0x7f;
1100 if (a1 < lowlim || a1 > highlim)
1101 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1103 if (CHARSET_DIMENSION (cs) == 1)
1107 ("Charset is of dimension one; second octet must be nil", arg2);
1108 return make_char (MAKE_CHAR (charset, a1, 0));
1112 a2 = XINT (arg2) & 0x7f;
1113 if (a2 < lowlim || a2 > highlim)
1114 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1116 return make_char (MAKE_CHAR (charset, a1, a2));
1119 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1120 Return the character set of char CH.
1124 CHECK_CHAR_COERCE_INT (ch);
1126 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1127 (CHAR_LEADING_BYTE (XCHAR (ch))));
1130 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1131 Return the octet numbered N (should be 0 or 1) of char CH.
1132 N defaults to 0 if omitted.
1136 Lisp_Object charset;
1139 CHECK_CHAR_COERCE_INT (ch);
1141 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
1143 if (NILP (n) || EQ (n, Qzero))
1144 return make_int (octet0);
1145 else if (EQ (n, make_int (1)))
1146 return make_int (octet1);
1148 signal_simple_error ("Octet number must be 0 or 1", n);
1151 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1152 Return list of charset and one or two position-codes of CHAR.
1156 /* This function can GC */
1157 struct gcpro gcpro1, gcpro2;
1158 Lisp_Object charset = Qnil;
1159 Lisp_Object rc = Qnil;
1162 GCPRO2 (charset, rc);
1163 CHECK_CHAR_COERCE_INT (character);
1165 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1167 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1169 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1173 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1181 #ifdef ENABLE_COMPOSITE_CHARS
1182 /************************************************************************/
1183 /* composite character functions */
1184 /************************************************************************/
1187 lookup_composite_char (Bufbyte *str, int len)
1189 Lisp_Object lispstr = make_string (str, len);
1190 Lisp_Object ch = Fgethash (lispstr,
1191 Vcomposite_char_string2char_hash_table,
1197 if (composite_char_row_next >= 128)
1198 signal_simple_error ("No more composite chars available", lispstr);
1199 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1200 composite_char_col_next);
1201 Fputhash (make_char (emch), lispstr,
1202 Vcomposite_char_char2string_hash_table);
1203 Fputhash (lispstr, make_char (emch),
1204 Vcomposite_char_string2char_hash_table);
1205 composite_char_col_next++;
1206 if (composite_char_col_next >= 128)
1208 composite_char_col_next = 32;
1209 composite_char_row_next++;
1218 composite_char_string (Emchar ch)
1220 Lisp_Object str = Fgethash (make_char (ch),
1221 Vcomposite_char_char2string_hash_table,
1223 assert (!UNBOUNDP (str));
1227 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1228 Convert a string into a single composite character.
1229 The character is the result of overstriking all the characters in
1234 CHECK_STRING (string);
1235 return make_char (lookup_composite_char (XSTRING_DATA (string),
1236 XSTRING_LENGTH (string)));
1239 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1240 Return a string of the characters comprising a composite character.
1248 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1249 signal_simple_error ("Must be composite char", ch);
1250 return composite_char_string (emch);
1252 #endif /* ENABLE_COMPOSITE_CHARS */
1255 /************************************************************************/
1256 /* initialization */
1257 /************************************************************************/
1260 syms_of_mule_charset (void)
1262 DEFSUBR (Fcharsetp);
1263 DEFSUBR (Ffind_charset);
1264 DEFSUBR (Fget_charset);
1265 DEFSUBR (Fcharset_list);
1266 DEFSUBR (Fcharset_name);
1267 DEFSUBR (Fmake_charset);
1268 DEFSUBR (Fmake_reverse_direction_charset);
1269 /* DEFSUBR (Freverse_direction_charset); */
1270 DEFSUBR (Fcharset_from_attributes);
1271 DEFSUBR (Fcharset_short_name);
1272 DEFSUBR (Fcharset_long_name);
1273 DEFSUBR (Fcharset_description);
1274 DEFSUBR (Fcharset_dimension);
1275 DEFSUBR (Fcharset_property);
1276 DEFSUBR (Fcharset_id);
1277 DEFSUBR (Fset_charset_ccl_program);
1278 DEFSUBR (Fset_charset_registry);
1280 DEFSUBR (Fmake_char);
1281 DEFSUBR (Fchar_charset);
1282 DEFSUBR (Fchar_octet);
1283 DEFSUBR (Fsplit_char);
1285 #ifdef ENABLE_COMPOSITE_CHARS
1286 DEFSUBR (Fmake_composite_char);
1287 DEFSUBR (Fcomposite_char_string);
1290 defsymbol (&Qcharsetp, "charsetp");
1291 defsymbol (&Qregistry, "registry");
1292 defsymbol (&Qfinal, "final");
1293 defsymbol (&Qgraphic, "graphic");
1294 defsymbol (&Qdirection, "direction");
1295 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1296 defsymbol (&Qshort_name, "short-name");
1297 defsymbol (&Qlong_name, "long-name");
1299 defsymbol (&Ql2r, "l2r");
1300 defsymbol (&Qr2l, "r2l");
1302 /* Charsets, compatible with FSF 20.3
1303 Naming convention is Script-Charset[-Edition] */
1304 defsymbol (&Qascii, "ascii");
1305 defsymbol (&Qcontrol_1, "control-1");
1306 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1307 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1308 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1309 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1310 defsymbol (&Qthai_tis620, "thai-tis620");
1311 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1312 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1313 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1314 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1315 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1316 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1317 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1318 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1319 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1320 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1321 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1322 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1323 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1324 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1325 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1326 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1328 defsymbol (&Qcomposite, "composite");
1332 vars_of_mule_charset (void)
1336 chlook = xnew (struct charset_lookup);
1337 dumpstruct (&chlook, &charset_lookup_description);
1339 /* Table of charsets indexed by leading byte. */
1340 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1341 chlook->charset_by_leading_byte[i] = Qnil;
1343 /* Table of charsets indexed by type/final-byte/direction. */
1344 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1345 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1346 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1347 chlook->charset_by_attributes[i][j][k] = Qnil;
1349 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1350 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1354 complex_vars_of_mule_charset (void)
1356 staticpro (&Vcharset_hash_table);
1357 Vcharset_hash_table =
1358 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1360 /* Predefined character sets. We store them into variables for
1363 staticpro (&Vcharset_ascii);
1365 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1366 CHARSET_TYPE_94, 1, 0, 'B',
1367 CHARSET_LEFT_TO_RIGHT,
1368 build_string ("ASCII"),
1369 build_string ("ASCII)"),
1370 build_string ("ASCII (ISO646 IRV)"),
1371 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1372 staticpro (&Vcharset_control_1);
1373 Vcharset_control_1 =
1374 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1375 CHARSET_TYPE_94, 1, 1, 0,
1376 CHARSET_LEFT_TO_RIGHT,
1377 build_string ("C1"),
1378 build_string ("Control characters"),
1379 build_string ("Control characters 128-191"),
1381 staticpro (&Vcharset_latin_iso8859_1);
1382 Vcharset_latin_iso8859_1 =
1383 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1384 CHARSET_TYPE_96, 1, 1, 'A',
1385 CHARSET_LEFT_TO_RIGHT,
1386 build_string ("Latin-1"),
1387 build_string ("ISO8859-1 (Latin-1)"),
1388 build_string ("ISO8859-1 (Latin-1)"),
1389 build_string ("iso8859-1"));
1390 staticpro (&Vcharset_latin_iso8859_2);
1391 Vcharset_latin_iso8859_2 =
1392 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1393 CHARSET_TYPE_96, 1, 1, 'B',
1394 CHARSET_LEFT_TO_RIGHT,
1395 build_string ("Latin-2"),
1396 build_string ("ISO8859-2 (Latin-2)"),
1397 build_string ("ISO8859-2 (Latin-2)"),
1398 build_string ("iso8859-2"));
1399 staticpro (&Vcharset_latin_iso8859_3);
1400 Vcharset_latin_iso8859_3 =
1401 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1402 CHARSET_TYPE_96, 1, 1, 'C',
1403 CHARSET_LEFT_TO_RIGHT,
1404 build_string ("Latin-3"),
1405 build_string ("ISO8859-3 (Latin-3)"),
1406 build_string ("ISO8859-3 (Latin-3)"),
1407 build_string ("iso8859-3"));
1408 staticpro (&Vcharset_latin_iso8859_4);
1409 Vcharset_latin_iso8859_4 =
1410 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1411 CHARSET_TYPE_96, 1, 1, 'D',
1412 CHARSET_LEFT_TO_RIGHT,
1413 build_string ("Latin-4"),
1414 build_string ("ISO8859-4 (Latin-4)"),
1415 build_string ("ISO8859-4 (Latin-4)"),
1416 build_string ("iso8859-4"));
1417 staticpro (&Vcharset_thai_tis620);
1418 Vcharset_thai_tis620 =
1419 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1420 CHARSET_TYPE_96, 1, 1, 'T',
1421 CHARSET_LEFT_TO_RIGHT,
1422 build_string ("TIS620"),
1423 build_string ("TIS620 (Thai)"),
1424 build_string ("TIS620.2529 (Thai)"),
1425 build_string ("tis620"));
1426 staticpro (&Vcharset_greek_iso8859_7);
1427 Vcharset_greek_iso8859_7 =
1428 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1429 CHARSET_TYPE_96, 1, 1, 'F',
1430 CHARSET_LEFT_TO_RIGHT,
1431 build_string ("ISO8859-7"),
1432 build_string ("ISO8859-7 (Greek)"),
1433 build_string ("ISO8859-7 (Greek)"),
1434 build_string ("iso8859-7"));
1435 staticpro (&Vcharset_arabic_iso8859_6);
1436 Vcharset_arabic_iso8859_6 =
1437 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1438 CHARSET_TYPE_96, 1, 1, 'G',
1439 CHARSET_RIGHT_TO_LEFT,
1440 build_string ("ISO8859-6"),
1441 build_string ("ISO8859-6 (Arabic)"),
1442 build_string ("ISO8859-6 (Arabic)"),
1443 build_string ("iso8859-6"));
1444 staticpro (&Vcharset_hebrew_iso8859_8);
1445 Vcharset_hebrew_iso8859_8 =
1446 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1447 CHARSET_TYPE_96, 1, 1, 'H',
1448 CHARSET_RIGHT_TO_LEFT,
1449 build_string ("ISO8859-8"),
1450 build_string ("ISO8859-8 (Hebrew)"),
1451 build_string ("ISO8859-8 (Hebrew)"),
1452 build_string ("iso8859-8"));
1453 staticpro (&Vcharset_katakana_jisx0201);
1454 Vcharset_katakana_jisx0201 =
1455 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1456 CHARSET_TYPE_94, 1, 1, 'I',
1457 CHARSET_LEFT_TO_RIGHT,
1458 build_string ("JISX0201 Kana"),
1459 build_string ("JISX0201.1976 (Japanese Kana)"),
1460 build_string ("JISX0201.1976 Japanese Kana"),
1461 build_string ("jisx0201.1976"));
1462 staticpro (&Vcharset_latin_jisx0201);
1463 Vcharset_latin_jisx0201 =
1464 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1465 CHARSET_TYPE_94, 1, 0, 'J',
1466 CHARSET_LEFT_TO_RIGHT,
1467 build_string ("JISX0201 Roman"),
1468 build_string ("JISX0201.1976 (Japanese Roman)"),
1469 build_string ("JISX0201.1976 Japanese Roman"),
1470 build_string ("jisx0201.1976"));
1471 staticpro (&Vcharset_cyrillic_iso8859_5);
1472 Vcharset_cyrillic_iso8859_5 =
1473 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1474 CHARSET_TYPE_96, 1, 1, 'L',
1475 CHARSET_LEFT_TO_RIGHT,
1476 build_string ("ISO8859-5"),
1477 build_string ("ISO8859-5 (Cyrillic)"),
1478 build_string ("ISO8859-5 (Cyrillic)"),
1479 build_string ("iso8859-5"));
1480 staticpro (&Vcharset_latin_iso8859_9);
1481 Vcharset_latin_iso8859_9 =
1482 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1483 CHARSET_TYPE_96, 1, 1, 'M',
1484 CHARSET_LEFT_TO_RIGHT,
1485 build_string ("Latin-5"),
1486 build_string ("ISO8859-9 (Latin-5)"),
1487 build_string ("ISO8859-9 (Latin-5)"),
1488 build_string ("iso8859-9"));
1489 staticpro (&Vcharset_japanese_jisx0208_1978);
1490 Vcharset_japanese_jisx0208_1978 =
1491 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1492 CHARSET_TYPE_94X94, 2, 0, '@',
1493 CHARSET_LEFT_TO_RIGHT,
1494 build_string ("JISX0208.1978"),
1495 build_string ("JISX0208.1978 (Japanese)"),
1497 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1498 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1499 staticpro (&Vcharset_chinese_gb2312);
1500 Vcharset_chinese_gb2312 =
1501 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1502 CHARSET_TYPE_94X94, 2, 0, 'A',
1503 CHARSET_LEFT_TO_RIGHT,
1504 build_string ("GB2312"),
1505 build_string ("GB2312)"),
1506 build_string ("GB2312 Chinese simplified"),
1507 build_string ("gb2312"));
1508 staticpro (&Vcharset_japanese_jisx0208);
1509 Vcharset_japanese_jisx0208 =
1510 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1511 CHARSET_TYPE_94X94, 2, 0, 'B',
1512 CHARSET_LEFT_TO_RIGHT,
1513 build_string ("JISX0208"),
1514 build_string ("JISX0208.1983/1990 (Japanese)"),
1515 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1516 build_string ("jisx0208.19\\(83\\|90\\)"));
1517 staticpro (&Vcharset_korean_ksc5601);
1518 Vcharset_korean_ksc5601 =
1519 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1520 CHARSET_TYPE_94X94, 2, 0, 'C',
1521 CHARSET_LEFT_TO_RIGHT,
1522 build_string ("KSC5601"),
1523 build_string ("KSC5601 (Korean"),
1524 build_string ("KSC5601 Korean Hangul and Hanja"),
1525 build_string ("ksc5601"));
1526 staticpro (&Vcharset_japanese_jisx0212);
1527 Vcharset_japanese_jisx0212 =
1528 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1529 CHARSET_TYPE_94X94, 2, 0, 'D',
1530 CHARSET_LEFT_TO_RIGHT,
1531 build_string ("JISX0212"),
1532 build_string ("JISX0212 (Japanese)"),
1533 build_string ("JISX0212 Japanese Supplement"),
1534 build_string ("jisx0212"));
1536 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1537 staticpro (&Vcharset_chinese_cns11643_1);
1538 Vcharset_chinese_cns11643_1 =
1539 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1540 CHARSET_TYPE_94X94, 2, 0, 'G',
1541 CHARSET_LEFT_TO_RIGHT,
1542 build_string ("CNS11643-1"),
1543 build_string ("CNS11643-1 (Chinese traditional)"),
1545 ("CNS 11643 Plane 1 Chinese traditional"),
1546 build_string (CHINESE_CNS_PLANE_RE("1")));
1547 staticpro (&Vcharset_chinese_cns11643_2);
1548 Vcharset_chinese_cns11643_2 =
1549 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1550 CHARSET_TYPE_94X94, 2, 0, 'H',
1551 CHARSET_LEFT_TO_RIGHT,
1552 build_string ("CNS11643-2"),
1553 build_string ("CNS11643-2 (Chinese traditional)"),
1555 ("CNS 11643 Plane 2 Chinese traditional"),
1556 build_string (CHINESE_CNS_PLANE_RE("2")));
1557 staticpro (&Vcharset_chinese_big5_1);
1558 Vcharset_chinese_big5_1 =
1559 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1560 CHARSET_TYPE_94X94, 2, 0, '0',
1561 CHARSET_LEFT_TO_RIGHT,
1562 build_string ("Big5"),
1563 build_string ("Big5 (Level-1)"),
1565 ("Big5 Level-1 Chinese traditional"),
1566 build_string ("big5"));
1567 staticpro (&Vcharset_chinese_big5_2);
1568 Vcharset_chinese_big5_2 =
1569 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1570 CHARSET_TYPE_94X94, 2, 0, '1',
1571 CHARSET_LEFT_TO_RIGHT,
1572 build_string ("Big5"),
1573 build_string ("Big5 (Level-2)"),
1575 ("Big5 Level-2 Chinese traditional"),
1576 build_string ("big5"));
1579 #ifdef ENABLE_COMPOSITE_CHARS
1580 /* #### For simplicity, we put composite chars into a 96x96 charset.
1581 This is going to lead to problems because you can run out of
1582 room, esp. as we don't yet recycle numbers. */
1583 staticpro (&Vcharset_composite);
1584 Vcharset_composite =
1585 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1586 CHARSET_TYPE_96X96, 2, 0, 0,
1587 CHARSET_LEFT_TO_RIGHT,
1588 build_string ("Composite"),
1589 build_string ("Composite characters"),
1590 build_string ("Composite characters"),
1593 /* #### not dumped properly */
1594 composite_char_row_next = 32;
1595 composite_char_col_next = 32;
1597 Vcomposite_char_string2char_hash_table =
1598 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1599 Vcomposite_char_char2string_hash_table =
1600 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1601 staticpro (&Vcomposite_char_string2char_hash_table);
1602 staticpro (&Vcomposite_char_char2string_hash_table);
1603 #endif /* ENABLE_COMPOSITE_CHARS */