1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 struct charset_lookup *chlook;
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
84 static const struct struct_description charset_lookup_description = {
85 sizeof(struct charset_lookup),
86 charset_lookup_description_1
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
95 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
97 Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
149 Lisp_Object Ql2r, Qr2l;
151 Lisp_Object Vcharset_hash_table;
153 static Bufbyte next_allocated_1_byte_leading_byte;
154 static Bufbyte next_allocated_2_byte_leading_byte;
156 /* Composite characters are characters constructed by overstriking two
157 or more regular characters.
159 1) The old Mule implementation involves storing composite characters
160 in a buffer as a tag followed by all of the actual characters
161 used to make up the composite character. I think this is a bad
162 idea; it greatly complicates code that wants to handle strings
163 one character at a time because it has to deal with the possibility
164 of great big ungainly characters. It's much more reasonable to
165 simply store an index into a table of composite characters.
167 2) The current implementation only allows for 16,384 separate
168 composite characters over the lifetime of the XEmacs process.
169 This could become a potential problem if the user
170 edited lots of different files that use composite characters.
171 Due to FSF bogosity, increasing the number of allowable
172 composite characters under Mule would decrease the number
173 of possible faces that can exist. Mule already has shrunk
174 this to 2048, and further shrinkage would become uncomfortable.
175 No such problems exist in XEmacs.
177 Composite characters could be represented as 0x80 C1 C2 C3,
178 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
179 for slightly under 2^20 (one million) composite characters
180 over the XEmacs process lifetime, and you only need to
181 increase the size of a Mule character from 19 to 21 bits.
182 Or you could use 0x80 C1 C2 C3 C4, allowing for about
183 85 million (slightly over 2^26) composite characters. */
186 /************************************************************************/
187 /* Basic Emchar functions */
188 /************************************************************************/
190 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
191 string in STR. Returns the number of bytes stored.
192 Do not call this directly. Use the macro set_charptr_emchar() instead.
196 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
204 BREAKUP_CHAR (c, charset, c1, c2);
205 lb = CHAR_LEADING_BYTE (c);
206 if (LEADING_BYTE_PRIVATE_P (lb))
207 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
209 if (EQ (charset, Vcharset_control_1))
218 /* Return the first character from a Mule-encoded string in STR,
219 assuming it's non-ASCII. Do not call this directly.
220 Use the macro charptr_emchar() instead. */
223 non_ascii_charptr_emchar (CONST Bufbyte *str)
225 Bufbyte i0 = *str, i1, i2 = 0;
228 if (i0 == LEADING_BYTE_CONTROL_1)
229 return (Emchar) (*++str - 0x20);
231 if (LEADING_BYTE_PREFIX_P (i0))
236 charset = CHARSET_BY_LEADING_BYTE (i0);
237 if (XCHARSET_DIMENSION (charset) == 2)
240 return MAKE_CHAR (charset, i1, i2);
243 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
244 Do not call this directly. Use the macro valid_char_p() instead. */
247 non_ascii_valid_char_p (Emchar ch)
251 /* Must have only lowest 19 bits set */
255 f1 = CHAR_FIELD1 (ch);
256 f2 = CHAR_FIELD2 (ch);
257 f3 = CHAR_FIELD3 (ch);
263 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
264 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
265 f2 > MAX_CHAR_FIELD2_PRIVATE)
270 if (f3 != 0x20 && f3 != 0x7F)
274 NOTE: This takes advantage of the fact that
275 FIELD2_TO_OFFICIAL_LEADING_BYTE and
276 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
278 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
279 return (XCHARSET_CHARS (charset) == 96);
285 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
286 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
287 f1 > MAX_CHAR_FIELD1_PRIVATE)
289 if (f2 < 0x20 || f3 < 0x20)
292 #ifdef ENABLE_COMPOSITE_CHARS
293 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
295 if (UNBOUNDP (Fgethash (make_int (ch),
296 Vcomposite_char_char2string_hash_table,
301 #endif /* ENABLE_COMPOSITE_CHARS */
303 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
306 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
308 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
311 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
313 return (XCHARSET_CHARS (charset) == 96);
318 /************************************************************************/
319 /* Basic string functions */
320 /************************************************************************/
322 /* Copy the character pointed to by PTR into STR, assuming it's
323 non-ASCII. Do not call this directly. Use the macro
324 charptr_copy_char() instead. */
327 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
329 Bufbyte *strptr = str;
331 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
333 /* Notice fallthrough. */
334 case 4: *++strptr = *ptr++;
335 case 3: *++strptr = *ptr++;
336 case 2: *++strptr = *ptr;
341 return strptr + 1 - str;
345 /************************************************************************/
346 /* streams of Emchars */
347 /************************************************************************/
349 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
350 The functions below are not meant to be called directly; use
351 the macros in insdel.h. */
354 Lstream_get_emchar_1 (Lstream *stream, int ch)
356 Bufbyte str[MAX_EMCHAR_LEN];
357 Bufbyte *strptr = str;
359 str[0] = (Bufbyte) ch;
360 switch (REP_BYTES_BY_FIRST_BYTE (ch))
362 /* Notice fallthrough. */
364 ch = Lstream_getc (stream);
366 *++strptr = (Bufbyte) ch;
368 ch = Lstream_getc (stream);
370 *++strptr = (Bufbyte) ch;
372 ch = Lstream_getc (stream);
374 *++strptr = (Bufbyte) ch;
379 return charptr_emchar (str);
383 Lstream_fput_emchar (Lstream *stream, Emchar ch)
385 Bufbyte str[MAX_EMCHAR_LEN];
386 Bytecount len = set_charptr_emchar (str, ch);
387 return Lstream_write (stream, str, len);
391 Lstream_funget_emchar (Lstream *stream, Emchar ch)
393 Bufbyte str[MAX_EMCHAR_LEN];
394 Bytecount len = set_charptr_emchar (str, ch);
395 Lstream_unread (stream, str, len);
399 /************************************************************************/
401 /************************************************************************/
404 mark_charset (Lisp_Object obj)
406 struct Lisp_Charset *cs = XCHARSET (obj);
408 mark_object (cs->short_name);
409 mark_object (cs->long_name);
410 mark_object (cs->doc_string);
411 mark_object (cs->registry);
412 mark_object (cs->ccl_program);
417 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
419 struct Lisp_Charset *cs = XCHARSET (obj);
423 error ("printing unreadable object #<charset %s 0x%x>",
424 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
427 write_c_string ("#<charset ", printcharfun);
428 print_internal (CHARSET_NAME (cs), printcharfun, 0);
429 write_c_string (" ", printcharfun);
430 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
431 write_c_string (" ", printcharfun);
432 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
433 write_c_string (" ", printcharfun);
434 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
435 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
436 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
437 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
438 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
440 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
441 CHARSET_COLUMNS (cs),
442 CHARSET_GRAPHIC (cs),
444 write_c_string (buf, printcharfun);
445 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
446 sprintf (buf, " 0x%x>", cs->header.uid);
447 write_c_string (buf, printcharfun);
450 static const struct lrecord_description charset_description[] = {
451 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
455 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
456 mark_charset, print_charset, 0, 0, 0, charset_description,
457 struct Lisp_Charset);
458 /* Make a new charset. */
461 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
462 unsigned char type, unsigned char columns, unsigned char graphic,
463 Bufbyte final, unsigned char direction, Lisp_Object short_name,
464 Lisp_Object long_name, Lisp_Object doc,
468 struct Lisp_Charset *cs =
469 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
470 XSETCHARSET (obj, cs);
472 CHARSET_ID (cs) = id;
473 CHARSET_NAME (cs) = name;
474 CHARSET_SHORT_NAME (cs) = short_name;
475 CHARSET_LONG_NAME (cs) = long_name;
476 CHARSET_REP_BYTES (cs) = rep_bytes;
477 CHARSET_DIRECTION (cs) = direction;
478 CHARSET_TYPE (cs) = type;
479 CHARSET_COLUMNS (cs) = columns;
480 CHARSET_GRAPHIC (cs) = graphic;
481 CHARSET_FINAL (cs) = final;
482 CHARSET_DOC_STRING (cs) = doc;
483 CHARSET_REGISTRY (cs) = reg;
484 CHARSET_CCL_PROGRAM (cs) = Qnil;
485 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
487 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
488 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
489 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
490 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
494 /* some charsets do not have final characters. This includes
495 ASCII, Control-1, Composite, and the two faux private
497 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
498 chlook->charset_by_attributes[type][final][direction] = obj;
501 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
502 chlook->charset_by_leading_byte[id - 128] = obj;
504 /* official leading byte */
505 rep_bytes_by_first_byte[id] = rep_bytes;
507 /* Some charsets are "faux" and don't have names or really exist at
508 all except in the leading-byte table. */
510 Fputhash (name, obj, Vcharset_hash_table);
515 get_unallocated_leading_byte (int dimension)
521 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
524 lb = next_allocated_1_byte_leading_byte++;
528 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
531 lb = next_allocated_2_byte_leading_byte++;
536 ("No more character sets free for this dimension",
537 make_int (dimension));
543 /************************************************************************/
544 /* Basic charset Lisp functions */
545 /************************************************************************/
547 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
548 Return non-nil if OBJECT is a charset.
552 return CHARSETP (object) ? Qt : Qnil;
555 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
556 Retrieve the charset of the given name.
557 If CHARSET-OR-NAME is a charset object, it is simply returned.
558 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
559 nil is returned. Otherwise the associated charset object is returned.
563 if (CHARSETP (charset_or_name))
564 return charset_or_name;
566 CHECK_SYMBOL (charset_or_name);
567 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
570 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
571 Retrieve the charset of the given name.
572 Same as `find-charset' except an error is signalled if there is no such
573 charset instead of returning nil.
577 Lisp_Object charset = Ffind_charset (name);
580 signal_simple_error ("No such charset", name);
584 /* We store the charsets in hash tables with the names as the key and the
585 actual charset object as the value. Occasionally we need to use them
586 in a list format. These routines provide us with that. */
587 struct charset_list_closure
589 Lisp_Object *charset_list;
593 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
594 void *charset_list_closure)
596 /* This function can GC */
597 struct charset_list_closure *chcl =
598 (struct charset_list_closure*) charset_list_closure;
599 Lisp_Object *charset_list = chcl->charset_list;
601 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
605 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
606 Return a list of the names of all defined charsets.
610 Lisp_Object charset_list = Qnil;
612 struct charset_list_closure charset_list_closure;
614 GCPRO1 (charset_list);
615 charset_list_closure.charset_list = &charset_list;
616 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
617 &charset_list_closure);
623 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
624 Return the name of the given charset.
628 return XCHARSET_NAME (Fget_charset (charset));
631 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
632 Define a new character set.
633 This function is for use with Mule support.
634 NAME is a symbol, the name by which the character set is normally referred.
635 DOC-STRING is a string describing the character set.
636 PROPS is a property list, describing the specific nature of the
637 character set. Recognized properties are:
639 'short-name Short version of the charset name (ex: Latin-1)
640 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
641 'registry A regular expression matching the font registry field for
643 'dimension Number of octets used to index a character in this charset.
644 Either 1 or 2. Defaults to 1.
645 'columns Number of columns used to display a character in this charset.
646 Only used in TTY mode. (Under X, the actual width of a
647 character can be derived from the font used to display the
648 characters.) If unspecified, defaults to the dimension
649 (this is almost always the correct value).
650 'chars Number of characters in each dimension (94 or 96).
651 Defaults to 94. Note that if the dimension is 2, the
652 character set thus described is 94x94 or 96x96.
653 'final Final byte of ISO 2022 escape sequence. Must be
654 supplied. Each combination of (DIMENSION, CHARS) defines a
655 separate namespace for final bytes. Note that ISO
656 2022 restricts the final byte to the range
657 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
658 dimension == 2. Note also that final bytes in the range
659 0x30 - 0x3F are reserved for user-defined (not official)
661 'graphic 0 (use left half of font on output) or 1 (use right half
662 of font on output). Defaults to 0. For example, for
663 a font whose registry is ISO8859-1, the left half
664 (octets 0x20 - 0x7F) is the `ascii' character set, while
665 the right half (octets 0xA0 - 0xFF) is the `latin-1'
666 character set. With 'graphic set to 0, the octets
667 will have their high bit cleared; with it set to 1,
668 the octets will have their high bit set.
669 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
671 'ccl-program A compiled CCL program used to convert a character in
672 this charset into an index into the font. This is in
673 addition to the 'graphic property. The CCL program
674 is passed the octets of the character, with the high
675 bit cleared and set depending upon whether the value
676 of the 'graphic property is 0 or 1.
678 (name, doc_string, props))
680 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
681 int direction = CHARSET_LEFT_TO_RIGHT;
683 Lisp_Object registry = Qnil;
685 Lisp_Object rest, keyword, value;
686 Lisp_Object ccl_program = Qnil;
687 Lisp_Object short_name = Qnil, long_name = Qnil;
690 if (!NILP (doc_string))
691 CHECK_STRING (doc_string);
693 charset = Ffind_charset (name);
695 signal_simple_error ("Cannot redefine existing charset", name);
697 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
699 if (EQ (keyword, Qshort_name))
701 CHECK_STRING (value);
705 if (EQ (keyword, Qlong_name))
707 CHECK_STRING (value);
711 else if (EQ (keyword, Qdimension))
714 dimension = XINT (value);
715 if (dimension < 1 || dimension > 2)
716 signal_simple_error ("Invalid value for 'dimension", value);
719 else if (EQ (keyword, Qchars))
722 chars = XINT (value);
723 if (chars != 94 && chars != 96)
724 signal_simple_error ("Invalid value for 'chars", value);
727 else if (EQ (keyword, Qcolumns))
730 columns = XINT (value);
731 if (columns != 1 && columns != 2)
732 signal_simple_error ("Invalid value for 'columns", value);
735 else if (EQ (keyword, Qgraphic))
738 graphic = XINT (value);
739 if (graphic < 0 || graphic > 1)
740 signal_simple_error ("Invalid value for 'graphic", value);
743 else if (EQ (keyword, Qregistry))
745 CHECK_STRING (value);
749 else if (EQ (keyword, Qdirection))
751 if (EQ (value, Ql2r))
752 direction = CHARSET_LEFT_TO_RIGHT;
753 else if (EQ (value, Qr2l))
754 direction = CHARSET_RIGHT_TO_LEFT;
756 signal_simple_error ("Invalid value for 'direction", value);
759 else if (EQ (keyword, Qfinal))
761 CHECK_CHAR_COERCE_INT (value);
762 final = XCHAR (value);
763 if (final < '0' || final > '~')
764 signal_simple_error ("Invalid value for 'final", value);
767 else if (EQ (keyword, Qccl_program))
769 CHECK_VECTOR (value);
774 signal_simple_error ("Unrecognized property", keyword);
778 error ("'final must be specified");
779 if (dimension == 2 && final > 0x5F)
781 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
785 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
787 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
789 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
790 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
792 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
794 id = get_unallocated_leading_byte (dimension);
796 if (NILP (doc_string))
797 doc_string = build_string ("");
800 registry = build_string ("");
802 if (NILP (short_name))
803 XSETSTRING (short_name, XSYMBOL (name)->name);
805 if (NILP (long_name))
806 long_name = doc_string;
810 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
811 final, direction, short_name, long_name, doc_string, registry);
812 if (!NILP (ccl_program))
813 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
817 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
819 Make a charset equivalent to CHARSET but which goes in the opposite direction.
820 NEW-NAME is the name of the new charset. Return the new charset.
824 Lisp_Object new_charset = Qnil;
825 int id, dimension, columns, graphic, final;
827 Lisp_Object registry, doc_string, short_name, long_name;
828 struct Lisp_Charset *cs;
830 charset = Fget_charset (charset);
831 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
832 signal_simple_error ("Charset already has reverse-direction charset",
835 CHECK_SYMBOL (new_name);
836 if (!NILP (Ffind_charset (new_name)))
837 signal_simple_error ("Cannot redefine existing charset", new_name);
839 cs = XCHARSET (charset);
841 type = CHARSET_TYPE (cs);
842 columns = CHARSET_COLUMNS (cs);
843 dimension = CHARSET_DIMENSION (cs);
844 id = get_unallocated_leading_byte (dimension);
846 graphic = CHARSET_GRAPHIC (cs);
847 final = CHARSET_FINAL (cs);
848 direction = CHARSET_RIGHT_TO_LEFT;
849 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
850 direction = CHARSET_LEFT_TO_RIGHT;
851 doc_string = CHARSET_DOC_STRING (cs);
852 short_name = CHARSET_SHORT_NAME (cs);
853 long_name = CHARSET_LONG_NAME (cs);
854 registry = CHARSET_REGISTRY (cs);
856 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
857 graphic, final, direction, short_name, long_name,
858 doc_string, registry);
860 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
861 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
866 /* #### Reverse direction charsets not yet implemented. */
868 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
870 Return the reverse-direction charset parallel to CHARSET, if any.
871 This is the charset with the same properties (in particular, the same
872 dimension, number of characters per dimension, and final byte) as
873 CHARSET but whose characters are displayed in the opposite direction.
877 charset = Fget_charset (charset);
878 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
882 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
883 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
884 If DIRECTION is omitted, both directions will be checked (left-to-right
885 will be returned if character sets exist for both directions).
887 (dimension, chars, final, direction))
889 int dm, ch, fi, di = -1;
891 Lisp_Object obj = Qnil;
893 CHECK_INT (dimension);
894 dm = XINT (dimension);
895 if (dm < 1 || dm > 2)
896 signal_simple_error ("Invalid value for DIMENSION", dimension);
900 if (ch != 94 && ch != 96)
901 signal_simple_error ("Invalid value for CHARS", chars);
903 CHECK_CHAR_COERCE_INT (final);
905 if (fi < '0' || fi > '~')
906 signal_simple_error ("Invalid value for FINAL", final);
908 if (EQ (direction, Ql2r))
909 di = CHARSET_LEFT_TO_RIGHT;
910 else if (EQ (direction, Qr2l))
911 di = CHARSET_RIGHT_TO_LEFT;
912 else if (!NILP (direction))
913 signal_simple_error ("Invalid value for DIRECTION", direction);
915 if (dm == 2 && fi > 0x5F)
917 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
920 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
922 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
926 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
928 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
931 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
934 return XCHARSET_NAME (obj);
938 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
939 Return short name of CHARSET.
943 return XCHARSET_SHORT_NAME (Fget_charset (charset));
946 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
947 Return long name of CHARSET.
951 return XCHARSET_LONG_NAME (Fget_charset (charset));
954 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
955 Return description of CHARSET.
959 return XCHARSET_DOC_STRING (Fget_charset (charset));
962 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
963 Return dimension of CHARSET.
967 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
970 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
971 Return property PROP of CHARSET.
972 Recognized properties are those listed in `make-charset', as well as
973 'name and 'doc-string.
977 struct Lisp_Charset *cs;
979 charset = Fget_charset (charset);
980 cs = XCHARSET (charset);
983 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
984 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
985 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
986 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
987 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
988 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
989 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
990 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
991 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
992 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
993 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
994 if (EQ (prop, Qdirection))
995 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
996 if (EQ (prop, Qreverse_direction_charset))
998 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1002 return XCHARSET_NAME (obj);
1004 signal_simple_error ("Unrecognized charset property name", prop);
1005 return Qnil; /* not reached */
1008 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1009 Return charset identification number of CHARSET.
1013 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1016 /* #### We need to figure out which properties we really want to
1019 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1020 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1022 (charset, ccl_program))
1024 charset = Fget_charset (charset);
1025 CHECK_VECTOR (ccl_program);
1026 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1031 invalidate_charset_font_caches (Lisp_Object charset)
1033 /* Invalidate font cache entries for charset on all devices. */
1034 Lisp_Object devcons, concons, hash_table;
1035 DEVICE_LOOP_NO_BREAK (devcons, concons)
1037 struct device *d = XDEVICE (XCAR (devcons));
1038 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1039 if (!UNBOUNDP (hash_table))
1040 Fclrhash (hash_table);
1044 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1045 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1046 Set the 'registry property of CHARSET to REGISTRY.
1048 (charset, registry))
1050 charset = Fget_charset (charset);
1051 CHECK_STRING (registry);
1052 XCHARSET_REGISTRY (charset) = registry;
1053 invalidate_charset_font_caches (charset);
1054 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1059 /************************************************************************/
1060 /* Lisp primitives for working with characters */
1061 /************************************************************************/
1063 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1064 Make a character from CHARSET and octets ARG1 and ARG2.
1065 ARG2 is required only for characters from two-dimensional charsets.
1066 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1067 character s with caron.
1069 (charset, arg1, arg2))
1071 struct Lisp_Charset *cs;
1073 int lowlim, highlim;
1075 charset = Fget_charset (charset);
1076 cs = XCHARSET (charset);
1078 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1079 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1080 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1081 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1084 /* It is useful (and safe, according to Olivier Galibert) to strip
1085 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1086 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1087 Latin 2 code of the character. */
1088 a1 = XINT (arg1) & 0x7f;
1089 if (a1 < lowlim || a1 > highlim)
1090 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1092 if (CHARSET_DIMENSION (cs) == 1)
1096 ("Charset is of dimension one; second octet must be nil", arg2);
1097 return make_char (MAKE_CHAR (charset, a1, 0));
1101 a2 = XINT (arg2) & 0x7f;
1102 if (a2 < lowlim || a2 > highlim)
1103 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1105 return make_char (MAKE_CHAR (charset, a1, a2));
1108 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1109 Return the character set of char CH.
1113 CHECK_CHAR_COERCE_INT (ch);
1115 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1116 (CHAR_LEADING_BYTE (XCHAR (ch))));
1119 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1120 Return list of charset and one or two position-codes of CHAR.
1124 /* This function can GC */
1125 struct gcpro gcpro1, gcpro2;
1126 Lisp_Object charset = Qnil;
1127 Lisp_Object rc = Qnil;
1130 GCPRO2 (charset, rc);
1131 CHECK_CHAR_COERCE_INT (character);
1133 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1135 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1137 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1141 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1149 #ifdef ENABLE_COMPOSITE_CHARS
1150 /************************************************************************/
1151 /* composite character functions */
1152 /************************************************************************/
1155 lookup_composite_char (Bufbyte *str, int len)
1157 Lisp_Object lispstr = make_string (str, len);
1158 Lisp_Object ch = Fgethash (lispstr,
1159 Vcomposite_char_string2char_hash_table,
1165 if (composite_char_row_next >= 128)
1166 signal_simple_error ("No more composite chars available", lispstr);
1167 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1168 composite_char_col_next);
1169 Fputhash (make_char (emch), lispstr,
1170 Vcomposite_char_char2string_hash_table);
1171 Fputhash (lispstr, make_char (emch),
1172 Vcomposite_char_string2char_hash_table);
1173 composite_char_col_next++;
1174 if (composite_char_col_next >= 128)
1176 composite_char_col_next = 32;
1177 composite_char_row_next++;
1186 composite_char_string (Emchar ch)
1188 Lisp_Object str = Fgethash (make_char (ch),
1189 Vcomposite_char_char2string_hash_table,
1191 assert (!UNBOUNDP (str));
1195 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1196 Convert a string into a single composite character.
1197 The character is the result of overstriking all the characters in
1202 CHECK_STRING (string);
1203 return make_char (lookup_composite_char (XSTRING_DATA (string),
1204 XSTRING_LENGTH (string)));
1207 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1208 Return a string of the characters comprising a composite character.
1216 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1217 signal_simple_error ("Must be composite char", ch);
1218 return composite_char_string (emch);
1220 #endif /* ENABLE_COMPOSITE_CHARS */
1223 /************************************************************************/
1224 /* initialization */
1225 /************************************************************************/
1228 syms_of_mule_charset (void)
1230 DEFSUBR (Fcharsetp);
1231 DEFSUBR (Ffind_charset);
1232 DEFSUBR (Fget_charset);
1233 DEFSUBR (Fcharset_list);
1234 DEFSUBR (Fcharset_name);
1235 DEFSUBR (Fmake_charset);
1236 DEFSUBR (Fmake_reverse_direction_charset);
1237 /* DEFSUBR (Freverse_direction_charset); */
1238 DEFSUBR (Fcharset_from_attributes);
1239 DEFSUBR (Fcharset_short_name);
1240 DEFSUBR (Fcharset_long_name);
1241 DEFSUBR (Fcharset_description);
1242 DEFSUBR (Fcharset_dimension);
1243 DEFSUBR (Fcharset_property);
1244 DEFSUBR (Fcharset_id);
1245 DEFSUBR (Fset_charset_ccl_program);
1246 DEFSUBR (Fset_charset_registry);
1248 DEFSUBR (Fmake_char);
1249 DEFSUBR (Fchar_charset);
1250 DEFSUBR (Fsplit_char);
1252 #ifdef ENABLE_COMPOSITE_CHARS
1253 DEFSUBR (Fmake_composite_char);
1254 DEFSUBR (Fcomposite_char_string);
1257 defsymbol (&Qcharsetp, "charsetp");
1258 defsymbol (&Qregistry, "registry");
1259 defsymbol (&Qfinal, "final");
1260 defsymbol (&Qgraphic, "graphic");
1261 defsymbol (&Qdirection, "direction");
1262 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1263 defsymbol (&Qshort_name, "short-name");
1264 defsymbol (&Qlong_name, "long-name");
1266 defsymbol (&Ql2r, "l2r");
1267 defsymbol (&Qr2l, "r2l");
1269 /* Charsets, compatible with FSF 20.3
1270 Naming convention is Script-Charset[-Edition] */
1271 defsymbol (&Qascii, "ascii");
1272 defsymbol (&Qcontrol_1, "control-1");
1273 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1274 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1275 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1276 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1277 defsymbol (&Qthai_tis620, "thai-tis620");
1278 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1279 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1280 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1281 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1282 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1283 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1284 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1285 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1286 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1287 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1288 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1289 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1290 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1291 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1292 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1293 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1295 defsymbol (&Qcomposite, "composite");
1299 vars_of_mule_charset (void)
1303 chlook = xnew (struct charset_lookup);
1304 dumpstruct (&chlook, &charset_lookup_description);
1306 /* Table of charsets indexed by leading byte. */
1307 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1308 chlook->charset_by_leading_byte[i] = Qnil;
1310 /* Table of charsets indexed by type/final-byte/direction. */
1311 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1312 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1313 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1314 chlook->charset_by_attributes[i][j][k] = Qnil;
1316 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1317 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1321 complex_vars_of_mule_charset (void)
1323 staticpro (&Vcharset_hash_table);
1324 Vcharset_hash_table =
1325 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1327 /* Predefined character sets. We store them into variables for
1330 staticpro (&Vcharset_ascii);
1332 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1333 CHARSET_TYPE_94, 1, 0, 'B',
1334 CHARSET_LEFT_TO_RIGHT,
1335 build_string ("ASCII"),
1336 build_string ("ASCII)"),
1337 build_string ("ASCII (ISO646 IRV)"),
1338 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1339 staticpro (&Vcharset_control_1);
1340 Vcharset_control_1 =
1341 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1342 CHARSET_TYPE_94, 1, 1, 0,
1343 CHARSET_LEFT_TO_RIGHT,
1344 build_string ("C1"),
1345 build_string ("Control characters"),
1346 build_string ("Control characters 128-191"),
1348 staticpro (&Vcharset_latin_iso8859_1);
1349 Vcharset_latin_iso8859_1 =
1350 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1351 CHARSET_TYPE_96, 1, 1, 'A',
1352 CHARSET_LEFT_TO_RIGHT,
1353 build_string ("Latin-1"),
1354 build_string ("ISO8859-1 (Latin-1)"),
1355 build_string ("ISO8859-1 (Latin-1)"),
1356 build_string ("iso8859-1"));
1357 staticpro (&Vcharset_latin_iso8859_2);
1358 Vcharset_latin_iso8859_2 =
1359 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1360 CHARSET_TYPE_96, 1, 1, 'B',
1361 CHARSET_LEFT_TO_RIGHT,
1362 build_string ("Latin-2"),
1363 build_string ("ISO8859-2 (Latin-2)"),
1364 build_string ("ISO8859-2 (Latin-2)"),
1365 build_string ("iso8859-2"));
1366 staticpro (&Vcharset_latin_iso8859_3);
1367 Vcharset_latin_iso8859_3 =
1368 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1369 CHARSET_TYPE_96, 1, 1, 'C',
1370 CHARSET_LEFT_TO_RIGHT,
1371 build_string ("Latin-3"),
1372 build_string ("ISO8859-3 (Latin-3)"),
1373 build_string ("ISO8859-3 (Latin-3)"),
1374 build_string ("iso8859-3"));
1375 staticpro (&Vcharset_latin_iso8859_4);
1376 Vcharset_latin_iso8859_4 =
1377 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1378 CHARSET_TYPE_96, 1, 1, 'D',
1379 CHARSET_LEFT_TO_RIGHT,
1380 build_string ("Latin-4"),
1381 build_string ("ISO8859-4 (Latin-4)"),
1382 build_string ("ISO8859-4 (Latin-4)"),
1383 build_string ("iso8859-4"));
1384 staticpro (&Vcharset_thai_tis620);
1385 Vcharset_thai_tis620 =
1386 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1387 CHARSET_TYPE_96, 1, 1, 'T',
1388 CHARSET_LEFT_TO_RIGHT,
1389 build_string ("TIS620"),
1390 build_string ("TIS620 (Thai)"),
1391 build_string ("TIS620.2529 (Thai)"),
1392 build_string ("tis620"));
1393 staticpro (&Vcharset_greek_iso8859_7);
1394 Vcharset_greek_iso8859_7 =
1395 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1396 CHARSET_TYPE_96, 1, 1, 'F',
1397 CHARSET_LEFT_TO_RIGHT,
1398 build_string ("ISO8859-7"),
1399 build_string ("ISO8859-7 (Greek)"),
1400 build_string ("ISO8859-7 (Greek)"),
1401 build_string ("iso8859-7"));
1402 staticpro (&Vcharset_arabic_iso8859_6);
1403 Vcharset_arabic_iso8859_6 =
1404 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1405 CHARSET_TYPE_96, 1, 1, 'G',
1406 CHARSET_RIGHT_TO_LEFT,
1407 build_string ("ISO8859-6"),
1408 build_string ("ISO8859-6 (Arabic)"),
1409 build_string ("ISO8859-6 (Arabic)"),
1410 build_string ("iso8859-6"));
1411 staticpro (&Vcharset_hebrew_iso8859_8);
1412 Vcharset_hebrew_iso8859_8 =
1413 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1414 CHARSET_TYPE_96, 1, 1, 'H',
1415 CHARSET_RIGHT_TO_LEFT,
1416 build_string ("ISO8859-8"),
1417 build_string ("ISO8859-8 (Hebrew)"),
1418 build_string ("ISO8859-8 (Hebrew)"),
1419 build_string ("iso8859-8"));
1420 staticpro (&Vcharset_katakana_jisx0201);
1421 Vcharset_katakana_jisx0201 =
1422 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1423 CHARSET_TYPE_94, 1, 1, 'I',
1424 CHARSET_LEFT_TO_RIGHT,
1425 build_string ("JISX0201 Kana"),
1426 build_string ("JISX0201.1976 (Japanese Kana)"),
1427 build_string ("JISX0201.1976 Japanese Kana"),
1428 build_string ("jisx0201.1976"));
1429 staticpro (&Vcharset_latin_jisx0201);
1430 Vcharset_latin_jisx0201 =
1431 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1432 CHARSET_TYPE_94, 1, 0, 'J',
1433 CHARSET_LEFT_TO_RIGHT,
1434 build_string ("JISX0201 Roman"),
1435 build_string ("JISX0201.1976 (Japanese Roman)"),
1436 build_string ("JISX0201.1976 Japanese Roman"),
1437 build_string ("jisx0201.1976"));
1438 staticpro (&Vcharset_cyrillic_iso8859_5);
1439 Vcharset_cyrillic_iso8859_5 =
1440 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1441 CHARSET_TYPE_96, 1, 1, 'L',
1442 CHARSET_LEFT_TO_RIGHT,
1443 build_string ("ISO8859-5"),
1444 build_string ("ISO8859-5 (Cyrillic)"),
1445 build_string ("ISO8859-5 (Cyrillic)"),
1446 build_string ("iso8859-5"));
1447 staticpro (&Vcharset_latin_iso8859_9);
1448 Vcharset_latin_iso8859_9 =
1449 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1450 CHARSET_TYPE_96, 1, 1, 'M',
1451 CHARSET_LEFT_TO_RIGHT,
1452 build_string ("Latin-5"),
1453 build_string ("ISO8859-9 (Latin-5)"),
1454 build_string ("ISO8859-9 (Latin-5)"),
1455 build_string ("iso8859-9"));
1456 staticpro (&Vcharset_japanese_jisx0208_1978);
1457 Vcharset_japanese_jisx0208_1978 =
1458 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1459 CHARSET_TYPE_94X94, 2, 0, '@',
1460 CHARSET_LEFT_TO_RIGHT,
1461 build_string ("JISX0208.1978"),
1462 build_string ("JISX0208.1978 (Japanese)"),
1464 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1465 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1466 staticpro (&Vcharset_chinese_gb2312);
1467 Vcharset_chinese_gb2312 =
1468 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1469 CHARSET_TYPE_94X94, 2, 0, 'A',
1470 CHARSET_LEFT_TO_RIGHT,
1471 build_string ("GB2312"),
1472 build_string ("GB2312)"),
1473 build_string ("GB2312 Chinese simplified"),
1474 build_string ("gb2312"));
1475 staticpro (&Vcharset_japanese_jisx0208);
1476 Vcharset_japanese_jisx0208 =
1477 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1478 CHARSET_TYPE_94X94, 2, 0, 'B',
1479 CHARSET_LEFT_TO_RIGHT,
1480 build_string ("JISX0208"),
1481 build_string ("JISX0208.1983/1990 (Japanese)"),
1482 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1483 build_string ("jisx0208.19\\(83\\|90\\)"));
1484 staticpro (&Vcharset_korean_ksc5601);
1485 Vcharset_korean_ksc5601 =
1486 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1487 CHARSET_TYPE_94X94, 2, 0, 'C',
1488 CHARSET_LEFT_TO_RIGHT,
1489 build_string ("KSC5601"),
1490 build_string ("KSC5601 (Korean"),
1491 build_string ("KSC5601 Korean Hangul and Hanja"),
1492 build_string ("ksc5601"));
1493 staticpro (&Vcharset_japanese_jisx0212);
1494 Vcharset_japanese_jisx0212 =
1495 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1496 CHARSET_TYPE_94X94, 2, 0, 'D',
1497 CHARSET_LEFT_TO_RIGHT,
1498 build_string ("JISX0212"),
1499 build_string ("JISX0212 (Japanese)"),
1500 build_string ("JISX0212 Japanese Supplement"),
1501 build_string ("jisx0212"));
1503 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1504 staticpro (&Vcharset_chinese_cns11643_1);
1505 Vcharset_chinese_cns11643_1 =
1506 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1507 CHARSET_TYPE_94X94, 2, 0, 'G',
1508 CHARSET_LEFT_TO_RIGHT,
1509 build_string ("CNS11643-1"),
1510 build_string ("CNS11643-1 (Chinese traditional)"),
1512 ("CNS 11643 Plane 1 Chinese traditional"),
1513 build_string (CHINESE_CNS_PLANE_RE("1")));
1514 staticpro (&Vcharset_chinese_cns11643_2);
1515 Vcharset_chinese_cns11643_2 =
1516 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1517 CHARSET_TYPE_94X94, 2, 0, 'H',
1518 CHARSET_LEFT_TO_RIGHT,
1519 build_string ("CNS11643-2"),
1520 build_string ("CNS11643-2 (Chinese traditional)"),
1522 ("CNS 11643 Plane 2 Chinese traditional"),
1523 build_string (CHINESE_CNS_PLANE_RE("2")));
1524 staticpro (&Vcharset_chinese_big5_1);
1525 Vcharset_chinese_big5_1 =
1526 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1527 CHARSET_TYPE_94X94, 2, 0, '0',
1528 CHARSET_LEFT_TO_RIGHT,
1529 build_string ("Big5"),
1530 build_string ("Big5 (Level-1)"),
1532 ("Big5 Level-1 Chinese traditional"),
1533 build_string ("big5"));
1534 staticpro (&Vcharset_chinese_big5_2);
1535 Vcharset_chinese_big5_2 =
1536 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1537 CHARSET_TYPE_94X94, 2, 0, '1',
1538 CHARSET_LEFT_TO_RIGHT,
1539 build_string ("Big5"),
1540 build_string ("Big5 (Level-2)"),
1542 ("Big5 Level-2 Chinese traditional"),
1543 build_string ("big5"));
1546 #ifdef ENABLE_COMPOSITE_CHARS
1547 /* #### For simplicity, we put composite chars into a 96x96 charset.
1548 This is going to lead to problems because you can run out of
1549 room, esp. as we don't yet recycle numbers. */
1550 staticpro (&Vcharset_composite);
1551 Vcharset_composite =
1552 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1553 CHARSET_TYPE_96X96, 2, 0, 0,
1554 CHARSET_LEFT_TO_RIGHT,
1555 build_string ("Composite"),
1556 build_string ("Composite characters"),
1557 build_string ("Composite characters"),
1560 /* #### not dumped properly */
1561 composite_char_row_next = 32;
1562 composite_char_col_next = 32;
1564 Vcomposite_char_string2char_hash_table =
1565 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1566 Vcomposite_char_char2string_hash_table =
1567 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1568 staticpro (&Vcomposite_char_string2char_hash_table);
1569 staticpro (&Vcomposite_char_char2string_hash_table);
1570 #endif /* ENABLE_COMPOSITE_CHARS */