1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
82 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation.
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent
86 canonical computation:
88 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
90 Bytecount rep_bytes_by_first_byte[0xA0] =
91 { /* 0x00 - 0x7f are for straight ASCII */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 /* 0x80 - 0x8f are for Dimension-1 official charsets */
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
102 /* 0x90 - 0x9d are for Dimension-2 official charsets */
103 /* 0x9e is for Dimension-1 private charsets */
104 /* 0x9f is for Dimension-2 private charsets */
105 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
108 Lisp_Object Vutf_2000_version;
110 int leading_code_private_11;
112 Lisp_Object Qcharsetp;
114 /* Qdoc_string, Qdimension, Qchars defined in general.c */
115 Lisp_Object Qregistry, Qfinal, Qgraphic;
116 Lisp_Object Qdirection;
117 Lisp_Object Qreverse_direction_charset;
118 Lisp_Object Qccl_program;
119 Lisp_Object Qleading_byte;
120 Lisp_Object Qshort_name, Qlong_name;
136 Qjapanese_jisx0208_1978,
147 Lisp_Object Ql2r, Qr2l;
149 Lisp_Object Vcharset_hash_table;
151 static Bufbyte next_allocated_1_byte_leading_byte;
152 static Bufbyte next_allocated_2_byte_leading_byte;
154 /* Composite characters are characters constructed by overstriking two
155 or more regular characters.
157 1) The old Mule implementation involves storing composite characters
158 in a buffer as a tag followed by all of the actual characters
159 used to make up the composite character. I think this is a bad
160 idea; it greatly complicates code that wants to handle strings
161 one character at a time because it has to deal with the possibility
162 of great big ungainly characters. It's much more reasonable to
163 simply store an index into a table of composite characters.
165 2) The current implementation only allows for 16,384 separate
166 composite characters over the lifetime of the XEmacs process.
167 This could become a potential problem if the user
168 edited lots of different files that use composite characters.
169 Due to FSF bogosity, increasing the number of allowable
170 composite characters under Mule would decrease the number
171 of possible faces that can exist. Mule already has shrunk
172 this to 2048, and further shrinkage would become uncomfortable.
173 No such problems exist in XEmacs.
175 Composite characters could be represented as 0x80 C1 C2 C3,
176 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
177 for slightly under 2^20 (one million) composite characters
178 over the XEmacs process lifetime, and you only need to
179 increase the size of a Mule character from 19 to 21 bits.
180 Or you could use 0x80 C1 C2 C3 C4, allowing for about
181 85 million (slightly over 2^26) composite characters. */
184 /************************************************************************/
185 /* Basic Emchar functions */
186 /************************************************************************/
188 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
189 string in STR. Returns the number of bytes stored.
190 Do not call this directly. Use the macro set_charptr_emchar() instead.
194 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
202 BREAKUP_CHAR (c, charset, c1, c2);
203 lb = CHAR_LEADING_BYTE (c);
204 if (LEADING_BYTE_PRIVATE_P (lb))
205 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
207 if (EQ (charset, Vcharset_control_1))
216 /* Return the first character from a Mule-encoded string in STR,
217 assuming it's non-ASCII. Do not call this directly.
218 Use the macro charptr_emchar() instead. */
221 non_ascii_charptr_emchar (CONST Bufbyte *str)
223 Bufbyte i0 = *str, i1, i2 = 0;
226 if (i0 == LEADING_BYTE_CONTROL_1)
227 return (Emchar) (*++str - 0x20);
229 if (LEADING_BYTE_PREFIX_P (i0))
234 charset = CHARSET_BY_LEADING_BYTE (i0);
235 if (XCHARSET_DIMENSION (charset) == 2)
238 return MAKE_CHAR (charset, i1, i2);
241 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
242 Do not call this directly. Use the macro valid_char_p() instead. */
245 non_ascii_valid_char_p (Emchar ch)
249 /* Must have only lowest 19 bits set */
253 f1 = CHAR_FIELD1 (ch);
254 f2 = CHAR_FIELD2 (ch);
255 f3 = CHAR_FIELD3 (ch);
261 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
262 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
263 f2 > MAX_CHAR_FIELD2_PRIVATE)
268 if (f3 != 0x20 && f3 != 0x7F)
272 NOTE: This takes advantage of the fact that
273 FIELD2_TO_OFFICIAL_LEADING_BYTE and
274 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
276 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
277 return (XCHARSET_CHARS (charset) == 96);
283 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
284 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
285 f1 > MAX_CHAR_FIELD1_PRIVATE)
287 if (f2 < 0x20 || f3 < 0x20)
290 #ifdef ENABLE_COMPOSITE_CHARS
291 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
293 if (UNBOUNDP (Fgethash (make_int (ch),
294 Vcomposite_char_char2string_hash_table,
299 #endif /* ENABLE_COMPOSITE_CHARS */
301 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
304 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
306 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
309 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
311 return (XCHARSET_CHARS (charset) == 96);
316 /************************************************************************/
317 /* Basic string functions */
318 /************************************************************************/
320 /* Copy the character pointed to by PTR into STR, assuming it's
321 non-ASCII. Do not call this directly. Use the macro
322 charptr_copy_char() instead. */
325 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
327 Bufbyte *strptr = str;
329 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
331 /* Notice fallthrough. */
332 case 4: *++strptr = *ptr++;
333 case 3: *++strptr = *ptr++;
334 case 2: *++strptr = *ptr;
339 return strptr + 1 - str;
343 /************************************************************************/
344 /* streams of Emchars */
345 /************************************************************************/
347 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
348 The functions below are not meant to be called directly; use
349 the macros in insdel.h. */
352 Lstream_get_emchar_1 (Lstream *stream, int ch)
354 Bufbyte str[MAX_EMCHAR_LEN];
355 Bufbyte *strptr = str;
357 str[0] = (Bufbyte) ch;
358 switch (REP_BYTES_BY_FIRST_BYTE (ch))
360 /* Notice fallthrough. */
362 ch = Lstream_getc (stream);
364 *++strptr = (Bufbyte) ch;
366 ch = Lstream_getc (stream);
368 *++strptr = (Bufbyte) ch;
370 ch = Lstream_getc (stream);
372 *++strptr = (Bufbyte) ch;
377 return charptr_emchar (str);
381 Lstream_fput_emchar (Lstream *stream, Emchar ch)
383 Bufbyte str[MAX_EMCHAR_LEN];
384 Bytecount len = set_charptr_emchar (str, ch);
385 return Lstream_write (stream, str, len);
389 Lstream_funget_emchar (Lstream *stream, Emchar ch)
391 Bufbyte str[MAX_EMCHAR_LEN];
392 Bytecount len = set_charptr_emchar (str, ch);
393 Lstream_unread (stream, str, len);
397 /************************************************************************/
399 /************************************************************************/
402 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
404 struct Lisp_Charset *cs = XCHARSET (obj);
406 markobj (cs->short_name);
407 markobj (cs->long_name);
408 markobj (cs->doc_string);
409 markobj (cs->registry);
410 markobj (cs->ccl_program);
415 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
417 struct Lisp_Charset *cs = XCHARSET (obj);
421 error ("printing unreadable object #<charset %s 0x%x>",
422 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
425 write_c_string ("#<charset ", printcharfun);
426 print_internal (CHARSET_NAME (cs), printcharfun, 0);
427 write_c_string (" ", printcharfun);
428 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
429 write_c_string (" ", printcharfun);
430 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
431 write_c_string (" ", printcharfun);
432 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
433 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
434 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
435 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
436 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
438 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
439 CHARSET_COLUMNS (cs),
440 CHARSET_GRAPHIC (cs),
442 write_c_string (buf, printcharfun);
443 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
444 sprintf (buf, " 0x%x>", cs->header.uid);
445 write_c_string (buf, printcharfun);
448 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
449 mark_charset, print_charset, 0, 0, 0,
450 struct Lisp_Charset);
451 /* Make a new charset. */
454 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
455 unsigned char type, unsigned char columns, unsigned char graphic,
456 Bufbyte final, unsigned char direction, Lisp_Object short_name,
457 Lisp_Object long_name, Lisp_Object doc,
461 struct Lisp_Charset *cs =
462 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
463 XSETCHARSET (obj, cs);
465 CHARSET_ID (cs) = id;
466 CHARSET_NAME (cs) = name;
467 CHARSET_SHORT_NAME (cs) = short_name;
468 CHARSET_LONG_NAME (cs) = long_name;
469 CHARSET_REP_BYTES (cs) = rep_bytes;
470 CHARSET_DIRECTION (cs) = direction;
471 CHARSET_TYPE (cs) = type;
472 CHARSET_COLUMNS (cs) = columns;
473 CHARSET_GRAPHIC (cs) = graphic;
474 CHARSET_FINAL (cs) = final;
475 CHARSET_DOC_STRING (cs) = doc;
476 CHARSET_REGISTRY (cs) = reg;
477 CHARSET_CCL_PROGRAM (cs) = Qnil;
478 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
480 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
481 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
482 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
483 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
487 /* some charsets do not have final characters. This includes
488 ASCII, Control-1, Composite, and the two faux private
490 assert (NILP (charset_by_attributes[type][final][direction]));
491 charset_by_attributes[type][final][direction] = obj;
494 assert (NILP (charset_by_leading_byte[id - 128]));
495 charset_by_leading_byte[id - 128] = obj;
497 /* official leading byte */
498 rep_bytes_by_first_byte[id] = rep_bytes;
500 /* Some charsets are "faux" and don't have names or really exist at
501 all except in the leading-byte table. */
503 Fputhash (name, obj, Vcharset_hash_table);
508 get_unallocated_leading_byte (int dimension)
514 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
517 lb = next_allocated_1_byte_leading_byte++;
521 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
524 lb = next_allocated_2_byte_leading_byte++;
529 ("No more character sets free for this dimension",
530 make_int (dimension));
536 /************************************************************************/
537 /* Basic charset Lisp functions */
538 /************************************************************************/
540 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
541 Return non-nil if OBJECT is a charset.
545 return CHARSETP (object) ? Qt : Qnil;
548 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
549 Retrieve the charset of the given name.
550 If CHARSET-OR-NAME is a charset object, it is simply returned.
551 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
552 nil is returned. Otherwise the associated charset object is returned.
556 if (CHARSETP (charset_or_name))
557 return charset_or_name;
559 CHECK_SYMBOL (charset_or_name);
560 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
563 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
564 Retrieve the charset of the given name.
565 Same as `find-charset' except an error is signalled if there is no such
566 charset instead of returning nil.
570 Lisp_Object charset = Ffind_charset (name);
573 signal_simple_error ("No such charset", name);
577 /* We store the charsets in hash tables with the names as the key and the
578 actual charset object as the value. Occasionally we need to use them
579 in a list format. These routines provide us with that. */
580 struct charset_list_closure
582 Lisp_Object *charset_list;
586 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
587 void *charset_list_closure)
589 /* This function can GC */
590 struct charset_list_closure *chcl =
591 (struct charset_list_closure*) charset_list_closure;
592 Lisp_Object *charset_list = chcl->charset_list;
594 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
598 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
599 Return a list of the names of all defined charsets.
603 Lisp_Object charset_list = Qnil;
605 struct charset_list_closure charset_list_closure;
607 GCPRO1 (charset_list);
608 charset_list_closure.charset_list = &charset_list;
609 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
610 &charset_list_closure);
616 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
617 Return the name of the given charset.
621 return XCHARSET_NAME (Fget_charset (charset));
624 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
625 Define a new character set.
626 This function is for use with Mule support.
627 NAME is a symbol, the name by which the character set is normally referred.
628 DOC-STRING is a string describing the character set.
629 PROPS is a property list, describing the specific nature of the
630 character set. Recognized properties are:
632 'short-name Short version of the charset name (ex: Latin-1)
633 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
634 'registry A regular expression matching the font registry field for
636 'dimension Number of octets used to index a character in this charset.
637 Either 1 or 2. Defaults to 1.
638 'columns Number of columns used to display a character in this charset.
639 Only used in TTY mode. (Under X, the actual width of a
640 character can be derived from the font used to display the
641 characters.) If unspecified, defaults to the dimension
642 (this is almost always the correct value).
643 'chars Number of characters in each dimension (94 or 96).
644 Defaults to 94. Note that if the dimension is 2, the
645 character set thus described is 94x94 or 96x96.
646 'final Final byte of ISO 2022 escape sequence. Must be
647 supplied. Each combination of (DIMENSION, CHARS) defines a
648 separate namespace for final bytes. Note that ISO
649 2022 restricts the final byte to the range
650 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
651 dimension == 2. Note also that final bytes in the range
652 0x30 - 0x3F are reserved for user-defined (not official)
654 'graphic 0 (use left half of font on output) or 1 (use right half
655 of font on output). Defaults to 0. For example, for
656 a font whose registry is ISO8859-1, the left half
657 (octets 0x20 - 0x7F) is the `ascii' character set, while
658 the right half (octets 0xA0 - 0xFF) is the `latin-1'
659 character set. With 'graphic set to 0, the octets
660 will have their high bit cleared; with it set to 1,
661 the octets will have their high bit set.
662 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
664 'ccl-program A compiled CCL program used to convert a character in
665 this charset into an index into the font. This is in
666 addition to the 'graphic property. The CCL program
667 is passed the octets of the character, with the high
668 bit cleared and set depending upon whether the value
669 of the 'graphic property is 0 or 1.
671 (name, doc_string, props))
673 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
674 int direction = CHARSET_LEFT_TO_RIGHT;
676 Lisp_Object registry = Qnil;
678 Lisp_Object rest, keyword, value;
679 Lisp_Object ccl_program = Qnil;
680 Lisp_Object short_name = Qnil, long_name = Qnil;
683 if (!NILP (doc_string))
684 CHECK_STRING (doc_string);
686 charset = Ffind_charset (name);
688 signal_simple_error ("Cannot redefine existing charset", name);
690 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
692 if (EQ (keyword, Qshort_name))
694 CHECK_STRING (value);
698 if (EQ (keyword, Qlong_name))
700 CHECK_STRING (value);
704 else if (EQ (keyword, Qdimension))
707 dimension = XINT (value);
708 if (dimension < 1 || dimension > 2)
709 signal_simple_error ("Invalid value for 'dimension", value);
712 else if (EQ (keyword, Qchars))
715 chars = XINT (value);
716 if (chars != 94 && chars != 96)
717 signal_simple_error ("Invalid value for 'chars", value);
720 else if (EQ (keyword, Qcolumns))
723 columns = XINT (value);
724 if (columns != 1 && columns != 2)
725 signal_simple_error ("Invalid value for 'columns", value);
728 else if (EQ (keyword, Qgraphic))
731 graphic = XINT (value);
732 if (graphic < 0 || graphic > 1)
733 signal_simple_error ("Invalid value for 'graphic", value);
736 else if (EQ (keyword, Qregistry))
738 CHECK_STRING (value);
742 else if (EQ (keyword, Qdirection))
744 if (EQ (value, Ql2r))
745 direction = CHARSET_LEFT_TO_RIGHT;
746 else if (EQ (value, Qr2l))
747 direction = CHARSET_RIGHT_TO_LEFT;
749 signal_simple_error ("Invalid value for 'direction", value);
752 else if (EQ (keyword, Qfinal))
754 CHECK_CHAR_COERCE_INT (value);
755 final = XCHAR (value);
756 if (final < '0' || final > '~')
757 signal_simple_error ("Invalid value for 'final", value);
760 else if (EQ (keyword, Qccl_program))
762 CHECK_VECTOR (value);
767 signal_simple_error ("Unrecognized property", keyword);
771 error ("'final must be specified");
772 if (dimension == 2 && final > 0x5F)
774 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
778 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
780 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
782 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
783 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
785 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
787 id = get_unallocated_leading_byte (dimension);
789 if (NILP (doc_string))
790 doc_string = build_string ("");
793 registry = build_string ("");
795 if (NILP (short_name))
796 XSETSTRING (short_name, XSYMBOL (name)->name);
798 if (NILP (long_name))
799 long_name = doc_string;
803 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
804 final, direction, short_name, long_name, doc_string, registry);
805 if (!NILP (ccl_program))
806 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
810 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
812 Make a charset equivalent to CHARSET but which goes in the opposite direction.
813 NEW-NAME is the name of the new charset. Return the new charset.
817 Lisp_Object new_charset = Qnil;
818 int id, dimension, columns, graphic, final;
820 Lisp_Object registry, doc_string, short_name, long_name;
821 struct Lisp_Charset *cs;
823 charset = Fget_charset (charset);
824 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
825 signal_simple_error ("Charset already has reverse-direction charset",
828 CHECK_SYMBOL (new_name);
829 if (!NILP (Ffind_charset (new_name)))
830 signal_simple_error ("Cannot redefine existing charset", new_name);
832 cs = XCHARSET (charset);
834 type = CHARSET_TYPE (cs);
835 columns = CHARSET_COLUMNS (cs);
836 dimension = CHARSET_DIMENSION (cs);
837 id = get_unallocated_leading_byte (dimension);
839 graphic = CHARSET_GRAPHIC (cs);
840 final = CHARSET_FINAL (cs);
841 direction = CHARSET_RIGHT_TO_LEFT;
842 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
843 direction = CHARSET_LEFT_TO_RIGHT;
844 doc_string = CHARSET_DOC_STRING (cs);
845 short_name = CHARSET_SHORT_NAME (cs);
846 long_name = CHARSET_LONG_NAME (cs);
847 registry = CHARSET_REGISTRY (cs);
849 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
850 graphic, final, direction, short_name, long_name,
851 doc_string, registry);
853 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
854 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
859 /* #### Reverse direction charsets not yet implemented. */
861 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
863 Return the reverse-direction charset parallel to CHARSET, if any.
864 This is the charset with the same properties (in particular, the same
865 dimension, number of characters per dimension, and final byte) as
866 CHARSET but whose characters are displayed in the opposite direction.
870 charset = Fget_charset (charset);
871 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
875 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
876 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
877 If DIRECTION is omitted, both directions will be checked (left-to-right
878 will be returned if character sets exist for both directions).
880 (dimension, chars, final, direction))
882 int dm, ch, fi, di = -1;
884 Lisp_Object obj = Qnil;
886 CHECK_INT (dimension);
887 dm = XINT (dimension);
888 if (dm < 1 || dm > 2)
889 signal_simple_error ("Invalid value for DIMENSION", dimension);
893 if (ch != 94 && ch != 96)
894 signal_simple_error ("Invalid value for CHARS", chars);
896 CHECK_CHAR_COERCE_INT (final);
898 if (fi < '0' || fi > '~')
899 signal_simple_error ("Invalid value for FINAL", final);
901 if (EQ (direction, Ql2r))
902 di = CHARSET_LEFT_TO_RIGHT;
903 else if (EQ (direction, Qr2l))
904 di = CHARSET_RIGHT_TO_LEFT;
905 else if (!NILP (direction))
906 signal_simple_error ("Invalid value for DIRECTION", direction);
908 if (dm == 2 && fi > 0x5F)
910 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
913 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
915 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
919 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
921 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
924 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
927 return XCHARSET_NAME (obj);
931 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
932 Return short name of CHARSET.
936 return XCHARSET_SHORT_NAME (Fget_charset (charset));
939 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
940 Return long name of CHARSET.
944 return XCHARSET_LONG_NAME (Fget_charset (charset));
947 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
948 Return description of CHARSET.
952 return XCHARSET_DOC_STRING (Fget_charset (charset));
955 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
956 Return dimension of CHARSET.
960 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
963 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
964 Return property PROP of CHARSET.
965 Recognized properties are those listed in `make-charset', as well as
966 'name and 'doc-string.
970 struct Lisp_Charset *cs;
972 charset = Fget_charset (charset);
973 cs = XCHARSET (charset);
976 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
977 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
978 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
979 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
980 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
981 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
982 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
983 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
984 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
985 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
986 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
987 if (EQ (prop, Qdirection))
988 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
989 if (EQ (prop, Qreverse_direction_charset))
991 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
995 return XCHARSET_NAME (obj);
997 signal_simple_error ("Unrecognized charset property name", prop);
998 return Qnil; /* not reached */
1001 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1002 Return charset identification number of CHARSET.
1006 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1009 /* #### We need to figure out which properties we really want to
1012 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1013 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1015 (charset, ccl_program))
1017 charset = Fget_charset (charset);
1018 CHECK_VECTOR (ccl_program);
1019 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1024 invalidate_charset_font_caches (Lisp_Object charset)
1026 /* Invalidate font cache entries for charset on all devices. */
1027 Lisp_Object devcons, concons, hash_table;
1028 DEVICE_LOOP_NO_BREAK (devcons, concons)
1030 struct device *d = XDEVICE (XCAR (devcons));
1031 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1032 if (!UNBOUNDP (hash_table))
1033 Fclrhash (hash_table);
1037 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1038 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1039 Set the 'registry property of CHARSET to REGISTRY.
1041 (charset, registry))
1043 charset = Fget_charset (charset);
1044 CHECK_STRING (registry);
1045 XCHARSET_REGISTRY (charset) = registry;
1046 invalidate_charset_font_caches (charset);
1047 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1052 /************************************************************************/
1053 /* Lisp primitives for working with characters */
1054 /************************************************************************/
1056 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1057 Make a character from CHARSET and octets ARG1 and ARG2.
1058 ARG2 is required only for characters from two-dimensional charsets.
1059 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1060 character s with caron.
1062 (charset, arg1, arg2))
1064 struct Lisp_Charset *cs;
1066 int lowlim, highlim;
1068 charset = Fget_charset (charset);
1069 cs = XCHARSET (charset);
1071 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1072 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1073 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1074 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1077 /* It is useful (and safe, according to Olivier Galibert) to strip
1078 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1079 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1080 Latin 2 code of the character. */
1081 a1 = XINT (arg1) & 0x7f;
1082 if (a1 < lowlim || a1 > highlim)
1083 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1085 if (CHARSET_DIMENSION (cs) == 1)
1089 ("Charset is of dimension one; second octet must be nil", arg2);
1090 return make_char (MAKE_CHAR (charset, a1, 0));
1094 a2 = XINT (arg2) & 0x7f;
1095 if (a2 < lowlim || a2 > highlim)
1096 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1098 return make_char (MAKE_CHAR (charset, a1, a2));
1101 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1102 Return the character set of char CH.
1106 CHECK_CHAR_COERCE_INT (ch);
1108 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1109 (CHAR_LEADING_BYTE (XCHAR (ch))));
1112 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1113 Return the octet numbered N (should be 0 or 1) of char CH.
1114 N defaults to 0 if omitted.
1118 Lisp_Object charset;
1121 CHECK_CHAR_COERCE_INT (ch);
1128 if (int_n != 0 && int_n != 1)
1129 signal_simple_error ("Octet number must be 0 or 1", n);
1131 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1132 return make_int (int_n == 0 ? c1 : c2);
1136 #ifdef ENABLE_COMPOSITE_CHARS
1137 /************************************************************************/
1138 /* composite character functions */
1139 /************************************************************************/
1142 lookup_composite_char (Bufbyte *str, int len)
1144 Lisp_Object lispstr = make_string (str, len);
1145 Lisp_Object ch = Fgethash (lispstr,
1146 Vcomposite_char_string2char_hash_table,
1152 if (composite_char_row_next >= 128)
1153 signal_simple_error ("No more composite chars available", lispstr);
1154 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1155 composite_char_col_next);
1156 Fputhash (make_char (emch), lispstr,
1157 Vcomposite_char_char2string_hash_table);
1158 Fputhash (lispstr, make_char (emch),
1159 Vcomposite_char_string2char_hash_table);
1160 composite_char_col_next++;
1161 if (composite_char_col_next >= 128)
1163 composite_char_col_next = 32;
1164 composite_char_row_next++;
1173 composite_char_string (Emchar ch)
1175 Lisp_Object str = Fgethash (make_char (ch),
1176 Vcomposite_char_char2string_hash_table,
1178 assert (!UNBOUNDP (str));
1182 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1183 Convert a string into a single composite character.
1184 The character is the result of overstriking all the characters in
1189 CHECK_STRING (string);
1190 return make_char (lookup_composite_char (XSTRING_DATA (string),
1191 XSTRING_LENGTH (string)));
1194 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1195 Return a string of the characters comprising a composite character.
1203 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1204 signal_simple_error ("Must be composite char", ch);
1205 return composite_char_string (emch);
1207 #endif /* ENABLE_COMPOSITE_CHARS */
1210 /************************************************************************/
1211 /* initialization */
1212 /************************************************************************/
1215 syms_of_mule_charset (void)
1217 DEFSUBR (Fcharsetp);
1218 DEFSUBR (Ffind_charset);
1219 DEFSUBR (Fget_charset);
1220 DEFSUBR (Fcharset_list);
1221 DEFSUBR (Fcharset_name);
1222 DEFSUBR (Fmake_charset);
1223 DEFSUBR (Fmake_reverse_direction_charset);
1224 /* DEFSUBR (Freverse_direction_charset); */
1225 DEFSUBR (Fcharset_from_attributes);
1226 DEFSUBR (Fcharset_short_name);
1227 DEFSUBR (Fcharset_long_name);
1228 DEFSUBR (Fcharset_description);
1229 DEFSUBR (Fcharset_dimension);
1230 DEFSUBR (Fcharset_property);
1231 DEFSUBR (Fcharset_id);
1232 DEFSUBR (Fset_charset_ccl_program);
1233 DEFSUBR (Fset_charset_registry);
1235 DEFSUBR (Fmake_char);
1236 DEFSUBR (Fchar_charset);
1237 DEFSUBR (Fchar_octet);
1239 #ifdef ENABLE_COMPOSITE_CHARS
1240 DEFSUBR (Fmake_composite_char);
1241 DEFSUBR (Fcomposite_char_string);
1244 defsymbol (&Qcharsetp, "charsetp");
1245 defsymbol (&Qregistry, "registry");
1246 defsymbol (&Qfinal, "final");
1247 defsymbol (&Qgraphic, "graphic");
1248 defsymbol (&Qdirection, "direction");
1249 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1250 defsymbol (&Qccl_program, "ccl-program");
1251 defsymbol (&Qshort_name, "short-name");
1252 defsymbol (&Qlong_name, "long-name");
1254 defsymbol (&Ql2r, "l2r");
1255 defsymbol (&Qr2l, "r2l");
1257 /* Charsets, compatible with FSF 20.3
1258 Naming convention is Script-Charset[-Edition] */
1259 defsymbol (&Qascii, "ascii");
1260 defsymbol (&Qcontrol_1, "control-1");
1261 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1262 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1263 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1264 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1265 defsymbol (&Qthai_tis620, "thai-tis620");
1266 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1267 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1268 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1269 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1270 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1271 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1272 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1273 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1274 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1275 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1276 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1277 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1278 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1279 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1280 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1281 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1283 defsymbol (&Qcomposite, "composite");
1285 Vutf_2000_version = build_string("0.1");
1286 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1287 Version number of UTF-2000.
1290 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1291 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1292 Leading-code of private TYPE9N charset of column-width 1.
1294 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1298 vars_of_mule_charset (void)
1302 /* Table of charsets indexed by leading byte. */
1303 for (i = 0; i < countof (charset_by_leading_byte); i++)
1304 charset_by_leading_byte[i] = Qnil;
1306 /* Table of charsets indexed by type/final-byte/direction. */
1307 for (i = 0; i < countof (charset_by_attributes); i++)
1308 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1309 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1310 charset_by_attributes[i][j][k] = Qnil;
1312 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1313 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1317 complex_vars_of_mule_charset (void)
1319 staticpro (&Vcharset_hash_table);
1320 Vcharset_hash_table =
1321 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1323 /* Predefined character sets. We store them into variables for
1327 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1328 CHARSET_TYPE_94, 1, 0, 'B',
1329 CHARSET_LEFT_TO_RIGHT,
1330 build_string ("ASCII"),
1331 build_string ("ASCII)"),
1332 build_string ("ASCII (ISO646 IRV)"),
1333 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1334 Vcharset_control_1 =
1335 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1336 CHARSET_TYPE_94, 1, 1, 0,
1337 CHARSET_LEFT_TO_RIGHT,
1338 build_string ("C1"),
1339 build_string ("Control characters"),
1340 build_string ("Control characters 128-191"),
1342 Vcharset_latin_iso8859_1 =
1343 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1344 CHARSET_TYPE_96, 1, 1, 'A',
1345 CHARSET_LEFT_TO_RIGHT,
1346 build_string ("Latin-1"),
1347 build_string ("ISO8859-1 (Latin-1)"),
1348 build_string ("ISO8859-1 (Latin-1)"),
1349 build_string ("iso8859-1"));
1350 Vcharset_latin_iso8859_2 =
1351 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1352 CHARSET_TYPE_96, 1, 1, 'B',
1353 CHARSET_LEFT_TO_RIGHT,
1354 build_string ("Latin-2"),
1355 build_string ("ISO8859-2 (Latin-2)"),
1356 build_string ("ISO8859-2 (Latin-2)"),
1357 build_string ("iso8859-2"));
1358 Vcharset_latin_iso8859_3 =
1359 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1360 CHARSET_TYPE_96, 1, 1, 'C',
1361 CHARSET_LEFT_TO_RIGHT,
1362 build_string ("Latin-3"),
1363 build_string ("ISO8859-3 (Latin-3)"),
1364 build_string ("ISO8859-3 (Latin-3)"),
1365 build_string ("iso8859-3"));
1366 Vcharset_latin_iso8859_4 =
1367 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1368 CHARSET_TYPE_96, 1, 1, 'D',
1369 CHARSET_LEFT_TO_RIGHT,
1370 build_string ("Latin-4"),
1371 build_string ("ISO8859-4 (Latin-4)"),
1372 build_string ("ISO8859-4 (Latin-4)"),
1373 build_string ("iso8859-4"));
1374 Vcharset_thai_tis620 =
1375 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1376 CHARSET_TYPE_96, 1, 1, 'T',
1377 CHARSET_LEFT_TO_RIGHT,
1378 build_string ("TIS620"),
1379 build_string ("TIS620 (Thai)"),
1380 build_string ("TIS620.2529 (Thai)"),
1381 build_string ("tis620"));
1382 Vcharset_greek_iso8859_7 =
1383 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1384 CHARSET_TYPE_96, 1, 1, 'F',
1385 CHARSET_LEFT_TO_RIGHT,
1386 build_string ("ISO8859-7"),
1387 build_string ("ISO8859-7 (Greek)"),
1388 build_string ("ISO8859-7 (Greek)"),
1389 build_string ("iso8859-7"));
1390 Vcharset_arabic_iso8859_6 =
1391 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1392 CHARSET_TYPE_96, 1, 1, 'G',
1393 CHARSET_RIGHT_TO_LEFT,
1394 build_string ("ISO8859-6"),
1395 build_string ("ISO8859-6 (Arabic)"),
1396 build_string ("ISO8859-6 (Arabic)"),
1397 build_string ("iso8859-6"));
1398 Vcharset_hebrew_iso8859_8 =
1399 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1400 CHARSET_TYPE_96, 1, 1, 'H',
1401 CHARSET_RIGHT_TO_LEFT,
1402 build_string ("ISO8859-8"),
1403 build_string ("ISO8859-8 (Hebrew)"),
1404 build_string ("ISO8859-8 (Hebrew)"),
1405 build_string ("iso8859-8"));
1406 Vcharset_katakana_jisx0201 =
1407 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1408 CHARSET_TYPE_94, 1, 1, 'I',
1409 CHARSET_LEFT_TO_RIGHT,
1410 build_string ("JISX0201 Kana"),
1411 build_string ("JISX0201.1976 (Japanese Kana)"),
1412 build_string ("JISX0201.1976 Japanese Kana"),
1413 build_string ("jisx0201.1976"));
1414 Vcharset_latin_jisx0201 =
1415 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1416 CHARSET_TYPE_94, 1, 0, 'J',
1417 CHARSET_LEFT_TO_RIGHT,
1418 build_string ("JISX0201 Roman"),
1419 build_string ("JISX0201.1976 (Japanese Roman)"),
1420 build_string ("JISX0201.1976 Japanese Roman"),
1421 build_string ("jisx0201.1976"));
1422 Vcharset_cyrillic_iso8859_5 =
1423 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1424 CHARSET_TYPE_96, 1, 1, 'L',
1425 CHARSET_LEFT_TO_RIGHT,
1426 build_string ("ISO8859-5"),
1427 build_string ("ISO8859-5 (Cyrillic)"),
1428 build_string ("ISO8859-5 (Cyrillic)"),
1429 build_string ("iso8859-5"));
1430 Vcharset_latin_iso8859_9 =
1431 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1432 CHARSET_TYPE_96, 1, 1, 'M',
1433 CHARSET_LEFT_TO_RIGHT,
1434 build_string ("Latin-5"),
1435 build_string ("ISO8859-9 (Latin-5)"),
1436 build_string ("ISO8859-9 (Latin-5)"),
1437 build_string ("iso8859-9"));
1438 Vcharset_japanese_jisx0208_1978 =
1439 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1440 CHARSET_TYPE_94X94, 2, 0, '@',
1441 CHARSET_LEFT_TO_RIGHT,
1442 build_string ("JISX0208.1978"),
1443 build_string ("JISX0208.1978 (Japanese)"),
1445 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1446 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1447 Vcharset_chinese_gb2312 =
1448 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1449 CHARSET_TYPE_94X94, 2, 0, 'A',
1450 CHARSET_LEFT_TO_RIGHT,
1451 build_string ("GB2312"),
1452 build_string ("GB2312)"),
1453 build_string ("GB2312 Chinese simplified"),
1454 build_string ("gb2312"));
1455 Vcharset_japanese_jisx0208 =
1456 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1457 CHARSET_TYPE_94X94, 2, 0, 'B',
1458 CHARSET_LEFT_TO_RIGHT,
1459 build_string ("JISX0208"),
1460 build_string ("JISX0208.1983/1990 (Japanese)"),
1461 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1462 build_string ("jisx0208.19\\(83\\|90\\)"));
1463 Vcharset_korean_ksc5601 =
1464 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1465 CHARSET_TYPE_94X94, 2, 0, 'C',
1466 CHARSET_LEFT_TO_RIGHT,
1467 build_string ("KSC5601"),
1468 build_string ("KSC5601 (Korean"),
1469 build_string ("KSC5601 Korean Hangul and Hanja"),
1470 build_string ("ksc5601"));
1471 Vcharset_japanese_jisx0212 =
1472 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1473 CHARSET_TYPE_94X94, 2, 0, 'D',
1474 CHARSET_LEFT_TO_RIGHT,
1475 build_string ("JISX0212"),
1476 build_string ("JISX0212 (Japanese)"),
1477 build_string ("JISX0212 Japanese Supplement"),
1478 build_string ("jisx0212"));
1480 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1481 Vcharset_chinese_cns11643_1 =
1482 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1483 CHARSET_TYPE_94X94, 2, 0, 'G',
1484 CHARSET_LEFT_TO_RIGHT,
1485 build_string ("CNS11643-1"),
1486 build_string ("CNS11643-1 (Chinese traditional)"),
1488 ("CNS 11643 Plane 1 Chinese traditional"),
1489 build_string (CHINESE_CNS_PLANE_RE("1")));
1490 Vcharset_chinese_cns11643_2 =
1491 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1492 CHARSET_TYPE_94X94, 2, 0, 'H',
1493 CHARSET_LEFT_TO_RIGHT,
1494 build_string ("CNS11643-2"),
1495 build_string ("CNS11643-2 (Chinese traditional)"),
1497 ("CNS 11643 Plane 2 Chinese traditional"),
1498 build_string (CHINESE_CNS_PLANE_RE("2")));
1499 Vcharset_chinese_big5_1 =
1500 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1501 CHARSET_TYPE_94X94, 2, 0, '0',
1502 CHARSET_LEFT_TO_RIGHT,
1503 build_string ("Big5"),
1504 build_string ("Big5 (Level-1)"),
1506 ("Big5 Level-1 Chinese traditional"),
1507 build_string ("big5"));
1508 Vcharset_chinese_big5_2 =
1509 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1510 CHARSET_TYPE_94X94, 2, 0, '1',
1511 CHARSET_LEFT_TO_RIGHT,
1512 build_string ("Big5"),
1513 build_string ("Big5 (Level-2)"),
1515 ("Big5 Level-2 Chinese traditional"),
1516 build_string ("big5"));
1519 #ifdef ENABLE_COMPOSITE_CHARS
1520 /* #### For simplicity, we put composite chars into a 96x96 charset.
1521 This is going to lead to problems because you can run out of
1522 room, esp. as we don't yet recycle numbers. */
1523 Vcharset_composite =
1524 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1525 CHARSET_TYPE_96X96, 2, 0, 0,
1526 CHARSET_LEFT_TO_RIGHT,
1527 build_string ("Composite"),
1528 build_string ("Composite characters"),
1529 build_string ("Composite characters"),
1532 composite_char_row_next = 32;
1533 composite_char_col_next = 32;
1535 Vcomposite_char_string2char_hash_table =
1536 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1537 Vcomposite_char_char2string_hash_table =
1538 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1539 staticpro (&Vcomposite_char_string2char_hash_table);
1540 staticpro (&Vcomposite_char_char2string_hash_table);
1541 #endif /* ENABLE_COMPOSITE_CHARS */