1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
82 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation.
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent
86 canonical computation:
88 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
90 Bytecount rep_bytes_by_first_byte[0xA0] =
91 { /* 0x00 - 0x7f are for straight ASCII */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 /* 0x80 - 0x8f are for Dimension-1 official charsets */
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 /* 0x90 - 0x9d are for Dimension-2 official charsets */
103 /* 0x9e is for Dimension-1 private charsets */
104 /* 0x9f is for Dimension-2 private charsets */
105 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
108 Lisp_Object Qcharsetp;
110 /* Qdoc_string, Qdimension, Qchars defined in general.c */
111 Lisp_Object Qregistry, Qfinal, Qgraphic;
112 Lisp_Object Qdirection;
113 Lisp_Object Qreverse_direction_charset;
114 Lisp_Object Qccl_program;
115 Lisp_Object Qleading_byte;
116 Lisp_Object Qshort_name, Qlong_name;
132 Qjapanese_jisx0208_1978,
143 Lisp_Object Ql2r, Qr2l;
145 Lisp_Object Vcharset_hash_table;
147 static Bufbyte next_allocated_1_byte_leading_byte;
148 static Bufbyte next_allocated_2_byte_leading_byte;
150 /* Composite characters are characters constructed by overstriking two
151 or more regular characters.
153 1) The old Mule implementation involves storing composite characters
154 in a buffer as a tag followed by all of the actual characters
155 used to make up the composite character. I think this is a bad
156 idea; it greatly complicates code that wants to handle strings
157 one character at a time because it has to deal with the possibility
158 of great big ungainly characters. It's much more reasonable to
159 simply store an index into a table of composite characters.
161 2) The current implementation only allows for 16,384 separate
162 composite characters over the lifetime of the XEmacs process.
163 This could become a potential problem if the user
164 edited lots of different files that use composite characters.
165 Due to FSF bogosity, increasing the number of allowable
166 composite characters under Mule would decrease the number
167 of possible faces that can exist. Mule already has shrunk
168 this to 2048, and further shrinkage would become uncomfortable.
169 No such problems exist in XEmacs.
171 Composite characters could be represented as 0x80 C1 C2 C3,
172 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
173 for slightly under 2^20 (one million) composite characters
174 over the XEmacs process lifetime, and you only need to
175 increase the size of a Mule character from 19 to 21 bits.
176 Or you could use 0x80 C1 C2 C3 C4, allowing for about
177 85 million (slightly over 2^26) composite characters. */
180 /************************************************************************/
181 /* Basic Emchar functions */
182 /************************************************************************/
184 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
185 string in STR. Returns the number of bytes stored.
186 Do not call this directly. Use the macro set_charptr_emchar() instead.
190 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
198 BREAKUP_CHAR (c, charset, c1, c2);
199 lb = CHAR_LEADING_BYTE (c);
200 if (LEADING_BYTE_PRIVATE_P (lb))
201 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
203 if (EQ (charset, Vcharset_control_1))
212 /* Return the first character from a Mule-encoded string in STR,
213 assuming it's non-ASCII. Do not call this directly.
214 Use the macro charptr_emchar() instead. */
217 non_ascii_charptr_emchar (CONST Bufbyte *str)
219 Bufbyte i0 = *str, i1, i2 = 0;
222 if (i0 == LEADING_BYTE_CONTROL_1)
223 return (Emchar) (*++str - 0x20);
225 if (LEADING_BYTE_PREFIX_P (i0))
230 charset = CHARSET_BY_LEADING_BYTE (i0);
231 if (XCHARSET_DIMENSION (charset) == 2)
234 return MAKE_CHAR (charset, i1, i2);
237 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
238 Do not call this directly. Use the macro valid_char_p() instead. */
241 non_ascii_valid_char_p (Emchar ch)
245 /* Must have only lowest 19 bits set */
249 f1 = CHAR_FIELD1 (ch);
250 f2 = CHAR_FIELD2 (ch);
251 f3 = CHAR_FIELD3 (ch);
257 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
258 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
259 f2 > MAX_CHAR_FIELD2_PRIVATE)
264 if (f3 != 0x20 && f3 != 0x7F)
268 NOTE: This takes advantage of the fact that
269 FIELD2_TO_OFFICIAL_LEADING_BYTE and
270 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
272 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
273 return (XCHARSET_CHARS (charset) == 96);
279 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
280 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
281 f1 > MAX_CHAR_FIELD1_PRIVATE)
283 if (f2 < 0x20 || f3 < 0x20)
286 #ifdef ENABLE_COMPOSITE_CHARS
287 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
289 if (UNBOUNDP (Fgethash (make_int (ch),
290 Vcomposite_char_char2string_hash_table,
295 #endif /* ENABLE_COMPOSITE_CHARS */
297 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
300 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
302 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
305 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
307 return (XCHARSET_CHARS (charset) == 96);
312 /************************************************************************/
313 /* Basic string functions */
314 /************************************************************************/
316 /* Copy the character pointed to by PTR into STR, assuming it's
317 non-ASCII. Do not call this directly. Use the macro
318 charptr_copy_char() instead. */
321 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
323 Bufbyte *strptr = str;
325 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
327 /* Notice fallthrough. */
328 case 4: *++strptr = *ptr++;
329 case 3: *++strptr = *ptr++;
330 case 2: *++strptr = *ptr;
335 return strptr + 1 - str;
339 /************************************************************************/
340 /* streams of Emchars */
341 /************************************************************************/
343 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
344 The functions below are not meant to be called directly; use
345 the macros in insdel.h. */
348 Lstream_get_emchar_1 (Lstream *stream, int ch)
350 Bufbyte str[MAX_EMCHAR_LEN];
351 Bufbyte *strptr = str;
353 str[0] = (Bufbyte) ch;
354 switch (REP_BYTES_BY_FIRST_BYTE (ch))
356 /* Notice fallthrough. */
358 ch = Lstream_getc (stream);
360 *++strptr = (Bufbyte) ch;
362 ch = Lstream_getc (stream);
364 *++strptr = (Bufbyte) ch;
366 ch = Lstream_getc (stream);
368 *++strptr = (Bufbyte) ch;
373 return charptr_emchar (str);
377 Lstream_fput_emchar (Lstream *stream, Emchar ch)
379 Bufbyte str[MAX_EMCHAR_LEN];
380 Bytecount len = set_charptr_emchar (str, ch);
381 return Lstream_write (stream, str, len);
385 Lstream_funget_emchar (Lstream *stream, Emchar ch)
387 Bufbyte str[MAX_EMCHAR_LEN];
388 Bytecount len = set_charptr_emchar (str, ch);
389 Lstream_unread (stream, str, len);
393 /************************************************************************/
395 /************************************************************************/
398 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
400 struct Lisp_Charset *cs = XCHARSET (obj);
402 markobj (cs->short_name);
403 markobj (cs->long_name);
404 markobj (cs->doc_string);
405 markobj (cs->registry);
406 markobj (cs->ccl_program);
411 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
413 struct Lisp_Charset *cs = XCHARSET (obj);
417 error ("printing unreadable object #<charset %s 0x%x>",
418 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
421 write_c_string ("#<charset ", printcharfun);
422 print_internal (CHARSET_NAME (cs), printcharfun, 0);
423 write_c_string (" ", printcharfun);
424 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
425 write_c_string (" ", printcharfun);
426 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
427 write_c_string (" ", printcharfun);
428 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
429 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
430 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
431 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
432 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
434 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
435 CHARSET_COLUMNS (cs),
436 CHARSET_GRAPHIC (cs),
438 write_c_string (buf, printcharfun);
439 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
440 sprintf (buf, " 0x%x>", cs->header.uid);
441 write_c_string (buf, printcharfun);
444 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
445 mark_charset, print_charset, 0, 0, 0,
446 struct Lisp_Charset);
447 /* Make a new charset. */
450 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
451 unsigned char type, unsigned char columns, unsigned char graphic,
452 Bufbyte final, unsigned char direction, Lisp_Object short_name,
453 Lisp_Object long_name, Lisp_Object doc,
457 struct Lisp_Charset *cs =
458 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
459 XSETCHARSET (obj, cs);
461 CHARSET_ID (cs) = id;
462 CHARSET_NAME (cs) = name;
463 CHARSET_SHORT_NAME (cs) = short_name;
464 CHARSET_LONG_NAME (cs) = long_name;
465 CHARSET_REP_BYTES (cs) = rep_bytes;
466 CHARSET_DIRECTION (cs) = direction;
467 CHARSET_TYPE (cs) = type;
468 CHARSET_COLUMNS (cs) = columns;
469 CHARSET_GRAPHIC (cs) = graphic;
470 CHARSET_FINAL (cs) = final;
471 CHARSET_DOC_STRING (cs) = doc;
472 CHARSET_REGISTRY (cs) = reg;
473 CHARSET_CCL_PROGRAM (cs) = Qnil;
474 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
476 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
477 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
478 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
479 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
483 /* some charsets do not have final characters. This includes
484 ASCII, Control-1, Composite, and the two faux private
486 assert (NILP (charset_by_attributes[type][final][direction]));
487 charset_by_attributes[type][final][direction] = obj;
490 assert (NILP (charset_by_leading_byte[id - 128]));
491 charset_by_leading_byte[id - 128] = obj;
493 /* official leading byte */
494 rep_bytes_by_first_byte[id] = rep_bytes;
496 /* Some charsets are "faux" and don't have names or really exist at
497 all except in the leading-byte table. */
499 Fputhash (name, obj, Vcharset_hash_table);
504 get_unallocated_leading_byte (int dimension)
510 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
513 lb = next_allocated_1_byte_leading_byte++;
517 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
520 lb = next_allocated_2_byte_leading_byte++;
525 ("No more character sets free for this dimension",
526 make_int (dimension));
532 /************************************************************************/
533 /* Basic charset Lisp functions */
534 /************************************************************************/
536 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
537 Return non-nil if OBJECT is a charset.
541 return CHARSETP (object) ? Qt : Qnil;
544 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
545 Retrieve the charset of the given name.
546 If CHARSET-OR-NAME is a charset object, it is simply returned.
547 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
548 nil is returned. Otherwise the associated charset object is returned.
552 if (CHARSETP (charset_or_name))
553 return charset_or_name;
555 CHECK_SYMBOL (charset_or_name);
556 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
559 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
560 Retrieve the charset of the given name.
561 Same as `find-charset' except an error is signalled if there is no such
562 charset instead of returning nil.
566 Lisp_Object charset = Ffind_charset (name);
569 signal_simple_error ("No such charset", name);
573 /* We store the charsets in hash tables with the names as the key and the
574 actual charset object as the value. Occasionally we need to use them
575 in a list format. These routines provide us with that. */
576 struct charset_list_closure
578 Lisp_Object *charset_list;
582 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
583 void *charset_list_closure)
585 /* This function can GC */
586 struct charset_list_closure *chcl =
587 (struct charset_list_closure*) charset_list_closure;
588 Lisp_Object *charset_list = chcl->charset_list;
590 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
594 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
595 Return a list of the names of all defined charsets.
599 Lisp_Object charset_list = Qnil;
601 struct charset_list_closure charset_list_closure;
603 GCPRO1 (charset_list);
604 charset_list_closure.charset_list = &charset_list;
605 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
606 &charset_list_closure);
612 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
613 Return the name of the given charset.
617 return XCHARSET_NAME (Fget_charset (charset));
620 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
621 Define a new character set.
622 This function is for use with Mule support.
623 NAME is a symbol, the name by which the character set is normally referred.
624 DOC-STRING is a string describing the character set.
625 PROPS is a property list, describing the specific nature of the
626 character set. Recognized properties are:
628 'short-name Short version of the charset name (ex: Latin-1)
629 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
630 'registry A regular expression matching the font registry field for
632 'dimension Number of octets used to index a character in this charset.
633 Either 1 or 2. Defaults to 1.
634 'columns Number of columns used to display a character in this charset.
635 Only used in TTY mode. (Under X, the actual width of a
636 character can be derived from the font used to display the
637 characters.) If unspecified, defaults to the dimension
638 (this is almost always the correct value).
639 'chars Number of characters in each dimension (94 or 96).
640 Defaults to 94. Note that if the dimension is 2, the
641 character set thus described is 94x94 or 96x96.
642 'final Final byte of ISO 2022 escape sequence. Must be
643 supplied. Each combination of (DIMENSION, CHARS) defines a
644 separate namespace for final bytes. Note that ISO
645 2022 restricts the final byte to the range
646 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
647 dimension == 2. Note also that final bytes in the range
648 0x30 - 0x3F are reserved for user-defined (not official)
650 'graphic 0 (use left half of font on output) or 1 (use right half
651 of font on output). Defaults to 0. For example, for
652 a font whose registry is ISO8859-1, the left half
653 (octets 0x20 - 0x7F) is the `ascii' character set, while
654 the right half (octets 0xA0 - 0xFF) is the `latin-1'
655 character set. With 'graphic set to 0, the octets
656 will have their high bit cleared; with it set to 1,
657 the octets will have their high bit set.
658 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
660 'ccl-program A compiled CCL program used to convert a character in
661 this charset into an index into the font. This is in
662 addition to the 'graphic property. The CCL program
663 is passed the octets of the character, with the high
664 bit cleared and set depending upon whether the value
665 of the 'graphic property is 0 or 1.
667 (name, doc_string, props))
669 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
670 int direction = CHARSET_LEFT_TO_RIGHT;
672 Lisp_Object registry = Qnil;
674 Lisp_Object rest, keyword, value;
675 Lisp_Object ccl_program = Qnil;
676 Lisp_Object short_name = Qnil, long_name = Qnil;
679 if (!NILP (doc_string))
680 CHECK_STRING (doc_string);
682 charset = Ffind_charset (name);
684 signal_simple_error ("Cannot redefine existing charset", name);
686 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
688 if (EQ (keyword, Qshort_name))
690 CHECK_STRING (value);
694 if (EQ (keyword, Qlong_name))
696 CHECK_STRING (value);
700 else if (EQ (keyword, Qdimension))
703 dimension = XINT (value);
704 if (dimension < 1 || dimension > 2)
705 signal_simple_error ("Invalid value for 'dimension", value);
708 else if (EQ (keyword, Qchars))
711 chars = XINT (value);
712 if (chars != 94 && chars != 96)
713 signal_simple_error ("Invalid value for 'chars", value);
716 else if (EQ (keyword, Qcolumns))
719 columns = XINT (value);
720 if (columns != 1 && columns != 2)
721 signal_simple_error ("Invalid value for 'columns", value);
724 else if (EQ (keyword, Qgraphic))
727 graphic = XINT (value);
728 if (graphic < 0 || graphic > 1)
729 signal_simple_error ("Invalid value for 'graphic", value);
732 else if (EQ (keyword, Qregistry))
734 CHECK_STRING (value);
738 else if (EQ (keyword, Qdirection))
740 if (EQ (value, Ql2r))
741 direction = CHARSET_LEFT_TO_RIGHT;
742 else if (EQ (value, Qr2l))
743 direction = CHARSET_RIGHT_TO_LEFT;
745 signal_simple_error ("Invalid value for 'direction", value);
748 else if (EQ (keyword, Qfinal))
750 CHECK_CHAR_COERCE_INT (value);
751 final = XCHAR (value);
752 if (final < '0' || final > '~')
753 signal_simple_error ("Invalid value for 'final", value);
756 else if (EQ (keyword, Qccl_program))
758 CHECK_VECTOR (value);
763 signal_simple_error ("Unrecognized property", keyword);
767 error ("'final must be specified");
768 if (dimension == 2 && final > 0x5F)
770 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
774 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
776 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
778 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
779 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
781 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
783 id = get_unallocated_leading_byte (dimension);
785 if (NILP (doc_string))
786 doc_string = build_string ("");
789 registry = build_string ("");
791 if (NILP (short_name))
792 XSETSTRING (short_name, XSYMBOL (name)->name);
794 if (NILP (long_name))
795 long_name = doc_string;
799 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
800 final, direction, short_name, long_name, doc_string, registry);
801 if (!NILP (ccl_program))
802 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
806 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
808 Make a charset equivalent to CHARSET but which goes in the opposite direction.
809 NEW-NAME is the name of the new charset. Return the new charset.
813 Lisp_Object new_charset = Qnil;
814 int id, dimension, columns, graphic, final;
816 Lisp_Object registry, doc_string, short_name, long_name;
817 struct Lisp_Charset *cs;
819 charset = Fget_charset (charset);
820 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
821 signal_simple_error ("Charset already has reverse-direction charset",
824 CHECK_SYMBOL (new_name);
825 if (!NILP (Ffind_charset (new_name)))
826 signal_simple_error ("Cannot redefine existing charset", new_name);
828 cs = XCHARSET (charset);
830 type = CHARSET_TYPE (cs);
831 columns = CHARSET_COLUMNS (cs);
832 dimension = CHARSET_DIMENSION (cs);
833 id = get_unallocated_leading_byte (dimension);
835 graphic = CHARSET_GRAPHIC (cs);
836 final = CHARSET_FINAL (cs);
837 direction = CHARSET_RIGHT_TO_LEFT;
838 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
839 direction = CHARSET_LEFT_TO_RIGHT;
840 doc_string = CHARSET_DOC_STRING (cs);
841 short_name = CHARSET_SHORT_NAME (cs);
842 long_name = CHARSET_LONG_NAME (cs);
843 registry = CHARSET_REGISTRY (cs);
845 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
846 graphic, final, direction, short_name, long_name,
847 doc_string, registry);
849 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
850 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
855 /* #### Reverse direction charsets not yet implemented. */
857 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
859 Return the reverse-direction charset parallel to CHARSET, if any.
860 This is the charset with the same properties (in particular, the same
861 dimension, number of characters per dimension, and final byte) as
862 CHARSET but whose characters are displayed in the opposite direction.
866 charset = Fget_charset (charset);
867 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
871 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
872 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
873 If DIRECTION is omitted, both directions will be checked (left-to-right
874 will be returned if character sets exist for both directions).
876 (dimension, chars, final, direction))
878 int dm, ch, fi, di = -1;
880 Lisp_Object obj = Qnil;
882 CHECK_INT (dimension);
883 dm = XINT (dimension);
884 if (dm < 1 || dm > 2)
885 signal_simple_error ("Invalid value for DIMENSION", dimension);
889 if (ch != 94 && ch != 96)
890 signal_simple_error ("Invalid value for CHARS", chars);
892 CHECK_CHAR_COERCE_INT (final);
894 if (fi < '0' || fi > '~')
895 signal_simple_error ("Invalid value for FINAL", final);
897 if (EQ (direction, Ql2r))
898 di = CHARSET_LEFT_TO_RIGHT;
899 else if (EQ (direction, Qr2l))
900 di = CHARSET_RIGHT_TO_LEFT;
901 else if (!NILP (direction))
902 signal_simple_error ("Invalid value for DIRECTION", direction);
904 if (dm == 2 && fi > 0x5F)
906 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
909 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
911 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
915 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
917 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
920 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
923 return XCHARSET_NAME (obj);
927 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
928 Return short name of CHARSET.
932 return XCHARSET_SHORT_NAME (Fget_charset (charset));
935 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
936 Return long name of CHARSET.
940 return XCHARSET_LONG_NAME (Fget_charset (charset));
943 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
944 Return description of CHARSET.
948 return XCHARSET_DOC_STRING (Fget_charset (charset));
951 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
952 Return dimension of CHARSET.
956 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
959 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
960 Return property PROP of CHARSET.
961 Recognized properties are those listed in `make-charset', as well as
962 'name and 'doc-string.
966 struct Lisp_Charset *cs;
968 charset = Fget_charset (charset);
969 cs = XCHARSET (charset);
972 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
973 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
974 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
975 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
976 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
977 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
978 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
979 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
980 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
981 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
982 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
983 if (EQ (prop, Qdirection))
984 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
985 if (EQ (prop, Qreverse_direction_charset))
987 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
991 return XCHARSET_NAME (obj);
993 signal_simple_error ("Unrecognized charset property name", prop);
994 return Qnil; /* not reached */
997 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
998 Return charset identification number of CHARSET.
1002 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1005 /* #### We need to figure out which properties we really want to
1008 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1009 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1011 (charset, ccl_program))
1013 charset = Fget_charset (charset);
1014 CHECK_VECTOR (ccl_program);
1015 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1020 invalidate_charset_font_caches (Lisp_Object charset)
1022 /* Invalidate font cache entries for charset on all devices. */
1023 Lisp_Object devcons, concons, hash_table;
1024 DEVICE_LOOP_NO_BREAK (devcons, concons)
1026 struct device *d = XDEVICE (XCAR (devcons));
1027 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1028 if (!UNBOUNDP (hash_table))
1029 Fclrhash (hash_table);
1033 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1034 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1035 Set the 'registry property of CHARSET to REGISTRY.
1037 (charset, registry))
1039 charset = Fget_charset (charset);
1040 CHECK_STRING (registry);
1041 XCHARSET_REGISTRY (charset) = registry;
1042 invalidate_charset_font_caches (charset);
1043 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1048 /************************************************************************/
1049 /* Lisp primitives for working with characters */
1050 /************************************************************************/
1052 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1053 Make a character from CHARSET and octets ARG1 and ARG2.
1054 ARG2 is required only for characters from two-dimensional charsets.
1055 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1056 character s with caron.
1058 (charset, arg1, arg2))
1060 struct Lisp_Charset *cs;
1062 int lowlim, highlim;
1064 charset = Fget_charset (charset);
1065 cs = XCHARSET (charset);
1067 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1068 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1069 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1070 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1073 /* It is useful (and safe, according to Olivier Galibert) to strip
1074 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1075 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1076 Latin 2 code of the character. */
1077 a1 = XINT (arg1) & 0x7f;
1078 if (a1 < lowlim || a1 > highlim)
1079 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1081 if (CHARSET_DIMENSION (cs) == 1)
1085 ("Charset is of dimension one; second octet must be nil", arg2);
1086 return make_char (MAKE_CHAR (charset, a1, 0));
1090 a2 = XINT (arg2) & 0x7f;
1091 if (a2 < lowlim || a2 > highlim)
1092 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1094 return make_char (MAKE_CHAR (charset, a1, a2));
1097 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1098 Return the character set of char CH.
1102 CHECK_CHAR_COERCE_INT (ch);
1104 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1105 (CHAR_LEADING_BYTE (XCHAR (ch))));
1108 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1109 Return list of charset and one or two position-codes of CHAR.
1113 /* This function can GC */
1114 struct gcpro gcpro1, gcpro2;
1115 Lisp_Object charset = Qnil;
1116 Lisp_Object rc = Qnil;
1119 GCPRO2 (charset, rc);
1120 CHECK_CHAR_COERCE_INT (character);
1122 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1124 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1126 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1130 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1138 #ifdef ENABLE_COMPOSITE_CHARS
1139 /************************************************************************/
1140 /* composite character functions */
1141 /************************************************************************/
1144 lookup_composite_char (Bufbyte *str, int len)
1146 Lisp_Object lispstr = make_string (str, len);
1147 Lisp_Object ch = Fgethash (lispstr,
1148 Vcomposite_char_string2char_hash_table,
1154 if (composite_char_row_next >= 128)
1155 signal_simple_error ("No more composite chars available", lispstr);
1156 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1157 composite_char_col_next);
1158 Fputhash (make_char (emch), lispstr,
1159 Vcomposite_char_char2string_hash_table);
1160 Fputhash (lispstr, make_char (emch),
1161 Vcomposite_char_string2char_hash_table);
1162 composite_char_col_next++;
1163 if (composite_char_col_next >= 128)
1165 composite_char_col_next = 32;
1166 composite_char_row_next++;
1175 composite_char_string (Emchar ch)
1177 Lisp_Object str = Fgethash (make_char (ch),
1178 Vcomposite_char_char2string_hash_table,
1180 assert (!UNBOUNDP (str));
1184 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1185 Convert a string into a single composite character.
1186 The character is the result of overstriking all the characters in
1191 CHECK_STRING (string);
1192 return make_char (lookup_composite_char (XSTRING_DATA (string),
1193 XSTRING_LENGTH (string)));
1196 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1197 Return a string of the characters comprising a composite character.
1205 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1206 signal_simple_error ("Must be composite char", ch);
1207 return composite_char_string (emch);
1209 #endif /* ENABLE_COMPOSITE_CHARS */
1212 /************************************************************************/
1213 /* initialization */
1214 /************************************************************************/
1217 syms_of_mule_charset (void)
1219 DEFSUBR (Fcharsetp);
1220 DEFSUBR (Ffind_charset);
1221 DEFSUBR (Fget_charset);
1222 DEFSUBR (Fcharset_list);
1223 DEFSUBR (Fcharset_name);
1224 DEFSUBR (Fmake_charset);
1225 DEFSUBR (Fmake_reverse_direction_charset);
1226 /* DEFSUBR (Freverse_direction_charset); */
1227 DEFSUBR (Fcharset_from_attributes);
1228 DEFSUBR (Fcharset_short_name);
1229 DEFSUBR (Fcharset_long_name);
1230 DEFSUBR (Fcharset_description);
1231 DEFSUBR (Fcharset_dimension);
1232 DEFSUBR (Fcharset_property);
1233 DEFSUBR (Fcharset_id);
1234 DEFSUBR (Fset_charset_ccl_program);
1235 DEFSUBR (Fset_charset_registry);
1237 DEFSUBR (Fmake_char);
1238 DEFSUBR (Fchar_charset);
1239 DEFSUBR (Fsplit_char);
1241 #ifdef ENABLE_COMPOSITE_CHARS
1242 DEFSUBR (Fmake_composite_char);
1243 DEFSUBR (Fcomposite_char_string);
1246 defsymbol (&Qcharsetp, "charsetp");
1247 defsymbol (&Qregistry, "registry");
1248 defsymbol (&Qfinal, "final");
1249 defsymbol (&Qgraphic, "graphic");
1250 defsymbol (&Qdirection, "direction");
1251 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1252 defsymbol (&Qccl_program, "ccl-program");
1253 defsymbol (&Qshort_name, "short-name");
1254 defsymbol (&Qlong_name, "long-name");
1256 defsymbol (&Ql2r, "l2r");
1257 defsymbol (&Qr2l, "r2l");
1259 /* Charsets, compatible with FSF 20.3
1260 Naming convention is Script-Charset[-Edition] */
1261 defsymbol (&Qascii, "ascii");
1262 defsymbol (&Qcontrol_1, "control-1");
1263 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1264 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1265 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1266 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1267 defsymbol (&Qthai_tis620, "thai-tis620");
1268 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1269 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1270 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1271 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1272 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1273 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1274 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1275 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1276 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1277 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1278 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1279 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1280 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1281 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1282 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1283 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1285 defsymbol (&Qcomposite, "composite");
1289 vars_of_mule_charset (void)
1293 /* Table of charsets indexed by leading byte. */
1294 for (i = 0; i < countof (charset_by_leading_byte); i++)
1295 charset_by_leading_byte[i] = Qnil;
1297 /* Table of charsets indexed by type/final-byte/direction. */
1298 for (i = 0; i < countof (charset_by_attributes); i++)
1299 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1300 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1301 charset_by_attributes[i][j][k] = Qnil;
1303 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1304 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1308 complex_vars_of_mule_charset (void)
1310 staticpro (&Vcharset_hash_table);
1311 Vcharset_hash_table =
1312 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1314 /* Predefined character sets. We store them into variables for
1318 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1319 CHARSET_TYPE_94, 1, 0, 'B',
1320 CHARSET_LEFT_TO_RIGHT,
1321 build_string ("ASCII"),
1322 build_string ("ASCII)"),
1323 build_string ("ASCII (ISO646 IRV)"),
1324 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1325 Vcharset_control_1 =
1326 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1327 CHARSET_TYPE_94, 1, 1, 0,
1328 CHARSET_LEFT_TO_RIGHT,
1329 build_string ("C1"),
1330 build_string ("Control characters"),
1331 build_string ("Control characters 128-191"),
1333 Vcharset_latin_iso8859_1 =
1334 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1335 CHARSET_TYPE_96, 1, 1, 'A',
1336 CHARSET_LEFT_TO_RIGHT,
1337 build_string ("Latin-1"),
1338 build_string ("ISO8859-1 (Latin-1)"),
1339 build_string ("ISO8859-1 (Latin-1)"),
1340 build_string ("iso8859-1"));
1341 Vcharset_latin_iso8859_2 =
1342 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1343 CHARSET_TYPE_96, 1, 1, 'B',
1344 CHARSET_LEFT_TO_RIGHT,
1345 build_string ("Latin-2"),
1346 build_string ("ISO8859-2 (Latin-2)"),
1347 build_string ("ISO8859-2 (Latin-2)"),
1348 build_string ("iso8859-2"));
1349 Vcharset_latin_iso8859_3 =
1350 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1351 CHARSET_TYPE_96, 1, 1, 'C',
1352 CHARSET_LEFT_TO_RIGHT,
1353 build_string ("Latin-3"),
1354 build_string ("ISO8859-3 (Latin-3)"),
1355 build_string ("ISO8859-3 (Latin-3)"),
1356 build_string ("iso8859-3"));
1357 Vcharset_latin_iso8859_4 =
1358 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1359 CHARSET_TYPE_96, 1, 1, 'D',
1360 CHARSET_LEFT_TO_RIGHT,
1361 build_string ("Latin-4"),
1362 build_string ("ISO8859-4 (Latin-4)"),
1363 build_string ("ISO8859-4 (Latin-4)"),
1364 build_string ("iso8859-4"));
1365 Vcharset_thai_tis620 =
1366 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1367 CHARSET_TYPE_96, 1, 1, 'T',
1368 CHARSET_LEFT_TO_RIGHT,
1369 build_string ("TIS620"),
1370 build_string ("TIS620 (Thai)"),
1371 build_string ("TIS620.2529 (Thai)"),
1372 build_string ("tis620"));
1373 Vcharset_greek_iso8859_7 =
1374 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1375 CHARSET_TYPE_96, 1, 1, 'F',
1376 CHARSET_LEFT_TO_RIGHT,
1377 build_string ("ISO8859-7"),
1378 build_string ("ISO8859-7 (Greek)"),
1379 build_string ("ISO8859-7 (Greek)"),
1380 build_string ("iso8859-7"));
1381 Vcharset_arabic_iso8859_6 =
1382 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1383 CHARSET_TYPE_96, 1, 1, 'G',
1384 CHARSET_RIGHT_TO_LEFT,
1385 build_string ("ISO8859-6"),
1386 build_string ("ISO8859-6 (Arabic)"),
1387 build_string ("ISO8859-6 (Arabic)"),
1388 build_string ("iso8859-6"));
1389 Vcharset_hebrew_iso8859_8 =
1390 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1391 CHARSET_TYPE_96, 1, 1, 'H',
1392 CHARSET_RIGHT_TO_LEFT,
1393 build_string ("ISO8859-8"),
1394 build_string ("ISO8859-8 (Hebrew)"),
1395 build_string ("ISO8859-8 (Hebrew)"),
1396 build_string ("iso8859-8"));
1397 Vcharset_katakana_jisx0201 =
1398 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1399 CHARSET_TYPE_94, 1, 1, 'I',
1400 CHARSET_LEFT_TO_RIGHT,
1401 build_string ("JISX0201 Kana"),
1402 build_string ("JISX0201.1976 (Japanese Kana)"),
1403 build_string ("JISX0201.1976 Japanese Kana"),
1404 build_string ("jisx0201.1976"));
1405 Vcharset_latin_jisx0201 =
1406 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1407 CHARSET_TYPE_94, 1, 0, 'J',
1408 CHARSET_LEFT_TO_RIGHT,
1409 build_string ("JISX0201 Roman"),
1410 build_string ("JISX0201.1976 (Japanese Roman)"),
1411 build_string ("JISX0201.1976 Japanese Roman"),
1412 build_string ("jisx0201.1976"));
1413 Vcharset_cyrillic_iso8859_5 =
1414 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1415 CHARSET_TYPE_96, 1, 1, 'L',
1416 CHARSET_LEFT_TO_RIGHT,
1417 build_string ("ISO8859-5"),
1418 build_string ("ISO8859-5 (Cyrillic)"),
1419 build_string ("ISO8859-5 (Cyrillic)"),
1420 build_string ("iso8859-5"));
1421 Vcharset_latin_iso8859_9 =
1422 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1423 CHARSET_TYPE_96, 1, 1, 'M',
1424 CHARSET_LEFT_TO_RIGHT,
1425 build_string ("Latin-5"),
1426 build_string ("ISO8859-9 (Latin-5)"),
1427 build_string ("ISO8859-9 (Latin-5)"),
1428 build_string ("iso8859-9"));
1429 Vcharset_japanese_jisx0208_1978 =
1430 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1431 CHARSET_TYPE_94X94, 2, 0, '@',
1432 CHARSET_LEFT_TO_RIGHT,
1433 build_string ("JISX0208.1978"),
1434 build_string ("JISX0208.1978 (Japanese)"),
1436 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1437 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1438 Vcharset_chinese_gb2312 =
1439 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1440 CHARSET_TYPE_94X94, 2, 0, 'A',
1441 CHARSET_LEFT_TO_RIGHT,
1442 build_string ("GB2312"),
1443 build_string ("GB2312)"),
1444 build_string ("GB2312 Chinese simplified"),
1445 build_string ("gb2312"));
1446 Vcharset_japanese_jisx0208 =
1447 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1448 CHARSET_TYPE_94X94, 2, 0, 'B',
1449 CHARSET_LEFT_TO_RIGHT,
1450 build_string ("JISX0208"),
1451 build_string ("JISX0208.1983/1990 (Japanese)"),
1452 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1453 build_string ("jisx0208.19\\(83\\|90\\)"));
1454 Vcharset_korean_ksc5601 =
1455 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1456 CHARSET_TYPE_94X94, 2, 0, 'C',
1457 CHARSET_LEFT_TO_RIGHT,
1458 build_string ("KSC5601"),
1459 build_string ("KSC5601 (Korean"),
1460 build_string ("KSC5601 Korean Hangul and Hanja"),
1461 build_string ("ksc5601"));
1462 Vcharset_japanese_jisx0212 =
1463 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1464 CHARSET_TYPE_94X94, 2, 0, 'D',
1465 CHARSET_LEFT_TO_RIGHT,
1466 build_string ("JISX0212"),
1467 build_string ("JISX0212 (Japanese)"),
1468 build_string ("JISX0212 Japanese Supplement"),
1469 build_string ("jisx0212"));
1471 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1472 Vcharset_chinese_cns11643_1 =
1473 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1474 CHARSET_TYPE_94X94, 2, 0, 'G',
1475 CHARSET_LEFT_TO_RIGHT,
1476 build_string ("CNS11643-1"),
1477 build_string ("CNS11643-1 (Chinese traditional)"),
1479 ("CNS 11643 Plane 1 Chinese traditional"),
1480 build_string (CHINESE_CNS_PLANE_RE("1")));
1481 Vcharset_chinese_cns11643_2 =
1482 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1483 CHARSET_TYPE_94X94, 2, 0, 'H',
1484 CHARSET_LEFT_TO_RIGHT,
1485 build_string ("CNS11643-2"),
1486 build_string ("CNS11643-2 (Chinese traditional)"),
1488 ("CNS 11643 Plane 2 Chinese traditional"),
1489 build_string (CHINESE_CNS_PLANE_RE("2")));
1490 Vcharset_chinese_big5_1 =
1491 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1492 CHARSET_TYPE_94X94, 2, 0, '0',
1493 CHARSET_LEFT_TO_RIGHT,
1494 build_string ("Big5"),
1495 build_string ("Big5 (Level-1)"),
1497 ("Big5 Level-1 Chinese traditional"),
1498 build_string ("big5"));
1499 Vcharset_chinese_big5_2 =
1500 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1501 CHARSET_TYPE_94X94, 2, 0, '1',
1502 CHARSET_LEFT_TO_RIGHT,
1503 build_string ("Big5"),
1504 build_string ("Big5 (Level-2)"),
1506 ("Big5 Level-2 Chinese traditional"),
1507 build_string ("big5"));
1510 #ifdef ENABLE_COMPOSITE_CHARS
1511 /* #### For simplicity, we put composite chars into a 96x96 charset.
1512 This is going to lead to problems because you can run out of
1513 room, esp. as we don't yet recycle numbers. */
1514 Vcharset_composite =
1515 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1516 CHARSET_TYPE_96X96, 2, 0, 0,
1517 CHARSET_LEFT_TO_RIGHT,
1518 build_string ("Composite"),
1519 build_string ("Composite characters"),
1520 build_string ("Composite characters"),
1523 composite_char_row_next = 32;
1524 composite_char_col_next = 32;
1526 Vcomposite_char_string2char_hash_table =
1527 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1528 Vcomposite_char_char2string_hash_table =
1529 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1530 staticpro (&Vcomposite_char_string2char_hash_table);
1531 staticpro (&Vcomposite_char_char2string_hash_table);
1532 #endif /* ENABLE_COMPOSITE_CHARS */