1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
82 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation.
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent
86 canonical computation:
88 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
90 Bytecount rep_bytes_by_first_byte[0xA0] =
91 { /* 0x00 - 0x7f are for straight ASCII */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 /* 0x80 - 0x8f are for Dimension-1 official charsets */
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 /* 0x90 - 0x9d are for Dimension-2 official charsets */
103 /* 0x9e is for Dimension-1 private charsets */
104 /* 0x9f is for Dimension-2 private charsets */
105 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
108 Lisp_Object Qcharsetp;
110 /* Qdoc_string, Qdimension, Qchars defined in general.c */
111 Lisp_Object Qregistry, Qfinal, Qgraphic;
112 Lisp_Object Qdirection;
113 Lisp_Object Qreverse_direction_charset;
114 Lisp_Object Qccl_program;
115 Lisp_Object Qleading_byte;
116 Lisp_Object Qshort_name, Qlong_name;
132 Qjapanese_jisx0208_1978,
143 Lisp_Object Ql2r, Qr2l;
145 Lisp_Object Vcharset_hash_table;
147 static Bufbyte next_allocated_1_byte_leading_byte;
148 static Bufbyte next_allocated_2_byte_leading_byte;
150 /* Composite characters are characters constructed by overstriking two
151 or more regular characters.
153 1) The old Mule implementation involves storing composite characters
154 in a buffer as a tag followed by all of the actual characters
155 used to make up the composite character. I think this is a bad
156 idea; it greatly complicates code that wants to handle strings
157 one character at a time because it has to deal with the possibility
158 of great big ungainly characters. It's much more reasonable to
159 simply store an index into a table of composite characters.
161 2) The current implementation only allows for 16,384 separate
162 composite characters over the lifetime of the XEmacs process.
163 This could become a potential problem if the user
164 edited lots of different files that use composite characters.
165 Due to FSF bogosity, increasing the number of allowable
166 composite characters under Mule would decrease the number
167 of possible faces that can exist. Mule already has shrunk
168 this to 2048, and further shrinkage would become uncomfortable.
169 No such problems exist in XEmacs.
171 Composite characters could be represented as 0x80 C1 C2 C3,
172 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
173 for slightly under 2^20 (one million) composite characters
174 over the XEmacs process lifetime, and you only need to
175 increase the size of a Mule character from 19 to 21 bits.
176 Or you could use 0x80 C1 C2 C3 C4, allowing for about
177 85 million (slightly over 2^26) composite characters. */
180 /************************************************************************/
181 /* Basic Emchar functions */
182 /************************************************************************/
184 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
185 string in STR. Returns the number of bytes stored.
186 Do not call this directly. Use the macro set_charptr_emchar() instead.
190 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
198 BREAKUP_CHAR (c, charset, c1, c2);
199 lb = CHAR_LEADING_BYTE (c);
200 if (LEADING_BYTE_PRIVATE_P (lb))
201 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
203 if (EQ (charset, Vcharset_control_1))
212 /* Return the first character from a Mule-encoded string in STR,
213 assuming it's non-ASCII. Do not call this directly.
214 Use the macro charptr_emchar() instead. */
217 non_ascii_charptr_emchar (CONST Bufbyte *str)
219 Bufbyte i0 = *str, i1, i2 = 0;
222 if (i0 == LEADING_BYTE_CONTROL_1)
223 return (Emchar) (*++str - 0x20);
225 if (LEADING_BYTE_PREFIX_P (i0))
230 charset = CHARSET_BY_LEADING_BYTE (i0);
231 if (XCHARSET_DIMENSION (charset) == 2)
234 return MAKE_CHAR (charset, i1, i2);
237 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
238 Do not call this directly. Use the macro valid_char_p() instead. */
241 non_ascii_valid_char_p (Emchar ch)
245 /* Must have only lowest 19 bits set */
249 f1 = CHAR_FIELD1 (ch);
250 f2 = CHAR_FIELD2 (ch);
251 f3 = CHAR_FIELD3 (ch);
257 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
258 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
259 f2 > MAX_CHAR_FIELD2_PRIVATE)
264 if (f3 != 0x20 && f3 != 0x7F)
268 NOTE: This takes advantage of the fact that
269 FIELD2_TO_OFFICIAL_LEADING_BYTE and
270 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
272 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
273 return (XCHARSET_CHARS (charset) == 96);
279 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
280 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
281 f1 > MAX_CHAR_FIELD1_PRIVATE)
283 if (f2 < 0x20 || f3 < 0x20)
286 #ifdef ENABLE_COMPOSITE_CHARS
287 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
289 if (UNBOUNDP (Fgethash (make_int (ch),
290 Vcomposite_char_char2string_hash_table,
295 #endif /* ENABLE_COMPOSITE_CHARS */
297 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
300 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
302 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
305 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
307 return (XCHARSET_CHARS (charset) == 96);
312 /************************************************************************/
313 /* Basic string functions */
314 /************************************************************************/
316 /* Copy the character pointed to by PTR into STR, assuming it's
317 non-ASCII. Do not call this directly. Use the macro
318 charptr_copy_char() instead. */
321 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
323 Bufbyte *strptr = str;
325 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
327 /* Notice fallthrough. */
328 case 4: *++strptr = *ptr++;
329 case 3: *++strptr = *ptr++;
330 case 2: *++strptr = *ptr;
335 return strptr + 1 - str;
339 /************************************************************************/
340 /* streams of Emchars */
341 /************************************************************************/
343 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
344 The functions below are not meant to be called directly; use
345 the macros in insdel.h. */
348 Lstream_get_emchar_1 (Lstream *stream, int ch)
350 Bufbyte str[MAX_EMCHAR_LEN];
351 Bufbyte *strptr = str;
353 str[0] = (Bufbyte) ch;
354 switch (REP_BYTES_BY_FIRST_BYTE (ch))
356 /* Notice fallthrough. */
358 ch = Lstream_getc (stream);
360 *++strptr = (Bufbyte) ch;
362 ch = Lstream_getc (stream);
364 *++strptr = (Bufbyte) ch;
366 ch = Lstream_getc (stream);
368 *++strptr = (Bufbyte) ch;
373 return charptr_emchar (str);
377 Lstream_fput_emchar (Lstream *stream, Emchar ch)
379 Bufbyte str[MAX_EMCHAR_LEN];
380 Bytecount len = set_charptr_emchar (str, ch);
381 return Lstream_write (stream, str, len);
385 Lstream_funget_emchar (Lstream *stream, Emchar ch)
387 Bufbyte str[MAX_EMCHAR_LEN];
388 Bytecount len = set_charptr_emchar (str, ch);
389 Lstream_unread (stream, str, len);
393 /************************************************************************/
395 /************************************************************************/
398 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
400 struct Lisp_Charset *cs = XCHARSET (obj);
402 markobj (cs->short_name);
403 markobj (cs->long_name);
404 markobj (cs->doc_string);
405 markobj (cs->registry);
406 markobj (cs->ccl_program);
411 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
413 struct Lisp_Charset *cs = XCHARSET (obj);
417 error ("printing unreadable object #<charset %s 0x%x>",
418 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
421 write_c_string ("#<charset ", printcharfun);
422 print_internal (CHARSET_NAME (cs), printcharfun, 0);
423 write_c_string (" ", printcharfun);
424 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
425 write_c_string (" ", printcharfun);
426 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
427 write_c_string (" ", printcharfun);
428 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
429 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
430 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
431 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
432 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
434 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
435 CHARSET_COLUMNS (cs),
436 CHARSET_GRAPHIC (cs),
438 write_c_string (buf, printcharfun);
439 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
440 sprintf (buf, " 0x%x>", cs->header.uid);
441 write_c_string (buf, printcharfun);
444 static const struct lrecord_description charset_description[] = {
445 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
449 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
450 mark_charset, print_charset, 0, 0, 0, charset_description,
451 struct Lisp_Charset);
452 /* Make a new charset. */
455 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
456 unsigned char type, unsigned char columns, unsigned char graphic,
457 Bufbyte final, unsigned char direction, Lisp_Object short_name,
458 Lisp_Object long_name, Lisp_Object doc,
462 struct Lisp_Charset *cs =
463 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
464 XSETCHARSET (obj, cs);
466 CHARSET_ID (cs) = id;
467 CHARSET_NAME (cs) = name;
468 CHARSET_SHORT_NAME (cs) = short_name;
469 CHARSET_LONG_NAME (cs) = long_name;
470 CHARSET_REP_BYTES (cs) = rep_bytes;
471 CHARSET_DIRECTION (cs) = direction;
472 CHARSET_TYPE (cs) = type;
473 CHARSET_COLUMNS (cs) = columns;
474 CHARSET_GRAPHIC (cs) = graphic;
475 CHARSET_FINAL (cs) = final;
476 CHARSET_DOC_STRING (cs) = doc;
477 CHARSET_REGISTRY (cs) = reg;
478 CHARSET_CCL_PROGRAM (cs) = Qnil;
479 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
481 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
482 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
483 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
484 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
488 /* some charsets do not have final characters. This includes
489 ASCII, Control-1, Composite, and the two faux private
491 assert (NILP (charset_by_attributes[type][final][direction]));
492 charset_by_attributes[type][final][direction] = obj;
495 assert (NILP (charset_by_leading_byte[id - 128]));
496 charset_by_leading_byte[id - 128] = obj;
498 /* official leading byte */
499 rep_bytes_by_first_byte[id] = rep_bytes;
501 /* Some charsets are "faux" and don't have names or really exist at
502 all except in the leading-byte table. */
504 Fputhash (name, obj, Vcharset_hash_table);
509 get_unallocated_leading_byte (int dimension)
515 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
518 lb = next_allocated_1_byte_leading_byte++;
522 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
525 lb = next_allocated_2_byte_leading_byte++;
530 ("No more character sets free for this dimension",
531 make_int (dimension));
537 /************************************************************************/
538 /* Basic charset Lisp functions */
539 /************************************************************************/
541 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
542 Return non-nil if OBJECT is a charset.
546 return CHARSETP (object) ? Qt : Qnil;
549 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
550 Retrieve the charset of the given name.
551 If CHARSET-OR-NAME is a charset object, it is simply returned.
552 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
553 nil is returned. Otherwise the associated charset object is returned.
557 if (CHARSETP (charset_or_name))
558 return charset_or_name;
560 CHECK_SYMBOL (charset_or_name);
561 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
564 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
565 Retrieve the charset of the given name.
566 Same as `find-charset' except an error is signalled if there is no such
567 charset instead of returning nil.
571 Lisp_Object charset = Ffind_charset (name);
574 signal_simple_error ("No such charset", name);
578 /* We store the charsets in hash tables with the names as the key and the
579 actual charset object as the value. Occasionally we need to use them
580 in a list format. These routines provide us with that. */
581 struct charset_list_closure
583 Lisp_Object *charset_list;
587 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
588 void *charset_list_closure)
590 /* This function can GC */
591 struct charset_list_closure *chcl =
592 (struct charset_list_closure*) charset_list_closure;
593 Lisp_Object *charset_list = chcl->charset_list;
595 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
599 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
600 Return a list of the names of all defined charsets.
604 Lisp_Object charset_list = Qnil;
606 struct charset_list_closure charset_list_closure;
608 GCPRO1 (charset_list);
609 charset_list_closure.charset_list = &charset_list;
610 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
611 &charset_list_closure);
617 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
618 Return the name of the given charset.
622 return XCHARSET_NAME (Fget_charset (charset));
625 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
626 Define a new character set.
627 This function is for use with Mule support.
628 NAME is a symbol, the name by which the character set is normally referred.
629 DOC-STRING is a string describing the character set.
630 PROPS is a property list, describing the specific nature of the
631 character set. Recognized properties are:
633 'short-name Short version of the charset name (ex: Latin-1)
634 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
635 'registry A regular expression matching the font registry field for
637 'dimension Number of octets used to index a character in this charset.
638 Either 1 or 2. Defaults to 1.
639 'columns Number of columns used to display a character in this charset.
640 Only used in TTY mode. (Under X, the actual width of a
641 character can be derived from the font used to display the
642 characters.) If unspecified, defaults to the dimension
643 (this is almost always the correct value).
644 'chars Number of characters in each dimension (94 or 96).
645 Defaults to 94. Note that if the dimension is 2, the
646 character set thus described is 94x94 or 96x96.
647 'final Final byte of ISO 2022 escape sequence. Must be
648 supplied. Each combination of (DIMENSION, CHARS) defines a
649 separate namespace for final bytes. Note that ISO
650 2022 restricts the final byte to the range
651 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
652 dimension == 2. Note also that final bytes in the range
653 0x30 - 0x3F are reserved for user-defined (not official)
655 'graphic 0 (use left half of font on output) or 1 (use right half
656 of font on output). Defaults to 0. For example, for
657 a font whose registry is ISO8859-1, the left half
658 (octets 0x20 - 0x7F) is the `ascii' character set, while
659 the right half (octets 0xA0 - 0xFF) is the `latin-1'
660 character set. With 'graphic set to 0, the octets
661 will have their high bit cleared; with it set to 1,
662 the octets will have their high bit set.
663 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
665 'ccl-program A compiled CCL program used to convert a character in
666 this charset into an index into the font. This is in
667 addition to the 'graphic property. The CCL program
668 is passed the octets of the character, with the high
669 bit cleared and set depending upon whether the value
670 of the 'graphic property is 0 or 1.
672 (name, doc_string, props))
674 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
675 int direction = CHARSET_LEFT_TO_RIGHT;
677 Lisp_Object registry = Qnil;
679 Lisp_Object rest, keyword, value;
680 Lisp_Object ccl_program = Qnil;
681 Lisp_Object short_name = Qnil, long_name = Qnil;
684 if (!NILP (doc_string))
685 CHECK_STRING (doc_string);
687 charset = Ffind_charset (name);
689 signal_simple_error ("Cannot redefine existing charset", name);
691 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
693 if (EQ (keyword, Qshort_name))
695 CHECK_STRING (value);
699 if (EQ (keyword, Qlong_name))
701 CHECK_STRING (value);
705 else if (EQ (keyword, Qdimension))
708 dimension = XINT (value);
709 if (dimension < 1 || dimension > 2)
710 signal_simple_error ("Invalid value for 'dimension", value);
713 else if (EQ (keyword, Qchars))
716 chars = XINT (value);
717 if (chars != 94 && chars != 96)
718 signal_simple_error ("Invalid value for 'chars", value);
721 else if (EQ (keyword, Qcolumns))
724 columns = XINT (value);
725 if (columns != 1 && columns != 2)
726 signal_simple_error ("Invalid value for 'columns", value);
729 else if (EQ (keyword, Qgraphic))
732 graphic = XINT (value);
733 if (graphic < 0 || graphic > 1)
734 signal_simple_error ("Invalid value for 'graphic", value);
737 else if (EQ (keyword, Qregistry))
739 CHECK_STRING (value);
743 else if (EQ (keyword, Qdirection))
745 if (EQ (value, Ql2r))
746 direction = CHARSET_LEFT_TO_RIGHT;
747 else if (EQ (value, Qr2l))
748 direction = CHARSET_RIGHT_TO_LEFT;
750 signal_simple_error ("Invalid value for 'direction", value);
753 else if (EQ (keyword, Qfinal))
755 CHECK_CHAR_COERCE_INT (value);
756 final = XCHAR (value);
757 if (final < '0' || final > '~')
758 signal_simple_error ("Invalid value for 'final", value);
761 else if (EQ (keyword, Qccl_program))
763 CHECK_VECTOR (value);
768 signal_simple_error ("Unrecognized property", keyword);
772 error ("'final must be specified");
773 if (dimension == 2 && final > 0x5F)
775 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
779 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
781 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
783 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
784 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
786 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
788 id = get_unallocated_leading_byte (dimension);
790 if (NILP (doc_string))
791 doc_string = build_string ("");
794 registry = build_string ("");
796 if (NILP (short_name))
797 XSETSTRING (short_name, XSYMBOL (name)->name);
799 if (NILP (long_name))
800 long_name = doc_string;
804 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
805 final, direction, short_name, long_name, doc_string, registry);
806 if (!NILP (ccl_program))
807 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
811 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
813 Make a charset equivalent to CHARSET but which goes in the opposite direction.
814 NEW-NAME is the name of the new charset. Return the new charset.
818 Lisp_Object new_charset = Qnil;
819 int id, dimension, columns, graphic, final;
821 Lisp_Object registry, doc_string, short_name, long_name;
822 struct Lisp_Charset *cs;
824 charset = Fget_charset (charset);
825 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
826 signal_simple_error ("Charset already has reverse-direction charset",
829 CHECK_SYMBOL (new_name);
830 if (!NILP (Ffind_charset (new_name)))
831 signal_simple_error ("Cannot redefine existing charset", new_name);
833 cs = XCHARSET (charset);
835 type = CHARSET_TYPE (cs);
836 columns = CHARSET_COLUMNS (cs);
837 dimension = CHARSET_DIMENSION (cs);
838 id = get_unallocated_leading_byte (dimension);
840 graphic = CHARSET_GRAPHIC (cs);
841 final = CHARSET_FINAL (cs);
842 direction = CHARSET_RIGHT_TO_LEFT;
843 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
844 direction = CHARSET_LEFT_TO_RIGHT;
845 doc_string = CHARSET_DOC_STRING (cs);
846 short_name = CHARSET_SHORT_NAME (cs);
847 long_name = CHARSET_LONG_NAME (cs);
848 registry = CHARSET_REGISTRY (cs);
850 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
851 graphic, final, direction, short_name, long_name,
852 doc_string, registry);
854 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
855 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
860 /* #### Reverse direction charsets not yet implemented. */
862 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
864 Return the reverse-direction charset parallel to CHARSET, if any.
865 This is the charset with the same properties (in particular, the same
866 dimension, number of characters per dimension, and final byte) as
867 CHARSET but whose characters are displayed in the opposite direction.
871 charset = Fget_charset (charset);
872 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
876 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
877 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
878 If DIRECTION is omitted, both directions will be checked (left-to-right
879 will be returned if character sets exist for both directions).
881 (dimension, chars, final, direction))
883 int dm, ch, fi, di = -1;
885 Lisp_Object obj = Qnil;
887 CHECK_INT (dimension);
888 dm = XINT (dimension);
889 if (dm < 1 || dm > 2)
890 signal_simple_error ("Invalid value for DIMENSION", dimension);
894 if (ch != 94 && ch != 96)
895 signal_simple_error ("Invalid value for CHARS", chars);
897 CHECK_CHAR_COERCE_INT (final);
899 if (fi < '0' || fi > '~')
900 signal_simple_error ("Invalid value for FINAL", final);
902 if (EQ (direction, Ql2r))
903 di = CHARSET_LEFT_TO_RIGHT;
904 else if (EQ (direction, Qr2l))
905 di = CHARSET_RIGHT_TO_LEFT;
906 else if (!NILP (direction))
907 signal_simple_error ("Invalid value for DIRECTION", direction);
909 if (dm == 2 && fi > 0x5F)
911 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
914 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
916 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
920 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
922 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
925 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
928 return XCHARSET_NAME (obj);
932 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
933 Return short name of CHARSET.
937 return XCHARSET_SHORT_NAME (Fget_charset (charset));
940 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
941 Return long name of CHARSET.
945 return XCHARSET_LONG_NAME (Fget_charset (charset));
948 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
949 Return description of CHARSET.
953 return XCHARSET_DOC_STRING (Fget_charset (charset));
956 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
957 Return dimension of CHARSET.
961 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
964 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
965 Return property PROP of CHARSET.
966 Recognized properties are those listed in `make-charset', as well as
967 'name and 'doc-string.
971 struct Lisp_Charset *cs;
973 charset = Fget_charset (charset);
974 cs = XCHARSET (charset);
977 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
978 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
979 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
980 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
981 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
982 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
983 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
984 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
985 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
986 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
987 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
988 if (EQ (prop, Qdirection))
989 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
990 if (EQ (prop, Qreverse_direction_charset))
992 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
996 return XCHARSET_NAME (obj);
998 signal_simple_error ("Unrecognized charset property name", prop);
999 return Qnil; /* not reached */
1002 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1003 Return charset identification number of CHARSET.
1007 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1010 /* #### We need to figure out which properties we really want to
1013 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1014 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1016 (charset, ccl_program))
1018 charset = Fget_charset (charset);
1019 CHECK_VECTOR (ccl_program);
1020 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1025 invalidate_charset_font_caches (Lisp_Object charset)
1027 /* Invalidate font cache entries for charset on all devices. */
1028 Lisp_Object devcons, concons, hash_table;
1029 DEVICE_LOOP_NO_BREAK (devcons, concons)
1031 struct device *d = XDEVICE (XCAR (devcons));
1032 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1033 if (!UNBOUNDP (hash_table))
1034 Fclrhash (hash_table);
1038 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1039 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1040 Set the 'registry property of CHARSET to REGISTRY.
1042 (charset, registry))
1044 charset = Fget_charset (charset);
1045 CHECK_STRING (registry);
1046 XCHARSET_REGISTRY (charset) = registry;
1047 invalidate_charset_font_caches (charset);
1048 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1053 /************************************************************************/
1054 /* Lisp primitives for working with characters */
1055 /************************************************************************/
1057 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1058 Make a character from CHARSET and octets ARG1 and ARG2.
1059 ARG2 is required only for characters from two-dimensional charsets.
1060 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1061 character s with caron.
1063 (charset, arg1, arg2))
1065 struct Lisp_Charset *cs;
1067 int lowlim, highlim;
1069 charset = Fget_charset (charset);
1070 cs = XCHARSET (charset);
1072 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1073 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1074 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1075 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1078 /* It is useful (and safe, according to Olivier Galibert) to strip
1079 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1080 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1081 Latin 2 code of the character. */
1082 a1 = XINT (arg1) & 0x7f;
1083 if (a1 < lowlim || a1 > highlim)
1084 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1086 if (CHARSET_DIMENSION (cs) == 1)
1090 ("Charset is of dimension one; second octet must be nil", arg2);
1091 return make_char (MAKE_CHAR (charset, a1, 0));
1095 a2 = XINT (arg2) & 0x7f;
1096 if (a2 < lowlim || a2 > highlim)
1097 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1099 return make_char (MAKE_CHAR (charset, a1, a2));
1102 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1103 Return the character set of char CH.
1107 CHECK_CHAR_COERCE_INT (ch);
1109 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1110 (CHAR_LEADING_BYTE (XCHAR (ch))));
1113 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1114 Return list of charset and one or two position-codes of CHAR.
1118 /* This function can GC */
1119 struct gcpro gcpro1, gcpro2;
1120 Lisp_Object charset = Qnil;
1121 Lisp_Object rc = Qnil;
1124 GCPRO2 (charset, rc);
1125 CHECK_CHAR_COERCE_INT (character);
1127 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1129 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1131 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1135 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1143 #ifdef ENABLE_COMPOSITE_CHARS
1144 /************************************************************************/
1145 /* composite character functions */
1146 /************************************************************************/
1149 lookup_composite_char (Bufbyte *str, int len)
1151 Lisp_Object lispstr = make_string (str, len);
1152 Lisp_Object ch = Fgethash (lispstr,
1153 Vcomposite_char_string2char_hash_table,
1159 if (composite_char_row_next >= 128)
1160 signal_simple_error ("No more composite chars available", lispstr);
1161 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1162 composite_char_col_next);
1163 Fputhash (make_char (emch), lispstr,
1164 Vcomposite_char_char2string_hash_table);
1165 Fputhash (lispstr, make_char (emch),
1166 Vcomposite_char_string2char_hash_table);
1167 composite_char_col_next++;
1168 if (composite_char_col_next >= 128)
1170 composite_char_col_next = 32;
1171 composite_char_row_next++;
1180 composite_char_string (Emchar ch)
1182 Lisp_Object str = Fgethash (make_char (ch),
1183 Vcomposite_char_char2string_hash_table,
1185 assert (!UNBOUNDP (str));
1189 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1190 Convert a string into a single composite character.
1191 The character is the result of overstriking all the characters in
1196 CHECK_STRING (string);
1197 return make_char (lookup_composite_char (XSTRING_DATA (string),
1198 XSTRING_LENGTH (string)));
1201 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1202 Return a string of the characters comprising a composite character.
1210 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1211 signal_simple_error ("Must be composite char", ch);
1212 return composite_char_string (emch);
1214 #endif /* ENABLE_COMPOSITE_CHARS */
1217 /************************************************************************/
1218 /* initialization */
1219 /************************************************************************/
1222 syms_of_mule_charset (void)
1224 DEFSUBR (Fcharsetp);
1225 DEFSUBR (Ffind_charset);
1226 DEFSUBR (Fget_charset);
1227 DEFSUBR (Fcharset_list);
1228 DEFSUBR (Fcharset_name);
1229 DEFSUBR (Fmake_charset);
1230 DEFSUBR (Fmake_reverse_direction_charset);
1231 /* DEFSUBR (Freverse_direction_charset); */
1232 DEFSUBR (Fcharset_from_attributes);
1233 DEFSUBR (Fcharset_short_name);
1234 DEFSUBR (Fcharset_long_name);
1235 DEFSUBR (Fcharset_description);
1236 DEFSUBR (Fcharset_dimension);
1237 DEFSUBR (Fcharset_property);
1238 DEFSUBR (Fcharset_id);
1239 DEFSUBR (Fset_charset_ccl_program);
1240 DEFSUBR (Fset_charset_registry);
1242 DEFSUBR (Fmake_char);
1243 DEFSUBR (Fchar_charset);
1244 DEFSUBR (Fsplit_char);
1246 #ifdef ENABLE_COMPOSITE_CHARS
1247 DEFSUBR (Fmake_composite_char);
1248 DEFSUBR (Fcomposite_char_string);
1251 defsymbol (&Qcharsetp, "charsetp");
1252 defsymbol (&Qregistry, "registry");
1253 defsymbol (&Qfinal, "final");
1254 defsymbol (&Qgraphic, "graphic");
1255 defsymbol (&Qdirection, "direction");
1256 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1257 defsymbol (&Qccl_program, "ccl-program");
1258 defsymbol (&Qshort_name, "short-name");
1259 defsymbol (&Qlong_name, "long-name");
1261 defsymbol (&Ql2r, "l2r");
1262 defsymbol (&Qr2l, "r2l");
1264 /* Charsets, compatible with FSF 20.3
1265 Naming convention is Script-Charset[-Edition] */
1266 defsymbol (&Qascii, "ascii");
1267 defsymbol (&Qcontrol_1, "control-1");
1268 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1269 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1270 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1271 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1272 defsymbol (&Qthai_tis620, "thai-tis620");
1273 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1274 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1275 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1276 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1277 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1278 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1279 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1280 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1281 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1282 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1283 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1284 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1285 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1286 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1287 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1288 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1290 defsymbol (&Qcomposite, "composite");
1294 vars_of_mule_charset (void)
1298 /* Table of charsets indexed by leading byte. */
1299 for (i = 0; i < countof (charset_by_leading_byte); i++)
1300 charset_by_leading_byte[i] = Qnil;
1302 /* Table of charsets indexed by type/final-byte/direction. */
1303 for (i = 0; i < countof (charset_by_attributes); i++)
1304 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1305 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1306 charset_by_attributes[i][j][k] = Qnil;
1308 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1309 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1313 complex_vars_of_mule_charset (void)
1315 staticpro (&Vcharset_hash_table);
1316 Vcharset_hash_table =
1317 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1319 /* Predefined character sets. We store them into variables for
1323 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1324 CHARSET_TYPE_94, 1, 0, 'B',
1325 CHARSET_LEFT_TO_RIGHT,
1326 build_string ("ASCII"),
1327 build_string ("ASCII)"),
1328 build_string ("ASCII (ISO646 IRV)"),
1329 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1330 Vcharset_control_1 =
1331 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1332 CHARSET_TYPE_94, 1, 1, 0,
1333 CHARSET_LEFT_TO_RIGHT,
1334 build_string ("C1"),
1335 build_string ("Control characters"),
1336 build_string ("Control characters 128-191"),
1338 Vcharset_latin_iso8859_1 =
1339 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1340 CHARSET_TYPE_96, 1, 1, 'A',
1341 CHARSET_LEFT_TO_RIGHT,
1342 build_string ("Latin-1"),
1343 build_string ("ISO8859-1 (Latin-1)"),
1344 build_string ("ISO8859-1 (Latin-1)"),
1345 build_string ("iso8859-1"));
1346 Vcharset_latin_iso8859_2 =
1347 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1348 CHARSET_TYPE_96, 1, 1, 'B',
1349 CHARSET_LEFT_TO_RIGHT,
1350 build_string ("Latin-2"),
1351 build_string ("ISO8859-2 (Latin-2)"),
1352 build_string ("ISO8859-2 (Latin-2)"),
1353 build_string ("iso8859-2"));
1354 Vcharset_latin_iso8859_3 =
1355 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1356 CHARSET_TYPE_96, 1, 1, 'C',
1357 CHARSET_LEFT_TO_RIGHT,
1358 build_string ("Latin-3"),
1359 build_string ("ISO8859-3 (Latin-3)"),
1360 build_string ("ISO8859-3 (Latin-3)"),
1361 build_string ("iso8859-3"));
1362 Vcharset_latin_iso8859_4 =
1363 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1364 CHARSET_TYPE_96, 1, 1, 'D',
1365 CHARSET_LEFT_TO_RIGHT,
1366 build_string ("Latin-4"),
1367 build_string ("ISO8859-4 (Latin-4)"),
1368 build_string ("ISO8859-4 (Latin-4)"),
1369 build_string ("iso8859-4"));
1370 Vcharset_thai_tis620 =
1371 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1372 CHARSET_TYPE_96, 1, 1, 'T',
1373 CHARSET_LEFT_TO_RIGHT,
1374 build_string ("TIS620"),
1375 build_string ("TIS620 (Thai)"),
1376 build_string ("TIS620.2529 (Thai)"),
1377 build_string ("tis620"));
1378 Vcharset_greek_iso8859_7 =
1379 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1380 CHARSET_TYPE_96, 1, 1, 'F',
1381 CHARSET_LEFT_TO_RIGHT,
1382 build_string ("ISO8859-7"),
1383 build_string ("ISO8859-7 (Greek)"),
1384 build_string ("ISO8859-7 (Greek)"),
1385 build_string ("iso8859-7"));
1386 Vcharset_arabic_iso8859_6 =
1387 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1388 CHARSET_TYPE_96, 1, 1, 'G',
1389 CHARSET_RIGHT_TO_LEFT,
1390 build_string ("ISO8859-6"),
1391 build_string ("ISO8859-6 (Arabic)"),
1392 build_string ("ISO8859-6 (Arabic)"),
1393 build_string ("iso8859-6"));
1394 Vcharset_hebrew_iso8859_8 =
1395 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1396 CHARSET_TYPE_96, 1, 1, 'H',
1397 CHARSET_RIGHT_TO_LEFT,
1398 build_string ("ISO8859-8"),
1399 build_string ("ISO8859-8 (Hebrew)"),
1400 build_string ("ISO8859-8 (Hebrew)"),
1401 build_string ("iso8859-8"));
1402 Vcharset_katakana_jisx0201 =
1403 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1404 CHARSET_TYPE_94, 1, 1, 'I',
1405 CHARSET_LEFT_TO_RIGHT,
1406 build_string ("JISX0201 Kana"),
1407 build_string ("JISX0201.1976 (Japanese Kana)"),
1408 build_string ("JISX0201.1976 Japanese Kana"),
1409 build_string ("jisx0201.1976"));
1410 Vcharset_latin_jisx0201 =
1411 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1412 CHARSET_TYPE_94, 1, 0, 'J',
1413 CHARSET_LEFT_TO_RIGHT,
1414 build_string ("JISX0201 Roman"),
1415 build_string ("JISX0201.1976 (Japanese Roman)"),
1416 build_string ("JISX0201.1976 Japanese Roman"),
1417 build_string ("jisx0201.1976"));
1418 Vcharset_cyrillic_iso8859_5 =
1419 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1420 CHARSET_TYPE_96, 1, 1, 'L',
1421 CHARSET_LEFT_TO_RIGHT,
1422 build_string ("ISO8859-5"),
1423 build_string ("ISO8859-5 (Cyrillic)"),
1424 build_string ("ISO8859-5 (Cyrillic)"),
1425 build_string ("iso8859-5"));
1426 Vcharset_latin_iso8859_9 =
1427 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1428 CHARSET_TYPE_96, 1, 1, 'M',
1429 CHARSET_LEFT_TO_RIGHT,
1430 build_string ("Latin-5"),
1431 build_string ("ISO8859-9 (Latin-5)"),
1432 build_string ("ISO8859-9 (Latin-5)"),
1433 build_string ("iso8859-9"));
1434 Vcharset_japanese_jisx0208_1978 =
1435 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1436 CHARSET_TYPE_94X94, 2, 0, '@',
1437 CHARSET_LEFT_TO_RIGHT,
1438 build_string ("JISX0208.1978"),
1439 build_string ("JISX0208.1978 (Japanese)"),
1441 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1442 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1443 Vcharset_chinese_gb2312 =
1444 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1445 CHARSET_TYPE_94X94, 2, 0, 'A',
1446 CHARSET_LEFT_TO_RIGHT,
1447 build_string ("GB2312"),
1448 build_string ("GB2312)"),
1449 build_string ("GB2312 Chinese simplified"),
1450 build_string ("gb2312"));
1451 Vcharset_japanese_jisx0208 =
1452 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1453 CHARSET_TYPE_94X94, 2, 0, 'B',
1454 CHARSET_LEFT_TO_RIGHT,
1455 build_string ("JISX0208"),
1456 build_string ("JISX0208.1983/1990 (Japanese)"),
1457 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1458 build_string ("jisx0208.19\\(83\\|90\\)"));
1459 Vcharset_korean_ksc5601 =
1460 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1461 CHARSET_TYPE_94X94, 2, 0, 'C',
1462 CHARSET_LEFT_TO_RIGHT,
1463 build_string ("KSC5601"),
1464 build_string ("KSC5601 (Korean"),
1465 build_string ("KSC5601 Korean Hangul and Hanja"),
1466 build_string ("ksc5601"));
1467 Vcharset_japanese_jisx0212 =
1468 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1469 CHARSET_TYPE_94X94, 2, 0, 'D',
1470 CHARSET_LEFT_TO_RIGHT,
1471 build_string ("JISX0212"),
1472 build_string ("JISX0212 (Japanese)"),
1473 build_string ("JISX0212 Japanese Supplement"),
1474 build_string ("jisx0212"));
1476 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1477 Vcharset_chinese_cns11643_1 =
1478 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1479 CHARSET_TYPE_94X94, 2, 0, 'G',
1480 CHARSET_LEFT_TO_RIGHT,
1481 build_string ("CNS11643-1"),
1482 build_string ("CNS11643-1 (Chinese traditional)"),
1484 ("CNS 11643 Plane 1 Chinese traditional"),
1485 build_string (CHINESE_CNS_PLANE_RE("1")));
1486 Vcharset_chinese_cns11643_2 =
1487 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1488 CHARSET_TYPE_94X94, 2, 0, 'H',
1489 CHARSET_LEFT_TO_RIGHT,
1490 build_string ("CNS11643-2"),
1491 build_string ("CNS11643-2 (Chinese traditional)"),
1493 ("CNS 11643 Plane 2 Chinese traditional"),
1494 build_string (CHINESE_CNS_PLANE_RE("2")));
1495 Vcharset_chinese_big5_1 =
1496 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1497 CHARSET_TYPE_94X94, 2, 0, '0',
1498 CHARSET_LEFT_TO_RIGHT,
1499 build_string ("Big5"),
1500 build_string ("Big5 (Level-1)"),
1502 ("Big5 Level-1 Chinese traditional"),
1503 build_string ("big5"));
1504 Vcharset_chinese_big5_2 =
1505 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1506 CHARSET_TYPE_94X94, 2, 0, '1',
1507 CHARSET_LEFT_TO_RIGHT,
1508 build_string ("Big5"),
1509 build_string ("Big5 (Level-2)"),
1511 ("Big5 Level-2 Chinese traditional"),
1512 build_string ("big5"));
1515 #ifdef ENABLE_COMPOSITE_CHARS
1516 /* #### For simplicity, we put composite chars into a 96x96 charset.
1517 This is going to lead to problems because you can run out of
1518 room, esp. as we don't yet recycle numbers. */
1519 Vcharset_composite =
1520 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1521 CHARSET_TYPE_96X96, 2, 0, 0,
1522 CHARSET_LEFT_TO_RIGHT,
1523 build_string ("Composite"),
1524 build_string ("Composite characters"),
1525 build_string ("Composite characters"),
1528 composite_char_row_next = 32;
1529 composite_char_col_next = 32;
1531 Vcomposite_char_string2char_hash_table =
1532 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1533 Vcomposite_char_char2string_hash_table =
1534 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1535 staticpro (&Vcomposite_char_string2char_hash_table);
1536 staticpro (&Vcomposite_char_char2string_hash_table);
1537 #endif /* ENABLE_COMPOSITE_CHARS */