1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 /* Table of charsets indexed by leading byte. */
78 Lisp_Object charset_by_leading_byte[128];
80 /* Table of charsets indexed by type/final-byte/direction. */
81 Lisp_Object charset_by_attributes[4][128][2];
83 /* Table of number of bytes in the string representation of a character
84 indexed by the first byte of that representation.
86 rep_bytes_by_first_byte(c) is more efficient than the equivalent
87 canonical computation:
89 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
91 Bytecount rep_bytes_by_first_byte[0xA0] =
92 { /* 0x00 - 0x7f are for straight ASCII */
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 /* 0x80 - 0x8f are for Dimension-1 official charsets */
102 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 /* 0x90 - 0x9d are for Dimension-2 official charsets */
104 /* 0x9e is for Dimension-1 private charsets */
105 /* 0x9f is for Dimension-2 private charsets */
106 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
109 Lisp_Object Qcharsetp;
111 /* Qdoc_string, Qdimension, Qchars defined in general.c */
112 Lisp_Object Qregistry, Qfinal, Qgraphic;
113 Lisp_Object Qdirection;
114 Lisp_Object Qreverse_direction_charset;
115 Lisp_Object Qccl_program;
116 Lisp_Object Qleading_byte;
117 Lisp_Object Qshort_name, Qlong_name;
133 Qjapanese_jisx0208_1978,
144 Lisp_Object Ql2r, Qr2l;
146 Lisp_Object Vcharset_hash_table;
148 static Bufbyte next_allocated_1_byte_leading_byte;
149 static Bufbyte next_allocated_2_byte_leading_byte;
151 /* Composite characters are characters constructed by overstriking two
152 or more regular characters.
154 1) The old Mule implementation involves storing composite characters
155 in a buffer as a tag followed by all of the actual characters
156 used to make up the composite character. I think this is a bad
157 idea; it greatly complicates code that wants to handle strings
158 one character at a time because it has to deal with the possibility
159 of great big ungainly characters. It's much more reasonable to
160 simply store an index into a table of composite characters.
162 2) The current implementation only allows for 16,384 separate
163 composite characters over the lifetime of the XEmacs process.
164 This could become a potential problem if the user
165 edited lots of different files that use composite characters.
166 Due to FSF bogosity, increasing the number of allowable
167 composite characters under Mule would decrease the number
168 of possible faces that can exist. Mule already has shrunk
169 this to 2048, and further shrinkage would become uncomfortable.
170 No such problems exist in XEmacs.
172 Composite characters could be represented as 0x80 C1 C2 C3,
173 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
174 for slightly under 2^20 (one million) composite characters
175 over the XEmacs process lifetime, and you only need to
176 increase the size of a Mule character from 19 to 21 bits.
177 Or you could use 0x80 C1 C2 C3 C4, allowing for about
178 85 million (slightly over 2^26) composite characters. */
181 /************************************************************************/
182 /* Basic Emchar functions */
183 /************************************************************************/
185 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
186 string in STR. Returns the number of bytes stored.
187 Do not call this directly. Use the macro set_charptr_emchar() instead.
191 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
199 BREAKUP_CHAR (c, charset, c1, c2);
200 lb = CHAR_LEADING_BYTE (c);
201 if (LEADING_BYTE_PRIVATE_P (lb))
202 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
204 if (EQ (charset, Vcharset_control_1))
213 /* Return the first character from a Mule-encoded string in STR,
214 assuming it's non-ASCII. Do not call this directly.
215 Use the macro charptr_emchar() instead. */
218 non_ascii_charptr_emchar (CONST Bufbyte *str)
220 Bufbyte i0 = *str, i1, i2 = 0;
223 if (i0 == LEADING_BYTE_CONTROL_1)
224 return (Emchar) (*++str - 0x20);
226 if (LEADING_BYTE_PREFIX_P (i0))
231 charset = CHARSET_BY_LEADING_BYTE (i0);
232 if (XCHARSET_DIMENSION (charset) == 2)
235 return MAKE_CHAR (charset, i1, i2);
238 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
239 Do not call this directly. Use the macro valid_char_p() instead. */
242 non_ascii_valid_char_p (Emchar ch)
246 /* Must have only lowest 19 bits set */
250 f1 = CHAR_FIELD1 (ch);
251 f2 = CHAR_FIELD2 (ch);
252 f3 = CHAR_FIELD3 (ch);
258 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
259 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
260 f2 > MAX_CHAR_FIELD2_PRIVATE)
265 if (f3 != 0x20 && f3 != 0x7F)
269 NOTE: This takes advantage of the fact that
270 FIELD2_TO_OFFICIAL_LEADING_BYTE and
271 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
273 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
274 return (XCHARSET_CHARS (charset) == 96);
280 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
281 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
282 f1 > MAX_CHAR_FIELD1_PRIVATE)
284 if (f2 < 0x20 || f3 < 0x20)
287 #ifdef ENABLE_COMPOSITE_CHARS
288 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
290 if (UNBOUNDP (Fgethash (make_int (ch),
291 Vcomposite_char_char2string_hash_table,
296 #endif /* ENABLE_COMPOSITE_CHARS */
298 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
301 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
303 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
306 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
308 return (XCHARSET_CHARS (charset) == 96);
313 /************************************************************************/
314 /* Basic string functions */
315 /************************************************************************/
317 /* Copy the character pointed to by PTR into STR, assuming it's
318 non-ASCII. Do not call this directly. Use the macro
319 charptr_copy_char() instead. */
322 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
324 Bufbyte *strptr = str;
326 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
328 /* Notice fallthrough. */
329 case 4: *++strptr = *ptr++;
330 case 3: *++strptr = *ptr++;
331 case 2: *++strptr = *ptr;
336 return strptr + 1 - str;
340 /************************************************************************/
341 /* streams of Emchars */
342 /************************************************************************/
344 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
345 The functions below are not meant to be called directly; use
346 the macros in insdel.h. */
349 Lstream_get_emchar_1 (Lstream *stream, int ch)
351 Bufbyte str[MAX_EMCHAR_LEN];
352 Bufbyte *strptr = str;
354 str[0] = (Bufbyte) ch;
355 switch (REP_BYTES_BY_FIRST_BYTE (ch))
357 /* Notice fallthrough. */
359 ch = Lstream_getc (stream);
361 *++strptr = (Bufbyte) ch;
363 ch = Lstream_getc (stream);
365 *++strptr = (Bufbyte) ch;
367 ch = Lstream_getc (stream);
369 *++strptr = (Bufbyte) ch;
374 return charptr_emchar (str);
378 Lstream_fput_emchar (Lstream *stream, Emchar ch)
380 Bufbyte str[MAX_EMCHAR_LEN];
381 Bytecount len = set_charptr_emchar (str, ch);
382 return Lstream_write (stream, str, len);
386 Lstream_funget_emchar (Lstream *stream, Emchar ch)
388 Bufbyte str[MAX_EMCHAR_LEN];
389 Bytecount len = set_charptr_emchar (str, ch);
390 Lstream_unread (stream, str, len);
394 /************************************************************************/
396 /************************************************************************/
399 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
401 struct Lisp_Charset *cs = XCHARSET (obj);
403 markobj (cs->short_name);
404 markobj (cs->long_name);
405 markobj (cs->doc_string);
406 markobj (cs->registry);
407 markobj (cs->ccl_program);
412 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
414 struct Lisp_Charset *cs = XCHARSET (obj);
418 error ("printing unreadable object #<charset %s 0x%x>",
419 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
422 write_c_string ("#<charset ", printcharfun);
423 print_internal (CHARSET_NAME (cs), printcharfun, 0);
424 write_c_string (" ", printcharfun);
425 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
426 write_c_string (" ", printcharfun);
427 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
428 write_c_string (" ", printcharfun);
429 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
430 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
431 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
432 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
433 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
435 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
436 CHARSET_COLUMNS (cs),
437 CHARSET_GRAPHIC (cs),
439 write_c_string (buf, printcharfun);
440 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
441 sprintf (buf, " 0x%x>", cs->header.uid);
442 write_c_string (buf, printcharfun);
445 static const struct lrecord_description charset_description[] = {
446 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
450 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
451 mark_charset, print_charset, 0, 0, 0, charset_description,
452 struct Lisp_Charset);
453 /* Make a new charset. */
456 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
457 unsigned char type, unsigned char columns, unsigned char graphic,
458 Bufbyte final, unsigned char direction, Lisp_Object short_name,
459 Lisp_Object long_name, Lisp_Object doc,
463 struct Lisp_Charset *cs =
464 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
465 XSETCHARSET (obj, cs);
467 CHARSET_ID (cs) = id;
468 CHARSET_NAME (cs) = name;
469 CHARSET_SHORT_NAME (cs) = short_name;
470 CHARSET_LONG_NAME (cs) = long_name;
471 CHARSET_REP_BYTES (cs) = rep_bytes;
472 CHARSET_DIRECTION (cs) = direction;
473 CHARSET_TYPE (cs) = type;
474 CHARSET_COLUMNS (cs) = columns;
475 CHARSET_GRAPHIC (cs) = graphic;
476 CHARSET_FINAL (cs) = final;
477 CHARSET_DOC_STRING (cs) = doc;
478 CHARSET_REGISTRY (cs) = reg;
479 CHARSET_CCL_PROGRAM (cs) = Qnil;
480 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
482 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
483 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
484 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
485 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
489 /* some charsets do not have final characters. This includes
490 ASCII, Control-1, Composite, and the two faux private
492 assert (NILP (charset_by_attributes[type][final][direction]));
493 charset_by_attributes[type][final][direction] = obj;
496 assert (NILP (charset_by_leading_byte[id - 128]));
497 charset_by_leading_byte[id - 128] = obj;
499 /* official leading byte */
500 rep_bytes_by_first_byte[id] = rep_bytes;
502 /* Some charsets are "faux" and don't have names or really exist at
503 all except in the leading-byte table. */
505 Fputhash (name, obj, Vcharset_hash_table);
510 get_unallocated_leading_byte (int dimension)
516 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
519 lb = next_allocated_1_byte_leading_byte++;
523 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
526 lb = next_allocated_2_byte_leading_byte++;
531 ("No more character sets free for this dimension",
532 make_int (dimension));
538 /************************************************************************/
539 /* Basic charset Lisp functions */
540 /************************************************************************/
542 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
543 Return non-nil if OBJECT is a charset.
547 return CHARSETP (object) ? Qt : Qnil;
550 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
551 Retrieve the charset of the given name.
552 If CHARSET-OR-NAME is a charset object, it is simply returned.
553 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
554 nil is returned. Otherwise the associated charset object is returned.
558 if (CHARSETP (charset_or_name))
559 return charset_or_name;
561 CHECK_SYMBOL (charset_or_name);
562 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
565 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
566 Retrieve the charset of the given name.
567 Same as `find-charset' except an error is signalled if there is no such
568 charset instead of returning nil.
572 Lisp_Object charset = Ffind_charset (name);
575 signal_simple_error ("No such charset", name);
579 /* We store the charsets in hash tables with the names as the key and the
580 actual charset object as the value. Occasionally we need to use them
581 in a list format. These routines provide us with that. */
582 struct charset_list_closure
584 Lisp_Object *charset_list;
588 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
589 void *charset_list_closure)
591 /* This function can GC */
592 struct charset_list_closure *chcl =
593 (struct charset_list_closure*) charset_list_closure;
594 Lisp_Object *charset_list = chcl->charset_list;
596 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
600 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
601 Return a list of the names of all defined charsets.
605 Lisp_Object charset_list = Qnil;
607 struct charset_list_closure charset_list_closure;
609 GCPRO1 (charset_list);
610 charset_list_closure.charset_list = &charset_list;
611 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
612 &charset_list_closure);
618 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
619 Return the name of the given charset.
623 return XCHARSET_NAME (Fget_charset (charset));
626 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
627 Define a new character set.
628 This function is for use with Mule support.
629 NAME is a symbol, the name by which the character set is normally referred.
630 DOC-STRING is a string describing the character set.
631 PROPS is a property list, describing the specific nature of the
632 character set. Recognized properties are:
634 'short-name Short version of the charset name (ex: Latin-1)
635 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
636 'registry A regular expression matching the font registry field for
638 'dimension Number of octets used to index a character in this charset.
639 Either 1 or 2. Defaults to 1.
640 'columns Number of columns used to display a character in this charset.
641 Only used in TTY mode. (Under X, the actual width of a
642 character can be derived from the font used to display the
643 characters.) If unspecified, defaults to the dimension
644 (this is almost always the correct value).
645 'chars Number of characters in each dimension (94 or 96).
646 Defaults to 94. Note that if the dimension is 2, the
647 character set thus described is 94x94 or 96x96.
648 'final Final byte of ISO 2022 escape sequence. Must be
649 supplied. Each combination of (DIMENSION, CHARS) defines a
650 separate namespace for final bytes. Note that ISO
651 2022 restricts the final byte to the range
652 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
653 dimension == 2. Note also that final bytes in the range
654 0x30 - 0x3F are reserved for user-defined (not official)
656 'graphic 0 (use left half of font on output) or 1 (use right half
657 of font on output). Defaults to 0. For example, for
658 a font whose registry is ISO8859-1, the left half
659 (octets 0x20 - 0x7F) is the `ascii' character set, while
660 the right half (octets 0xA0 - 0xFF) is the `latin-1'
661 character set. With 'graphic set to 0, the octets
662 will have their high bit cleared; with it set to 1,
663 the octets will have their high bit set.
664 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
666 'ccl-program A compiled CCL program used to convert a character in
667 this charset into an index into the font. This is in
668 addition to the 'graphic property. The CCL program
669 is passed the octets of the character, with the high
670 bit cleared and set depending upon whether the value
671 of the 'graphic property is 0 or 1.
673 (name, doc_string, props))
675 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
676 int direction = CHARSET_LEFT_TO_RIGHT;
678 Lisp_Object registry = Qnil;
680 Lisp_Object rest, keyword, value;
681 Lisp_Object ccl_program = Qnil;
682 Lisp_Object short_name = Qnil, long_name = Qnil;
685 if (!NILP (doc_string))
686 CHECK_STRING (doc_string);
688 charset = Ffind_charset (name);
690 signal_simple_error ("Cannot redefine existing charset", name);
692 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
694 if (EQ (keyword, Qshort_name))
696 CHECK_STRING (value);
700 if (EQ (keyword, Qlong_name))
702 CHECK_STRING (value);
706 else if (EQ (keyword, Qdimension))
709 dimension = XINT (value);
710 if (dimension < 1 || dimension > 2)
711 signal_simple_error ("Invalid value for 'dimension", value);
714 else if (EQ (keyword, Qchars))
717 chars = XINT (value);
718 if (chars != 94 && chars != 96)
719 signal_simple_error ("Invalid value for 'chars", value);
722 else if (EQ (keyword, Qcolumns))
725 columns = XINT (value);
726 if (columns != 1 && columns != 2)
727 signal_simple_error ("Invalid value for 'columns", value);
730 else if (EQ (keyword, Qgraphic))
733 graphic = XINT (value);
734 if (graphic < 0 || graphic > 1)
735 signal_simple_error ("Invalid value for 'graphic", value);
738 else if (EQ (keyword, Qregistry))
740 CHECK_STRING (value);
744 else if (EQ (keyword, Qdirection))
746 if (EQ (value, Ql2r))
747 direction = CHARSET_LEFT_TO_RIGHT;
748 else if (EQ (value, Qr2l))
749 direction = CHARSET_RIGHT_TO_LEFT;
751 signal_simple_error ("Invalid value for 'direction", value);
754 else if (EQ (keyword, Qfinal))
756 CHECK_CHAR_COERCE_INT (value);
757 final = XCHAR (value);
758 if (final < '0' || final > '~')
759 signal_simple_error ("Invalid value for 'final", value);
762 else if (EQ (keyword, Qccl_program))
764 CHECK_VECTOR (value);
769 signal_simple_error ("Unrecognized property", keyword);
773 error ("'final must be specified");
774 if (dimension == 2 && final > 0x5F)
776 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
780 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
782 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
784 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
785 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
787 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
789 id = get_unallocated_leading_byte (dimension);
791 if (NILP (doc_string))
792 doc_string = build_string ("");
795 registry = build_string ("");
797 if (NILP (short_name))
798 XSETSTRING (short_name, XSYMBOL (name)->name);
800 if (NILP (long_name))
801 long_name = doc_string;
805 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
806 final, direction, short_name, long_name, doc_string, registry);
807 if (!NILP (ccl_program))
808 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
812 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
814 Make a charset equivalent to CHARSET but which goes in the opposite direction.
815 NEW-NAME is the name of the new charset. Return the new charset.
819 Lisp_Object new_charset = Qnil;
820 int id, dimension, columns, graphic, final;
822 Lisp_Object registry, doc_string, short_name, long_name;
823 struct Lisp_Charset *cs;
825 charset = Fget_charset (charset);
826 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
827 signal_simple_error ("Charset already has reverse-direction charset",
830 CHECK_SYMBOL (new_name);
831 if (!NILP (Ffind_charset (new_name)))
832 signal_simple_error ("Cannot redefine existing charset", new_name);
834 cs = XCHARSET (charset);
836 type = CHARSET_TYPE (cs);
837 columns = CHARSET_COLUMNS (cs);
838 dimension = CHARSET_DIMENSION (cs);
839 id = get_unallocated_leading_byte (dimension);
841 graphic = CHARSET_GRAPHIC (cs);
842 final = CHARSET_FINAL (cs);
843 direction = CHARSET_RIGHT_TO_LEFT;
844 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
845 direction = CHARSET_LEFT_TO_RIGHT;
846 doc_string = CHARSET_DOC_STRING (cs);
847 short_name = CHARSET_SHORT_NAME (cs);
848 long_name = CHARSET_LONG_NAME (cs);
849 registry = CHARSET_REGISTRY (cs);
851 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
852 graphic, final, direction, short_name, long_name,
853 doc_string, registry);
855 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
856 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
861 /* #### Reverse direction charsets not yet implemented. */
863 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
865 Return the reverse-direction charset parallel to CHARSET, if any.
866 This is the charset with the same properties (in particular, the same
867 dimension, number of characters per dimension, and final byte) as
868 CHARSET but whose characters are displayed in the opposite direction.
872 charset = Fget_charset (charset);
873 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
877 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
878 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
879 If DIRECTION is omitted, both directions will be checked (left-to-right
880 will be returned if character sets exist for both directions).
882 (dimension, chars, final, direction))
884 int dm, ch, fi, di = -1;
886 Lisp_Object obj = Qnil;
888 CHECK_INT (dimension);
889 dm = XINT (dimension);
890 if (dm < 1 || dm > 2)
891 signal_simple_error ("Invalid value for DIMENSION", dimension);
895 if (ch != 94 && ch != 96)
896 signal_simple_error ("Invalid value for CHARS", chars);
898 CHECK_CHAR_COERCE_INT (final);
900 if (fi < '0' || fi > '~')
901 signal_simple_error ("Invalid value for FINAL", final);
903 if (EQ (direction, Ql2r))
904 di = CHARSET_LEFT_TO_RIGHT;
905 else if (EQ (direction, Qr2l))
906 di = CHARSET_RIGHT_TO_LEFT;
907 else if (!NILP (direction))
908 signal_simple_error ("Invalid value for DIRECTION", direction);
910 if (dm == 2 && fi > 0x5F)
912 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
915 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
917 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
921 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
923 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
926 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
929 return XCHARSET_NAME (obj);
933 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
934 Return short name of CHARSET.
938 return XCHARSET_SHORT_NAME (Fget_charset (charset));
941 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
942 Return long name of CHARSET.
946 return XCHARSET_LONG_NAME (Fget_charset (charset));
949 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
950 Return description of CHARSET.
954 return XCHARSET_DOC_STRING (Fget_charset (charset));
957 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
958 Return dimension of CHARSET.
962 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
965 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
966 Return property PROP of CHARSET.
967 Recognized properties are those listed in `make-charset', as well as
968 'name and 'doc-string.
972 struct Lisp_Charset *cs;
974 charset = Fget_charset (charset);
975 cs = XCHARSET (charset);
978 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
979 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
980 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
981 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
982 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
983 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
984 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
985 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
986 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
987 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
988 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
989 if (EQ (prop, Qdirection))
990 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
991 if (EQ (prop, Qreverse_direction_charset))
993 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
997 return XCHARSET_NAME (obj);
999 signal_simple_error ("Unrecognized charset property name", prop);
1000 return Qnil; /* not reached */
1003 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1004 Return charset identification number of CHARSET.
1008 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1011 /* #### We need to figure out which properties we really want to
1014 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1015 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1017 (charset, ccl_program))
1019 charset = Fget_charset (charset);
1020 CHECK_VECTOR (ccl_program);
1021 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1026 invalidate_charset_font_caches (Lisp_Object charset)
1028 /* Invalidate font cache entries for charset on all devices. */
1029 Lisp_Object devcons, concons, hash_table;
1030 DEVICE_LOOP_NO_BREAK (devcons, concons)
1032 struct device *d = XDEVICE (XCAR (devcons));
1033 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1034 if (!UNBOUNDP (hash_table))
1035 Fclrhash (hash_table);
1039 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1040 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1041 Set the 'registry property of CHARSET to REGISTRY.
1043 (charset, registry))
1045 charset = Fget_charset (charset);
1046 CHECK_STRING (registry);
1047 XCHARSET_REGISTRY (charset) = registry;
1048 invalidate_charset_font_caches (charset);
1049 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1054 /************************************************************************/
1055 /* Lisp primitives for working with characters */
1056 /************************************************************************/
1058 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1059 Make a character from CHARSET and octets ARG1 and ARG2.
1060 ARG2 is required only for characters from two-dimensional charsets.
1061 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1062 character s with caron.
1064 (charset, arg1, arg2))
1066 struct Lisp_Charset *cs;
1068 int lowlim, highlim;
1070 charset = Fget_charset (charset);
1071 cs = XCHARSET (charset);
1073 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1074 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1075 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1076 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1079 /* It is useful (and safe, according to Olivier Galibert) to strip
1080 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1081 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1082 Latin 2 code of the character. */
1083 a1 = XINT (arg1) & 0x7f;
1084 if (a1 < lowlim || a1 > highlim)
1085 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1087 if (CHARSET_DIMENSION (cs) == 1)
1091 ("Charset is of dimension one; second octet must be nil", arg2);
1092 return make_char (MAKE_CHAR (charset, a1, 0));
1096 a2 = XINT (arg2) & 0x7f;
1097 if (a2 < lowlim || a2 > highlim)
1098 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1100 return make_char (MAKE_CHAR (charset, a1, a2));
1103 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1104 Return the character set of char CH.
1108 CHECK_CHAR_COERCE_INT (ch);
1110 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1111 (CHAR_LEADING_BYTE (XCHAR (ch))));
1114 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1115 Return list of charset and one or two position-codes of CHAR.
1119 /* This function can GC */
1120 struct gcpro gcpro1, gcpro2;
1121 Lisp_Object charset = Qnil;
1122 Lisp_Object rc = Qnil;
1125 GCPRO2 (charset, rc);
1126 CHECK_CHAR_COERCE_INT (character);
1128 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1130 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1132 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1136 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1144 #ifdef ENABLE_COMPOSITE_CHARS
1145 /************************************************************************/
1146 /* composite character functions */
1147 /************************************************************************/
1150 lookup_composite_char (Bufbyte *str, int len)
1152 Lisp_Object lispstr = make_string (str, len);
1153 Lisp_Object ch = Fgethash (lispstr,
1154 Vcomposite_char_string2char_hash_table,
1160 if (composite_char_row_next >= 128)
1161 signal_simple_error ("No more composite chars available", lispstr);
1162 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1163 composite_char_col_next);
1164 Fputhash (make_char (emch), lispstr,
1165 Vcomposite_char_char2string_hash_table);
1166 Fputhash (lispstr, make_char (emch),
1167 Vcomposite_char_string2char_hash_table);
1168 composite_char_col_next++;
1169 if (composite_char_col_next >= 128)
1171 composite_char_col_next = 32;
1172 composite_char_row_next++;
1181 composite_char_string (Emchar ch)
1183 Lisp_Object str = Fgethash (make_char (ch),
1184 Vcomposite_char_char2string_hash_table,
1186 assert (!UNBOUNDP (str));
1190 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1191 Convert a string into a single composite character.
1192 The character is the result of overstriking all the characters in
1197 CHECK_STRING (string);
1198 return make_char (lookup_composite_char (XSTRING_DATA (string),
1199 XSTRING_LENGTH (string)));
1202 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1203 Return a string of the characters comprising a composite character.
1211 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1212 signal_simple_error ("Must be composite char", ch);
1213 return composite_char_string (emch);
1215 #endif /* ENABLE_COMPOSITE_CHARS */
1218 /************************************************************************/
1219 /* initialization */
1220 /************************************************************************/
1223 syms_of_mule_charset (void)
1225 DEFSUBR (Fcharsetp);
1226 DEFSUBR (Ffind_charset);
1227 DEFSUBR (Fget_charset);
1228 DEFSUBR (Fcharset_list);
1229 DEFSUBR (Fcharset_name);
1230 DEFSUBR (Fmake_charset);
1231 DEFSUBR (Fmake_reverse_direction_charset);
1232 /* DEFSUBR (Freverse_direction_charset); */
1233 DEFSUBR (Fcharset_from_attributes);
1234 DEFSUBR (Fcharset_short_name);
1235 DEFSUBR (Fcharset_long_name);
1236 DEFSUBR (Fcharset_description);
1237 DEFSUBR (Fcharset_dimension);
1238 DEFSUBR (Fcharset_property);
1239 DEFSUBR (Fcharset_id);
1240 DEFSUBR (Fset_charset_ccl_program);
1241 DEFSUBR (Fset_charset_registry);
1243 DEFSUBR (Fmake_char);
1244 DEFSUBR (Fchar_charset);
1245 DEFSUBR (Fsplit_char);
1247 #ifdef ENABLE_COMPOSITE_CHARS
1248 DEFSUBR (Fmake_composite_char);
1249 DEFSUBR (Fcomposite_char_string);
1252 defsymbol (&Qcharsetp, "charsetp");
1253 defsymbol (&Qregistry, "registry");
1254 defsymbol (&Qfinal, "final");
1255 defsymbol (&Qgraphic, "graphic");
1256 defsymbol (&Qdirection, "direction");
1257 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1258 defsymbol (&Qccl_program, "ccl-program");
1259 defsymbol (&Qshort_name, "short-name");
1260 defsymbol (&Qlong_name, "long-name");
1262 defsymbol (&Ql2r, "l2r");
1263 defsymbol (&Qr2l, "r2l");
1265 /* Charsets, compatible with FSF 20.3
1266 Naming convention is Script-Charset[-Edition] */
1267 defsymbol (&Qascii, "ascii");
1268 defsymbol (&Qcontrol_1, "control-1");
1269 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1270 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1271 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1272 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1273 defsymbol (&Qthai_tis620, "thai-tis620");
1274 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1275 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1276 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1277 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1278 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1279 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1280 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1281 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1282 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1283 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1284 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1285 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1286 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1287 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1288 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1289 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1291 defsymbol (&Qcomposite, "composite");
1295 vars_of_mule_charset (void)
1299 /* Table of charsets indexed by leading byte. */
1300 for (i = 0; i < countof (charset_by_leading_byte); i++)
1301 charset_by_leading_byte[i] = Qnil;
1303 /* Table of charsets indexed by type/final-byte/direction. */
1304 for (i = 0; i < countof (charset_by_attributes); i++)
1305 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1306 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1307 charset_by_attributes[i][j][k] = Qnil;
1309 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1310 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1314 complex_vars_of_mule_charset (void)
1316 staticpro (&Vcharset_hash_table);
1317 Vcharset_hash_table =
1318 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1320 /* Predefined character sets. We store them into variables for
1324 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1325 CHARSET_TYPE_94, 1, 0, 'B',
1326 CHARSET_LEFT_TO_RIGHT,
1327 build_string ("ASCII"),
1328 build_string ("ASCII)"),
1329 build_string ("ASCII (ISO646 IRV)"),
1330 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1331 Vcharset_control_1 =
1332 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1333 CHARSET_TYPE_94, 1, 1, 0,
1334 CHARSET_LEFT_TO_RIGHT,
1335 build_string ("C1"),
1336 build_string ("Control characters"),
1337 build_string ("Control characters 128-191"),
1339 Vcharset_latin_iso8859_1 =
1340 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1341 CHARSET_TYPE_96, 1, 1, 'A',
1342 CHARSET_LEFT_TO_RIGHT,
1343 build_string ("Latin-1"),
1344 build_string ("ISO8859-1 (Latin-1)"),
1345 build_string ("ISO8859-1 (Latin-1)"),
1346 build_string ("iso8859-1"));
1347 Vcharset_latin_iso8859_2 =
1348 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1349 CHARSET_TYPE_96, 1, 1, 'B',
1350 CHARSET_LEFT_TO_RIGHT,
1351 build_string ("Latin-2"),
1352 build_string ("ISO8859-2 (Latin-2)"),
1353 build_string ("ISO8859-2 (Latin-2)"),
1354 build_string ("iso8859-2"));
1355 Vcharset_latin_iso8859_3 =
1356 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1357 CHARSET_TYPE_96, 1, 1, 'C',
1358 CHARSET_LEFT_TO_RIGHT,
1359 build_string ("Latin-3"),
1360 build_string ("ISO8859-3 (Latin-3)"),
1361 build_string ("ISO8859-3 (Latin-3)"),
1362 build_string ("iso8859-3"));
1363 Vcharset_latin_iso8859_4 =
1364 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1365 CHARSET_TYPE_96, 1, 1, 'D',
1366 CHARSET_LEFT_TO_RIGHT,
1367 build_string ("Latin-4"),
1368 build_string ("ISO8859-4 (Latin-4)"),
1369 build_string ("ISO8859-4 (Latin-4)"),
1370 build_string ("iso8859-4"));
1371 Vcharset_thai_tis620 =
1372 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1373 CHARSET_TYPE_96, 1, 1, 'T',
1374 CHARSET_LEFT_TO_RIGHT,
1375 build_string ("TIS620"),
1376 build_string ("TIS620 (Thai)"),
1377 build_string ("TIS620.2529 (Thai)"),
1378 build_string ("tis620"));
1379 Vcharset_greek_iso8859_7 =
1380 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1381 CHARSET_TYPE_96, 1, 1, 'F',
1382 CHARSET_LEFT_TO_RIGHT,
1383 build_string ("ISO8859-7"),
1384 build_string ("ISO8859-7 (Greek)"),
1385 build_string ("ISO8859-7 (Greek)"),
1386 build_string ("iso8859-7"));
1387 Vcharset_arabic_iso8859_6 =
1388 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1389 CHARSET_TYPE_96, 1, 1, 'G',
1390 CHARSET_RIGHT_TO_LEFT,
1391 build_string ("ISO8859-6"),
1392 build_string ("ISO8859-6 (Arabic)"),
1393 build_string ("ISO8859-6 (Arabic)"),
1394 build_string ("iso8859-6"));
1395 Vcharset_hebrew_iso8859_8 =
1396 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1397 CHARSET_TYPE_96, 1, 1, 'H',
1398 CHARSET_RIGHT_TO_LEFT,
1399 build_string ("ISO8859-8"),
1400 build_string ("ISO8859-8 (Hebrew)"),
1401 build_string ("ISO8859-8 (Hebrew)"),
1402 build_string ("iso8859-8"));
1403 Vcharset_katakana_jisx0201 =
1404 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1405 CHARSET_TYPE_94, 1, 1, 'I',
1406 CHARSET_LEFT_TO_RIGHT,
1407 build_string ("JISX0201 Kana"),
1408 build_string ("JISX0201.1976 (Japanese Kana)"),
1409 build_string ("JISX0201.1976 Japanese Kana"),
1410 build_string ("jisx0201.1976"));
1411 Vcharset_latin_jisx0201 =
1412 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1413 CHARSET_TYPE_94, 1, 0, 'J',
1414 CHARSET_LEFT_TO_RIGHT,
1415 build_string ("JISX0201 Roman"),
1416 build_string ("JISX0201.1976 (Japanese Roman)"),
1417 build_string ("JISX0201.1976 Japanese Roman"),
1418 build_string ("jisx0201.1976"));
1419 Vcharset_cyrillic_iso8859_5 =
1420 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1421 CHARSET_TYPE_96, 1, 1, 'L',
1422 CHARSET_LEFT_TO_RIGHT,
1423 build_string ("ISO8859-5"),
1424 build_string ("ISO8859-5 (Cyrillic)"),
1425 build_string ("ISO8859-5 (Cyrillic)"),
1426 build_string ("iso8859-5"));
1427 Vcharset_latin_iso8859_9 =
1428 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1429 CHARSET_TYPE_96, 1, 1, 'M',
1430 CHARSET_LEFT_TO_RIGHT,
1431 build_string ("Latin-5"),
1432 build_string ("ISO8859-9 (Latin-5)"),
1433 build_string ("ISO8859-9 (Latin-5)"),
1434 build_string ("iso8859-9"));
1435 Vcharset_japanese_jisx0208_1978 =
1436 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1437 CHARSET_TYPE_94X94, 2, 0, '@',
1438 CHARSET_LEFT_TO_RIGHT,
1439 build_string ("JISX0208.1978"),
1440 build_string ("JISX0208.1978 (Japanese)"),
1442 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1443 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1444 Vcharset_chinese_gb2312 =
1445 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1446 CHARSET_TYPE_94X94, 2, 0, 'A',
1447 CHARSET_LEFT_TO_RIGHT,
1448 build_string ("GB2312"),
1449 build_string ("GB2312)"),
1450 build_string ("GB2312 Chinese simplified"),
1451 build_string ("gb2312"));
1452 Vcharset_japanese_jisx0208 =
1453 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1454 CHARSET_TYPE_94X94, 2, 0, 'B',
1455 CHARSET_LEFT_TO_RIGHT,
1456 build_string ("JISX0208"),
1457 build_string ("JISX0208.1983/1990 (Japanese)"),
1458 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1459 build_string ("jisx0208.19\\(83\\|90\\)"));
1460 Vcharset_korean_ksc5601 =
1461 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1462 CHARSET_TYPE_94X94, 2, 0, 'C',
1463 CHARSET_LEFT_TO_RIGHT,
1464 build_string ("KSC5601"),
1465 build_string ("KSC5601 (Korean"),
1466 build_string ("KSC5601 Korean Hangul and Hanja"),
1467 build_string ("ksc5601"));
1468 Vcharset_japanese_jisx0212 =
1469 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1470 CHARSET_TYPE_94X94, 2, 0, 'D',
1471 CHARSET_LEFT_TO_RIGHT,
1472 build_string ("JISX0212"),
1473 build_string ("JISX0212 (Japanese)"),
1474 build_string ("JISX0212 Japanese Supplement"),
1475 build_string ("jisx0212"));
1477 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1478 Vcharset_chinese_cns11643_1 =
1479 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1480 CHARSET_TYPE_94X94, 2, 0, 'G',
1481 CHARSET_LEFT_TO_RIGHT,
1482 build_string ("CNS11643-1"),
1483 build_string ("CNS11643-1 (Chinese traditional)"),
1485 ("CNS 11643 Plane 1 Chinese traditional"),
1486 build_string (CHINESE_CNS_PLANE_RE("1")));
1487 Vcharset_chinese_cns11643_2 =
1488 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1489 CHARSET_TYPE_94X94, 2, 0, 'H',
1490 CHARSET_LEFT_TO_RIGHT,
1491 build_string ("CNS11643-2"),
1492 build_string ("CNS11643-2 (Chinese traditional)"),
1494 ("CNS 11643 Plane 2 Chinese traditional"),
1495 build_string (CHINESE_CNS_PLANE_RE("2")));
1496 Vcharset_chinese_big5_1 =
1497 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1498 CHARSET_TYPE_94X94, 2, 0, '0',
1499 CHARSET_LEFT_TO_RIGHT,
1500 build_string ("Big5"),
1501 build_string ("Big5 (Level-1)"),
1503 ("Big5 Level-1 Chinese traditional"),
1504 build_string ("big5"));
1505 Vcharset_chinese_big5_2 =
1506 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1507 CHARSET_TYPE_94X94, 2, 0, '1',
1508 CHARSET_LEFT_TO_RIGHT,
1509 build_string ("Big5"),
1510 build_string ("Big5 (Level-2)"),
1512 ("Big5 Level-2 Chinese traditional"),
1513 build_string ("big5"));
1516 #ifdef ENABLE_COMPOSITE_CHARS
1517 /* #### For simplicity, we put composite chars into a 96x96 charset.
1518 This is going to lead to problems because you can run out of
1519 room, esp. as we don't yet recycle numbers. */
1520 Vcharset_composite =
1521 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1522 CHARSET_TYPE_96X96, 2, 0, 0,
1523 CHARSET_LEFT_TO_RIGHT,
1524 build_string ("Composite"),
1525 build_string ("Composite characters"),
1526 build_string ("Composite characters"),
1529 composite_char_row_next = 32;
1530 composite_char_col_next = 32;
1532 Vcomposite_char_string2char_hash_table =
1533 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1534 Vcomposite_char_char2string_hash_table =
1535 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1536 staticpro (&Vcomposite_char_string2char_hash_table);
1537 staticpro (&Vcomposite_char_char2string_hash_table);
1538 #endif /* ENABLE_COMPOSITE_CHARS */