1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
83 /* Table of number of bytes in the string representation of a character
84 indexed by the first byte of that representation.
86 rep_bytes_by_first_byte(c) is more efficient than the equivalent
87 canonical computation:
89 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
91 Bytecount rep_bytes_by_first_byte[0xA0] =
92 { /* 0x00 - 0x7f are for straight ASCII */
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 /* 0x80 - 0x8f are for Dimension-1 official charsets */
103 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 /* 0x90 - 0x9d are for Dimension-2 official charsets */
108 /* 0x9e is for Dimension-1 private charsets */
109 /* 0x9f is for Dimension-2 private charsets */
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
114 Lisp_Object Vutf_2000_version;
116 int leading_code_private_11;
118 Lisp_Object Qcharsetp;
120 /* Qdoc_string, Qdimension, Qchars defined in general.c */
121 Lisp_Object Qregistry, Qfinal, Qgraphic;
122 Lisp_Object Qdirection;
123 Lisp_Object Qreverse_direction_charset;
124 Lisp_Object Qccl_program;
125 Lisp_Object Qleading_byte;
126 Lisp_Object Qshort_name, Qlong_name;
142 Qjapanese_jisx0208_1978,
153 Lisp_Object Ql2r, Qr2l;
155 Lisp_Object Vcharset_hash_table;
157 static Bufbyte next_allocated_1_byte_leading_byte;
158 static Bufbyte next_allocated_2_byte_leading_byte;
160 /* Composite characters are characters constructed by overstriking two
161 or more regular characters.
163 1) The old Mule implementation involves storing composite characters
164 in a buffer as a tag followed by all of the actual characters
165 used to make up the composite character. I think this is a bad
166 idea; it greatly complicates code that wants to handle strings
167 one character at a time because it has to deal with the possibility
168 of great big ungainly characters. It's much more reasonable to
169 simply store an index into a table of composite characters.
171 2) The current implementation only allows for 16,384 separate
172 composite characters over the lifetime of the XEmacs process.
173 This could become a potential problem if the user
174 edited lots of different files that use composite characters.
175 Due to FSF bogosity, increasing the number of allowable
176 composite characters under Mule would decrease the number
177 of possible faces that can exist. Mule already has shrunk
178 this to 2048, and further shrinkage would become uncomfortable.
179 No such problems exist in XEmacs.
181 Composite characters could be represented as 0x80 C1 C2 C3,
182 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
183 for slightly under 2^20 (one million) composite characters
184 over the XEmacs process lifetime, and you only need to
185 increase the size of a Mule character from 19 to 21 bits.
186 Or you could use 0x80 C1 C2 C3 C4, allowing for about
187 85 million (slightly over 2^26) composite characters. */
190 /************************************************************************/
191 /* Basic Emchar functions */
192 /************************************************************************/
194 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
195 string in STR. Returns the number of bytes stored.
196 Do not call this directly. Use the macro set_charptr_emchar() instead.
200 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
215 else if ( c <= 0x7ff )
217 *p++ = (c >> 6) | 0xc0;
218 *p++ = (c & 0x3f) | 0x80;
220 else if ( c <= 0xffff )
222 *p++ = (c >> 12) | 0xe0;
223 *p++ = ((c >> 6) & 0x3f) | 0x80;
224 *p++ = (c & 0x3f) | 0x80;
226 else if ( c <= 0x1fffff )
228 *p++ = (c >> 18) | 0xf0;
229 *p++ = ((c >> 12) & 0x3f) | 0x80;
230 *p++ = ((c >> 6) & 0x3f) | 0x80;
231 *p++ = (c & 0x3f) | 0x80;
233 else if ( c <= 0x3ffffff )
235 *p++ = (c >> 24) | 0xf8;
236 *p++ = ((c >> 18) & 0x3f) | 0x80;
237 *p++ = ((c >> 12) & 0x3f) | 0x80;
238 *p++ = ((c >> 6) & 0x3f) | 0x80;
239 *p++ = (c & 0x3f) | 0x80;
243 *p++ = (c >> 30) | 0xfc;
244 *p++ = ((c >> 24) & 0x3f) | 0x80;
245 *p++ = ((c >> 18) & 0x3f) | 0x80;
246 *p++ = ((c >> 12) & 0x3f) | 0x80;
247 *p++ = ((c >> 6) & 0x3f) | 0x80;
248 *p++ = (c & 0x3f) | 0x80;
251 BREAKUP_CHAR (c, charset, c1, c2);
252 lb = CHAR_LEADING_BYTE (c);
253 if (LEADING_BYTE_PRIVATE_P (lb))
254 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
256 if (EQ (charset, Vcharset_control_1))
265 /* Return the first character from a Mule-encoded string in STR,
266 assuming it's non-ASCII. Do not call this directly.
267 Use the macro charptr_emchar() instead. */
270 non_ascii_charptr_emchar (CONST Bufbyte *str)
283 else if ( b >= 0xf8 )
288 else if ( b >= 0xf0 )
293 else if ( b >= 0xe0 )
298 else if ( b >= 0xc0 )
308 for( ; len > 0; len-- )
311 ch = ( ch << 6 ) | ( b & 0x3f );
315 Bufbyte i0 = *str, i1, i2 = 0;
318 if (i0 == LEADING_BYTE_CONTROL_1)
319 return (Emchar) (*++str - 0x20);
321 if (LEADING_BYTE_PREFIX_P (i0))
326 charset = CHARSET_BY_LEADING_BYTE (i0);
327 if (XCHARSET_DIMENSION (charset) == 2)
330 return MAKE_CHAR (charset, i1, i2);
334 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
335 Do not call this directly. Use the macro valid_char_p() instead. */
339 non_ascii_valid_char_p (Emchar ch)
343 /* Must have only lowest 19 bits set */
347 f1 = CHAR_FIELD1 (ch);
348 f2 = CHAR_FIELD2 (ch);
349 f3 = CHAR_FIELD3 (ch);
355 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
356 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
357 f2 > MAX_CHAR_FIELD2_PRIVATE)
362 if (f3 != 0x20 && f3 != 0x7F)
366 NOTE: This takes advantage of the fact that
367 FIELD2_TO_OFFICIAL_LEADING_BYTE and
368 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
370 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
371 return (XCHARSET_CHARS (charset) == 96);
377 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
378 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
379 f1 > MAX_CHAR_FIELD1_PRIVATE)
381 if (f2 < 0x20 || f3 < 0x20)
384 #ifdef ENABLE_COMPOSITE_CHARS
385 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
387 if (UNBOUNDP (Fgethash (make_int (ch),
388 Vcomposite_char_char2string_hash_table,
393 #endif /* ENABLE_COMPOSITE_CHARS */
395 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
398 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
400 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
403 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
405 return (XCHARSET_CHARS (charset) == 96);
411 /************************************************************************/
412 /* Basic string functions */
413 /************************************************************************/
415 /* Copy the character pointed to by PTR into STR, assuming it's
416 non-ASCII. Do not call this directly. Use the macro
417 charptr_copy_char() instead. */
420 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
422 Bufbyte *strptr = str;
424 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
426 /* Notice fallthrough. */
428 case 6: *++strptr = *ptr++;
429 case 5: *++strptr = *ptr++;
431 case 4: *++strptr = *ptr++;
432 case 3: *++strptr = *ptr++;
433 case 2: *++strptr = *ptr;
438 return strptr + 1 - str;
442 /************************************************************************/
443 /* streams of Emchars */
444 /************************************************************************/
446 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
447 The functions below are not meant to be called directly; use
448 the macros in insdel.h. */
451 Lstream_get_emchar_1 (Lstream *stream, int ch)
453 Bufbyte str[MAX_EMCHAR_LEN];
454 Bufbyte *strptr = str;
456 str[0] = (Bufbyte) ch;
457 switch (REP_BYTES_BY_FIRST_BYTE (ch))
459 /* Notice fallthrough. */
462 ch = Lstream_getc (stream);
464 *++strptr = (Bufbyte) ch;
466 ch = Lstream_getc (stream);
468 *++strptr = (Bufbyte) ch;
471 ch = Lstream_getc (stream);
473 *++strptr = (Bufbyte) ch;
475 ch = Lstream_getc (stream);
477 *++strptr = (Bufbyte) ch;
479 ch = Lstream_getc (stream);
481 *++strptr = (Bufbyte) ch;
486 return charptr_emchar (str);
490 Lstream_fput_emchar (Lstream *stream, Emchar ch)
492 Bufbyte str[MAX_EMCHAR_LEN];
493 Bytecount len = set_charptr_emchar (str, ch);
494 return Lstream_write (stream, str, len);
498 Lstream_funget_emchar (Lstream *stream, Emchar ch)
500 Bufbyte str[MAX_EMCHAR_LEN];
501 Bytecount len = set_charptr_emchar (str, ch);
502 Lstream_unread (stream, str, len);
506 /************************************************************************/
508 /************************************************************************/
511 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
513 struct Lisp_Charset *cs = XCHARSET (obj);
515 markobj (cs->short_name);
516 markobj (cs->long_name);
517 markobj (cs->doc_string);
518 markobj (cs->registry);
519 markobj (cs->ccl_program);
524 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
526 struct Lisp_Charset *cs = XCHARSET (obj);
530 error ("printing unreadable object #<charset %s 0x%x>",
531 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
534 write_c_string ("#<charset ", printcharfun);
535 print_internal (CHARSET_NAME (cs), printcharfun, 0);
536 write_c_string (" ", printcharfun);
537 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
538 write_c_string (" ", printcharfun);
539 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
540 write_c_string (" ", printcharfun);
541 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
542 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
543 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
544 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
545 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
547 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
548 CHARSET_COLUMNS (cs),
549 CHARSET_GRAPHIC (cs),
551 write_c_string (buf, printcharfun);
552 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
553 sprintf (buf, " 0x%x>", cs->header.uid);
554 write_c_string (buf, printcharfun);
557 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
558 mark_charset, print_charset, 0, 0, 0,
559 struct Lisp_Charset);
560 /* Make a new charset. */
563 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
564 unsigned char type, unsigned char columns, unsigned char graphic,
565 Bufbyte final, unsigned char direction, Lisp_Object short_name,
566 Lisp_Object long_name, Lisp_Object doc,
570 struct Lisp_Charset *cs =
571 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
572 XSETCHARSET (obj, cs);
574 CHARSET_ID (cs) = id;
575 CHARSET_NAME (cs) = name;
576 CHARSET_SHORT_NAME (cs) = short_name;
577 CHARSET_LONG_NAME (cs) = long_name;
578 CHARSET_REP_BYTES (cs) = rep_bytes;
579 CHARSET_DIRECTION (cs) = direction;
580 CHARSET_TYPE (cs) = type;
581 CHARSET_COLUMNS (cs) = columns;
582 CHARSET_GRAPHIC (cs) = graphic;
583 CHARSET_FINAL (cs) = final;
584 CHARSET_DOC_STRING (cs) = doc;
585 CHARSET_REGISTRY (cs) = reg;
586 CHARSET_CCL_PROGRAM (cs) = Qnil;
587 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
589 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
590 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
591 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
592 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
596 /* some charsets do not have final characters. This includes
597 ASCII, Control-1, Composite, and the two faux private
599 assert (NILP (charset_by_attributes[type][final][direction]));
600 charset_by_attributes[type][final][direction] = obj;
603 assert (NILP (charset_by_leading_byte[id - 128]));
604 charset_by_leading_byte[id - 128] = obj;
607 /* official leading byte */
608 rep_bytes_by_first_byte[id] = rep_bytes;
611 /* Some charsets are "faux" and don't have names or really exist at
612 all except in the leading-byte table. */
614 Fputhash (name, obj, Vcharset_hash_table);
619 get_unallocated_leading_byte (int dimension)
625 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
628 lb = next_allocated_1_byte_leading_byte++;
632 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
635 lb = next_allocated_2_byte_leading_byte++;
640 ("No more character sets free for this dimension",
641 make_int (dimension));
647 /************************************************************************/
648 /* Basic charset Lisp functions */
649 /************************************************************************/
651 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
652 Return non-nil if OBJECT is a charset.
656 return CHARSETP (object) ? Qt : Qnil;
659 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
660 Retrieve the charset of the given name.
661 If CHARSET-OR-NAME is a charset object, it is simply returned.
662 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
663 nil is returned. Otherwise the associated charset object is returned.
667 if (CHARSETP (charset_or_name))
668 return charset_or_name;
670 CHECK_SYMBOL (charset_or_name);
671 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
674 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
675 Retrieve the charset of the given name.
676 Same as `find-charset' except an error is signalled if there is no such
677 charset instead of returning nil.
681 Lisp_Object charset = Ffind_charset (name);
684 signal_simple_error ("No such charset", name);
688 /* We store the charsets in hash tables with the names as the key and the
689 actual charset object as the value. Occasionally we need to use them
690 in a list format. These routines provide us with that. */
691 struct charset_list_closure
693 Lisp_Object *charset_list;
697 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
698 void *charset_list_closure)
700 /* This function can GC */
701 struct charset_list_closure *chcl =
702 (struct charset_list_closure*) charset_list_closure;
703 Lisp_Object *charset_list = chcl->charset_list;
705 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
709 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
710 Return a list of the names of all defined charsets.
714 Lisp_Object charset_list = Qnil;
716 struct charset_list_closure charset_list_closure;
718 GCPRO1 (charset_list);
719 charset_list_closure.charset_list = &charset_list;
720 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
721 &charset_list_closure);
727 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
728 Return the name of the given charset.
732 return XCHARSET_NAME (Fget_charset (charset));
735 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
736 Define a new character set.
737 This function is for use with Mule support.
738 NAME is a symbol, the name by which the character set is normally referred.
739 DOC-STRING is a string describing the character set.
740 PROPS is a property list, describing the specific nature of the
741 character set. Recognized properties are:
743 'short-name Short version of the charset name (ex: Latin-1)
744 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
745 'registry A regular expression matching the font registry field for
747 'dimension Number of octets used to index a character in this charset.
748 Either 1 or 2. Defaults to 1.
749 'columns Number of columns used to display a character in this charset.
750 Only used in TTY mode. (Under X, the actual width of a
751 character can be derived from the font used to display the
752 characters.) If unspecified, defaults to the dimension
753 (this is almost always the correct value).
754 'chars Number of characters in each dimension (94 or 96).
755 Defaults to 94. Note that if the dimension is 2, the
756 character set thus described is 94x94 or 96x96.
757 'final Final byte of ISO 2022 escape sequence. Must be
758 supplied. Each combination of (DIMENSION, CHARS) defines a
759 separate namespace for final bytes. Note that ISO
760 2022 restricts the final byte to the range
761 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
762 dimension == 2. Note also that final bytes in the range
763 0x30 - 0x3F are reserved for user-defined (not official)
765 'graphic 0 (use left half of font on output) or 1 (use right half
766 of font on output). Defaults to 0. For example, for
767 a font whose registry is ISO8859-1, the left half
768 (octets 0x20 - 0x7F) is the `ascii' character set, while
769 the right half (octets 0xA0 - 0xFF) is the `latin-1'
770 character set. With 'graphic set to 0, the octets
771 will have their high bit cleared; with it set to 1,
772 the octets will have their high bit set.
773 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
775 'ccl-program A compiled CCL program used to convert a character in
776 this charset into an index into the font. This is in
777 addition to the 'graphic property. The CCL program
778 is passed the octets of the character, with the high
779 bit cleared and set depending upon whether the value
780 of the 'graphic property is 0 or 1.
782 (name, doc_string, props))
784 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
785 int direction = CHARSET_LEFT_TO_RIGHT;
787 Lisp_Object registry = Qnil;
789 Lisp_Object rest, keyword, value;
790 Lisp_Object ccl_program = Qnil;
791 Lisp_Object short_name = Qnil, long_name = Qnil;
794 if (!NILP (doc_string))
795 CHECK_STRING (doc_string);
797 charset = Ffind_charset (name);
799 signal_simple_error ("Cannot redefine existing charset", name);
801 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
803 if (EQ (keyword, Qshort_name))
805 CHECK_STRING (value);
809 if (EQ (keyword, Qlong_name))
811 CHECK_STRING (value);
815 else if (EQ (keyword, Qdimension))
818 dimension = XINT (value);
819 if (dimension < 1 || dimension > 2)
820 signal_simple_error ("Invalid value for 'dimension", value);
823 else if (EQ (keyword, Qchars))
826 chars = XINT (value);
827 if (chars != 94 && chars != 96)
828 signal_simple_error ("Invalid value for 'chars", value);
831 else if (EQ (keyword, Qcolumns))
834 columns = XINT (value);
835 if (columns != 1 && columns != 2)
836 signal_simple_error ("Invalid value for 'columns", value);
839 else if (EQ (keyword, Qgraphic))
842 graphic = XINT (value);
843 if (graphic < 0 || graphic > 1)
844 signal_simple_error ("Invalid value for 'graphic", value);
847 else if (EQ (keyword, Qregistry))
849 CHECK_STRING (value);
853 else if (EQ (keyword, Qdirection))
855 if (EQ (value, Ql2r))
856 direction = CHARSET_LEFT_TO_RIGHT;
857 else if (EQ (value, Qr2l))
858 direction = CHARSET_RIGHT_TO_LEFT;
860 signal_simple_error ("Invalid value for 'direction", value);
863 else if (EQ (keyword, Qfinal))
865 CHECK_CHAR_COERCE_INT (value);
866 final = XCHAR (value);
867 if (final < '0' || final > '~')
868 signal_simple_error ("Invalid value for 'final", value);
871 else if (EQ (keyword, Qccl_program))
873 CHECK_VECTOR (value);
878 signal_simple_error ("Unrecognized property", keyword);
882 error ("'final must be specified");
883 if (dimension == 2 && final > 0x5F)
885 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
889 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
891 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
893 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
894 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
896 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
898 id = get_unallocated_leading_byte (dimension);
900 if (NILP (doc_string))
901 doc_string = build_string ("");
904 registry = build_string ("");
906 if (NILP (short_name))
907 XSETSTRING (short_name, XSYMBOL (name)->name);
909 if (NILP (long_name))
910 long_name = doc_string;
914 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
915 final, direction, short_name, long_name, doc_string, registry);
916 if (!NILP (ccl_program))
917 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
921 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
923 Make a charset equivalent to CHARSET but which goes in the opposite direction.
924 NEW-NAME is the name of the new charset. Return the new charset.
928 Lisp_Object new_charset = Qnil;
929 int id, dimension, columns, graphic, final;
931 Lisp_Object registry, doc_string, short_name, long_name;
932 struct Lisp_Charset *cs;
934 charset = Fget_charset (charset);
935 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
936 signal_simple_error ("Charset already has reverse-direction charset",
939 CHECK_SYMBOL (new_name);
940 if (!NILP (Ffind_charset (new_name)))
941 signal_simple_error ("Cannot redefine existing charset", new_name);
943 cs = XCHARSET (charset);
945 type = CHARSET_TYPE (cs);
946 columns = CHARSET_COLUMNS (cs);
947 dimension = CHARSET_DIMENSION (cs);
948 id = get_unallocated_leading_byte (dimension);
950 graphic = CHARSET_GRAPHIC (cs);
951 final = CHARSET_FINAL (cs);
952 direction = CHARSET_RIGHT_TO_LEFT;
953 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
954 direction = CHARSET_LEFT_TO_RIGHT;
955 doc_string = CHARSET_DOC_STRING (cs);
956 short_name = CHARSET_SHORT_NAME (cs);
957 long_name = CHARSET_LONG_NAME (cs);
958 registry = CHARSET_REGISTRY (cs);
960 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
961 graphic, final, direction, short_name, long_name,
962 doc_string, registry);
964 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
965 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
970 /* #### Reverse direction charsets not yet implemented. */
972 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
974 Return the reverse-direction charset parallel to CHARSET, if any.
975 This is the charset with the same properties (in particular, the same
976 dimension, number of characters per dimension, and final byte) as
977 CHARSET but whose characters are displayed in the opposite direction.
981 charset = Fget_charset (charset);
982 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
986 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
987 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
988 If DIRECTION is omitted, both directions will be checked (left-to-right
989 will be returned if character sets exist for both directions).
991 (dimension, chars, final, direction))
993 int dm, ch, fi, di = -1;
995 Lisp_Object obj = Qnil;
997 CHECK_INT (dimension);
998 dm = XINT (dimension);
999 if (dm < 1 || dm > 2)
1000 signal_simple_error ("Invalid value for DIMENSION", dimension);
1004 if (ch != 94 && ch != 96)
1005 signal_simple_error ("Invalid value for CHARS", chars);
1007 CHECK_CHAR_COERCE_INT (final);
1009 if (fi < '0' || fi > '~')
1010 signal_simple_error ("Invalid value for FINAL", final);
1012 if (EQ (direction, Ql2r))
1013 di = CHARSET_LEFT_TO_RIGHT;
1014 else if (EQ (direction, Qr2l))
1015 di = CHARSET_RIGHT_TO_LEFT;
1016 else if (!NILP (direction))
1017 signal_simple_error ("Invalid value for DIRECTION", direction);
1019 if (dm == 2 && fi > 0x5F)
1021 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1024 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1026 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1030 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1032 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1035 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1038 return XCHARSET_NAME (obj);
1042 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1043 Return short name of CHARSET.
1047 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1050 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1051 Return long name of CHARSET.
1055 return XCHARSET_LONG_NAME (Fget_charset (charset));
1058 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1059 Return description of CHARSET.
1063 return XCHARSET_DOC_STRING (Fget_charset (charset));
1066 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1067 Return dimension of CHARSET.
1071 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1074 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1075 Return property PROP of CHARSET.
1076 Recognized properties are those listed in `make-charset', as well as
1077 'name and 'doc-string.
1081 struct Lisp_Charset *cs;
1083 charset = Fget_charset (charset);
1084 cs = XCHARSET (charset);
1086 CHECK_SYMBOL (prop);
1087 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1088 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1089 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1090 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1091 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1092 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1093 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1094 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1095 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1096 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1097 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1098 if (EQ (prop, Qdirection))
1099 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1100 if (EQ (prop, Qreverse_direction_charset))
1102 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1106 return XCHARSET_NAME (obj);
1108 signal_simple_error ("Unrecognized charset property name", prop);
1109 return Qnil; /* not reached */
1112 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1113 Return charset identification number of CHARSET.
1117 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1120 /* #### We need to figure out which properties we really want to
1123 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1124 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1126 (charset, ccl_program))
1128 charset = Fget_charset (charset);
1129 CHECK_VECTOR (ccl_program);
1130 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1135 invalidate_charset_font_caches (Lisp_Object charset)
1137 /* Invalidate font cache entries for charset on all devices. */
1138 Lisp_Object devcons, concons, hash_table;
1139 DEVICE_LOOP_NO_BREAK (devcons, concons)
1141 struct device *d = XDEVICE (XCAR (devcons));
1142 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1143 if (!UNBOUNDP (hash_table))
1144 Fclrhash (hash_table);
1148 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1149 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1150 Set the 'registry property of CHARSET to REGISTRY.
1152 (charset, registry))
1154 charset = Fget_charset (charset);
1155 CHECK_STRING (registry);
1156 XCHARSET_REGISTRY (charset) = registry;
1157 invalidate_charset_font_caches (charset);
1158 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1163 /************************************************************************/
1164 /* Lisp primitives for working with characters */
1165 /************************************************************************/
1167 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1168 Make a character from CHARSET and octets ARG1 and ARG2.
1169 ARG2 is required only for characters from two-dimensional charsets.
1170 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1171 character s with caron.
1173 (charset, arg1, arg2))
1175 struct Lisp_Charset *cs;
1177 int lowlim, highlim;
1179 charset = Fget_charset (charset);
1180 cs = XCHARSET (charset);
1182 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1183 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1184 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1185 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1188 /* It is useful (and safe, according to Olivier Galibert) to strip
1189 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1190 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1191 Latin 2 code of the character. */
1192 a1 = XINT (arg1) & 0x7f;
1193 if (a1 < lowlim || a1 > highlim)
1194 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1196 if (CHARSET_DIMENSION (cs) == 1)
1200 ("Charset is of dimension one; second octet must be nil", arg2);
1201 return make_char (MAKE_CHAR (charset, a1, 0));
1205 a2 = XINT (arg2) & 0x7f;
1206 if (a2 < lowlim || a2 > highlim)
1207 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1209 return make_char (MAKE_CHAR (charset, a1, a2));
1212 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1213 Return the character set of char CH.
1217 CHECK_CHAR_COERCE_INT (ch);
1219 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1220 (CHAR_LEADING_BYTE (XCHAR (ch))));
1223 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1224 Return the octet numbered N (should be 0 or 1) of char CH.
1225 N defaults to 0 if omitted.
1229 Lisp_Object charset;
1232 CHECK_CHAR_COERCE_INT (ch);
1239 if (int_n != 0 && int_n != 1)
1240 signal_simple_error ("Octet number must be 0 or 1", n);
1242 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1243 return make_int (int_n == 0 ? c1 : c2);
1247 #ifdef ENABLE_COMPOSITE_CHARS
1248 /************************************************************************/
1249 /* composite character functions */
1250 /************************************************************************/
1253 lookup_composite_char (Bufbyte *str, int len)
1255 Lisp_Object lispstr = make_string (str, len);
1256 Lisp_Object ch = Fgethash (lispstr,
1257 Vcomposite_char_string2char_hash_table,
1263 if (composite_char_row_next >= 128)
1264 signal_simple_error ("No more composite chars available", lispstr);
1265 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1266 composite_char_col_next);
1267 Fputhash (make_char (emch), lispstr,
1268 Vcomposite_char_char2string_hash_table);
1269 Fputhash (lispstr, make_char (emch),
1270 Vcomposite_char_string2char_hash_table);
1271 composite_char_col_next++;
1272 if (composite_char_col_next >= 128)
1274 composite_char_col_next = 32;
1275 composite_char_row_next++;
1284 composite_char_string (Emchar ch)
1286 Lisp_Object str = Fgethash (make_char (ch),
1287 Vcomposite_char_char2string_hash_table,
1289 assert (!UNBOUNDP (str));
1293 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1294 Convert a string into a single composite character.
1295 The character is the result of overstriking all the characters in
1300 CHECK_STRING (string);
1301 return make_char (lookup_composite_char (XSTRING_DATA (string),
1302 XSTRING_LENGTH (string)));
1305 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1306 Return a string of the characters comprising a composite character.
1314 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1315 signal_simple_error ("Must be composite char", ch);
1316 return composite_char_string (emch);
1318 #endif /* ENABLE_COMPOSITE_CHARS */
1321 /************************************************************************/
1322 /* initialization */
1323 /************************************************************************/
1326 syms_of_mule_charset (void)
1328 DEFSUBR (Fcharsetp);
1329 DEFSUBR (Ffind_charset);
1330 DEFSUBR (Fget_charset);
1331 DEFSUBR (Fcharset_list);
1332 DEFSUBR (Fcharset_name);
1333 DEFSUBR (Fmake_charset);
1334 DEFSUBR (Fmake_reverse_direction_charset);
1335 /* DEFSUBR (Freverse_direction_charset); */
1336 DEFSUBR (Fcharset_from_attributes);
1337 DEFSUBR (Fcharset_short_name);
1338 DEFSUBR (Fcharset_long_name);
1339 DEFSUBR (Fcharset_description);
1340 DEFSUBR (Fcharset_dimension);
1341 DEFSUBR (Fcharset_property);
1342 DEFSUBR (Fcharset_id);
1343 DEFSUBR (Fset_charset_ccl_program);
1344 DEFSUBR (Fset_charset_registry);
1346 DEFSUBR (Fmake_char);
1347 DEFSUBR (Fchar_charset);
1348 DEFSUBR (Fchar_octet);
1350 #ifdef ENABLE_COMPOSITE_CHARS
1351 DEFSUBR (Fmake_composite_char);
1352 DEFSUBR (Fcomposite_char_string);
1355 defsymbol (&Qcharsetp, "charsetp");
1356 defsymbol (&Qregistry, "registry");
1357 defsymbol (&Qfinal, "final");
1358 defsymbol (&Qgraphic, "graphic");
1359 defsymbol (&Qdirection, "direction");
1360 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1361 defsymbol (&Qccl_program, "ccl-program");
1362 defsymbol (&Qshort_name, "short-name");
1363 defsymbol (&Qlong_name, "long-name");
1365 defsymbol (&Ql2r, "l2r");
1366 defsymbol (&Qr2l, "r2l");
1368 /* Charsets, compatible with FSF 20.3
1369 Naming convention is Script-Charset[-Edition] */
1370 defsymbol (&Qascii, "ascii");
1371 defsymbol (&Qcontrol_1, "control-1");
1372 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1373 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1374 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1375 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1376 defsymbol (&Qthai_tis620, "thai-tis620");
1377 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1378 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1379 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1380 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1381 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1382 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1383 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1384 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1385 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1386 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1387 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1388 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1389 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1390 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1391 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1392 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1394 defsymbol (&Qcomposite, "composite");
1397 Vutf_2000_version = build_string("0.3 (Imamiya)");
1398 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1399 Version number of UTF-2000.
1403 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1404 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1405 Leading-code of private TYPE9N charset of column-width 1.
1407 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1411 vars_of_mule_charset (void)
1415 /* Table of charsets indexed by leading byte. */
1416 for (i = 0; i < countof (charset_by_leading_byte); i++)
1417 charset_by_leading_byte[i] = Qnil;
1419 /* Table of charsets indexed by type/final-byte/direction. */
1420 for (i = 0; i < countof (charset_by_attributes); i++)
1421 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1422 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1423 charset_by_attributes[i][j][k] = Qnil;
1425 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1426 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1430 complex_vars_of_mule_charset (void)
1432 staticpro (&Vcharset_hash_table);
1433 Vcharset_hash_table =
1434 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1436 /* Predefined character sets. We store them into variables for
1440 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1441 CHARSET_TYPE_94, 1, 0, 'B',
1442 CHARSET_LEFT_TO_RIGHT,
1443 build_string ("ASCII"),
1444 build_string ("ASCII)"),
1445 build_string ("ASCII (ISO646 IRV)"),
1446 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1447 Vcharset_control_1 =
1448 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1449 CHARSET_TYPE_94, 1, 1, 0,
1450 CHARSET_LEFT_TO_RIGHT,
1451 build_string ("C1"),
1452 build_string ("Control characters"),
1453 build_string ("Control characters 128-191"),
1455 Vcharset_latin_iso8859_1 =
1456 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1457 CHARSET_TYPE_96, 1, 1, 'A',
1458 CHARSET_LEFT_TO_RIGHT,
1459 build_string ("Latin-1"),
1460 build_string ("ISO8859-1 (Latin-1)"),
1461 build_string ("ISO8859-1 (Latin-1)"),
1462 build_string ("iso8859-1"));
1463 Vcharset_latin_iso8859_2 =
1464 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1465 CHARSET_TYPE_96, 1, 1, 'B',
1466 CHARSET_LEFT_TO_RIGHT,
1467 build_string ("Latin-2"),
1468 build_string ("ISO8859-2 (Latin-2)"),
1469 build_string ("ISO8859-2 (Latin-2)"),
1470 build_string ("iso8859-2"));
1471 Vcharset_latin_iso8859_3 =
1472 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1473 CHARSET_TYPE_96, 1, 1, 'C',
1474 CHARSET_LEFT_TO_RIGHT,
1475 build_string ("Latin-3"),
1476 build_string ("ISO8859-3 (Latin-3)"),
1477 build_string ("ISO8859-3 (Latin-3)"),
1478 build_string ("iso8859-3"));
1479 Vcharset_latin_iso8859_4 =
1480 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1481 CHARSET_TYPE_96, 1, 1, 'D',
1482 CHARSET_LEFT_TO_RIGHT,
1483 build_string ("Latin-4"),
1484 build_string ("ISO8859-4 (Latin-4)"),
1485 build_string ("ISO8859-4 (Latin-4)"),
1486 build_string ("iso8859-4"));
1487 Vcharset_thai_tis620 =
1488 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1489 CHARSET_TYPE_96, 1, 1, 'T',
1490 CHARSET_LEFT_TO_RIGHT,
1491 build_string ("TIS620"),
1492 build_string ("TIS620 (Thai)"),
1493 build_string ("TIS620.2529 (Thai)"),
1494 build_string ("tis620"));
1495 Vcharset_greek_iso8859_7 =
1496 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1497 CHARSET_TYPE_96, 1, 1, 'F',
1498 CHARSET_LEFT_TO_RIGHT,
1499 build_string ("ISO8859-7"),
1500 build_string ("ISO8859-7 (Greek)"),
1501 build_string ("ISO8859-7 (Greek)"),
1502 build_string ("iso8859-7"));
1503 Vcharset_arabic_iso8859_6 =
1504 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1505 CHARSET_TYPE_96, 1, 1, 'G',
1506 CHARSET_RIGHT_TO_LEFT,
1507 build_string ("ISO8859-6"),
1508 build_string ("ISO8859-6 (Arabic)"),
1509 build_string ("ISO8859-6 (Arabic)"),
1510 build_string ("iso8859-6"));
1511 Vcharset_hebrew_iso8859_8 =
1512 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1513 CHARSET_TYPE_96, 1, 1, 'H',
1514 CHARSET_RIGHT_TO_LEFT,
1515 build_string ("ISO8859-8"),
1516 build_string ("ISO8859-8 (Hebrew)"),
1517 build_string ("ISO8859-8 (Hebrew)"),
1518 build_string ("iso8859-8"));
1519 Vcharset_katakana_jisx0201 =
1520 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1521 CHARSET_TYPE_94, 1, 1, 'I',
1522 CHARSET_LEFT_TO_RIGHT,
1523 build_string ("JISX0201 Kana"),
1524 build_string ("JISX0201.1976 (Japanese Kana)"),
1525 build_string ("JISX0201.1976 Japanese Kana"),
1526 build_string ("jisx0201.1976"));
1527 Vcharset_latin_jisx0201 =
1528 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1529 CHARSET_TYPE_94, 1, 0, 'J',
1530 CHARSET_LEFT_TO_RIGHT,
1531 build_string ("JISX0201 Roman"),
1532 build_string ("JISX0201.1976 (Japanese Roman)"),
1533 build_string ("JISX0201.1976 Japanese Roman"),
1534 build_string ("jisx0201.1976"));
1535 Vcharset_cyrillic_iso8859_5 =
1536 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1537 CHARSET_TYPE_96, 1, 1, 'L',
1538 CHARSET_LEFT_TO_RIGHT,
1539 build_string ("ISO8859-5"),
1540 build_string ("ISO8859-5 (Cyrillic)"),
1541 build_string ("ISO8859-5 (Cyrillic)"),
1542 build_string ("iso8859-5"));
1543 Vcharset_latin_iso8859_9 =
1544 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1545 CHARSET_TYPE_96, 1, 1, 'M',
1546 CHARSET_LEFT_TO_RIGHT,
1547 build_string ("Latin-5"),
1548 build_string ("ISO8859-9 (Latin-5)"),
1549 build_string ("ISO8859-9 (Latin-5)"),
1550 build_string ("iso8859-9"));
1551 Vcharset_japanese_jisx0208_1978 =
1552 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1553 CHARSET_TYPE_94X94, 2, 0, '@',
1554 CHARSET_LEFT_TO_RIGHT,
1555 build_string ("JISX0208.1978"),
1556 build_string ("JISX0208.1978 (Japanese)"),
1558 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1559 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1560 Vcharset_chinese_gb2312 =
1561 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1562 CHARSET_TYPE_94X94, 2, 0, 'A',
1563 CHARSET_LEFT_TO_RIGHT,
1564 build_string ("GB2312"),
1565 build_string ("GB2312)"),
1566 build_string ("GB2312 Chinese simplified"),
1567 build_string ("gb2312"));
1568 Vcharset_japanese_jisx0208 =
1569 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1570 CHARSET_TYPE_94X94, 2, 0, 'B',
1571 CHARSET_LEFT_TO_RIGHT,
1572 build_string ("JISX0208"),
1573 build_string ("JISX0208.1983/1990 (Japanese)"),
1574 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1575 build_string ("jisx0208.19\\(83\\|90\\)"));
1576 Vcharset_korean_ksc5601 =
1577 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1578 CHARSET_TYPE_94X94, 2, 0, 'C',
1579 CHARSET_LEFT_TO_RIGHT,
1580 build_string ("KSC5601"),
1581 build_string ("KSC5601 (Korean"),
1582 build_string ("KSC5601 Korean Hangul and Hanja"),
1583 build_string ("ksc5601"));
1584 Vcharset_japanese_jisx0212 =
1585 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1586 CHARSET_TYPE_94X94, 2, 0, 'D',
1587 CHARSET_LEFT_TO_RIGHT,
1588 build_string ("JISX0212"),
1589 build_string ("JISX0212 (Japanese)"),
1590 build_string ("JISX0212 Japanese Supplement"),
1591 build_string ("jisx0212"));
1593 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1594 Vcharset_chinese_cns11643_1 =
1595 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1596 CHARSET_TYPE_94X94, 2, 0, 'G',
1597 CHARSET_LEFT_TO_RIGHT,
1598 build_string ("CNS11643-1"),
1599 build_string ("CNS11643-1 (Chinese traditional)"),
1601 ("CNS 11643 Plane 1 Chinese traditional"),
1602 build_string (CHINESE_CNS_PLANE_RE("1")));
1603 Vcharset_chinese_cns11643_2 =
1604 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1605 CHARSET_TYPE_94X94, 2, 0, 'H',
1606 CHARSET_LEFT_TO_RIGHT,
1607 build_string ("CNS11643-2"),
1608 build_string ("CNS11643-2 (Chinese traditional)"),
1610 ("CNS 11643 Plane 2 Chinese traditional"),
1611 build_string (CHINESE_CNS_PLANE_RE("2")));
1612 Vcharset_chinese_big5_1 =
1613 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1614 CHARSET_TYPE_94X94, 2, 0, '0',
1615 CHARSET_LEFT_TO_RIGHT,
1616 build_string ("Big5"),
1617 build_string ("Big5 (Level-1)"),
1619 ("Big5 Level-1 Chinese traditional"),
1620 build_string ("big5"));
1621 Vcharset_chinese_big5_2 =
1622 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1623 CHARSET_TYPE_94X94, 2, 0, '1',
1624 CHARSET_LEFT_TO_RIGHT,
1625 build_string ("Big5"),
1626 build_string ("Big5 (Level-2)"),
1628 ("Big5 Level-2 Chinese traditional"),
1629 build_string ("big5"));
1632 #ifdef ENABLE_COMPOSITE_CHARS
1633 /* #### For simplicity, we put composite chars into a 96x96 charset.
1634 This is going to lead to problems because you can run out of
1635 room, esp. as we don't yet recycle numbers. */
1636 Vcharset_composite =
1637 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1638 CHARSET_TYPE_96X96, 2, 0, 0,
1639 CHARSET_LEFT_TO_RIGHT,
1640 build_string ("Composite"),
1641 build_string ("Composite characters"),
1642 build_string ("Composite characters"),
1645 composite_char_row_next = 32;
1646 composite_char_col_next = 32;
1648 Vcomposite_char_string2char_hash_table =
1649 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1650 Vcomposite_char_char2string_hash_table =
1651 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1652 staticpro (&Vcomposite_char_string2char_hash_table);
1653 staticpro (&Vcomposite_char_char2string_hash_table);
1654 #endif /* ENABLE_COMPOSITE_CHARS */