1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: Mule 2.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_cyrillic_iso8859_5;
45 Lisp_Object Vcharset_arabic_iso8859_6;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_latin_iso8859_9;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_japanese_jisx0208;
54 Lisp_Object Vcharset_japanese_jisx0212;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_chinese_big5_1;
57 Lisp_Object Vcharset_chinese_big5_2;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_korean_ksc5601;
61 Lisp_Object Vcharset_composite;
63 /* Hash tables for composite chars. One maps string representing
64 composed chars to their equivalent chars; one goes the
66 Lisp_Object Vcomposite_char_char2string_hash_table;
67 Lisp_Object Vcomposite_char_string2char_hash_table;
69 /* Table of charsets indexed by leading byte. */
70 Lisp_Object charset_by_leading_byte[128];
72 /* Table of charsets indexed by type/final-byte/direction. */
73 Lisp_Object charset_by_attributes[4][128][2];
75 static int composite_char_row_next;
76 static int composite_char_col_next;
78 /* Table of number of bytes in the string representation of a character
79 indexed by the first byte of that representation.
81 rep_bytes_by_first_byte(c) is more efficient than the equivalent
82 canonical computation:
84 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
86 Bytecount rep_bytes_by_first_byte[0xA0] =
87 { /* 0x00 - 0x7f are for straight ASCII */
88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 /* 0x80 - 0x8f are for Dimension-1 official charsets */
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 /* 0x90 - 0x9d are for Dimension-2 official charsets */
99 /* 0x9e is for Dimension-1 private charsets */
100 /* 0x9f is for Dimension-2 private charsets */
101 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
104 Lisp_Object Qcharsetp;
106 /* Qdoc_string, Qdimension, Qchars defined in general.c */
107 Lisp_Object Qregistry, Qfinal, Qgraphic;
108 Lisp_Object Qdirection;
109 Lisp_Object Qreverse_direction_charset;
110 Lisp_Object Qccl_program;
111 Lisp_Object Qleading_byte;
113 Lisp_Object Qascii, Qcontrol_1,
127 Qkatakana_jisx0201, Qlatin_jisx0201,
128 Qjapanese_jisx0208_1978,
133 Qchinese_big5_1, Qchinese_big5_2,
134 Qchinese_cns11643_1, Qchinese_cns11643_2,
136 Qkorean_ksc5601, Qcomposite;
138 Lisp_Object Ql2r, Qr2l;
140 Lisp_Object Vcharset_hash_table;
142 static Bufbyte next_allocated_1_byte_leading_byte;
143 static Bufbyte next_allocated_2_byte_leading_byte;
145 /* Composite characters are characters constructed by overstriking two
146 or more regular characters.
148 1) The old Mule implementation involves storing composite characters
149 in a buffer as a tag followed by all of the actual characters
150 used to make up the composite character. I think this is a bad
151 idea; it greatly complicates code that wants to handle strings
152 one character at a time because it has to deal with the possibility
153 of great big ungainly characters. It's much more reasonable to
154 simply store an index into a table of composite characters.
156 2) The current implementation only allows for 16,384 separate
157 composite characters over the lifetime of the XEmacs process.
158 This could become a potential problem if the user
159 edited lots of different files that use composite characters.
160 Due to FSF bogosity, increasing the number of allowable
161 composite characters under Mule would decrease the number
162 of possible faces that can exist. Mule already has shrunk
163 this to 2048, and further shrinkage would become uncomfortable.
164 No such problems exist in XEmacs.
166 Composite characters could be represented as 0x80 C1 C2 C3,
167 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
168 for slightly under 2^20 (one million) composite characters
169 over the XEmacs process lifetime, and you only need to
170 increase the size of a Mule character from 19 to 21 bits.
171 Or you could use 0x80 C1 C2 C3 C4, allowing for about
172 85 million (slightly over 2^26) composite characters. */
175 /************************************************************************/
176 /* Basic Emchar functions */
177 /************************************************************************/
179 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
180 string in STR. Returns the number of bytes stored.
181 Do not call this directly. Use the macro set_charptr_emchar() instead.
185 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
193 BREAKUP_CHAR (c, charset, c1, c2);
194 lb = CHAR_LEADING_BYTE (c);
195 if (LEADING_BYTE_PRIVATE_P (lb))
196 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
198 if (EQ (charset, Vcharset_control_1))
207 /* Return the first character from a Mule-encoded string in STR,
208 assuming it's non-ASCII. Do not call this directly.
209 Use the macro charptr_emchar() instead. */
212 non_ascii_charptr_emchar (CONST Bufbyte *str)
214 Bufbyte i0 = *str, i1, i2 = 0;
217 if (i0 == LEADING_BYTE_CONTROL_1)
218 return (Emchar) (*++str - 0x20);
220 if (LEADING_BYTE_PREFIX_P (i0))
225 charset = CHARSET_BY_LEADING_BYTE (i0);
226 if (XCHARSET_DIMENSION (charset) == 2)
229 return MAKE_CHAR (charset, i1, i2);
232 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
233 Do not call this directly. Use the macro valid_char_p() instead. */
236 non_ascii_valid_char_p (Emchar ch)
240 /* Must have only lowest 19 bits set */
244 f1 = CHAR_FIELD1 (ch);
245 f2 = CHAR_FIELD2 (ch);
246 f3 = CHAR_FIELD3 (ch);
252 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
253 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
254 f2 > MAX_CHAR_FIELD2_PRIVATE)
259 if (f3 != 0x20 && f3 != 0x7F)
263 NOTE: This takes advantage of the fact that
264 FIELD2_TO_OFFICIAL_LEADING_BYTE and
265 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
267 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
268 return (XCHARSET_CHARS (charset) == 96);
274 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
275 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
276 f1 > MAX_CHAR_FIELD1_PRIVATE)
278 if (f2 < 0x20 || f3 < 0x20)
281 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
283 if (UNBOUNDP (Fgethash (make_int (ch),
284 Vcomposite_char_char2string_hash_table,
290 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
293 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
295 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
298 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
300 return (XCHARSET_CHARS (charset) == 96);
305 /************************************************************************/
306 /* Basic string functions */
307 /************************************************************************/
309 /* Copy the character pointed to by PTR into STR, assuming it's
310 non-ASCII. Do not call this directly. Use the macro
311 charptr_copy_char() instead. */
314 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
316 Bufbyte *strptr = str;
318 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
320 /* Notice fallthrough. */
321 case 4: *++strptr = *ptr++;
322 case 3: *++strptr = *ptr++;
323 case 2: *++strptr = *ptr;
328 return strptr + 1 - str;
332 /************************************************************************/
333 /* streams of Emchars */
334 /************************************************************************/
336 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
337 The functions below are not meant to be called directly; use
338 the macros in insdel.h. */
341 Lstream_get_emchar_1 (Lstream *stream, int ch)
343 Bufbyte str[MAX_EMCHAR_LEN];
344 Bufbyte *strptr = str;
346 str[0] = (Bufbyte) ch;
347 switch (REP_BYTES_BY_FIRST_BYTE (ch))
349 /* Notice fallthrough. */
351 ch = Lstream_getc (stream);
353 *++strptr = (Bufbyte) ch;
355 ch = Lstream_getc (stream);
357 *++strptr = (Bufbyte) ch;
359 ch = Lstream_getc (stream);
361 *++strptr = (Bufbyte) ch;
366 return charptr_emchar (str);
370 Lstream_fput_emchar (Lstream *stream, Emchar ch)
372 Bufbyte str[MAX_EMCHAR_LEN];
373 Bytecount len = set_charptr_emchar (str, ch);
374 return Lstream_write (stream, str, len);
378 Lstream_funget_emchar (Lstream *stream, Emchar ch)
380 Bufbyte str[MAX_EMCHAR_LEN];
381 Bytecount len = set_charptr_emchar (str, ch);
382 Lstream_unread (stream, str, len);
386 /************************************************************************/
388 /************************************************************************/
391 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
393 struct Lisp_Charset *cs = XCHARSET (obj);
395 markobj (cs->doc_string);
396 markobj (cs->registry);
397 markobj (cs->ccl_program);
402 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
404 struct Lisp_Charset *cs = XCHARSET (obj);
408 error ("printing unreadable object #<charset %s 0x%x>",
409 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
412 write_c_string ("#<charset ", printcharfun);
413 print_internal (CHARSET_NAME (cs), printcharfun, 0);
414 write_c_string (" ", printcharfun);
415 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
416 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
417 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
418 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
419 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
421 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
422 CHARSET_COLUMNS (cs),
423 CHARSET_GRAPHIC (cs),
425 write_c_string (buf, printcharfun);
426 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
427 sprintf (buf, " 0x%x>", cs->header.uid);
428 write_c_string (buf, printcharfun);
431 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
432 mark_charset, print_charset, 0, 0, 0,
433 struct Lisp_Charset);
434 /* Make a new charset. */
437 make_charset (int id, Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes,
438 unsigned char type, unsigned char columns, unsigned char graphic,
439 Bufbyte final, unsigned char direction, Lisp_Object doc,
443 struct Lisp_Charset *cs =
444 alloc_lcrecord_type (struct Lisp_Charset, lrecord_charset);
445 XSETCHARSET (obj, cs);
447 CHARSET_ID (cs) = id;
448 CHARSET_NAME (cs) = name;
449 CHARSET_LEADING_BYTE (cs) = leading_byte;
450 CHARSET_REP_BYTES (cs) = rep_bytes;
451 CHARSET_DIRECTION (cs) = direction;
452 CHARSET_TYPE (cs) = type;
453 CHARSET_COLUMNS (cs) = columns;
454 CHARSET_GRAPHIC (cs) = graphic;
455 CHARSET_FINAL (cs) = final;
456 CHARSET_DOC_STRING (cs) = doc;
457 CHARSET_REGISTRY (cs) = reg;
458 CHARSET_CCL_PROGRAM (cs) = Qnil;
459 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
461 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
462 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
463 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
464 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
468 /* some charsets do not have final characters. This includes
469 ASCII, Control-1, Composite, and the two faux private
471 assert (NILP (charset_by_attributes[type][final][direction]));
472 charset_by_attributes[type][final][direction] = obj;
475 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
476 charset_by_leading_byte[leading_byte - 128] = obj;
477 if (leading_byte < 0xA0)
478 /* official leading byte */
479 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
481 /* Some charsets are "faux" and don't have names or really exist at
482 all except in the leading-byte table. */
484 Fputhash (name, obj, Vcharset_hash_table);
489 get_unallocated_leading_byte (int dimension)
495 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
498 lb = next_allocated_1_byte_leading_byte++;
502 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
505 lb = next_allocated_2_byte_leading_byte++;
510 ("No more character sets free for this dimension",
511 make_int (dimension));
517 /************************************************************************/
518 /* Basic charset Lisp functions */
519 /************************************************************************/
521 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
522 Return non-nil if OBJECT is a charset.
526 return CHARSETP (object) ? Qt : Qnil;
529 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
530 Retrieve the charset of the given name.
531 If CHARSET-OR-NAME is a charset object, it is simply returned.
532 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
533 nil is returned. Otherwise the associated charset object is returned.
537 if (CHARSETP (charset_or_name))
538 return charset_or_name;
540 CHECK_SYMBOL (charset_or_name);
541 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
544 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
545 Retrieve the charset of the given name.
546 Same as `find-charset' except an error is signalled if there is no such
547 charset instead of returning nil.
551 Lisp_Object charset = Ffind_charset (name);
554 signal_simple_error ("No such charset", name);
558 /* We store the charsets in hash tables with the names as the key and the
559 actual charset object as the value. Occasionally we need to use them
560 in a list format. These routines provide us with that. */
561 struct charset_list_closure
563 Lisp_Object *charset_list;
567 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
568 void *charset_list_closure)
570 /* This function can GC */
571 struct charset_list_closure *chcl =
572 (struct charset_list_closure*) charset_list_closure;
573 Lisp_Object *charset_list = chcl->charset_list;
575 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
579 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
580 Return a list of the names of all defined charsets.
584 Lisp_Object charset_list = Qnil;
586 struct charset_list_closure charset_list_closure;
588 GCPRO1 (charset_list);
589 charset_list_closure.charset_list = &charset_list;
590 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
591 &charset_list_closure);
597 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
598 Return the name of the given charset.
602 return XCHARSET_NAME (Fget_charset (charset));
605 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
606 Define a new character set.
607 This function is for use with Mule support.
608 NAME is a symbol, the name by which the character set is normally referred.
609 DOC-STRING is a string describing the character set.
610 PROPS is a property list, describing the specific nature of the
611 character set. Recognized properties are:
613 'registry A regular expression matching the font registry field for
615 'dimension Number of octets used to index a character in this charset.
616 Either 1 or 2. Defaults to 1.
617 'columns Number of columns used to display a character in this charset.
618 Only used in TTY mode. (Under X, the actual width of a
619 character can be derived from the font used to display the
620 characters.) If unspecified, defaults to the dimension
621 (this is almost always the correct value).
622 'chars Number of characters in each dimension (94 or 96).
623 Defaults to 94. Note that if the dimension is 2, the
624 character set thus described is 94x94 or 96x96.
625 'final Final byte of ISO 2022 escape sequence. Must be
626 supplied. Each combination of (DIMENSION, CHARS) defines a
627 separate namespace for final bytes. Note that ISO
628 2022 restricts the final byte to the range
629 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
630 dimension == 2. Note also that final bytes in the range
631 0x30 - 0x3F are reserved for user-defined (not official)
633 'graphic 0 (use left half of font on output) or 1 (use right half
634 of font on output). Defaults to 0. For example, for
635 a font whose registry is ISO8859-1, the left half
636 (octets 0x20 - 0x7F) is the `ascii' character set, while
637 the right half (octets 0xA0 - 0xFF) is the `latin-1'
638 character set. With 'graphic set to 0, the octets
639 will have their high bit cleared; with it set to 1,
640 the octets will have their high bit set.
641 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
643 'ccl-program A compiled CCL program used to convert a character in
644 this charset into an index into the font. This is in
645 addition to the 'graphic property. The CCL program
646 is passed the octets of the character, with the high
647 bit cleared and set depending upon whether the value
648 of the 'graphic property is 0 or 1.
650 (name, doc_string, props))
652 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
653 int direction = CHARSET_LEFT_TO_RIGHT;
655 Lisp_Object registry = Qnil;
657 Lisp_Object rest, keyword, value;
658 Lisp_Object ccl_program = Qnil;
661 if (!NILP (doc_string))
662 CHECK_STRING (doc_string);
664 charset = Ffind_charset (name);
666 signal_simple_error ("Cannot redefine existing charset", name);
668 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
670 if (EQ (keyword, Qdimension))
673 dimension = XINT (value);
674 if (dimension < 1 || dimension > 2)
675 signal_simple_error ("Invalid value for 'dimension", value);
678 else if (EQ (keyword, Qchars))
681 chars = XINT (value);
682 if (chars != 94 && chars != 96)
683 signal_simple_error ("Invalid value for 'chars", value);
686 else if (EQ (keyword, Qcolumns))
689 columns = XINT (value);
690 if (columns != 1 && columns != 2)
691 signal_simple_error ("Invalid value for 'columns", value);
694 else if (EQ (keyword, Qgraphic))
697 graphic = XINT (value);
698 if (graphic < 0 || graphic > 1)
699 signal_simple_error ("Invalid value for 'graphic", value);
702 else if (EQ (keyword, Qregistry))
704 CHECK_STRING (value);
708 else if (EQ (keyword, Qdirection))
710 if (EQ (value, Ql2r))
711 direction = CHARSET_LEFT_TO_RIGHT;
712 else if (EQ (value, Qr2l))
713 direction = CHARSET_RIGHT_TO_LEFT;
715 signal_simple_error ("Invalid value for 'direction", value);
718 else if (EQ (keyword, Qfinal))
720 CHECK_CHAR_COERCE_INT (value);
721 final = XCHAR (value);
722 if (final < '0' || final > '~')
723 signal_simple_error ("Invalid value for 'final", value);
726 else if (EQ (keyword, Qccl_program))
728 CHECK_VECTOR (value);
733 signal_simple_error ("Unrecognized property", keyword);
737 error ("'final must be specified");
738 if (dimension == 2 && final > 0x5F)
740 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
744 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
746 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
748 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
749 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
751 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
753 lb = get_unallocated_leading_byte (dimension);
755 if (NILP (doc_string))
756 doc_string = build_string ("");
759 registry = build_string ("");
763 charset = make_charset (-1, name, lb, dimension + 2, type, columns, graphic,
764 final, direction, doc_string, registry);
765 if (!NILP (ccl_program))
766 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
770 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
772 Make a charset equivalent to CHARSET but which goes in the opposite direction.
773 NEW-NAME is the name of the new charset. Return the new charset.
777 Lisp_Object new_charset = Qnil;
778 int lb, dimension, columns, graphic, final;
780 Lisp_Object registry, doc_string;
781 struct Lisp_Charset *cs;
783 charset = Fget_charset (charset);
784 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
785 signal_simple_error ("Charset already has reverse-direction charset",
788 CHECK_SYMBOL (new_name);
789 if (!NILP (Ffind_charset (new_name)))
790 signal_simple_error ("Cannot redefine existing charset", new_name);
792 cs = XCHARSET (charset);
794 type = CHARSET_TYPE (cs);
795 columns = CHARSET_COLUMNS (cs);
796 dimension = CHARSET_DIMENSION (cs);
797 lb = get_unallocated_leading_byte (dimension);
799 graphic = CHARSET_GRAPHIC (cs);
800 final = CHARSET_FINAL (cs);
801 direction = CHARSET_RIGHT_TO_LEFT;
802 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
803 direction = CHARSET_LEFT_TO_RIGHT;
804 doc_string = CHARSET_DOC_STRING (cs);
805 registry = CHARSET_REGISTRY (cs);
807 new_charset = make_charset (-1, new_name, lb, dimension + 2, type, columns,
808 graphic, final, direction, doc_string, registry);
810 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
811 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
816 /* #### Reverse direction charsets not yet implemented. */
818 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
820 Return the reverse-direction charset parallel to CHARSET, if any.
821 This is the charset with the same properties (in particular, the same
822 dimension, number of characters per dimension, and final byte) as
823 CHARSET but whose characters are displayed in the opposite direction.
827 charset = Fget_charset (charset);
828 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
832 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
833 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
834 If DIRECTION is omitted, both directions will be checked (left-to-right
835 will be returned if character sets exist for both directions).
837 (dimension, chars, final, direction))
839 int dm, ch, fi, di = -1;
841 Lisp_Object obj = Qnil;
843 CHECK_INT (dimension);
844 dm = XINT (dimension);
845 if (dm < 1 || dm > 2)
846 signal_simple_error ("Invalid value for DIMENSION", dimension);
850 if (ch != 94 && ch != 96)
851 signal_simple_error ("Invalid value for CHARS", chars);
853 CHECK_CHAR_COERCE_INT (final);
855 if (fi < '0' || fi > '~')
856 signal_simple_error ("Invalid value for FINAL", final);
858 if (EQ (direction, Ql2r))
859 di = CHARSET_LEFT_TO_RIGHT;
860 else if (EQ (direction, Qr2l))
861 di = CHARSET_RIGHT_TO_LEFT;
862 else if (!NILP (direction))
863 signal_simple_error ("Invalid value for DIRECTION", direction);
865 if (dm == 2 && fi > 0x5F)
867 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
870 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
872 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
876 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
878 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
881 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
884 return XCHARSET_NAME (obj);
888 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /*
889 Return doc string of CHARSET.
893 return XCHARSET_DOC_STRING (Fget_charset (charset));
896 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
897 Return dimension of CHARSET.
901 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
904 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
905 Return property PROP of CHARSET.
906 Recognized properties are those listed in `make-charset', as well as
907 'name and 'doc-string.
911 struct Lisp_Charset *cs;
913 charset = Fget_charset (charset);
914 cs = XCHARSET (charset);
917 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
918 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
919 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
920 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
921 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
922 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
923 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
924 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
925 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
926 if (EQ (prop, Qleading_byte)) return make_char (CHARSET_LEADING_BYTE (cs));
927 if (EQ (prop, Qdirection))
928 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
929 if (EQ (prop, Qreverse_direction_charset))
931 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
935 return XCHARSET_NAME (obj);
937 signal_simple_error ("Unrecognized charset property name", prop);
938 return Qnil; /* not reached */
941 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
942 Return charset identification number of CHARSET.
946 return make_int(XCHARSET_ID (Fget_charset (charset)));
949 /* #### We need to figure out which properties we really want to
952 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
953 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
955 (charset, ccl_program))
957 charset = Fget_charset (charset);
958 CHECK_VECTOR (ccl_program);
959 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
964 invalidate_charset_font_caches (Lisp_Object charset)
966 /* Invalidate font cache entries for charset on all devices. */
967 Lisp_Object devcons, concons, hash_table;
968 DEVICE_LOOP_NO_BREAK (devcons, concons)
970 struct device *d = XDEVICE (XCAR (devcons));
971 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
972 if (!UNBOUNDP (hash_table))
973 Fclrhash (hash_table);
977 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
978 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
979 Set the 'registry property of CHARSET to REGISTRY.
983 charset = Fget_charset (charset);
984 CHECK_STRING (registry);
985 XCHARSET_REGISTRY (charset) = registry;
986 invalidate_charset_font_caches (charset);
987 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
992 /************************************************************************/
993 /* Lisp primitives for working with characters */
994 /************************************************************************/
996 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
997 Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
999 (charset, arg1, arg2))
1001 struct Lisp_Charset *cs;
1003 int lowlim, highlim;
1005 charset = Fget_charset (charset);
1006 cs = XCHARSET (charset);
1008 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1009 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1010 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1011 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1015 if (a1 < lowlim || a1 > highlim)
1016 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1018 if (CHARSET_DIMENSION (cs) == 1)
1022 ("Charset is of dimension one; second octet must be nil", arg2);
1023 return make_char (MAKE_CHAR (charset, a1, 0));
1028 if (a2 < lowlim || a2 > highlim)
1029 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1031 return make_char (MAKE_CHAR (charset, a1, a2));
1034 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1035 Return the character set of char CH.
1039 CHECK_CHAR_COERCE_INT (ch);
1041 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1042 (CHAR_LEADING_BYTE (XCHAR (ch))));
1045 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1046 Return the octet numbered N (should be 0 or 1) of char CH.
1047 N defaults to 0 if omitted.
1051 Lisp_Object charset;
1054 CHECK_CHAR_COERCE_INT (ch);
1061 if (int_n != 0 && int_n != 1)
1062 signal_simple_error ("Octet number must be 0 or 1", n);
1064 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1065 return make_int (int_n == 0 ? c1 : c2);
1069 /************************************************************************/
1070 /* composite character functions */
1071 /************************************************************************/
1074 lookup_composite_char (Bufbyte *str, int len)
1076 Lisp_Object lispstr = make_string (str, len);
1077 Lisp_Object ch = Fgethash (lispstr,
1078 Vcomposite_char_string2char_hash_table,
1084 if (composite_char_row_next >= 128)
1085 signal_simple_error ("No more composite chars available", lispstr);
1086 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1087 composite_char_col_next);
1088 Fputhash (make_char (emch), lispstr,
1089 Vcomposite_char_char2string_hash_table);
1090 Fputhash (lispstr, make_char (emch),
1091 Vcomposite_char_string2char_hash_table);
1092 composite_char_col_next++;
1093 if (composite_char_col_next >= 128)
1095 composite_char_col_next = 32;
1096 composite_char_row_next++;
1105 composite_char_string (Emchar ch)
1107 Lisp_Object str = Fgethash (make_char (ch),
1108 Vcomposite_char_char2string_hash_table,
1110 assert (!UNBOUNDP (str));
1114 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1115 Convert a string into a single composite character.
1116 The character is the result of overstriking all the characters in
1121 CHECK_STRING (string);
1122 return make_char (lookup_composite_char (XSTRING_DATA (string),
1123 XSTRING_LENGTH (string)));
1126 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1127 Return a string of the characters comprising a composite character.
1135 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1136 signal_simple_error ("Must be composite char", ch);
1137 return composite_char_string (emch);
1141 /************************************************************************/
1142 /* initialization */
1143 /************************************************************************/
1146 syms_of_mule_charset (void)
1148 DEFSUBR (Fcharsetp);
1149 DEFSUBR (Ffind_charset);
1150 DEFSUBR (Fget_charset);
1151 DEFSUBR (Fcharset_list);
1152 DEFSUBR (Fcharset_name);
1153 DEFSUBR (Fmake_charset);
1154 DEFSUBR (Fmake_reverse_direction_charset);
1155 /* DEFSUBR (Freverse_direction_charset); */
1156 DEFSUBR (Fcharset_from_attributes);
1157 DEFSUBR (Fcharset_doc_string);
1158 DEFSUBR (Fcharset_dimension);
1159 DEFSUBR (Fcharset_property);
1160 DEFSUBR (Fcharset_id);
1161 DEFSUBR (Fset_charset_ccl_program);
1162 DEFSUBR (Fset_charset_registry);
1164 DEFSUBR (Fmake_char);
1165 DEFSUBR (Fchar_charset);
1166 DEFSUBR (Fchar_octet);
1168 DEFSUBR (Fmake_composite_char);
1169 DEFSUBR (Fcomposite_char_string);
1171 defsymbol (&Qcharsetp, "charsetp");
1172 defsymbol (&Qregistry, "registry");
1173 defsymbol (&Qfinal, "final");
1174 defsymbol (&Qgraphic, "graphic");
1175 defsymbol (&Qdirection, "direction");
1176 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1177 defsymbol (&Qccl_program, "ccl-program");
1178 defsymbol (&Qleading_byte, "leading-byte");
1180 defsymbol (&Ql2r, "l2r");
1181 defsymbol (&Qr2l, "r2l");
1183 /* Charsets, compatible with Emacs/Mule 19.33-delta
1184 Naming convention is Script-Charset[-Edition] */
1185 defsymbol (&Qascii, "ascii");
1186 defsymbol (&Qcontrol_1, "control-1");
1187 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1188 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1189 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1190 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1191 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1192 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1193 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1194 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1195 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1196 defsymbol (&Qthai_tis620, "thai-tis620");
1198 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1199 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1200 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1201 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1202 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1204 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1205 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1206 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1207 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1208 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1210 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1211 defsymbol (&Qcomposite, "composite");
1215 vars_of_mule_charset (void)
1219 /* Table of charsets indexed by leading byte. */
1220 for (i = 0; i < countof (charset_by_leading_byte); i++)
1221 charset_by_leading_byte[i] = Qnil;
1223 /* Table of charsets indexed by type/final-byte/direction. */
1224 for (i = 0; i < countof (charset_by_attributes); i++)
1225 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1226 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1227 charset_by_attributes[i][j][k] = Qnil;
1229 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1230 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1234 complex_vars_of_mule_charset (void)
1236 staticpro (&Vcharset_hash_table);
1237 Vcharset_hash_table =
1238 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1240 /* Predefined character sets. We store them into variables for
1244 make_charset (0, Qascii, LEADING_BYTE_ASCII, 1,
1245 CHARSET_TYPE_94, 1, 0, 'B',
1246 CHARSET_LEFT_TO_RIGHT,
1247 build_string ("ASCII (ISO 646 IRV)"),
1248 build_string ("iso8859-1"));
1249 Vcharset_control_1 =
1250 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
1251 CHARSET_TYPE_94, 1, 1, 0,
1252 CHARSET_LEFT_TO_RIGHT,
1253 build_string ("Control characters"),
1255 Vcharset_latin_iso8859_1 =
1256 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2,
1257 CHARSET_TYPE_96, 1, 1, 'A',
1258 CHARSET_LEFT_TO_RIGHT,
1259 build_string ("ISO 8859-1 (Latin-1)"),
1260 build_string ("iso8859-1"));
1261 Vcharset_latin_iso8859_2 =
1262 make_charset (130, Qlatin_iso8859_2, LEADING_BYTE_LATIN_ISO8859_2, 2,
1263 CHARSET_TYPE_96, 1, 1, 'B',
1264 CHARSET_LEFT_TO_RIGHT,
1265 build_string ("ISO 8859-2 (Latin-2)"),
1266 build_string ("iso8859-2"));
1267 Vcharset_latin_iso8859_3 =
1268 make_charset (131, Qlatin_iso8859_3, LEADING_BYTE_LATIN_ISO8859_3, 2,
1269 CHARSET_TYPE_96, 1, 1, 'C',
1270 CHARSET_LEFT_TO_RIGHT,
1271 build_string ("ISO 8859-3 (Latin-3)"),
1272 build_string ("iso8859-3"));
1273 Vcharset_latin_iso8859_4 =
1274 make_charset (132, Qlatin_iso8859_4, LEADING_BYTE_LATIN_ISO8859_4, 2,
1275 CHARSET_TYPE_96, 1, 1, 'D',
1276 CHARSET_LEFT_TO_RIGHT,
1277 build_string ("ISO 8859-4 (Latin-4)"),
1278 build_string ("iso8859-4"));
1279 Vcharset_cyrillic_iso8859_5 =
1280 make_charset (140, Qcyrillic_iso8859_5, LEADING_BYTE_CYRILLIC_ISO8859_5, 2,
1281 CHARSET_TYPE_96, 1, 1, 'L',
1282 CHARSET_LEFT_TO_RIGHT,
1283 build_string ("ISO 8859-5 (Cyrillic)"),
1284 build_string ("iso8859-5"));
1285 Vcharset_arabic_iso8859_6 =
1286 make_charset (135, Qarabic_iso8859_6, LEADING_BYTE_ARABIC_ISO8859_6, 2,
1287 CHARSET_TYPE_96, 1, 1, 'G',
1288 CHARSET_RIGHT_TO_LEFT,
1289 build_string ("ISO 8859-6 (Arabic)"),
1290 build_string ("iso8859-6"));
1291 Vcharset_greek_iso8859_7 =
1292 make_charset (134, Qgreek_iso8859_7, LEADING_BYTE_GREEK_ISO8859_7, 2,
1293 CHARSET_TYPE_96, 1, 1, 'F',
1294 CHARSET_LEFT_TO_RIGHT,
1295 build_string ("ISO 8859-7 (Greek)"),
1296 build_string ("iso8859-7"));
1297 Vcharset_hebrew_iso8859_8 =
1298 make_charset (136, Qhebrew_iso8859_8, LEADING_BYTE_HEBREW_ISO8859_8, 2,
1299 CHARSET_TYPE_96, 1, 1, 'H',
1300 CHARSET_RIGHT_TO_LEFT,
1301 build_string ("ISO 8859-8 (Hebrew)"),
1302 build_string ("iso8859-8"));
1303 Vcharset_latin_iso8859_9 =
1304 make_charset (141, Qlatin_iso8859_9, LEADING_BYTE_LATIN_ISO8859_9, 2,
1305 CHARSET_TYPE_96, 1, 1, 'M',
1306 CHARSET_LEFT_TO_RIGHT,
1307 build_string ("ISO 8859-9 (Latin-5)"),
1308 build_string ("iso8859-9"));
1309 Vcharset_thai_tis620 =
1310 make_charset (133, Qthai_tis620, LEADING_BYTE_THAI_TIS620, 2,
1311 CHARSET_TYPE_96, 1, 1, 'T',
1312 CHARSET_LEFT_TO_RIGHT,
1313 build_string ("TIS 620.2529 (Thai)"),
1314 build_string ("tis620"));
1317 Vcharset_katakana_jisx0201 =
1318 make_charset (137, Qkatakana_jisx0201,
1319 LEADING_BYTE_KATAKANA_JISX0201, 2,
1320 CHARSET_TYPE_94, 1, 1, 'I',
1321 CHARSET_LEFT_TO_RIGHT,
1322 build_string ("JIS X0201-Katakana"),
1323 build_string ("jisx0201.1976"));
1324 Vcharset_latin_jisx0201 =
1325 make_charset (138, Qlatin_jisx0201,
1326 LEADING_BYTE_LATIN_JISX0201, 2,
1327 CHARSET_TYPE_94, 1, 0, 'J',
1328 CHARSET_LEFT_TO_RIGHT,
1329 build_string ("JIS X0201-Latin"),
1330 build_string ("jisx0201.1976"));
1331 Vcharset_japanese_jisx0208_1978 =
1332 make_charset (144, Qjapanese_jisx0208_1978,
1333 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
1334 CHARSET_TYPE_94X94, 2, 0, '@',
1335 CHARSET_LEFT_TO_RIGHT,
1337 ("JIS X0208-1978 (Japanese Kanji; Old Version)"),
1338 build_string ("\\(jisx0208\\|jisc6226\\).19"));
1339 Vcharset_japanese_jisx0208 =
1340 make_charset (146, Qjapanese_jisx0208,
1341 LEADING_BYTE_JAPANESE_JISX0208, 3,
1342 CHARSET_TYPE_94X94, 2, 0, 'B',
1343 CHARSET_LEFT_TO_RIGHT,
1344 build_string ("JIS X0208-1983 (Japanese Kanji)"),
1345 build_string ("jisx0208.19\\(83\\|90\\)"));
1346 Vcharset_japanese_jisx0212 =
1347 make_charset (148, Qjapanese_jisx0212,
1348 LEADING_BYTE_JAPANESE_JISX0212, 3,
1349 CHARSET_TYPE_94X94, 2, 0, 'D',
1350 CHARSET_LEFT_TO_RIGHT,
1351 build_string ("JIS X0212 (Japanese Supplement)"),
1352 build_string ("jisx0212"));
1355 Vcharset_chinese_gb2312 =
1356 make_charset (145, Qchinese_gb2312, LEADING_BYTE_CHINESE_GB2312, 3,
1357 CHARSET_TYPE_94X94, 2, 0, 'A',
1358 CHARSET_LEFT_TO_RIGHT,
1359 build_string ("GB 2312 (Simplified Chinese)"),
1360 build_string ("gb2312"));
1361 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1362 Vcharset_chinese_cns11643_1 =
1363 make_charset (149, Qchinese_cns11643_1,
1364 LEADING_BYTE_CHINESE_CNS11643_1, 3,
1365 CHARSET_TYPE_94X94, 2, 0, 'G',
1366 CHARSET_LEFT_TO_RIGHT,
1368 ("CNS 11643 Plane 1 (Traditional Chinese for daily use)"),
1369 build_string (CHINESE_CNS_PLANE_RE("1")));
1370 Vcharset_chinese_cns11643_2 =
1371 make_charset (150, Qchinese_cns11643_2,
1372 LEADING_BYTE_CHINESE_CNS11643_2, 3,
1373 CHARSET_TYPE_94X94, 2, 0, 'H',
1374 CHARSET_LEFT_TO_RIGHT,
1376 ("CNS 11643 Plane 2 (Traditional Chinese for daily use)"),
1377 build_string (CHINESE_CNS_PLANE_RE("2")));
1378 Vcharset_chinese_big5_1 =
1379 make_charset (152, Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3,
1380 CHARSET_TYPE_94X94, 2, 0, '0',
1381 CHARSET_LEFT_TO_RIGHT,
1383 ("Big5 Level 1 (Traditional Chinese for daily use)"),
1384 build_string ("big5"));
1385 Vcharset_chinese_big5_2 =
1386 make_charset (153, Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3,
1387 CHARSET_TYPE_94X94, 2, 0, '1',
1388 CHARSET_LEFT_TO_RIGHT,
1390 ("Big5 Level 2 (Traditional Chinese for daily use)"),
1391 build_string ("big5"));
1393 Vcharset_korean_ksc5601 =
1394 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
1395 CHARSET_TYPE_94X94, 2, 0, 'C',
1396 CHARSET_LEFT_TO_RIGHT,
1397 build_string ("KS C5601 (Hangul and Korean Hanja)"),
1398 build_string ("ksc5601"));
1399 /* #### For simplicity, we put composite chars into a 96x96 charset.
1400 This is going to lead to problems because you can run out of
1401 room, esp. as we don't yet recycle numbers. */
1402 Vcharset_composite =
1403 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3,
1404 CHARSET_TYPE_96X96, 2, 0, 0,
1405 CHARSET_LEFT_TO_RIGHT,
1406 build_string ("Composite characters"),
1409 composite_char_row_next = 32;
1410 composite_char_col_next = 32;
1412 Vcomposite_char_string2char_hash_table =
1413 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1414 Vcomposite_char_char2string_hash_table =
1415 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1416 staticpro (&Vcomposite_char_string2char_hash_table);
1417 staticpro (&Vcomposite_char_char2string_hash_table);