1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: Mule 2.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_cyrillic_iso8859_5;
45 Lisp_Object Vcharset_arabic_iso8859_6;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_latin_iso8859_9;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_japanese_jisx0208;
54 Lisp_Object Vcharset_japanese_jisx0212;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_chinese_big5_1;
57 Lisp_Object Vcharset_chinese_big5_2;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_korean_ksc5601;
61 Lisp_Object Vcharset_composite;
63 /* Hash tables for composite chars. One maps string representing
64 composed chars to their equivalent chars; one goes the
66 Lisp_Object Vcomposite_char_char2string_hash_table;
67 Lisp_Object Vcomposite_char_string2char_hash_table;
69 /* Table of charsets indexed by leading byte. */
70 Lisp_Object charset_by_leading_byte[128];
72 /* Table of charsets indexed by type/final-byte/direction. */
73 Lisp_Object charset_by_attributes[4][128][2];
75 static int composite_char_row_next;
76 static int composite_char_col_next;
78 /* Table of number of bytes in the string representation of a character
79 indexed by the first byte of that representation.
81 rep_bytes_by_first_byte(c) is more efficient than the equivalent
82 canonical computation:
84 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
86 Bytecount rep_bytes_by_first_byte[0xA0] =
87 { /* 0x00 - 0x7f are for straight ASCII */
88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 /* 0x80 - 0x8f are for Dimension-1 official charsets */
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 /* 0x90 - 0x9d are for Dimension-2 official charsets */
99 /* 0x9e is for Dimension-1 private charsets */
100 /* 0x9f is for Dimension-2 private charsets */
101 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
104 Lisp_Object Qcharsetp;
106 /* Qdoc_string, Qdimension, Qchars defined in general.c */
107 Lisp_Object Qregistry, Qfinal, Qgraphic;
108 Lisp_Object Qdirection;
109 Lisp_Object Qreverse_direction_charset;
110 Lisp_Object Qccl_program;
112 Lisp_Object Qascii, Qcontrol_1,
126 Qkatakana_jisx0201, Qlatin_jisx0201,
127 Qjapanese_jisx0208_1978,
132 Qchinese_big5_1, Qchinese_big5_2,
133 Qchinese_cns11643_1, Qchinese_cns11643_2,
135 Qkorean_ksc5601, Qcomposite;
137 Lisp_Object Ql2r, Qr2l;
139 Lisp_Object Vcharset_hash_table;
141 static Bufbyte next_allocated_1_byte_leading_byte;
142 static Bufbyte next_allocated_2_byte_leading_byte;
144 /* Composite characters are characters constructed by overstriking two
145 or more regular characters.
147 1) The old Mule implementation involves storing composite characters
148 in a buffer as a tag followed by all of the actual characters
149 used to make up the composite character. I think this is a bad
150 idea; it greatly complicates code that wants to handle strings
151 one character at a time because it has to deal with the possibility
152 of great big ungainly characters. It's much more reasonable to
153 simply store an index into a table of composite characters.
155 2) The current implementation only allows for 16,384 separate
156 composite characters over the lifetime of the XEmacs process.
157 This could become a potential problem if the user
158 edited lots of different files that use composite characters.
159 Due to FSF bogosity, increasing the number of allowable
160 composite characters under Mule would decrease the number
161 of possible faces that can exist. Mule already has shrunk
162 this to 2048, and further shrinkage would become uncomfortable.
163 No such problems exist in XEmacs.
165 Composite characters could be represented as 0x80 C1 C2 C3,
166 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
167 for slightly under 2^20 (one million) composite characters
168 over the XEmacs process lifetime, and you only need to
169 increase the size of a Mule character from 19 to 21 bits.
170 Or you could use 0x80 C1 C2 C3 C4, allowing for about
171 85 million (slightly over 2^26) composite characters. */
174 /************************************************************************/
175 /* Basic Emchar functions */
176 /************************************************************************/
178 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
179 string in STR. Returns the number of bytes stored.
180 Do not call this directly. Use the macro set_charptr_emchar() instead.
184 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
192 BREAKUP_CHAR (c, charset, c1, c2);
193 lb = CHAR_LEADING_BYTE (c);
194 if (LEADING_BYTE_PRIVATE_P (lb))
195 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
197 if (EQ (charset, Vcharset_control_1))
206 /* Return the first character from a Mule-encoded string in STR,
207 assuming it's non-ASCII. Do not call this directly.
208 Use the macro charptr_emchar() instead. */
211 non_ascii_charptr_emchar (CONST Bufbyte *str)
213 Bufbyte i0 = *str, i1, i2 = 0;
216 if (i0 == LEADING_BYTE_CONTROL_1)
217 return (Emchar) (*++str - 0x20);
219 if (LEADING_BYTE_PREFIX_P (i0))
224 charset = CHARSET_BY_LEADING_BYTE (i0);
225 if (XCHARSET_DIMENSION (charset) == 2)
228 return MAKE_CHAR (charset, i1, i2);
231 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
232 Do not call this directly. Use the macro valid_char_p() instead. */
235 non_ascii_valid_char_p (Emchar ch)
239 /* Must have only lowest 19 bits set */
243 f1 = CHAR_FIELD1 (ch);
244 f2 = CHAR_FIELD2 (ch);
245 f3 = CHAR_FIELD3 (ch);
251 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
252 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
253 f2 > MAX_CHAR_FIELD2_PRIVATE)
258 if (f3 != 0x20 && f3 != 0x7F)
262 NOTE: This takes advantage of the fact that
263 FIELD2_TO_OFFICIAL_LEADING_BYTE and
264 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
266 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
267 return (XCHARSET_CHARS (charset) == 96);
273 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
274 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
275 f1 > MAX_CHAR_FIELD1_PRIVATE)
277 if (f2 < 0x20 || f3 < 0x20)
280 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
282 if (UNBOUNDP (Fgethash (make_int (ch),
283 Vcomposite_char_char2string_hash_table,
289 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
292 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
294 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
297 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
299 return (XCHARSET_CHARS (charset) == 96);
304 /************************************************************************/
305 /* Basic string functions */
306 /************************************************************************/
308 /* Copy the character pointed to by PTR into STR, assuming it's
309 non-ASCII. Do not call this directly. Use the macro
310 charptr_copy_char() instead. */
313 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
315 Bufbyte *strptr = str;
317 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
319 /* Notice fallthrough. */
320 case 4: *++strptr = *ptr++;
321 case 3: *++strptr = *ptr++;
322 case 2: *++strptr = *ptr;
327 return strptr + 1 - str;
331 /************************************************************************/
332 /* streams of Emchars */
333 /************************************************************************/
335 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
336 The functions below are not meant to be called directly; use
337 the macros in insdel.h. */
340 Lstream_get_emchar_1 (Lstream *stream, int ch)
342 Bufbyte str[MAX_EMCHAR_LEN];
343 Bufbyte *strptr = str;
345 str[0] = (Bufbyte) ch;
346 switch (REP_BYTES_BY_FIRST_BYTE (ch))
348 /* Notice fallthrough. */
350 ch = Lstream_getc (stream);
352 *++strptr = (Bufbyte) ch;
354 ch = Lstream_getc (stream);
356 *++strptr = (Bufbyte) ch;
358 ch = Lstream_getc (stream);
360 *++strptr = (Bufbyte) ch;
365 return charptr_emchar (str);
369 Lstream_fput_emchar (Lstream *stream, Emchar ch)
371 Bufbyte str[MAX_EMCHAR_LEN];
372 Bytecount len = set_charptr_emchar (str, ch);
373 return Lstream_write (stream, str, len);
377 Lstream_funget_emchar (Lstream *stream, Emchar ch)
379 Bufbyte str[MAX_EMCHAR_LEN];
380 Bytecount len = set_charptr_emchar (str, ch);
381 Lstream_unread (stream, str, len);
385 /************************************************************************/
387 /************************************************************************/
390 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
392 struct Lisp_Charset *cs = XCHARSET (obj);
394 markobj (cs->doc_string);
395 markobj (cs->registry);
396 markobj (cs->ccl_program);
401 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
403 struct Lisp_Charset *cs = XCHARSET (obj);
407 error ("printing unreadable object #<charset %s 0x%x>",
408 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
411 write_c_string ("#<charset ", printcharfun);
412 print_internal (CHARSET_NAME (cs), printcharfun, 0);
413 write_c_string (" ", printcharfun);
414 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
415 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
416 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
417 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
418 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
420 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
421 CHARSET_COLUMNS (cs),
422 CHARSET_GRAPHIC (cs),
424 write_c_string (buf, printcharfun);
425 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
426 sprintf (buf, " 0x%x>", cs->header.uid);
427 write_c_string (buf, printcharfun);
430 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
431 mark_charset, print_charset, 0, 0, 0,
432 struct Lisp_Charset);
433 /* Make a new charset. */
436 make_charset (int id, Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes,
437 unsigned char type, unsigned char columns, unsigned char graphic,
438 Bufbyte final, unsigned char direction, Lisp_Object doc,
442 struct Lisp_Charset *cs =
443 alloc_lcrecord_type (struct Lisp_Charset, lrecord_charset);
444 XSETCHARSET (obj, cs);
446 CHARSET_ID (cs) = id;
447 CHARSET_NAME (cs) = name;
448 CHARSET_LEADING_BYTE (cs) = leading_byte;
449 CHARSET_REP_BYTES (cs) = rep_bytes;
450 CHARSET_DIRECTION (cs) = direction;
451 CHARSET_TYPE (cs) = type;
452 CHARSET_COLUMNS (cs) = columns;
453 CHARSET_GRAPHIC (cs) = graphic;
454 CHARSET_FINAL (cs) = final;
455 CHARSET_DOC_STRING (cs) = doc;
456 CHARSET_REGISTRY (cs) = reg;
457 CHARSET_CCL_PROGRAM (cs) = Qnil;
458 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
460 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
461 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
462 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
463 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
467 /* some charsets do not have final characters. This includes
468 ASCII, Control-1, Composite, and the two faux private
470 assert (NILP (charset_by_attributes[type][final][direction]));
471 charset_by_attributes[type][final][direction] = obj;
474 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
475 charset_by_leading_byte[leading_byte - 128] = obj;
476 if (leading_byte < 0xA0)
477 /* official leading byte */
478 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
480 /* Some charsets are "faux" and don't have names or really exist at
481 all except in the leading-byte table. */
483 Fputhash (name, obj, Vcharset_hash_table);
488 get_unallocated_leading_byte (int dimension)
494 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
497 lb = next_allocated_1_byte_leading_byte++;
501 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
504 lb = next_allocated_2_byte_leading_byte++;
509 ("No more character sets free for this dimension",
510 make_int (dimension));
516 /************************************************************************/
517 /* Basic charset Lisp functions */
518 /************************************************************************/
520 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
521 Return non-nil if OBJECT is a charset.
525 return CHARSETP (object) ? Qt : Qnil;
528 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
529 Retrieve the charset of the given name.
530 If CHARSET-OR-NAME is a charset object, it is simply returned.
531 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
532 nil is returned. Otherwise the associated charset object is returned.
536 if (CHARSETP (charset_or_name))
537 return charset_or_name;
539 CHECK_SYMBOL (charset_or_name);
540 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
543 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
544 Retrieve the charset of the given name.
545 Same as `find-charset' except an error is signalled if there is no such
546 charset instead of returning nil.
550 Lisp_Object charset = Ffind_charset (name);
553 signal_simple_error ("No such charset", name);
557 /* We store the charsets in hash tables with the names as the key and the
558 actual charset object as the value. Occasionally we need to use them
559 in a list format. These routines provide us with that. */
560 struct charset_list_closure
562 Lisp_Object *charset_list;
566 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
567 void *charset_list_closure)
569 /* This function can GC */
570 struct charset_list_closure *chcl =
571 (struct charset_list_closure*) charset_list_closure;
572 Lisp_Object *charset_list = chcl->charset_list;
574 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
578 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
579 Return a list of the names of all defined charsets.
583 Lisp_Object charset_list = Qnil;
585 struct charset_list_closure charset_list_closure;
587 GCPRO1 (charset_list);
588 charset_list_closure.charset_list = &charset_list;
589 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
590 &charset_list_closure);
596 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
597 Return the name of the given charset.
601 return XCHARSET_NAME (Fget_charset (charset));
604 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
605 Define a new character set.
606 This function is for use with Mule support.
607 NAME is a symbol, the name by which the character set is normally referred.
608 DOC-STRING is a string describing the character set.
609 PROPS is a property list, describing the specific nature of the
610 character set. Recognized properties are:
612 'registry A regular expression matching the font registry field for
614 'dimension Number of octets used to index a character in this charset.
615 Either 1 or 2. Defaults to 1.
616 'columns Number of columns used to display a character in this charset.
617 Only used in TTY mode. (Under X, the actual width of a
618 character can be derived from the font used to display the
619 characters.) If unspecified, defaults to the dimension
620 (this is almost always the correct value).
621 'chars Number of characters in each dimension (94 or 96).
622 Defaults to 94. Note that if the dimension is 2, the
623 character set thus described is 94x94 or 96x96.
624 'final Final byte of ISO 2022 escape sequence. Must be
625 supplied. Each combination of (DIMENSION, CHARS) defines a
626 separate namespace for final bytes. Note that ISO
627 2022 restricts the final byte to the range
628 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
629 dimension == 2. Note also that final bytes in the range
630 0x30 - 0x3F are reserved for user-defined (not official)
632 'graphic 0 (use left half of font on output) or 1 (use right half
633 of font on output). Defaults to 0. For example, for
634 a font whose registry is ISO8859-1, the left half
635 (octets 0x20 - 0x7F) is the `ascii' character set, while
636 the right half (octets 0xA0 - 0xFF) is the `latin-1'
637 character set. With 'graphic set to 0, the octets
638 will have their high bit cleared; with it set to 1,
639 the octets will have their high bit set.
640 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
642 'ccl-program A compiled CCL program used to convert a character in
643 this charset into an index into the font. This is in
644 addition to the 'graphic property. The CCL program
645 is passed the octets of the character, with the high
646 bit cleared and set depending upon whether the value
647 of the 'graphic property is 0 or 1.
649 (name, doc_string, props))
651 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
652 int direction = CHARSET_LEFT_TO_RIGHT;
654 Lisp_Object registry = Qnil;
656 Lisp_Object rest, keyword, value;
657 Lisp_Object ccl_program = Qnil;
660 if (!NILP (doc_string))
661 CHECK_STRING (doc_string);
663 charset = Ffind_charset (name);
665 signal_simple_error ("Cannot redefine existing charset", name);
667 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
669 if (EQ (keyword, Qdimension))
672 dimension = XINT (value);
673 if (dimension < 1 || dimension > 2)
674 signal_simple_error ("Invalid value for 'dimension", value);
677 else if (EQ (keyword, Qchars))
680 chars = XINT (value);
681 if (chars != 94 && chars != 96)
682 signal_simple_error ("Invalid value for 'chars", value);
685 else if (EQ (keyword, Qcolumns))
688 columns = XINT (value);
689 if (columns != 1 && columns != 2)
690 signal_simple_error ("Invalid value for 'columns", value);
693 else if (EQ (keyword, Qgraphic))
696 graphic = XINT (value);
697 if (graphic < 0 || graphic > 1)
698 signal_simple_error ("Invalid value for 'graphic", value);
701 else if (EQ (keyword, Qregistry))
703 CHECK_STRING (value);
707 else if (EQ (keyword, Qdirection))
709 if (EQ (value, Ql2r))
710 direction = CHARSET_LEFT_TO_RIGHT;
711 else if (EQ (value, Qr2l))
712 direction = CHARSET_RIGHT_TO_LEFT;
714 signal_simple_error ("Invalid value for 'direction", value);
717 else if (EQ (keyword, Qfinal))
719 CHECK_CHAR_COERCE_INT (value);
720 final = XCHAR (value);
721 if (final < '0' || final > '~')
722 signal_simple_error ("Invalid value for 'final", value);
725 else if (EQ (keyword, Qccl_program))
727 CHECK_VECTOR (value);
732 signal_simple_error ("Unrecognized property", keyword);
736 error ("'final must be specified");
737 if (dimension == 2 && final > 0x5F)
739 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
743 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
745 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
747 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
748 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
750 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
752 lb = get_unallocated_leading_byte (dimension);
754 if (NILP (doc_string))
755 doc_string = build_string ("");
758 registry = build_string ("");
762 charset = make_charset (-1, name, lb, dimension + 2, type, columns, graphic,
763 final, direction, doc_string, registry);
764 if (!NILP (ccl_program))
765 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
769 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
771 Make a charset equivalent to CHARSET but which goes in the opposite direction.
772 NEW-NAME is the name of the new charset. Return the new charset.
776 Lisp_Object new_charset = Qnil;
777 int lb, dimension, columns, graphic, final;
779 Lisp_Object registry, doc_string;
780 struct Lisp_Charset *cs;
782 charset = Fget_charset (charset);
783 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
784 signal_simple_error ("Charset already has reverse-direction charset",
787 CHECK_SYMBOL (new_name);
788 if (!NILP (Ffind_charset (new_name)))
789 signal_simple_error ("Cannot redefine existing charset", new_name);
791 cs = XCHARSET (charset);
793 type = CHARSET_TYPE (cs);
794 columns = CHARSET_COLUMNS (cs);
795 dimension = CHARSET_DIMENSION (cs);
796 lb = get_unallocated_leading_byte (dimension);
798 graphic = CHARSET_GRAPHIC (cs);
799 final = CHARSET_FINAL (cs);
800 direction = CHARSET_RIGHT_TO_LEFT;
801 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
802 direction = CHARSET_LEFT_TO_RIGHT;
803 doc_string = CHARSET_DOC_STRING (cs);
804 registry = CHARSET_REGISTRY (cs);
806 new_charset = make_charset (-1, new_name, lb, dimension + 2, type, columns,
807 graphic, final, direction, doc_string, registry);
809 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
810 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
815 /* #### Reverse direction charsets not yet implemented. */
817 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
819 Return the reverse-direction charset parallel to CHARSET, if any.
820 This is the charset with the same properties (in particular, the same
821 dimension, number of characters per dimension, and final byte) as
822 CHARSET but whose characters are displayed in the opposite direction.
826 charset = Fget_charset (charset);
827 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
831 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
832 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
833 If DIRECTION is omitted, both directions will be checked (left-to-right
834 will be returned if character sets exist for both directions).
836 (dimension, chars, final, direction))
838 int dm, ch, fi, di = -1;
840 Lisp_Object obj = Qnil;
842 CHECK_INT (dimension);
843 dm = XINT (dimension);
844 if (dm < 1 || dm > 2)
845 signal_simple_error ("Invalid value for DIMENSION", dimension);
849 if (ch != 94 && ch != 96)
850 signal_simple_error ("Invalid value for CHARS", chars);
852 CHECK_CHAR_COERCE_INT (final);
854 if (fi < '0' || fi > '~')
855 signal_simple_error ("Invalid value for FINAL", final);
857 if (EQ (direction, Ql2r))
858 di = CHARSET_LEFT_TO_RIGHT;
859 else if (EQ (direction, Qr2l))
860 di = CHARSET_RIGHT_TO_LEFT;
861 else if (!NILP (direction))
862 signal_simple_error ("Invalid value for DIRECTION", direction);
864 if (dm == 2 && fi > 0x5F)
866 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
869 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
871 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
875 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
877 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
880 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
883 return XCHARSET_NAME (obj);
887 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /*
888 Return doc string of CHARSET.
892 return XCHARSET_DOC_STRING (Fget_charset (charset));
895 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
896 Return dimension of CHARSET.
900 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
903 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
904 Return property PROP of CHARSET.
905 Recognized properties are those listed in `make-charset', as well as
906 'name and 'doc-string.
910 struct Lisp_Charset *cs;
912 charset = Fget_charset (charset);
913 cs = XCHARSET (charset);
916 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
917 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
918 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
919 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
920 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
921 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
922 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
923 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
924 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
925 if (EQ (prop, Qdirection))
926 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
927 if (EQ (prop, Qreverse_direction_charset))
929 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
933 return XCHARSET_NAME (obj);
935 signal_simple_error ("Unrecognized charset property name", prop);
936 return Qnil; /* not reached */
939 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
940 Return charset identification number of CHARSET.
944 return make_int(XCHARSET_ID (Fget_charset (charset)));
947 /* #### We need to figure out which properties we really want to
950 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
951 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
953 (charset, ccl_program))
955 charset = Fget_charset (charset);
956 CHECK_VECTOR (ccl_program);
957 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
962 invalidate_charset_font_caches (Lisp_Object charset)
964 /* Invalidate font cache entries for charset on all devices. */
965 Lisp_Object devcons, concons, hash_table;
966 DEVICE_LOOP_NO_BREAK (devcons, concons)
968 struct device *d = XDEVICE (XCAR (devcons));
969 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
970 if (!UNBOUNDP (hash_table))
971 Fclrhash (hash_table);
975 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
976 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
977 Set the 'registry property of CHARSET to REGISTRY.
981 charset = Fget_charset (charset);
982 CHECK_STRING (registry);
983 XCHARSET_REGISTRY (charset) = registry;
984 invalidate_charset_font_caches (charset);
985 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
990 /************************************************************************/
991 /* Lisp primitives for working with characters */
992 /************************************************************************/
994 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
995 Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
997 (charset, arg1, arg2))
999 struct Lisp_Charset *cs;
1001 int lowlim, highlim;
1003 charset = Fget_charset (charset);
1004 cs = XCHARSET (charset);
1006 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1007 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1008 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1009 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1013 if (a1 < lowlim || a1 > highlim)
1014 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1016 if (CHARSET_DIMENSION (cs) == 1)
1020 ("Charset is of dimension one; second octet must be nil", arg2);
1021 return make_char (MAKE_CHAR (charset, a1, 0));
1026 if (a2 < lowlim || a2 > highlim)
1027 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1029 return make_char (MAKE_CHAR (charset, a1, a2));
1032 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1033 Return the character set of char CH.
1037 CHECK_CHAR_COERCE_INT (ch);
1039 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1040 (CHAR_LEADING_BYTE (XCHAR (ch))));
1043 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1044 Return the octet numbered N (should be 0 or 1) of char CH.
1045 N defaults to 0 if omitted.
1049 Lisp_Object charset;
1052 CHECK_CHAR_COERCE_INT (ch);
1059 if (int_n != 0 && int_n != 1)
1060 signal_simple_error ("Octet number must be 0 or 1", n);
1062 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1063 return make_int (int_n == 0 ? c1 : c2);
1067 /************************************************************************/
1068 /* composite character functions */
1069 /************************************************************************/
1072 lookup_composite_char (Bufbyte *str, int len)
1074 Lisp_Object lispstr = make_string (str, len);
1075 Lisp_Object ch = Fgethash (lispstr,
1076 Vcomposite_char_string2char_hash_table,
1082 if (composite_char_row_next >= 128)
1083 signal_simple_error ("No more composite chars available", lispstr);
1084 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1085 composite_char_col_next);
1086 Fputhash (make_char (emch), lispstr,
1087 Vcomposite_char_char2string_hash_table);
1088 Fputhash (lispstr, make_char (emch),
1089 Vcomposite_char_string2char_hash_table);
1090 composite_char_col_next++;
1091 if (composite_char_col_next >= 128)
1093 composite_char_col_next = 32;
1094 composite_char_row_next++;
1103 composite_char_string (Emchar ch)
1105 Lisp_Object str = Fgethash (make_char (ch),
1106 Vcomposite_char_char2string_hash_table,
1108 assert (!UNBOUNDP (str));
1112 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1113 Convert a string into a single composite character.
1114 The character is the result of overstriking all the characters in
1119 CHECK_STRING (string);
1120 return make_char (lookup_composite_char (XSTRING_DATA (string),
1121 XSTRING_LENGTH (string)));
1124 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1125 Return a string of the characters comprising a composite character.
1133 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1134 signal_simple_error ("Must be composite char", ch);
1135 return composite_char_string (emch);
1139 /************************************************************************/
1140 /* initialization */
1141 /************************************************************************/
1144 syms_of_mule_charset (void)
1146 DEFSUBR (Fcharsetp);
1147 DEFSUBR (Ffind_charset);
1148 DEFSUBR (Fget_charset);
1149 DEFSUBR (Fcharset_list);
1150 DEFSUBR (Fcharset_name);
1151 DEFSUBR (Fmake_charset);
1152 DEFSUBR (Fmake_reverse_direction_charset);
1153 /* DEFSUBR (Freverse_direction_charset); */
1154 DEFSUBR (Fcharset_from_attributes);
1155 DEFSUBR (Fcharset_doc_string);
1156 DEFSUBR (Fcharset_dimension);
1157 DEFSUBR (Fcharset_property);
1158 DEFSUBR (Fcharset_id);
1159 DEFSUBR (Fset_charset_ccl_program);
1160 DEFSUBR (Fset_charset_registry);
1162 DEFSUBR (Fmake_char);
1163 DEFSUBR (Fchar_charset);
1164 DEFSUBR (Fchar_octet);
1166 DEFSUBR (Fmake_composite_char);
1167 DEFSUBR (Fcomposite_char_string);
1169 defsymbol (&Qcharsetp, "charsetp");
1170 defsymbol (&Qregistry, "registry");
1171 defsymbol (&Qfinal, "final");
1172 defsymbol (&Qgraphic, "graphic");
1173 defsymbol (&Qdirection, "direction");
1174 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1175 defsymbol (&Qccl_program, "ccl-program");
1177 defsymbol (&Ql2r, "l2r");
1178 defsymbol (&Qr2l, "r2l");
1180 /* Charsets, compatible with Emacs/Mule 19.33-delta
1181 Naming convention is Script-Charset[-Edition] */
1182 defsymbol (&Qascii, "ascii");
1183 defsymbol (&Qcontrol_1, "control-1");
1184 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1185 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1186 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1187 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1188 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1189 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1190 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1191 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1192 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1193 defsymbol (&Qthai_tis620, "thai-tis620");
1195 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1196 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1197 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1198 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1199 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1201 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1202 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1203 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1204 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1205 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1207 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1208 defsymbol (&Qcomposite, "composite");
1212 vars_of_mule_charset (void)
1216 /* Table of charsets indexed by leading byte. */
1217 for (i = 0; i < countof (charset_by_leading_byte); i++)
1218 charset_by_leading_byte[i] = Qnil;
1220 /* Table of charsets indexed by type/final-byte/direction. */
1221 for (i = 0; i < countof (charset_by_attributes); i++)
1222 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1223 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1224 charset_by_attributes[i][j][k] = Qnil;
1226 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1227 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1231 complex_vars_of_mule_charset (void)
1233 staticpro (&Vcharset_hash_table);
1234 Vcharset_hash_table =
1235 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1237 /* Predefined character sets. We store them into variables for
1241 make_charset (0, Qascii, LEADING_BYTE_ASCII, 1,
1242 CHARSET_TYPE_94, 1, 0, 'B',
1243 CHARSET_LEFT_TO_RIGHT,
1244 build_string ("ASCII (ISO 646 IRV)"),
1245 build_string ("iso8859-1"));
1246 Vcharset_control_1 =
1247 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
1248 CHARSET_TYPE_94, 1, 1, 0,
1249 CHARSET_LEFT_TO_RIGHT,
1250 build_string ("Control characters"),
1252 Vcharset_latin_iso8859_1 =
1253 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2,
1254 CHARSET_TYPE_96, 1, 1, 'A',
1255 CHARSET_LEFT_TO_RIGHT,
1256 build_string ("ISO 8859-1 (Latin-1)"),
1257 build_string ("iso8859-1"));
1258 Vcharset_latin_iso8859_2 =
1259 make_charset (130, Qlatin_iso8859_2, LEADING_BYTE_LATIN_ISO8859_2, 2,
1260 CHARSET_TYPE_96, 1, 1, 'B',
1261 CHARSET_LEFT_TO_RIGHT,
1262 build_string ("ISO 8859-2 (Latin-2)"),
1263 build_string ("iso8859-2"));
1264 Vcharset_latin_iso8859_3 =
1265 make_charset (131, Qlatin_iso8859_3, LEADING_BYTE_LATIN_ISO8859_3, 2,
1266 CHARSET_TYPE_96, 1, 1, 'C',
1267 CHARSET_LEFT_TO_RIGHT,
1268 build_string ("ISO 8859-3 (Latin-3)"),
1269 build_string ("iso8859-3"));
1270 Vcharset_latin_iso8859_4 =
1271 make_charset (132, Qlatin_iso8859_4, LEADING_BYTE_LATIN_ISO8859_4, 2,
1272 CHARSET_TYPE_96, 1, 1, 'D',
1273 CHARSET_LEFT_TO_RIGHT,
1274 build_string ("ISO 8859-4 (Latin-4)"),
1275 build_string ("iso8859-4"));
1276 Vcharset_cyrillic_iso8859_5 =
1277 make_charset (140, Qcyrillic_iso8859_5, LEADING_BYTE_CYRILLIC_ISO8859_5, 2,
1278 CHARSET_TYPE_96, 1, 1, 'L',
1279 CHARSET_LEFT_TO_RIGHT,
1280 build_string ("ISO 8859-5 (Cyrillic)"),
1281 build_string ("iso8859-5"));
1282 Vcharset_arabic_iso8859_6 =
1283 make_charset (135, Qarabic_iso8859_6, LEADING_BYTE_ARABIC_ISO8859_6, 2,
1284 CHARSET_TYPE_96, 1, 1, 'G',
1285 CHARSET_RIGHT_TO_LEFT,
1286 build_string ("ISO 8859-6 (Arabic)"),
1287 build_string ("iso8859-6"));
1288 Vcharset_greek_iso8859_7 =
1289 make_charset (134, Qgreek_iso8859_7, LEADING_BYTE_GREEK_ISO8859_7, 2,
1290 CHARSET_TYPE_96, 1, 1, 'F',
1291 CHARSET_LEFT_TO_RIGHT,
1292 build_string ("ISO 8859-7 (Greek)"),
1293 build_string ("iso8859-7"));
1294 Vcharset_hebrew_iso8859_8 =
1295 make_charset (136, Qhebrew_iso8859_8, LEADING_BYTE_HEBREW_ISO8859_8, 2,
1296 CHARSET_TYPE_96, 1, 1, 'H',
1297 CHARSET_RIGHT_TO_LEFT,
1298 build_string ("ISO 8859-8 (Hebrew)"),
1299 build_string ("iso8859-8"));
1300 Vcharset_latin_iso8859_9 =
1301 make_charset (141, Qlatin_iso8859_9, LEADING_BYTE_LATIN_ISO8859_9, 2,
1302 CHARSET_TYPE_96, 1, 1, 'M',
1303 CHARSET_LEFT_TO_RIGHT,
1304 build_string ("ISO 8859-9 (Latin-5)"),
1305 build_string ("iso8859-9"));
1306 Vcharset_thai_tis620 =
1307 make_charset (133, Qthai_tis620, LEADING_BYTE_THAI_TIS620, 2,
1308 CHARSET_TYPE_96, 1, 1, 'T',
1309 CHARSET_LEFT_TO_RIGHT,
1310 build_string ("TIS 620.2529 (Thai)"),
1311 build_string ("tis620"));
1314 Vcharset_katakana_jisx0201 =
1315 make_charset (137, Qkatakana_jisx0201,
1316 LEADING_BYTE_KATAKANA_JISX0201, 2,
1317 CHARSET_TYPE_94, 1, 1, 'I',
1318 CHARSET_LEFT_TO_RIGHT,
1319 build_string ("JIS X0201-Katakana"),
1320 build_string ("jisx0201.1976"));
1321 Vcharset_latin_jisx0201 =
1322 make_charset (138, Qlatin_jisx0201,
1323 LEADING_BYTE_LATIN_JISX0201, 2,
1324 CHARSET_TYPE_94, 1, 0, 'J',
1325 CHARSET_LEFT_TO_RIGHT,
1326 build_string ("JIS X0201-Latin"),
1327 build_string ("jisx0201.1976"));
1328 Vcharset_japanese_jisx0208_1978 =
1329 make_charset (144, Qjapanese_jisx0208_1978,
1330 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
1331 CHARSET_TYPE_94X94, 2, 0, '@',
1332 CHARSET_LEFT_TO_RIGHT,
1334 ("JIS X0208-1978 (Japanese Kanji; Old Version)"),
1335 build_string ("\\(jisx0208\\|jisc6226\\).19"));
1336 Vcharset_japanese_jisx0208 =
1337 make_charset (146, Qjapanese_jisx0208,
1338 LEADING_BYTE_JAPANESE_JISX0208, 3,
1339 CHARSET_TYPE_94X94, 2, 0, 'B',
1340 CHARSET_LEFT_TO_RIGHT,
1341 build_string ("JIS X0208-1983 (Japanese Kanji)"),
1342 build_string ("jisx0208.19\\(83\\|90\\)"));
1343 Vcharset_japanese_jisx0212 =
1344 make_charset (148, Qjapanese_jisx0212,
1345 LEADING_BYTE_JAPANESE_JISX0212, 3,
1346 CHARSET_TYPE_94X94, 2, 0, 'D',
1347 CHARSET_LEFT_TO_RIGHT,
1348 build_string ("JIS X0212 (Japanese Supplement)"),
1349 build_string ("jisx0212"));
1352 Vcharset_chinese_gb2312 =
1353 make_charset (145, Qchinese_gb2312, LEADING_BYTE_CHINESE_GB2312, 3,
1354 CHARSET_TYPE_94X94, 2, 0, 'A',
1355 CHARSET_LEFT_TO_RIGHT,
1356 build_string ("GB 2312 (Simplified Chinese)"),
1357 build_string ("gb2312"));
1358 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1359 Vcharset_chinese_cns11643_1 =
1360 make_charset (149, Qchinese_cns11643_1,
1361 LEADING_BYTE_CHINESE_CNS11643_1, 3,
1362 CHARSET_TYPE_94X94, 2, 0, 'G',
1363 CHARSET_LEFT_TO_RIGHT,
1365 ("CNS 11643 Plane 1 (Traditional Chinese for daily use)"),
1366 build_string (CHINESE_CNS_PLANE_RE("1")));
1367 Vcharset_chinese_cns11643_2 =
1368 make_charset (150, Qchinese_cns11643_2,
1369 LEADING_BYTE_CHINESE_CNS11643_2, 3,
1370 CHARSET_TYPE_94X94, 2, 0, 'H',
1371 CHARSET_LEFT_TO_RIGHT,
1373 ("CNS 11643 Plane 2 (Traditional Chinese for daily use)"),
1374 build_string (CHINESE_CNS_PLANE_RE("2")));
1375 Vcharset_chinese_big5_1 =
1376 make_charset (152, Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3,
1377 CHARSET_TYPE_94X94, 2, 0, '0',
1378 CHARSET_LEFT_TO_RIGHT,
1380 ("Big5 Level 1 (Traditional Chinese for daily use)"),
1381 build_string ("big5"));
1382 Vcharset_chinese_big5_2 =
1383 make_charset (153, Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3,
1384 CHARSET_TYPE_94X94, 2, 0, '1',
1385 CHARSET_LEFT_TO_RIGHT,
1387 ("Big5 Level 2 (Traditional Chinese for daily use)"),
1388 build_string ("big5"));
1390 Vcharset_korean_ksc5601 =
1391 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
1392 CHARSET_TYPE_94X94, 2, 0, 'C',
1393 CHARSET_LEFT_TO_RIGHT,
1394 build_string ("KS C5601 (Hangul and Korean Hanja)"),
1395 build_string ("ksc5601"));
1396 /* #### For simplicity, we put composite chars into a 96x96 charset.
1397 This is going to lead to problems because you can run out of
1398 room, esp. as we don't yet recycle numbers. */
1399 Vcharset_composite =
1400 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3,
1401 CHARSET_TYPE_96X96, 2, 0, 0,
1402 CHARSET_LEFT_TO_RIGHT,
1403 build_string ("Composite characters"),
1406 composite_char_row_next = 32;
1407 composite_char_col_next = 32;
1409 Vcomposite_char_string2char_hash_table =
1410 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1411 Vcomposite_char_char2string_hash_table =
1412 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1413 staticpro (&Vcomposite_char_string2char_hash_table);
1414 staticpro (&Vcomposite_char_char2string_hash_table);