1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: Mule 2.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_cyrillic_iso8859_5;
45 Lisp_Object Vcharset_arabic_iso8859_6;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_latin_iso8859_9;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_japanese_jisx0208;
54 Lisp_Object Vcharset_japanese_jisx0212;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_chinese_big5_1;
57 Lisp_Object Vcharset_chinese_big5_2;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_korean_ksc5601;
61 Lisp_Object Vcharset_composite;
63 /* Hashtables for composite chars. One maps string representing
64 composed chars to their equivalent chars; one goes the
66 Lisp_Object Vcomposite_char_char2string_hashtable;
67 Lisp_Object Vcomposite_char_string2char_hashtable;
69 /* Table of charsets indexed by leading byte. */
70 Lisp_Object charset_by_leading_byte[128];
72 /* Table of charsets indexed by type/final-byte/direction. */
73 Lisp_Object charset_by_attributes[4][128][2];
75 static int composite_char_row_next;
76 static int composite_char_col_next;
78 /* Table of number of bytes in the string representation of a character
79 indexed by the first byte of that representation.
81 rep_bytes_by_first_byte(c) is more efficient than the equivalent
82 canonical computation:
84 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
86 Bytecount rep_bytes_by_first_byte[0xA0] =
87 { /* 0x00 - 0x7f are for straight ASCII */
88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 /* 0x80 - 0x8f are for Dimension-1 official charsets */
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 /* 0x90 - 0x9d are for Dimension-2 official charsets */
99 /* 0x9e is for Dimension-1 private charsets */
100 /* 0x9f is for Dimension-2 private charsets */
101 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
104 Lisp_Object Qcharsetp;
106 /* Qdoc_string, Qdimension, Qchars defined in general.c */
107 Lisp_Object Qregistry, Qfinal, Qgraphic;
108 Lisp_Object Qdirection;
109 Lisp_Object Qreverse_direction_charset;
110 Lisp_Object Qccl_program;
112 Lisp_Object Qascii, Qcontrol_1,
126 Qkatakana_jisx0201, Qlatin_jisx0201,
127 Qjapanese_jisx0208_1978,
132 Qchinese_big5_1, Qchinese_big5_2,
133 Qchinese_cns11643_1, Qchinese_cns11643_2,
135 Qkorean_ksc5601, Qcomposite;
137 Lisp_Object Ql2r, Qr2l;
139 Lisp_Object Vcharset_hashtable;
141 static Bufbyte next_allocated_1_byte_leading_byte;
142 static Bufbyte next_allocated_2_byte_leading_byte;
144 /* Composite characters are characters constructed by overstriking two
145 or more regular characters.
147 1) The old Mule implementation involves storing composite characters
148 in a buffer as a tag followed by all of the actual characters
149 used to make up the composite character. I think this is a bad
150 idea; it greatly complicates code that wants to handle strings
151 one character at a time because it has to deal with the possibility
152 of great big ungainly characters. It's much more reasonable to
153 simply store an index into a table of composite characters.
155 2) The current implementation only allows for 16,384 separate
156 composite characters over the lifetime of the XEmacs process.
157 This could become a potential problem if the user
158 edited lots of different files that use composite characters.
159 Due to FSF bogosity, increasing the number of allowable
160 composite characters under Mule would decrease the number
161 of possible faces that can exist. Mule already has shrunk
162 this to 2048, and further shrinkage would become uncomfortable.
163 No such problems exist in XEmacs.
165 Composite characters could be represented as 0x80 C1 C2 C3,
166 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
167 for slightly under 2^20 (one million) composite characters
168 over the XEmacs process lifetime, and you only need to
169 increase the size of a Mule character from 19 to 21 bits.
170 Or you could use 0x80 C1 C2 C3 C4, allowing for about
171 85 million (slightly over 2^26) composite characters. */
174 /************************************************************************/
175 /* Basic Emchar functions */
176 /************************************************************************/
178 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
179 string in STR. Returns the number of bytes stored.
180 Do not call this directly. Use the macro set_charptr_emchar() instead.
184 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
192 BREAKUP_CHAR (c, charset, c1, c2);
193 lb = CHAR_LEADING_BYTE (c);
194 if (LEADING_BYTE_PRIVATE_P (lb))
195 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
197 if (EQ (charset, Vcharset_control_1))
206 /* Return the first character from a Mule-encoded string in STR,
207 assuming it's non-ASCII. Do not call this directly.
208 Use the macro charptr_emchar() instead. */
211 non_ascii_charptr_emchar (CONST Bufbyte *str)
213 Bufbyte i0 = *str, i1, i2 = 0;
216 if (i0 == LEADING_BYTE_CONTROL_1)
217 return (Emchar) (*++str - 0x20);
219 if (LEADING_BYTE_PREFIX_P (i0))
224 charset = CHARSET_BY_LEADING_BYTE (i0);
225 if (XCHARSET_DIMENSION (charset) == 2)
228 return MAKE_CHAR (charset, i1, i2);
231 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
232 Do not call this directly. Use the macro valid_char_p() instead. */
235 non_ascii_valid_char_p (Emchar ch)
239 /* Must have only lowest 19 bits set */
243 f1 = CHAR_FIELD1 (ch);
244 f2 = CHAR_FIELD2 (ch);
245 f3 = CHAR_FIELD3 (ch);
251 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
252 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
253 f2 > MAX_CHAR_FIELD2_PRIVATE)
258 if (f3 != 0x20 && f3 != 0x7F)
262 NOTE: This takes advantage of the fact that
263 FIELD2_TO_OFFICIAL_LEADING_BYTE and
264 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
266 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
267 return (XCHARSET_CHARS (charset) == 96);
273 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
274 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
275 f1 > MAX_CHAR_FIELD1_PRIVATE)
277 if (f2 < 0x20 || f3 < 0x20)
280 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
282 if (UNBOUNDP (Fgethash (make_int (ch),
283 Vcomposite_char_char2string_hashtable,
289 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
292 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
294 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
297 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
299 return (XCHARSET_CHARS (charset) == 96);
304 /************************************************************************/
305 /* Basic string functions */
306 /************************************************************************/
308 /* Copy the character pointed to by PTR into STR, assuming it's
309 non-ASCII. Do not call this directly. Use the macro
310 charptr_copy_char() instead. */
313 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
315 Bufbyte *strptr = str;
317 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
319 /* Notice fallthrough. */
320 case 4: *++strptr = *ptr++;
321 case 3: *++strptr = *ptr++;
322 case 2: *++strptr = *ptr;
327 return strptr + 1 - str;
331 /************************************************************************/
332 /* streams of Emchars */
333 /************************************************************************/
335 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
336 The functions below are not meant to be called directly; use
337 the macros in insdel.h. */
340 Lstream_get_emchar_1 (Lstream *stream, int ch)
342 Bufbyte str[MAX_EMCHAR_LEN];
343 Bufbyte *strptr = str;
345 str[0] = (Bufbyte) ch;
346 switch (REP_BYTES_BY_FIRST_BYTE (ch))
348 /* Notice fallthrough. */
350 ch = Lstream_getc (stream);
352 *++strptr = (Bufbyte) ch;
354 ch = Lstream_getc (stream);
356 *++strptr = (Bufbyte) ch;
358 ch = Lstream_getc (stream);
360 *++strptr = (Bufbyte) ch;
365 return charptr_emchar (str);
369 Lstream_fput_emchar (Lstream *stream, Emchar ch)
371 Bufbyte str[MAX_EMCHAR_LEN];
372 Bytecount len = set_charptr_emchar (str, ch);
373 return Lstream_write (stream, str, len);
377 Lstream_funget_emchar (Lstream *stream, Emchar ch)
379 Bufbyte str[MAX_EMCHAR_LEN];
380 Bytecount len = set_charptr_emchar (str, ch);
381 Lstream_unread (stream, str, len);
385 /************************************************************************/
387 /************************************************************************/
390 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
392 struct Lisp_Charset *cs = XCHARSET (obj);
394 (markobj) (cs->doc_string);
395 (markobj) (cs->registry);
396 (markobj) (cs->ccl_program);
401 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
403 struct Lisp_Charset *cs = XCHARSET (obj);
407 error ("printing unreadable object #<charset %s 0x%x>",
408 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
411 write_c_string ("#<charset ", printcharfun);
412 print_internal (CHARSET_NAME (cs), printcharfun, 0);
413 write_c_string (" ", printcharfun);
414 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
415 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
416 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
417 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
418 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
420 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
421 CHARSET_COLUMNS (cs),
422 CHARSET_GRAPHIC (cs),
424 write_c_string (buf, printcharfun);
425 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
426 sprintf (buf, " 0x%x>", cs->header.uid);
427 write_c_string (buf, printcharfun);
430 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
431 mark_charset, print_charset, 0, 0, 0,
432 struct Lisp_Charset);
433 /* Make a new charset. */
436 make_charset (int id, Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes,
437 unsigned char type, unsigned char columns, unsigned char graphic,
438 Bufbyte final, unsigned char direction, Lisp_Object doc,
442 struct Lisp_Charset *cs =
443 alloc_lcrecord_type (struct Lisp_Charset, lrecord_charset);
444 XSETCHARSET (obj, cs);
446 CHARSET_ID (cs) = id;
447 CHARSET_NAME (cs) = name;
448 CHARSET_LEADING_BYTE (cs) = leading_byte;
449 CHARSET_REP_BYTES (cs) = rep_bytes;
450 CHARSET_DIRECTION (cs) = direction;
451 CHARSET_TYPE (cs) = type;
452 CHARSET_COLUMNS (cs) = columns;
453 CHARSET_GRAPHIC (cs) = graphic;
454 CHARSET_FINAL (cs) = final;
455 CHARSET_DOC_STRING (cs) = doc;
456 CHARSET_REGISTRY (cs) = reg;
457 CHARSET_CCL_PROGRAM (cs) = Qnil;
458 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
460 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
461 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
462 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
463 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
467 /* some charsets do not have final characters. This includes
468 ASCII, Control-1, Composite, and the two faux private
470 assert (NILP (charset_by_attributes[type][final][direction]));
471 charset_by_attributes[type][final][direction] = obj;
474 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
475 charset_by_leading_byte[leading_byte - 128] = obj;
476 if (leading_byte < 0xA0)
477 /* official leading byte */
478 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
480 /* Some charsets are "faux" and don't have names or really exist at
481 all except in the leading-byte table. */
483 Fputhash (name, obj, Vcharset_hashtable);
488 get_unallocated_leading_byte (int dimension)
494 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
497 lb = next_allocated_1_byte_leading_byte++;
501 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
504 lb = next_allocated_2_byte_leading_byte++;
509 ("No more character sets free for this dimension",
510 make_int (dimension));
516 /************************************************************************/
517 /* Basic charset Lisp functions */
518 /************************************************************************/
520 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
521 Return non-nil if OBJECT is a charset.
525 return CHARSETP (object) ? Qt : Qnil;
528 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
529 Retrieve the charset of the given name.
530 If CHARSET-OR-NAME is a charset object, it is simply returned.
531 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
532 nil is returned. Otherwise the associated charset object is returned.
536 if (CHARSETP (charset_or_name))
537 return charset_or_name;
539 CHECK_SYMBOL (charset_or_name);
540 return Fgethash (charset_or_name, Vcharset_hashtable, Qnil);
543 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
544 Retrieve the charset of the given name.
545 Same as `find-charset' except an error is signalled if there is no such
546 charset instead of returning nil.
550 Lisp_Object charset = Ffind_charset (name);
553 signal_simple_error ("No such charset", name);
557 /* We store the charsets in hash tables with the names as the key and the
558 actual charset object as the value. Occasionally we need to use them
559 in a list format. These routines provide us with that. */
560 struct charset_list_closure
562 Lisp_Object *charset_list;
566 add_charset_to_list_mapper (CONST void *hash_key, void *hash_contents,
567 void *charset_list_closure)
569 /* This function can GC */
570 Lisp_Object key, contents;
571 Lisp_Object *charset_list;
572 struct charset_list_closure *chcl =
573 (struct charset_list_closure*) charset_list_closure;
574 CVOID_TO_LISP (key, hash_key);
575 VOID_TO_LISP (contents, hash_contents);
576 charset_list = chcl->charset_list;
578 *charset_list = Fcons (XCHARSET_NAME (contents), *charset_list);
582 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
583 Return a list of the names of all defined charsets.
587 Lisp_Object charset_list = Qnil;
589 struct charset_list_closure charset_list_closure;
591 GCPRO1 (charset_list);
592 charset_list_closure.charset_list = &charset_list;
593 elisp_maphash (add_charset_to_list_mapper, Vcharset_hashtable,
594 &charset_list_closure);
600 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
601 Return the name of the given charset.
605 return XCHARSET_NAME (Fget_charset (charset));
608 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
609 Define a new character set.
610 This function is for use with Mule support.
611 NAME is a symbol, the name by which the character set is normally referred.
612 DOC-STRING is a string describing the character set.
613 PROPS is a property list, describing the specific nature of the
614 character set. Recognized properties are:
616 'registry A regular expression matching the font registry field for
618 'dimension Number of octets used to index a character in this charset.
619 Either 1 or 2. Defaults to 1.
620 'columns Number of columns used to display a character in this charset.
621 Only used in TTY mode. (Under X, the actual width of a
622 character can be derived from the font used to display the
623 characters.) If unspecified, defaults to the dimension
624 (this is almost always the correct value).
625 'chars Number of characters in each dimension (94 or 96).
626 Defaults to 94. Note that if the dimension is 2, the
627 character set thus described is 94x94 or 96x96.
628 'final Final byte of ISO 2022 escape sequence. Must be
629 supplied. Each combination of (DIMENSION, CHARS) defines a
630 separate namespace for final bytes. Note that ISO
631 2022 restricts the final byte to the range
632 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
633 dimension == 2. Note also that final bytes in the range
634 0x30 - 0x3F are reserved for user-defined (not official)
636 'graphic 0 (use left half of font on output) or 1 (use right half
637 of font on output). Defaults to 0. For example, for
638 a font whose registry is ISO8859-1, the left half
639 (octets 0x20 - 0x7F) is the `ascii' character set, while
640 the right half (octets 0xA0 - 0xFF) is the `latin-1'
641 character set. With 'graphic set to 0, the octets
642 will have their high bit cleared; with it set to 1,
643 the octets will have their high bit set.
644 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
646 'ccl-program A compiled CCL program used to convert a character in
647 this charset into an index into the font. This is in
648 addition to the 'graphic property. The CCL program
649 is passed the octets of the character, with the high
650 bit cleared and set depending upon whether the value
651 of the 'graphic property is 0 or 1.
653 (name, doc_string, props))
655 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
656 int direction = CHARSET_LEFT_TO_RIGHT;
658 Lisp_Object registry = Qnil;
660 Lisp_Object rest, keyword, value;
661 Lisp_Object ccl_program = Qnil;
664 if (!NILP (doc_string))
665 CHECK_STRING (doc_string);
667 charset = Ffind_charset (name);
669 signal_simple_error ("Cannot redefine existing charset", name);
671 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
673 if (EQ (keyword, Qdimension))
676 dimension = XINT (value);
677 if (dimension < 1 || dimension > 2)
678 signal_simple_error ("Invalid value for 'dimension", value);
681 else if (EQ (keyword, Qchars))
684 chars = XINT (value);
685 if (chars != 94 && chars != 96)
686 signal_simple_error ("Invalid value for 'chars", value);
689 else if (EQ (keyword, Qcolumns))
692 columns = XINT (value);
693 if (columns != 1 && columns != 2)
694 signal_simple_error ("Invalid value for 'columns", value);
697 else if (EQ (keyword, Qgraphic))
700 graphic = XINT (value);
701 if (graphic < 0 || graphic > 1)
702 signal_simple_error ("Invalid value for 'graphic", value);
705 else if (EQ (keyword, Qregistry))
707 CHECK_STRING (value);
711 else if (EQ (keyword, Qdirection))
713 if (EQ (value, Ql2r))
714 direction = CHARSET_LEFT_TO_RIGHT;
715 else if (EQ (value, Qr2l))
716 direction = CHARSET_RIGHT_TO_LEFT;
718 signal_simple_error ("Invalid value for 'direction", value);
721 else if (EQ (keyword, Qfinal))
723 CHECK_CHAR_COERCE_INT (value);
724 final = XCHAR (value);
725 if (final < '0' || final > '~')
726 signal_simple_error ("Invalid value for 'final", value);
729 else if (EQ (keyword, Qccl_program))
731 CHECK_VECTOR (value);
736 signal_simple_error ("Unrecognized property", keyword);
740 error ("'final must be specified");
741 if (dimension == 2 && final > 0x5F)
743 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
747 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
749 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
751 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
752 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
754 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
756 lb = get_unallocated_leading_byte (dimension);
758 if (NILP (doc_string))
759 doc_string = build_string ("");
762 registry = build_string ("");
766 charset = make_charset (-1, name, lb, dimension + 2, type, columns, graphic,
767 final, direction, doc_string, registry);
768 if (!NILP (ccl_program))
769 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
773 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
775 Make a charset equivalent to CHARSET but which goes in the opposite direction.
776 NEW-NAME is the name of the new charset. Return the new charset.
780 Lisp_Object new_charset = Qnil;
781 int lb, dimension, columns, graphic, final;
783 Lisp_Object registry, doc_string;
784 struct Lisp_Charset *cs;
786 charset = Fget_charset (charset);
787 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
788 signal_simple_error ("Charset already has reverse-direction charset",
791 CHECK_SYMBOL (new_name);
792 if (!NILP (Ffind_charset (new_name)))
793 signal_simple_error ("Cannot redefine existing charset", new_name);
795 cs = XCHARSET (charset);
797 type = CHARSET_TYPE (cs);
798 columns = CHARSET_COLUMNS (cs);
799 dimension = CHARSET_DIMENSION (cs);
800 lb = get_unallocated_leading_byte (dimension);
802 graphic = CHARSET_GRAPHIC (cs);
803 final = CHARSET_FINAL (cs);
804 direction = CHARSET_RIGHT_TO_LEFT;
805 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
806 direction = CHARSET_LEFT_TO_RIGHT;
807 doc_string = CHARSET_DOC_STRING (cs);
808 registry = CHARSET_REGISTRY (cs);
810 new_charset = make_charset (-1, new_name, lb, dimension + 2, type, columns,
811 graphic, final, direction, doc_string, registry);
813 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
814 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
819 /* #### Reverse direction charsets not yet implemented. */
821 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
823 Return the reverse-direction charset parallel to CHARSET, if any.
824 This is the charset with the same properties (in particular, the same
825 dimension, number of characters per dimension, and final byte) as
826 CHARSET but whose characters are displayed in the opposite direction.
830 charset = Fget_charset (charset);
831 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
835 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
836 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
837 If DIRECTION is omitted, both directions will be checked (left-to-right
838 will be returned if character sets exist for both directions).
840 (dimension, chars, final, direction))
842 int dm, ch, fi, di = -1;
844 Lisp_Object obj = Qnil;
846 CHECK_INT (dimension);
847 dm = XINT (dimension);
848 if (dm < 1 || dm > 2)
849 signal_simple_error ("Invalid value for DIMENSION", dimension);
853 if (ch != 94 && ch != 96)
854 signal_simple_error ("Invalid value for CHARS", chars);
856 CHECK_CHAR_COERCE_INT (final);
858 if (fi < '0' || fi > '~')
859 signal_simple_error ("Invalid value for FINAL", final);
861 if (EQ (direction, Ql2r))
862 di = CHARSET_LEFT_TO_RIGHT;
863 else if (EQ (direction, Qr2l))
864 di = CHARSET_RIGHT_TO_LEFT;
865 else if (!NILP (direction))
866 signal_simple_error ("Invalid value for DIRECTION", direction);
868 if (dm == 2 && fi > 0x5F)
870 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
873 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
875 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
879 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
881 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
884 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
887 return XCHARSET_NAME (obj);
891 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /*
892 Return doc string of CHARSET.
896 return XCHARSET_DOC_STRING (Fget_charset (charset));
899 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
900 Return dimension of CHARSET.
904 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
907 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
908 Return property PROP of CHARSET.
909 Recognized properties are those listed in `make-charset', as well as
910 'name and 'doc-string.
914 struct Lisp_Charset *cs;
916 charset = Fget_charset (charset);
917 cs = XCHARSET (charset);
920 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
921 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
922 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
923 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
924 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
925 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
926 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
927 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
928 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
929 if (EQ (prop, Qdirection))
930 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
931 if (EQ (prop, Qreverse_direction_charset))
933 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
937 return XCHARSET_NAME (obj);
939 signal_simple_error ("Unrecognized charset property name", prop);
940 return Qnil; /* not reached */
943 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
944 Return charset identification number of CHARSET.
948 return make_int(XCHARSET_ID (Fget_charset (charset)));
951 /* #### We need to figure out which properties we really want to
954 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
955 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
957 (charset, ccl_program))
959 charset = Fget_charset (charset);
960 CHECK_VECTOR (ccl_program);
961 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
966 invalidate_charset_font_caches (Lisp_Object charset)
968 /* Invalidate font cache entries for charset on all devices. */
969 Lisp_Object devcons, concons, hashtab;
970 DEVICE_LOOP_NO_BREAK (devcons, concons)
972 struct device *d = XDEVICE (XCAR (devcons));
973 hashtab = Fgethash (charset, d->charset_font_cache, Qunbound);
974 if (!UNBOUNDP (hashtab))
979 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
980 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
981 Set the 'registry property of CHARSET to REGISTRY.
985 charset = Fget_charset (charset);
986 CHECK_STRING (registry);
987 XCHARSET_REGISTRY (charset) = registry;
988 invalidate_charset_font_caches (charset);
989 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
994 /************************************************************************/
995 /* Lisp primitives for working with characters */
996 /************************************************************************/
998 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
999 Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
1001 (charset, arg1, arg2))
1003 struct Lisp_Charset *cs;
1005 int lowlim, highlim;
1007 charset = Fget_charset (charset);
1008 cs = XCHARSET (charset);
1010 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1011 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1012 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1013 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1017 if (a1 < lowlim || a1 > highlim)
1018 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1020 if (CHARSET_DIMENSION (cs) == 1)
1024 ("Charset is of dimension one; second octet must be nil", arg2);
1025 return make_char (MAKE_CHAR (charset, a1, 0));
1030 if (a2 < lowlim || a2 > highlim)
1031 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1033 return make_char (MAKE_CHAR (charset, a1, a2));
1036 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1037 Return the character set of char CH.
1041 CHECK_CHAR_COERCE_INT (ch);
1043 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1044 (CHAR_LEADING_BYTE (XCHAR (ch))));
1047 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1048 Return the octet numbered N (should be 0 or 1) of char CH.
1049 N defaults to 0 if omitted.
1053 Lisp_Object charset;
1056 CHECK_CHAR_COERCE_INT (ch);
1063 if (int_n != 0 && int_n != 1)
1064 signal_simple_error ("Octet number must be 0 or 1", n);
1066 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1067 return make_int (int_n == 0 ? c1 : c2);
1071 /************************************************************************/
1072 /* composite character functions */
1073 /************************************************************************/
1076 lookup_composite_char (Bufbyte *str, int len)
1078 Lisp_Object lispstr = make_string (str, len);
1079 Lisp_Object ch = Fgethash (lispstr,
1080 Vcomposite_char_string2char_hashtable,
1086 if (composite_char_row_next >= 128)
1087 signal_simple_error ("No more composite chars available", lispstr);
1088 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1089 composite_char_col_next);
1090 Fputhash (make_char (emch), lispstr,
1091 Vcomposite_char_char2string_hashtable);
1092 Fputhash (lispstr, make_char (emch),
1093 Vcomposite_char_string2char_hashtable);
1094 composite_char_col_next++;
1095 if (composite_char_col_next >= 128)
1097 composite_char_col_next = 32;
1098 composite_char_row_next++;
1107 composite_char_string (Emchar ch)
1109 Lisp_Object str = Fgethash (make_char (ch),
1110 Vcomposite_char_char2string_hashtable,
1112 assert (!UNBOUNDP (str));
1116 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1117 Convert a string into a single composite character.
1118 The character is the result of overstriking all the characters in
1123 CHECK_STRING (string);
1124 return make_char (lookup_composite_char (XSTRING_DATA (string),
1125 XSTRING_LENGTH (string)));
1128 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1129 Return a string of the characters comprising a composite character.
1137 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1138 signal_simple_error ("Must be composite char", ch);
1139 return composite_char_string (emch);
1143 /************************************************************************/
1144 /* initialization */
1145 /************************************************************************/
1148 syms_of_mule_charset (void)
1150 DEFSUBR (Fcharsetp);
1151 DEFSUBR (Ffind_charset);
1152 DEFSUBR (Fget_charset);
1153 DEFSUBR (Fcharset_list);
1154 DEFSUBR (Fcharset_name);
1155 DEFSUBR (Fmake_charset);
1156 DEFSUBR (Fmake_reverse_direction_charset);
1157 /* DEFSUBR (Freverse_direction_charset); */
1158 DEFSUBR (Fcharset_from_attributes);
1159 DEFSUBR (Fcharset_doc_string);
1160 DEFSUBR (Fcharset_dimension);
1161 DEFSUBR (Fcharset_property);
1162 DEFSUBR (Fcharset_id);
1163 DEFSUBR (Fset_charset_ccl_program);
1164 DEFSUBR (Fset_charset_registry);
1166 DEFSUBR (Fmake_char);
1167 DEFSUBR (Fchar_charset);
1168 DEFSUBR (Fchar_octet);
1170 DEFSUBR (Fmake_composite_char);
1171 DEFSUBR (Fcomposite_char_string);
1173 defsymbol (&Qcharsetp, "charsetp");
1174 defsymbol (&Qregistry, "registry");
1175 defsymbol (&Qfinal, "final");
1176 defsymbol (&Qgraphic, "graphic");
1177 defsymbol (&Qdirection, "direction");
1178 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1179 defsymbol (&Qccl_program, "ccl-program");
1181 defsymbol (&Ql2r, "l2r");
1182 defsymbol (&Qr2l, "r2l");
1184 /* Charsets, compatible with Emacs/Mule 19.33-delta
1185 Naming convention is Script-Charset[-Edition] */
1186 defsymbol (&Qascii, "ascii");
1187 defsymbol (&Qcontrol_1, "control-1");
1188 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1189 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1190 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1191 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1192 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1193 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1194 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1195 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1196 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1197 defsymbol (&Qthai_tis620, "thai-tis620");
1199 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1200 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1201 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1202 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1203 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1205 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1206 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1207 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1208 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1209 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1211 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1212 defsymbol (&Qcomposite, "composite");
1216 vars_of_mule_charset (void)
1220 /* Table of charsets indexed by leading byte. */
1221 for (i = 0; i < countof (charset_by_leading_byte); i++)
1222 charset_by_leading_byte[i] = Qnil;
1224 /* Table of charsets indexed by type/final-byte/direction. */
1225 for (i = 0; i < countof (charset_by_attributes); i++)
1226 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1227 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1228 charset_by_attributes[i][j][k] = Qnil;
1230 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1231 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1235 complex_vars_of_mule_charset (void)
1237 staticpro (&Vcharset_hashtable);
1238 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
1241 /* Predefined character sets. We store them into variables for
1245 make_charset (0, Qascii, LEADING_BYTE_ASCII, 1,
1246 CHARSET_TYPE_94, 1, 0, 'B',
1247 CHARSET_LEFT_TO_RIGHT,
1248 build_string ("ASCII (ISO 646 IRV)"),
1249 build_string ("iso8859-1"));
1250 Vcharset_control_1 =
1251 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
1252 CHARSET_TYPE_94, 1, 0, 0,
1253 CHARSET_LEFT_TO_RIGHT,
1254 build_string ("Control characters"),
1256 Vcharset_latin_iso8859_1 =
1257 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2,
1258 CHARSET_TYPE_96, 1, 1, 'A',
1259 CHARSET_LEFT_TO_RIGHT,
1260 build_string ("ISO 8859-1 (Latin-1)"),
1261 build_string ("iso8859-1"));
1262 Vcharset_latin_iso8859_2 =
1263 make_charset (130, Qlatin_iso8859_2, LEADING_BYTE_LATIN_ISO8859_2, 2,
1264 CHARSET_TYPE_96, 1, 1, 'B',
1265 CHARSET_LEFT_TO_RIGHT,
1266 build_string ("ISO 8859-2 (Latin-2)"),
1267 build_string ("iso8859-2"));
1268 Vcharset_latin_iso8859_3 =
1269 make_charset (131, Qlatin_iso8859_3, LEADING_BYTE_LATIN_ISO8859_3, 2,
1270 CHARSET_TYPE_96, 1, 1, 'C',
1271 CHARSET_LEFT_TO_RIGHT,
1272 build_string ("ISO 8859-3 (Latin-3)"),
1273 build_string ("iso8859-3"));
1274 Vcharset_latin_iso8859_4 =
1275 make_charset (132, Qlatin_iso8859_4, LEADING_BYTE_LATIN_ISO8859_4, 2,
1276 CHARSET_TYPE_96, 1, 1, 'D',
1277 CHARSET_LEFT_TO_RIGHT,
1278 build_string ("ISO 8859-4 (Latin-4)"),
1279 build_string ("iso8859-4"));
1280 Vcharset_cyrillic_iso8859_5 =
1281 make_charset (140, Qcyrillic_iso8859_5, LEADING_BYTE_CYRILLIC_ISO8859_5, 2,
1282 CHARSET_TYPE_96, 1, 1, 'L',
1283 CHARSET_LEFT_TO_RIGHT,
1284 build_string ("ISO 8859-5 (Cyrillic)"),
1285 build_string ("iso8859-5"));
1286 Vcharset_arabic_iso8859_6 =
1287 make_charset (135, Qarabic_iso8859_6, LEADING_BYTE_ARABIC_ISO8859_6, 2,
1288 CHARSET_TYPE_96, 1, 1, 'G',
1289 CHARSET_RIGHT_TO_LEFT,
1290 build_string ("ISO 8859-6 (Arabic)"),
1291 build_string ("iso8859-6"));
1292 Vcharset_greek_iso8859_7 =
1293 make_charset (134, Qgreek_iso8859_7, LEADING_BYTE_GREEK_ISO8859_7, 2,
1294 CHARSET_TYPE_96, 1, 1, 'F',
1295 CHARSET_LEFT_TO_RIGHT,
1296 build_string ("ISO 8859-7 (Greek)"),
1297 build_string ("iso8859-7"));
1298 Vcharset_hebrew_iso8859_8 =
1299 make_charset (136, Qhebrew_iso8859_8, LEADING_BYTE_HEBREW_ISO8859_8, 2,
1300 CHARSET_TYPE_96, 1, 1, 'H',
1301 CHARSET_RIGHT_TO_LEFT,
1302 build_string ("ISO 8859-8 (Hebrew)"),
1303 build_string ("iso8859-8"));
1304 Vcharset_latin_iso8859_9 =
1305 make_charset (141, Qlatin_iso8859_9, LEADING_BYTE_LATIN_ISO8859_9, 2,
1306 CHARSET_TYPE_96, 1, 1, 'M',
1307 CHARSET_LEFT_TO_RIGHT,
1308 build_string ("ISO 8859-9 (Latin-5)"),
1309 build_string ("iso8859-9"));
1310 Vcharset_thai_tis620 =
1311 make_charset (133, Qthai_tis620, LEADING_BYTE_THAI_TIS620, 2,
1312 CHARSET_TYPE_96, 1, 1, 'T',
1313 CHARSET_LEFT_TO_RIGHT,
1314 build_string ("TIS 620.2529 (Thai)"),
1315 build_string ("tis620"));
1318 Vcharset_katakana_jisx0201 =
1319 make_charset (137, Qkatakana_jisx0201,
1320 LEADING_BYTE_KATAKANA_JISX0201, 2,
1321 CHARSET_TYPE_94, 1, 1, 'I',
1322 CHARSET_LEFT_TO_RIGHT,
1323 build_string ("JIS X0201-Katakana"),
1324 build_string ("jisx0201.1976"));
1325 Vcharset_latin_jisx0201 =
1326 make_charset (138, Qlatin_jisx0201,
1327 LEADING_BYTE_LATIN_JISX0201, 2,
1328 CHARSET_TYPE_94, 1, 0, 'J',
1329 CHARSET_LEFT_TO_RIGHT,
1330 build_string ("JIS X0201-Latin"),
1331 build_string ("jisx0201.1976"));
1332 Vcharset_japanese_jisx0208_1978 =
1333 make_charset (144, Qjapanese_jisx0208_1978,
1334 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
1335 CHARSET_TYPE_94X94, 2, 0, '@',
1336 CHARSET_LEFT_TO_RIGHT,
1338 ("JIS X0208-1978 (Japanese Kanji; Old Version)"),
1339 build_string ("\\(jisx0208\\|jisc6226\\).19"));
1340 Vcharset_japanese_jisx0208 =
1341 make_charset (146, Qjapanese_jisx0208,
1342 LEADING_BYTE_JAPANESE_JISX0208, 3,
1343 CHARSET_TYPE_94X94, 2, 0, 'B',
1344 CHARSET_LEFT_TO_RIGHT,
1345 build_string ("JIS X0208-1983 (Japanese Kanji)"),
1346 build_string ("jisx0208.19\\(83\\|90\\)"));
1347 Vcharset_japanese_jisx0212 =
1348 make_charset (148, Qjapanese_jisx0212,
1349 LEADING_BYTE_JAPANESE_JISX0212, 3,
1350 CHARSET_TYPE_94X94, 2, 0, 'D',
1351 CHARSET_LEFT_TO_RIGHT,
1352 build_string ("JIS X0212 (Japanese Supplement)"),
1353 build_string ("jisx0212"));
1356 Vcharset_chinese_gb2312 =
1357 make_charset (145, Qchinese_gb2312, LEADING_BYTE_CHINESE_GB2312, 3,
1358 CHARSET_TYPE_94X94, 2, 0, 'A',
1359 CHARSET_LEFT_TO_RIGHT,
1360 build_string ("GB 2312 (Simplified Chinese)"),
1361 build_string ("gb2312"));
1362 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1363 Vcharset_chinese_cns11643_1 =
1364 make_charset (149, Qchinese_cns11643_1,
1365 LEADING_BYTE_CHINESE_CNS11643_1, 3,
1366 CHARSET_TYPE_94X94, 2, 0, 'G',
1367 CHARSET_LEFT_TO_RIGHT,
1369 ("CNS 11643 Plane 1 (Traditional Chinese for daily use)"),
1370 build_string (CHINESE_CNS_PLANE_RE("1")));
1371 Vcharset_chinese_cns11643_2 =
1372 make_charset (150, Qchinese_cns11643_2,
1373 LEADING_BYTE_CHINESE_CNS11643_2, 3,
1374 CHARSET_TYPE_94X94, 2, 0, 'H',
1375 CHARSET_LEFT_TO_RIGHT,
1377 ("CNS 11643 Plane 2 (Traditional Chinese for daily use)"),
1378 build_string (CHINESE_CNS_PLANE_RE("2")));
1379 Vcharset_chinese_big5_1 =
1380 make_charset (152, Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3,
1381 CHARSET_TYPE_94X94, 2, 0, '0',
1382 CHARSET_LEFT_TO_RIGHT,
1384 ("Big5 Level 1 (Traditional Chinese for daily use)"),
1385 build_string ("big5"));
1386 Vcharset_chinese_big5_2 =
1387 make_charset (153, Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3,
1388 CHARSET_TYPE_94X94, 2, 0, '1',
1389 CHARSET_LEFT_TO_RIGHT,
1391 ("Big5 Level 2 (Traditional Chinese for daily use)"),
1392 build_string ("big5"));
1394 Vcharset_korean_ksc5601 =
1395 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
1396 CHARSET_TYPE_94X94, 2, 0, 'C',
1397 CHARSET_LEFT_TO_RIGHT,
1398 build_string ("KS C5601 (Hangul and Korean Hanja)"),
1399 build_string ("ksc5601"));
1400 /* #### For simplicity, we put composite chars into a 96x96 charset.
1401 This is going to lead to problems because you can run out of
1402 room, esp. as we don't yet recycle numbers. */
1403 Vcharset_composite =
1404 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3,
1405 CHARSET_TYPE_96X96, 2, 0, 0,
1406 CHARSET_LEFT_TO_RIGHT,
1407 build_string ("Composite characters"),
1410 composite_char_row_next = 32;
1411 composite_char_col_next = 32;
1413 Vcomposite_char_string2char_hashtable =
1414 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL);
1415 Vcomposite_char_char2string_hashtable =
1416 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ);
1417 staticpro (&Vcomposite_char_string2char_hashtable);
1418 staticpro (&Vcomposite_char_char2string_hashtable);