1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: Mule 2.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_cyrillic_iso8859_5;
45 Lisp_Object Vcharset_arabic_iso8859_6;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_latin_iso8859_9;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_japanese_jisx0208;
54 Lisp_Object Vcharset_japanese_jisx0212;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_chinese_big5_1;
57 Lisp_Object Vcharset_chinese_big5_2;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_korean_ksc5601;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
82 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation.
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent
86 canonical computation:
88 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
90 Bytecount rep_bytes_by_first_byte[0xA0] =
91 { /* 0x00 - 0x7f are for straight ASCII */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 /* 0x80 - 0x8f are for Dimension-1 official charsets */
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 /* 0x90 - 0x9d are for Dimension-2 official charsets */
103 /* 0x9e is for Dimension-1 private charsets */
104 /* 0x9f is for Dimension-2 private charsets */
105 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
108 Lisp_Object Qcharsetp;
110 /* Qdoc_string, Qdimension, Qchars defined in general.c */
111 Lisp_Object Qregistry, Qfinal, Qgraphic;
112 Lisp_Object Qdirection;
113 Lisp_Object Qreverse_direction_charset;
114 Lisp_Object Qccl_program;
115 Lisp_Object Qleading_byte;
117 Lisp_Object Qascii, Qcontrol_1,
131 Qkatakana_jisx0201, Qlatin_jisx0201,
132 Qjapanese_jisx0208_1978,
137 Qchinese_big5_1, Qchinese_big5_2,
138 Qchinese_cns11643_1, Qchinese_cns11643_2,
140 Qkorean_ksc5601, Qcomposite;
142 Lisp_Object Ql2r, Qr2l;
144 Lisp_Object Vcharset_hash_table;
146 static Bufbyte next_allocated_1_byte_leading_byte;
147 static Bufbyte next_allocated_2_byte_leading_byte;
149 /* Composite characters are characters constructed by overstriking two
150 or more regular characters.
152 1) The old Mule implementation involves storing composite characters
153 in a buffer as a tag followed by all of the actual characters
154 used to make up the composite character. I think this is a bad
155 idea; it greatly complicates code that wants to handle strings
156 one character at a time because it has to deal with the possibility
157 of great big ungainly characters. It's much more reasonable to
158 simply store an index into a table of composite characters.
160 2) The current implementation only allows for 16,384 separate
161 composite characters over the lifetime of the XEmacs process.
162 This could become a potential problem if the user
163 edited lots of different files that use composite characters.
164 Due to FSF bogosity, increasing the number of allowable
165 composite characters under Mule would decrease the number
166 of possible faces that can exist. Mule already has shrunk
167 this to 2048, and further shrinkage would become uncomfortable.
168 No such problems exist in XEmacs.
170 Composite characters could be represented as 0x80 C1 C2 C3,
171 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
172 for slightly under 2^20 (one million) composite characters
173 over the XEmacs process lifetime, and you only need to
174 increase the size of a Mule character from 19 to 21 bits.
175 Or you could use 0x80 C1 C2 C3 C4, allowing for about
176 85 million (slightly over 2^26) composite characters. */
179 /************************************************************************/
180 /* Basic Emchar functions */
181 /************************************************************************/
183 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
184 string in STR. Returns the number of bytes stored.
185 Do not call this directly. Use the macro set_charptr_emchar() instead.
189 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
197 BREAKUP_CHAR (c, charset, c1, c2);
198 lb = CHAR_LEADING_BYTE (c);
199 if (LEADING_BYTE_PRIVATE_P (lb))
200 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
202 if (EQ (charset, Vcharset_control_1))
211 /* Return the first character from a Mule-encoded string in STR,
212 assuming it's non-ASCII. Do not call this directly.
213 Use the macro charptr_emchar() instead. */
216 non_ascii_charptr_emchar (CONST Bufbyte *str)
218 Bufbyte i0 = *str, i1, i2 = 0;
221 if (i0 == LEADING_BYTE_CONTROL_1)
222 return (Emchar) (*++str - 0x20);
224 if (LEADING_BYTE_PREFIX_P (i0))
229 charset = CHARSET_BY_LEADING_BYTE (i0);
230 if (XCHARSET_DIMENSION (charset) == 2)
233 return MAKE_CHAR (charset, i1, i2);
236 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
237 Do not call this directly. Use the macro valid_char_p() instead. */
240 non_ascii_valid_char_p (Emchar ch)
244 /* Must have only lowest 19 bits set */
248 f1 = CHAR_FIELD1 (ch);
249 f2 = CHAR_FIELD2 (ch);
250 f3 = CHAR_FIELD3 (ch);
256 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
257 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
258 f2 > MAX_CHAR_FIELD2_PRIVATE)
263 if (f3 != 0x20 && f3 != 0x7F)
267 NOTE: This takes advantage of the fact that
268 FIELD2_TO_OFFICIAL_LEADING_BYTE and
269 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
271 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
272 return (XCHARSET_CHARS (charset) == 96);
278 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
279 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
280 f1 > MAX_CHAR_FIELD1_PRIVATE)
282 if (f2 < 0x20 || f3 < 0x20)
285 #ifdef ENABLE_COMPOSITE_CHARS
286 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
288 if (UNBOUNDP (Fgethash (make_int (ch),
289 Vcomposite_char_char2string_hash_table,
294 #endif /* ENABLE_COMPOSITE_CHARS */
296 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
299 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
301 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
304 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
306 return (XCHARSET_CHARS (charset) == 96);
311 /************************************************************************/
312 /* Basic string functions */
313 /************************************************************************/
315 /* Copy the character pointed to by PTR into STR, assuming it's
316 non-ASCII. Do not call this directly. Use the macro
317 charptr_copy_char() instead. */
320 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
322 Bufbyte *strptr = str;
324 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
326 /* Notice fallthrough. */
327 case 4: *++strptr = *ptr++;
328 case 3: *++strptr = *ptr++;
329 case 2: *++strptr = *ptr;
334 return strptr + 1 - str;
338 /************************************************************************/
339 /* streams of Emchars */
340 /************************************************************************/
342 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
343 The functions below are not meant to be called directly; use
344 the macros in insdel.h. */
347 Lstream_get_emchar_1 (Lstream *stream, int ch)
349 Bufbyte str[MAX_EMCHAR_LEN];
350 Bufbyte *strptr = str;
352 str[0] = (Bufbyte) ch;
353 switch (REP_BYTES_BY_FIRST_BYTE (ch))
355 /* Notice fallthrough. */
357 ch = Lstream_getc (stream);
359 *++strptr = (Bufbyte) ch;
361 ch = Lstream_getc (stream);
363 *++strptr = (Bufbyte) ch;
365 ch = Lstream_getc (stream);
367 *++strptr = (Bufbyte) ch;
372 return charptr_emchar (str);
376 Lstream_fput_emchar (Lstream *stream, Emchar ch)
378 Bufbyte str[MAX_EMCHAR_LEN];
379 Bytecount len = set_charptr_emchar (str, ch);
380 return Lstream_write (stream, str, len);
384 Lstream_funget_emchar (Lstream *stream, Emchar ch)
386 Bufbyte str[MAX_EMCHAR_LEN];
387 Bytecount len = set_charptr_emchar (str, ch);
388 Lstream_unread (stream, str, len);
392 /************************************************************************/
394 /************************************************************************/
397 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
399 struct Lisp_Charset *cs = XCHARSET (obj);
401 markobj (cs->doc_string);
402 markobj (cs->registry);
403 markobj (cs->ccl_program);
408 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
410 struct Lisp_Charset *cs = XCHARSET (obj);
414 error ("printing unreadable object #<charset %s 0x%x>",
415 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
418 write_c_string ("#<charset ", printcharfun);
419 print_internal (CHARSET_NAME (cs), printcharfun, 0);
420 write_c_string (" ", printcharfun);
421 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
422 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
423 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
424 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
425 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
427 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
428 CHARSET_COLUMNS (cs),
429 CHARSET_GRAPHIC (cs),
431 write_c_string (buf, printcharfun);
432 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
433 sprintf (buf, " 0x%x>", cs->header.uid);
434 write_c_string (buf, printcharfun);
437 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
438 mark_charset, print_charset, 0, 0, 0,
439 struct Lisp_Charset);
440 /* Make a new charset. */
443 make_charset (int id, Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes,
444 unsigned char type, unsigned char columns, unsigned char graphic,
445 Bufbyte final, unsigned char direction, Lisp_Object doc,
449 struct Lisp_Charset *cs =
450 alloc_lcrecord_type (struct Lisp_Charset, lrecord_charset);
451 XSETCHARSET (obj, cs);
453 CHARSET_ID (cs) = id;
454 CHARSET_NAME (cs) = name;
455 CHARSET_LEADING_BYTE (cs) = leading_byte;
456 CHARSET_REP_BYTES (cs) = rep_bytes;
457 CHARSET_DIRECTION (cs) = direction;
458 CHARSET_TYPE (cs) = type;
459 CHARSET_COLUMNS (cs) = columns;
460 CHARSET_GRAPHIC (cs) = graphic;
461 CHARSET_FINAL (cs) = final;
462 CHARSET_DOC_STRING (cs) = doc;
463 CHARSET_REGISTRY (cs) = reg;
464 CHARSET_CCL_PROGRAM (cs) = Qnil;
465 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
467 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
468 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
469 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
470 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
474 /* some charsets do not have final characters. This includes
475 ASCII, Control-1, Composite, and the two faux private
477 assert (NILP (charset_by_attributes[type][final][direction]));
478 charset_by_attributes[type][final][direction] = obj;
481 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
482 charset_by_leading_byte[leading_byte - 128] = obj;
483 if (leading_byte < 0xA0)
484 /* official leading byte */
485 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
487 /* Some charsets are "faux" and don't have names or really exist at
488 all except in the leading-byte table. */
490 Fputhash (name, obj, Vcharset_hash_table);
495 get_unallocated_leading_byte (int dimension)
501 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
504 lb = next_allocated_1_byte_leading_byte++;
508 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
511 lb = next_allocated_2_byte_leading_byte++;
516 ("No more character sets free for this dimension",
517 make_int (dimension));
523 /************************************************************************/
524 /* Basic charset Lisp functions */
525 /************************************************************************/
527 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
528 Return non-nil if OBJECT is a charset.
532 return CHARSETP (object) ? Qt : Qnil;
535 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
536 Retrieve the charset of the given name.
537 If CHARSET-OR-NAME is a charset object, it is simply returned.
538 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
539 nil is returned. Otherwise the associated charset object is returned.
543 if (CHARSETP (charset_or_name))
544 return charset_or_name;
546 CHECK_SYMBOL (charset_or_name);
547 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
550 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
551 Retrieve the charset of the given name.
552 Same as `find-charset' except an error is signalled if there is no such
553 charset instead of returning nil.
557 Lisp_Object charset = Ffind_charset (name);
560 signal_simple_error ("No such charset", name);
564 /* We store the charsets in hash tables with the names as the key and the
565 actual charset object as the value. Occasionally we need to use them
566 in a list format. These routines provide us with that. */
567 struct charset_list_closure
569 Lisp_Object *charset_list;
573 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
574 void *charset_list_closure)
576 /* This function can GC */
577 struct charset_list_closure *chcl =
578 (struct charset_list_closure*) charset_list_closure;
579 Lisp_Object *charset_list = chcl->charset_list;
581 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
585 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
586 Return a list of the names of all defined charsets.
590 Lisp_Object charset_list = Qnil;
592 struct charset_list_closure charset_list_closure;
594 GCPRO1 (charset_list);
595 charset_list_closure.charset_list = &charset_list;
596 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
597 &charset_list_closure);
603 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
604 Return the name of the given charset.
608 return XCHARSET_NAME (Fget_charset (charset));
611 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
612 Define a new character set.
613 This function is for use with Mule support.
614 NAME is a symbol, the name by which the character set is normally referred.
615 DOC-STRING is a string describing the character set.
616 PROPS is a property list, describing the specific nature of the
617 character set. Recognized properties are:
619 'registry A regular expression matching the font registry field for
621 'dimension Number of octets used to index a character in this charset.
622 Either 1 or 2. Defaults to 1.
623 'columns Number of columns used to display a character in this charset.
624 Only used in TTY mode. (Under X, the actual width of a
625 character can be derived from the font used to display the
626 characters.) If unspecified, defaults to the dimension
627 (this is almost always the correct value).
628 'chars Number of characters in each dimension (94 or 96).
629 Defaults to 94. Note that if the dimension is 2, the
630 character set thus described is 94x94 or 96x96.
631 'final Final byte of ISO 2022 escape sequence. Must be
632 supplied. Each combination of (DIMENSION, CHARS) defines a
633 separate namespace for final bytes. Note that ISO
634 2022 restricts the final byte to the range
635 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
636 dimension == 2. Note also that final bytes in the range
637 0x30 - 0x3F are reserved for user-defined (not official)
639 'graphic 0 (use left half of font on output) or 1 (use right half
640 of font on output). Defaults to 0. For example, for
641 a font whose registry is ISO8859-1, the left half
642 (octets 0x20 - 0x7F) is the `ascii' character set, while
643 the right half (octets 0xA0 - 0xFF) is the `latin-1'
644 character set. With 'graphic set to 0, the octets
645 will have their high bit cleared; with it set to 1,
646 the octets will have their high bit set.
647 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
649 'ccl-program A compiled CCL program used to convert a character in
650 this charset into an index into the font. This is in
651 addition to the 'graphic property. The CCL program
652 is passed the octets of the character, with the high
653 bit cleared and set depending upon whether the value
654 of the 'graphic property is 0 or 1.
656 (name, doc_string, props))
658 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
659 int direction = CHARSET_LEFT_TO_RIGHT;
661 Lisp_Object registry = Qnil;
663 Lisp_Object rest, keyword, value;
664 Lisp_Object ccl_program = Qnil;
667 if (!NILP (doc_string))
668 CHECK_STRING (doc_string);
670 charset = Ffind_charset (name);
672 signal_simple_error ("Cannot redefine existing charset", name);
674 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
676 if (EQ (keyword, Qdimension))
679 dimension = XINT (value);
680 if (dimension < 1 || dimension > 2)
681 signal_simple_error ("Invalid value for 'dimension", value);
684 else if (EQ (keyword, Qchars))
687 chars = XINT (value);
688 if (chars != 94 && chars != 96)
689 signal_simple_error ("Invalid value for 'chars", value);
692 else if (EQ (keyword, Qcolumns))
695 columns = XINT (value);
696 if (columns != 1 && columns != 2)
697 signal_simple_error ("Invalid value for 'columns", value);
700 else if (EQ (keyword, Qgraphic))
703 graphic = XINT (value);
704 if (graphic < 0 || graphic > 1)
705 signal_simple_error ("Invalid value for 'graphic", value);
708 else if (EQ (keyword, Qregistry))
710 CHECK_STRING (value);
714 else if (EQ (keyword, Qdirection))
716 if (EQ (value, Ql2r))
717 direction = CHARSET_LEFT_TO_RIGHT;
718 else if (EQ (value, Qr2l))
719 direction = CHARSET_RIGHT_TO_LEFT;
721 signal_simple_error ("Invalid value for 'direction", value);
724 else if (EQ (keyword, Qfinal))
726 CHECK_CHAR_COERCE_INT (value);
727 final = XCHAR (value);
728 if (final < '0' || final > '~')
729 signal_simple_error ("Invalid value for 'final", value);
732 else if (EQ (keyword, Qccl_program))
734 CHECK_VECTOR (value);
739 signal_simple_error ("Unrecognized property", keyword);
743 error ("'final must be specified");
744 if (dimension == 2 && final > 0x5F)
746 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
750 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
752 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
754 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
755 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
757 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
759 lb = get_unallocated_leading_byte (dimension);
761 if (NILP (doc_string))
762 doc_string = build_string ("");
765 registry = build_string ("");
769 charset = make_charset (-1, name, lb, dimension + 2, type, columns, graphic,
770 final, direction, doc_string, registry);
771 if (!NILP (ccl_program))
772 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
776 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
778 Make a charset equivalent to CHARSET but which goes in the opposite direction.
779 NEW-NAME is the name of the new charset. Return the new charset.
783 Lisp_Object new_charset = Qnil;
784 int lb, dimension, columns, graphic, final;
786 Lisp_Object registry, doc_string;
787 struct Lisp_Charset *cs;
789 charset = Fget_charset (charset);
790 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
791 signal_simple_error ("Charset already has reverse-direction charset",
794 CHECK_SYMBOL (new_name);
795 if (!NILP (Ffind_charset (new_name)))
796 signal_simple_error ("Cannot redefine existing charset", new_name);
798 cs = XCHARSET (charset);
800 type = CHARSET_TYPE (cs);
801 columns = CHARSET_COLUMNS (cs);
802 dimension = CHARSET_DIMENSION (cs);
803 lb = get_unallocated_leading_byte (dimension);
805 graphic = CHARSET_GRAPHIC (cs);
806 final = CHARSET_FINAL (cs);
807 direction = CHARSET_RIGHT_TO_LEFT;
808 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
809 direction = CHARSET_LEFT_TO_RIGHT;
810 doc_string = CHARSET_DOC_STRING (cs);
811 registry = CHARSET_REGISTRY (cs);
813 new_charset = make_charset (-1, new_name, lb, dimension + 2, type, columns,
814 graphic, final, direction, doc_string, registry);
816 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
817 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
822 /* #### Reverse direction charsets not yet implemented. */
824 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
826 Return the reverse-direction charset parallel to CHARSET, if any.
827 This is the charset with the same properties (in particular, the same
828 dimension, number of characters per dimension, and final byte) as
829 CHARSET but whose characters are displayed in the opposite direction.
833 charset = Fget_charset (charset);
834 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
838 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
839 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
840 If DIRECTION is omitted, both directions will be checked (left-to-right
841 will be returned if character sets exist for both directions).
843 (dimension, chars, final, direction))
845 int dm, ch, fi, di = -1;
847 Lisp_Object obj = Qnil;
849 CHECK_INT (dimension);
850 dm = XINT (dimension);
851 if (dm < 1 || dm > 2)
852 signal_simple_error ("Invalid value for DIMENSION", dimension);
856 if (ch != 94 && ch != 96)
857 signal_simple_error ("Invalid value for CHARS", chars);
859 CHECK_CHAR_COERCE_INT (final);
861 if (fi < '0' || fi > '~')
862 signal_simple_error ("Invalid value for FINAL", final);
864 if (EQ (direction, Ql2r))
865 di = CHARSET_LEFT_TO_RIGHT;
866 else if (EQ (direction, Qr2l))
867 di = CHARSET_RIGHT_TO_LEFT;
868 else if (!NILP (direction))
869 signal_simple_error ("Invalid value for DIRECTION", direction);
871 if (dm == 2 && fi > 0x5F)
873 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
876 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
878 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
882 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
884 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
887 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
890 return XCHARSET_NAME (obj);
894 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /*
895 Return doc string of CHARSET.
899 return XCHARSET_DOC_STRING (Fget_charset (charset));
902 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
903 Return dimension of CHARSET.
907 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
910 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
911 Return property PROP of CHARSET.
912 Recognized properties are those listed in `make-charset', as well as
913 'name and 'doc-string.
917 struct Lisp_Charset *cs;
919 charset = Fget_charset (charset);
920 cs = XCHARSET (charset);
923 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
924 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
925 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
926 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
927 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
928 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
929 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
930 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
931 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
932 if (EQ (prop, Qleading_byte)) return make_char (CHARSET_LEADING_BYTE (cs));
933 if (EQ (prop, Qdirection))
934 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
935 if (EQ (prop, Qreverse_direction_charset))
937 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
941 return XCHARSET_NAME (obj);
943 signal_simple_error ("Unrecognized charset property name", prop);
944 return Qnil; /* not reached */
947 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
948 Return charset identification number of CHARSET.
952 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
955 /* #### We need to figure out which properties we really want to
958 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
959 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
961 (charset, ccl_program))
963 charset = Fget_charset (charset);
964 CHECK_VECTOR (ccl_program);
965 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
970 invalidate_charset_font_caches (Lisp_Object charset)
972 /* Invalidate font cache entries for charset on all devices. */
973 Lisp_Object devcons, concons, hash_table;
974 DEVICE_LOOP_NO_BREAK (devcons, concons)
976 struct device *d = XDEVICE (XCAR (devcons));
977 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
978 if (!UNBOUNDP (hash_table))
979 Fclrhash (hash_table);
983 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
984 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
985 Set the 'registry property of CHARSET to REGISTRY.
989 charset = Fget_charset (charset);
990 CHECK_STRING (registry);
991 XCHARSET_REGISTRY (charset) = registry;
992 invalidate_charset_font_caches (charset);
993 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
998 /************************************************************************/
999 /* Lisp primitives for working with characters */
1000 /************************************************************************/
1002 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1003 Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
1005 (charset, arg1, arg2))
1007 struct Lisp_Charset *cs;
1009 int lowlim, highlim;
1011 charset = Fget_charset (charset);
1012 cs = XCHARSET (charset);
1014 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1015 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1016 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1017 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1021 if (a1 < lowlim || a1 > highlim)
1022 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1024 if (CHARSET_DIMENSION (cs) == 1)
1028 ("Charset is of dimension one; second octet must be nil", arg2);
1029 return make_char (MAKE_CHAR (charset, a1, 0));
1034 if (a2 < lowlim || a2 > highlim)
1035 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1037 return make_char (MAKE_CHAR (charset, a1, a2));
1040 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1041 Return the character set of char CH.
1045 CHECK_CHAR_COERCE_INT (ch);
1047 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1048 (CHAR_LEADING_BYTE (XCHAR (ch))));
1051 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1052 Return the octet numbered N (should be 0 or 1) of char CH.
1053 N defaults to 0 if omitted.
1057 Lisp_Object charset;
1060 CHECK_CHAR_COERCE_INT (ch);
1067 if (int_n != 0 && int_n != 1)
1068 signal_simple_error ("Octet number must be 0 or 1", n);
1070 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1071 return make_int (int_n == 0 ? c1 : c2);
1075 #ifdef ENABLE_COMPOSITE_CHARS
1076 /************************************************************************/
1077 /* composite character functions */
1078 /************************************************************************/
1081 lookup_composite_char (Bufbyte *str, int len)
1083 Lisp_Object lispstr = make_string (str, len);
1084 Lisp_Object ch = Fgethash (lispstr,
1085 Vcomposite_char_string2char_hash_table,
1091 if (composite_char_row_next >= 128)
1092 signal_simple_error ("No more composite chars available", lispstr);
1093 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1094 composite_char_col_next);
1095 Fputhash (make_char (emch), lispstr,
1096 Vcomposite_char_char2string_hash_table);
1097 Fputhash (lispstr, make_char (emch),
1098 Vcomposite_char_string2char_hash_table);
1099 composite_char_col_next++;
1100 if (composite_char_col_next >= 128)
1102 composite_char_col_next = 32;
1103 composite_char_row_next++;
1112 composite_char_string (Emchar ch)
1114 Lisp_Object str = Fgethash (make_char (ch),
1115 Vcomposite_char_char2string_hash_table,
1117 assert (!UNBOUNDP (str));
1121 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1122 Convert a string into a single composite character.
1123 The character is the result of overstriking all the characters in
1128 CHECK_STRING (string);
1129 return make_char (lookup_composite_char (XSTRING_DATA (string),
1130 XSTRING_LENGTH (string)));
1133 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1134 Return a string of the characters comprising a composite character.
1142 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1143 signal_simple_error ("Must be composite char", ch);
1144 return composite_char_string (emch);
1146 #endif /* ENABLE_COMPOSITE_CHARS */
1149 /************************************************************************/
1150 /* initialization */
1151 /************************************************************************/
1154 syms_of_mule_charset (void)
1156 DEFSUBR (Fcharsetp);
1157 DEFSUBR (Ffind_charset);
1158 DEFSUBR (Fget_charset);
1159 DEFSUBR (Fcharset_list);
1160 DEFSUBR (Fcharset_name);
1161 DEFSUBR (Fmake_charset);
1162 DEFSUBR (Fmake_reverse_direction_charset);
1163 /* DEFSUBR (Freverse_direction_charset); */
1164 DEFSUBR (Fcharset_from_attributes);
1165 DEFSUBR (Fcharset_doc_string);
1166 DEFSUBR (Fcharset_dimension);
1167 DEFSUBR (Fcharset_property);
1168 DEFSUBR (Fcharset_id);
1169 DEFSUBR (Fset_charset_ccl_program);
1170 DEFSUBR (Fset_charset_registry);
1172 DEFSUBR (Fmake_char);
1173 DEFSUBR (Fchar_charset);
1174 DEFSUBR (Fchar_octet);
1176 #ifdef ENABLE_COMPOSITE_CHARS
1177 DEFSUBR (Fmake_composite_char);
1178 DEFSUBR (Fcomposite_char_string);
1181 defsymbol (&Qcharsetp, "charsetp");
1182 defsymbol (&Qregistry, "registry");
1183 defsymbol (&Qfinal, "final");
1184 defsymbol (&Qgraphic, "graphic");
1185 defsymbol (&Qdirection, "direction");
1186 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1187 defsymbol (&Qccl_program, "ccl-program");
1188 defsymbol (&Qleading_byte, "leading-byte");
1190 defsymbol (&Ql2r, "l2r");
1191 defsymbol (&Qr2l, "r2l");
1193 /* Charsets, compatible with Emacs/Mule 19.33-delta
1194 Naming convention is Script-Charset[-Edition] */
1195 defsymbol (&Qascii, "ascii");
1196 defsymbol (&Qcontrol_1, "control-1");
1197 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1198 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1199 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1200 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1201 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1202 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1203 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1204 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1205 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1206 defsymbol (&Qthai_tis620, "thai-tis620");
1208 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1209 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1210 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1211 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1212 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1214 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1215 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1216 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1217 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1218 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1220 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1221 defsymbol (&Qcomposite, "composite");
1225 vars_of_mule_charset (void)
1229 /* Table of charsets indexed by leading byte. */
1230 for (i = 0; i < countof (charset_by_leading_byte); i++)
1231 charset_by_leading_byte[i] = Qnil;
1233 /* Table of charsets indexed by type/final-byte/direction. */
1234 for (i = 0; i < countof (charset_by_attributes); i++)
1235 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1236 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1237 charset_by_attributes[i][j][k] = Qnil;
1239 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1240 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1244 complex_vars_of_mule_charset (void)
1246 staticpro (&Vcharset_hash_table);
1247 Vcharset_hash_table =
1248 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1250 /* Predefined character sets. We store them into variables for
1254 make_charset (0, Qascii, LEADING_BYTE_ASCII, 1,
1255 CHARSET_TYPE_94, 1, 0, 'B',
1256 CHARSET_LEFT_TO_RIGHT,
1257 build_string ("ASCII (ISO 646 IRV)"),
1258 build_string ("iso8859-1"));
1259 Vcharset_control_1 =
1260 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
1261 CHARSET_TYPE_94, 1, 1, 0,
1262 CHARSET_LEFT_TO_RIGHT,
1263 build_string ("Control characters"),
1265 Vcharset_latin_iso8859_1 =
1266 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2,
1267 CHARSET_TYPE_96, 1, 1, 'A',
1268 CHARSET_LEFT_TO_RIGHT,
1269 build_string ("ISO 8859-1 (Latin-1)"),
1270 build_string ("iso8859-1"));
1271 Vcharset_latin_iso8859_2 =
1272 make_charset (130, Qlatin_iso8859_2, LEADING_BYTE_LATIN_ISO8859_2, 2,
1273 CHARSET_TYPE_96, 1, 1, 'B',
1274 CHARSET_LEFT_TO_RIGHT,
1275 build_string ("ISO 8859-2 (Latin-2)"),
1276 build_string ("iso8859-2"));
1277 Vcharset_latin_iso8859_3 =
1278 make_charset (131, Qlatin_iso8859_3, LEADING_BYTE_LATIN_ISO8859_3, 2,
1279 CHARSET_TYPE_96, 1, 1, 'C',
1280 CHARSET_LEFT_TO_RIGHT,
1281 build_string ("ISO 8859-3 (Latin-3)"),
1282 build_string ("iso8859-3"));
1283 Vcharset_latin_iso8859_4 =
1284 make_charset (132, Qlatin_iso8859_4, LEADING_BYTE_LATIN_ISO8859_4, 2,
1285 CHARSET_TYPE_96, 1, 1, 'D',
1286 CHARSET_LEFT_TO_RIGHT,
1287 build_string ("ISO 8859-4 (Latin-4)"),
1288 build_string ("iso8859-4"));
1289 Vcharset_cyrillic_iso8859_5 =
1290 make_charset (140, Qcyrillic_iso8859_5, LEADING_BYTE_CYRILLIC_ISO8859_5, 2,
1291 CHARSET_TYPE_96, 1, 1, 'L',
1292 CHARSET_LEFT_TO_RIGHT,
1293 build_string ("ISO 8859-5 (Cyrillic)"),
1294 build_string ("iso8859-5"));
1295 Vcharset_arabic_iso8859_6 =
1296 make_charset (135, Qarabic_iso8859_6, LEADING_BYTE_ARABIC_ISO8859_6, 2,
1297 CHARSET_TYPE_96, 1, 1, 'G',
1298 CHARSET_RIGHT_TO_LEFT,
1299 build_string ("ISO 8859-6 (Arabic)"),
1300 build_string ("iso8859-6"));
1301 Vcharset_greek_iso8859_7 =
1302 make_charset (134, Qgreek_iso8859_7, LEADING_BYTE_GREEK_ISO8859_7, 2,
1303 CHARSET_TYPE_96, 1, 1, 'F',
1304 CHARSET_LEFT_TO_RIGHT,
1305 build_string ("ISO 8859-7 (Greek)"),
1306 build_string ("iso8859-7"));
1307 Vcharset_hebrew_iso8859_8 =
1308 make_charset (136, Qhebrew_iso8859_8, LEADING_BYTE_HEBREW_ISO8859_8, 2,
1309 CHARSET_TYPE_96, 1, 1, 'H',
1310 CHARSET_RIGHT_TO_LEFT,
1311 build_string ("ISO 8859-8 (Hebrew)"),
1312 build_string ("iso8859-8"));
1313 Vcharset_latin_iso8859_9 =
1314 make_charset (141, Qlatin_iso8859_9, LEADING_BYTE_LATIN_ISO8859_9, 2,
1315 CHARSET_TYPE_96, 1, 1, 'M',
1316 CHARSET_LEFT_TO_RIGHT,
1317 build_string ("ISO 8859-9 (Latin-5)"),
1318 build_string ("iso8859-9"));
1319 Vcharset_thai_tis620 =
1320 make_charset (133, Qthai_tis620, LEADING_BYTE_THAI_TIS620, 2,
1321 CHARSET_TYPE_96, 1, 1, 'T',
1322 CHARSET_LEFT_TO_RIGHT,
1323 build_string ("TIS 620.2529 (Thai)"),
1324 build_string ("tis620"));
1327 Vcharset_katakana_jisx0201 =
1328 make_charset (137, Qkatakana_jisx0201,
1329 LEADING_BYTE_KATAKANA_JISX0201, 2,
1330 CHARSET_TYPE_94, 1, 1, 'I',
1331 CHARSET_LEFT_TO_RIGHT,
1332 build_string ("JIS X0201-Katakana"),
1333 build_string ("jisx0201.1976"));
1334 Vcharset_latin_jisx0201 =
1335 make_charset (138, Qlatin_jisx0201,
1336 LEADING_BYTE_LATIN_JISX0201, 2,
1337 CHARSET_TYPE_94, 1, 0, 'J',
1338 CHARSET_LEFT_TO_RIGHT,
1339 build_string ("JIS X0201-Latin"),
1340 build_string ("jisx0201.1976"));
1341 Vcharset_japanese_jisx0208_1978 =
1342 make_charset (144, Qjapanese_jisx0208_1978,
1343 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
1344 CHARSET_TYPE_94X94, 2, 0, '@',
1345 CHARSET_LEFT_TO_RIGHT,
1347 ("JIS X0208-1978 (Japanese Kanji; Old Version)"),
1348 build_string ("\\(jisx0208\\|jisc6226\\).19"));
1349 Vcharset_japanese_jisx0208 =
1350 make_charset (146, Qjapanese_jisx0208,
1351 LEADING_BYTE_JAPANESE_JISX0208, 3,
1352 CHARSET_TYPE_94X94, 2, 0, 'B',
1353 CHARSET_LEFT_TO_RIGHT,
1354 build_string ("JIS X0208-1983 (Japanese Kanji)"),
1355 build_string ("jisx0208.19\\(83\\|90\\)"));
1356 Vcharset_japanese_jisx0212 =
1357 make_charset (148, Qjapanese_jisx0212,
1358 LEADING_BYTE_JAPANESE_JISX0212, 3,
1359 CHARSET_TYPE_94X94, 2, 0, 'D',
1360 CHARSET_LEFT_TO_RIGHT,
1361 build_string ("JIS X0212 (Japanese Supplement)"),
1362 build_string ("jisx0212"));
1365 Vcharset_chinese_gb2312 =
1366 make_charset (145, Qchinese_gb2312, LEADING_BYTE_CHINESE_GB2312, 3,
1367 CHARSET_TYPE_94X94, 2, 0, 'A',
1368 CHARSET_LEFT_TO_RIGHT,
1369 build_string ("GB 2312 (Simplified Chinese)"),
1370 build_string ("gb2312"));
1371 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1372 Vcharset_chinese_cns11643_1 =
1373 make_charset (149, Qchinese_cns11643_1,
1374 LEADING_BYTE_CHINESE_CNS11643_1, 3,
1375 CHARSET_TYPE_94X94, 2, 0, 'G',
1376 CHARSET_LEFT_TO_RIGHT,
1378 ("CNS 11643 Plane 1 (Traditional Chinese for daily use)"),
1379 build_string (CHINESE_CNS_PLANE_RE("1")));
1380 Vcharset_chinese_cns11643_2 =
1381 make_charset (150, Qchinese_cns11643_2,
1382 LEADING_BYTE_CHINESE_CNS11643_2, 3,
1383 CHARSET_TYPE_94X94, 2, 0, 'H',
1384 CHARSET_LEFT_TO_RIGHT,
1386 ("CNS 11643 Plane 2 (Traditional Chinese for daily use)"),
1387 build_string (CHINESE_CNS_PLANE_RE("2")));
1388 Vcharset_chinese_big5_1 =
1389 make_charset (152, Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3,
1390 CHARSET_TYPE_94X94, 2, 0, '0',
1391 CHARSET_LEFT_TO_RIGHT,
1393 ("Big5 Level 1 (Traditional Chinese for daily use)"),
1394 build_string ("big5"));
1395 Vcharset_chinese_big5_2 =
1396 make_charset (153, Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3,
1397 CHARSET_TYPE_94X94, 2, 0, '1',
1398 CHARSET_LEFT_TO_RIGHT,
1400 ("Big5 Level 2 (Traditional Chinese for daily use)"),
1401 build_string ("big5"));
1403 Vcharset_korean_ksc5601 =
1404 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
1405 CHARSET_TYPE_94X94, 2, 0, 'C',
1406 CHARSET_LEFT_TO_RIGHT,
1407 build_string ("KS C5601 (Hangul and Korean Hanja)"),
1408 build_string ("ksc5601"));
1410 #ifdef ENABLE_COMPOSITE_CHARS
1411 /* #### For simplicity, we put composite chars into a 96x96 charset.
1412 This is going to lead to problems because you can run out of
1413 room, esp. as we don't yet recycle numbers. */
1414 Vcharset_composite =
1415 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3,
1416 CHARSET_TYPE_96X96, 2, 0, 0,
1417 CHARSET_LEFT_TO_RIGHT,
1418 build_string ("Composite characters"),
1421 composite_char_row_next = 32;
1422 composite_char_col_next = 32;
1424 Vcomposite_char_string2char_hash_table =
1425 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1426 Vcomposite_char_char2string_hash_table =
1427 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1428 staticpro (&Vcomposite_char_string2char_hash_table);
1429 staticpro (&Vcomposite_char_char2string_hash_table);
1430 #endif /* ENABLE_COMPOSITE_CHARS */