1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 struct charset_lookup *chlook;
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
84 static const struct struct_description charset_lookup_description = {
85 sizeof (struct charset_lookup),
86 charset_lookup_description_1
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
95 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97 const Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
149 Lisp_Object Ql2r, Qr2l;
151 Lisp_Object Vcharset_hash_table;
153 /* Composite characters are characters constructed by overstriking two
154 or more regular characters.
156 1) The old Mule implementation involves storing composite characters
157 in a buffer as a tag followed by all of the actual characters
158 used to make up the composite character. I think this is a bad
159 idea; it greatly complicates code that wants to handle strings
160 one character at a time because it has to deal with the possibility
161 of great big ungainly characters. It's much more reasonable to
162 simply store an index into a table of composite characters.
164 2) The current implementation only allows for 16,384 separate
165 composite characters over the lifetime of the XEmacs process.
166 This could become a potential problem if the user
167 edited lots of different files that use composite characters.
168 Due to FSF bogosity, increasing the number of allowable
169 composite characters under Mule would decrease the number
170 of possible faces that can exist. Mule already has shrunk
171 this to 2048, and further shrinkage would become uncomfortable.
172 No such problems exist in XEmacs.
174 Composite characters could be represented as 0x80 C1 C2 C3,
175 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
176 for slightly under 2^20 (one million) composite characters
177 over the XEmacs process lifetime, and you only need to
178 increase the size of a Mule character from 19 to 21 bits.
179 Or you could use 0x80 C1 C2 C3 C4, allowing for about
180 85 million (slightly over 2^26) composite characters. */
183 /************************************************************************/
184 /* Basic Emchar functions */
185 /************************************************************************/
187 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
188 string in STR. Returns the number of bytes stored.
189 Do not call this directly. Use the macro set_charptr_emchar() instead.
193 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
201 BREAKUP_CHAR (c, charset, c1, c2);
202 lb = CHAR_LEADING_BYTE (c);
203 if (LEADING_BYTE_PRIVATE_P (lb))
204 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
206 if (EQ (charset, Vcharset_control_1))
215 /* Return the first character from a Mule-encoded string in STR,
216 assuming it's non-ASCII. Do not call this directly.
217 Use the macro charptr_emchar() instead. */
220 non_ascii_charptr_emchar (const Bufbyte *str)
222 Bufbyte i0 = *str, i1, i2 = 0;
225 if (i0 == LEADING_BYTE_CONTROL_1)
226 return (Emchar) (*++str - 0x20);
228 if (LEADING_BYTE_PREFIX_P (i0))
233 charset = CHARSET_BY_LEADING_BYTE (i0);
234 if (XCHARSET_DIMENSION (charset) == 2)
237 return MAKE_CHAR (charset, i1, i2);
240 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
241 Do not call this directly. Use the macro valid_char_p() instead. */
244 non_ascii_valid_char_p (Emchar ch)
248 /* Must have only lowest 19 bits set */
252 f1 = CHAR_FIELD1 (ch);
253 f2 = CHAR_FIELD2 (ch);
254 f3 = CHAR_FIELD3 (ch);
260 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
261 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
262 f2 > MAX_CHAR_FIELD2_PRIVATE)
267 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
268 f2 <= MAX_CHAR_FIELD2_PRIVATE))
272 NOTE: This takes advantage of the fact that
273 FIELD2_TO_OFFICIAL_LEADING_BYTE and
274 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
276 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
277 if (EQ (charset, Qnil))
279 return (XCHARSET_CHARS (charset) == 96);
285 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
286 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
287 f1 > MAX_CHAR_FIELD1_PRIVATE)
289 if (f2 < 0x20 || f3 < 0x20)
292 #ifdef ENABLE_COMPOSITE_CHARS
293 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
295 if (UNBOUNDP (Fgethash (make_int (ch),
296 Vcomposite_char_char2string_hash_table,
301 #endif /* ENABLE_COMPOSITE_CHARS */
303 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
304 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
307 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
309 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
312 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
314 if (EQ (charset, Qnil))
316 return (XCHARSET_CHARS (charset) == 96);
321 /************************************************************************/
322 /* Basic string functions */
323 /************************************************************************/
325 /* Copy the character pointed to by SRC into DST. Do not call this
326 directly. Use the macro charptr_copy_char() instead.
327 Return the number of bytes copied. */
330 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
332 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
334 for (i = bytes; i; i--, dst++, src++)
340 /************************************************************************/
341 /* streams of Emchars */
342 /************************************************************************/
344 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
345 The functions below are not meant to be called directly; use
346 the macros in insdel.h. */
349 Lstream_get_emchar_1 (Lstream *stream, int ch)
351 Bufbyte str[MAX_EMCHAR_LEN];
352 Bufbyte *strptr = str;
355 str[0] = (Bufbyte) ch;
357 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
359 int c = Lstream_getc (stream);
360 bufpos_checking_assert (c >= 0);
361 *++strptr = (Bufbyte) c;
363 return charptr_emchar (str);
367 Lstream_fput_emchar (Lstream *stream, Emchar ch)
369 Bufbyte str[MAX_EMCHAR_LEN];
370 Bytecount len = set_charptr_emchar (str, ch);
371 return Lstream_write (stream, str, len);
375 Lstream_funget_emchar (Lstream *stream, Emchar ch)
377 Bufbyte str[MAX_EMCHAR_LEN];
378 Bytecount len = set_charptr_emchar (str, ch);
379 Lstream_unread (stream, str, len);
383 /************************************************************************/
385 /************************************************************************/
388 mark_charset (Lisp_Object obj)
390 Lisp_Charset *cs = XCHARSET (obj);
392 mark_object (cs->short_name);
393 mark_object (cs->long_name);
394 mark_object (cs->doc_string);
395 mark_object (cs->registry);
396 mark_object (cs->ccl_program);
401 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
403 Lisp_Charset *cs = XCHARSET (obj);
407 error ("printing unreadable object #<charset %s 0x%x>",
408 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
411 write_c_string ("#<charset ", printcharfun);
412 print_internal (CHARSET_NAME (cs), printcharfun, 0);
413 write_c_string (" ", printcharfun);
414 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
415 write_c_string (" ", printcharfun);
416 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
417 write_c_string (" ", printcharfun);
418 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
419 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
420 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
421 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
422 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
424 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
425 CHARSET_COLUMNS (cs),
426 CHARSET_GRAPHIC (cs),
428 write_c_string (buf, printcharfun);
429 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
430 sprintf (buf, " 0x%x>", cs->header.uid);
431 write_c_string (buf, printcharfun);
434 static const struct lrecord_description charset_description[] = {
435 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
436 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
437 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
438 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
439 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
440 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
441 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
445 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
446 mark_charset, print_charset, 0, 0, 0, charset_description,
449 /* Make a new charset. */
450 /* #### SJT Should generic properties be allowed? */
452 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
453 unsigned char type, unsigned char columns, unsigned char graphic,
454 Bufbyte final, unsigned char direction, Lisp_Object short_name,
455 Lisp_Object long_name, Lisp_Object doc,
459 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
463 XSETCHARSET (obj, cs);
465 CHARSET_ID (cs) = id;
466 CHARSET_NAME (cs) = name;
467 CHARSET_SHORT_NAME (cs) = short_name;
468 CHARSET_LONG_NAME (cs) = long_name;
469 CHARSET_REP_BYTES (cs) = rep_bytes;
470 CHARSET_DIRECTION (cs) = direction;
471 CHARSET_TYPE (cs) = type;
472 CHARSET_COLUMNS (cs) = columns;
473 CHARSET_GRAPHIC (cs) = graphic;
474 CHARSET_FINAL (cs) = final;
475 CHARSET_DOC_STRING (cs) = doc;
476 CHARSET_REGISTRY (cs) = reg;
477 CHARSET_CCL_PROGRAM (cs) = Qnil;
478 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
480 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
481 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
482 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
483 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
487 /* some charsets do not have final characters. This includes
488 ASCII, Control-1, Composite, and the two faux private
490 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
491 chlook->charset_by_attributes[type][final][direction] = obj;
494 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
495 chlook->charset_by_leading_byte[id - 128] = obj;
497 /* Some charsets are "faux" and don't have names or really exist at
498 all except in the leading-byte table. */
500 Fputhash (name, obj, Vcharset_hash_table);
505 get_unallocated_leading_byte (int dimension)
511 if (chlook->next_allocated_1_byte_leading_byte >
512 MAX_LEADING_BYTE_PRIVATE_1)
515 lb = chlook->next_allocated_1_byte_leading_byte++;
519 /* awfully fragile, but correct */
520 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
521 if (chlook->next_allocated_2_byte_leading_byte == 0)
523 if (chlook->next_allocated_2_byte_leading_byte >
524 MAX_LEADING_BYTE_PRIVATE_2)
528 lb = chlook->next_allocated_2_byte_leading_byte++;
533 ("No more character sets free for this dimension",
534 make_int (dimension));
540 /************************************************************************/
541 /* Basic charset Lisp functions */
542 /************************************************************************/
544 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
545 Return non-nil if OBJECT is a charset.
549 return CHARSETP (object) ? Qt : Qnil;
552 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
553 Retrieve the charset of the given name.
554 If CHARSET-OR-NAME is a charset object, it is simply returned.
555 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
556 nil is returned. Otherwise the associated charset object is returned.
560 if (CHARSETP (charset_or_name))
561 return charset_or_name;
563 CHECK_SYMBOL (charset_or_name);
564 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
567 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
568 Retrieve the charset of the given name.
569 Same as `find-charset' except an error is signalled if there is no such
570 charset instead of returning nil.
574 Lisp_Object charset = Ffind_charset (name);
577 signal_simple_error ("No such charset", name);
581 /* We store the charsets in hash tables with the names as the key and the
582 actual charset object as the value. Occasionally we need to use them
583 in a list format. These routines provide us with that. */
584 struct charset_list_closure
586 Lisp_Object *charset_list;
590 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
591 void *charset_list_closure)
593 /* This function can GC */
594 struct charset_list_closure *chcl =
595 (struct charset_list_closure*) charset_list_closure;
596 Lisp_Object *charset_list = chcl->charset_list;
598 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
602 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
603 Return a list of the names of all defined charsets.
607 Lisp_Object charset_list = Qnil;
609 struct charset_list_closure charset_list_closure;
611 GCPRO1 (charset_list);
612 charset_list_closure.charset_list = &charset_list;
613 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
614 &charset_list_closure);
620 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
621 Return the name of charset CHARSET.
625 return XCHARSET_NAME (Fget_charset (charset));
628 /* #### SJT Should generic properties be allowed? */
629 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
630 Define a new character set.
631 This function is for use with Mule support.
632 NAME is a symbol, the name by which the character set is normally referred.
633 DOC-STRING is a string describing the character set.
634 PROPS is a property list, describing the specific nature of the
635 character set. Recognized properties are:
637 'short-name Short version of the charset name (ex: Latin-1)
638 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
639 'registry A regular expression matching the font registry field for
641 'dimension Number of octets used to index a character in this charset.
642 Either 1 or 2. Defaults to 1.
643 'columns Number of columns used to display a character in this charset.
644 Only used in TTY mode. (Under X, the actual width of a
645 character can be derived from the font used to display the
646 characters.) If unspecified, defaults to the dimension
647 (this is almost always the correct value).
648 'chars Number of characters in each dimension (94 or 96).
649 Defaults to 94. Note that if the dimension is 2, the
650 character set thus described is 94x94 or 96x96.
651 'final Final byte of ISO 2022 escape sequence. Must be
652 supplied. Each combination of (DIMENSION, CHARS) defines a
653 separate namespace for final bytes. Note that ISO
654 2022 restricts the final byte to the range
655 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
656 dimension == 2. Note also that final bytes in the range
657 0x30 - 0x3F are reserved for user-defined (not official)
659 'graphic 0 (use left half of font on output) or 1 (use right half
660 of font on output). Defaults to 0. For example, for
661 a font whose registry is ISO8859-1, the left half
662 (octets 0x20 - 0x7F) is the `ascii' character set, while
663 the right half (octets 0xA0 - 0xFF) is the `latin-1'
664 character set. With 'graphic set to 0, the octets
665 will have their high bit cleared; with it set to 1,
666 the octets will have their high bit set.
667 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
669 'ccl-program A compiled CCL program used to convert a character in
670 this charset into an index into the font. This is in
671 addition to the 'graphic property. The CCL program
672 is passed the octets of the character, with the high
673 bit cleared and set depending upon whether the value
674 of the 'graphic property is 0 or 1.
676 (name, doc_string, props))
678 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
679 int direction = CHARSET_LEFT_TO_RIGHT;
681 Lisp_Object registry = Qnil;
683 Lisp_Object ccl_program = Qnil;
684 Lisp_Object short_name = Qnil, long_name = Qnil;
687 if (!NILP (doc_string))
688 CHECK_STRING (doc_string);
690 charset = Ffind_charset (name);
692 signal_simple_error ("Cannot redefine existing charset", name);
695 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
697 if (EQ (keyword, Qshort_name))
699 CHECK_STRING (value);
703 else if (EQ (keyword, Qlong_name))
705 CHECK_STRING (value);
709 else if (EQ (keyword, Qdimension))
712 dimension = XINT (value);
713 if (dimension < 1 || dimension > 2)
714 signal_simple_error ("Invalid value for 'dimension", value);
717 else if (EQ (keyword, Qchars))
720 chars = XINT (value);
721 if (chars != 94 && chars != 96)
722 signal_simple_error ("Invalid value for 'chars", value);
725 else if (EQ (keyword, Qcolumns))
728 columns = XINT (value);
729 if (columns != 1 && columns != 2)
730 signal_simple_error ("Invalid value for 'columns", value);
733 else if (EQ (keyword, Qgraphic))
736 graphic = XINT (value);
737 if (graphic < 0 || graphic > 1)
738 signal_simple_error ("Invalid value for 'graphic", value);
741 else if (EQ (keyword, Qregistry))
743 CHECK_STRING (value);
747 else if (EQ (keyword, Qdirection))
749 if (EQ (value, Ql2r))
750 direction = CHARSET_LEFT_TO_RIGHT;
751 else if (EQ (value, Qr2l))
752 direction = CHARSET_RIGHT_TO_LEFT;
754 signal_simple_error ("Invalid value for 'direction", value);
757 else if (EQ (keyword, Qfinal))
759 CHECK_CHAR_COERCE_INT (value);
760 final = XCHAR (value);
761 if (final < '0' || final > '~')
762 signal_simple_error ("Invalid value for 'final", value);
765 else if (EQ (keyword, Qccl_program))
767 struct ccl_program test_ccl;
769 if (setup_ccl_program (&test_ccl, value) < 0)
770 signal_simple_error ("Invalid value for 'ccl-program", value);
775 signal_simple_error ("Unrecognized property", keyword);
780 error ("'final must be specified");
781 if (dimension == 2 && final > 0x5F)
783 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
787 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
789 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
791 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
792 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
794 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
796 id = get_unallocated_leading_byte (dimension);
798 if (NILP (doc_string))
799 doc_string = build_string ("");
802 registry = build_string ("");
804 if (NILP (short_name))
805 XSETSTRING (short_name, XSYMBOL (name)->name);
807 if (NILP (long_name))
808 long_name = doc_string;
812 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
813 final, direction, short_name, long_name, doc_string, registry);
814 if (!NILP (ccl_program))
815 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
819 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
821 Make a charset equivalent to CHARSET but which goes in the opposite direction.
822 NEW-NAME is the name of the new charset. Return the new charset.
826 Lisp_Object new_charset = Qnil;
827 int id, dimension, columns, graphic, final;
829 Lisp_Object registry, doc_string, short_name, long_name;
832 charset = Fget_charset (charset);
833 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
834 signal_simple_error ("Charset already has reverse-direction charset",
837 CHECK_SYMBOL (new_name);
838 if (!NILP (Ffind_charset (new_name)))
839 signal_simple_error ("Cannot redefine existing charset", new_name);
841 cs = XCHARSET (charset);
843 type = CHARSET_TYPE (cs);
844 columns = CHARSET_COLUMNS (cs);
845 dimension = CHARSET_DIMENSION (cs);
846 id = get_unallocated_leading_byte (dimension);
848 graphic = CHARSET_GRAPHIC (cs);
849 final = CHARSET_FINAL (cs);
850 direction = CHARSET_RIGHT_TO_LEFT;
851 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
852 direction = CHARSET_LEFT_TO_RIGHT;
853 doc_string = CHARSET_DOC_STRING (cs);
854 short_name = CHARSET_SHORT_NAME (cs);
855 long_name = CHARSET_LONG_NAME (cs);
856 registry = CHARSET_REGISTRY (cs);
858 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
859 graphic, final, direction, short_name, long_name,
860 doc_string, registry);
862 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
863 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
868 /* #### Reverse direction charsets not yet implemented. */
870 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
872 Return the reverse-direction charset parallel to CHARSET, if any.
873 This is the charset with the same properties (in particular, the same
874 dimension, number of characters per dimension, and final byte) as
875 CHARSET but whose characters are displayed in the opposite direction.
879 charset = Fget_charset (charset);
880 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
884 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
885 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
886 If DIRECTION is omitted, both directions will be checked (left-to-right
887 will be returned if character sets exist for both directions).
889 (dimension, chars, final, direction))
891 int dm, ch, fi, di = -1;
893 Lisp_Object obj = Qnil;
895 CHECK_INT (dimension);
896 dm = XINT (dimension);
897 if (dm < 1 || dm > 2)
898 signal_simple_error ("Invalid value for DIMENSION", dimension);
902 if (ch != 94 && ch != 96)
903 signal_simple_error ("Invalid value for CHARS", chars);
905 CHECK_CHAR_COERCE_INT (final);
907 if (fi < '0' || fi > '~')
908 signal_simple_error ("Invalid value for FINAL", final);
910 if (EQ (direction, Ql2r))
911 di = CHARSET_LEFT_TO_RIGHT;
912 else if (EQ (direction, Qr2l))
913 di = CHARSET_RIGHT_TO_LEFT;
914 else if (!NILP (direction))
915 signal_simple_error ("Invalid value for DIRECTION", direction);
917 if (dm == 2 && fi > 0x5F)
919 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
922 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
924 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
928 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
930 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
933 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
936 return XCHARSET_NAME (obj);
940 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
941 Return short name of CHARSET.
945 return XCHARSET_SHORT_NAME (Fget_charset (charset));
948 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
949 Return long name of CHARSET.
953 return XCHARSET_LONG_NAME (Fget_charset (charset));
956 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
957 Return description of CHARSET.
961 return XCHARSET_DOC_STRING (Fget_charset (charset));
964 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
965 Return dimension of CHARSET.
969 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
972 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
973 Return property PROP of CHARSET, a charset object or symbol naming a charset.
974 Recognized properties are those listed in `make-charset', as well as
975 'name and 'doc-string.
981 charset = Fget_charset (charset);
982 cs = XCHARSET (charset);
985 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
986 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
987 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
988 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
989 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
990 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
991 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
992 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
993 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
994 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
995 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
996 if (EQ (prop, Qdirection))
997 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
998 if (EQ (prop, Qreverse_direction_charset))
1000 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1001 /* #### Is this translation OK? If so, error checking sufficient? */
1002 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1004 signal_simple_error ("Unrecognized charset property name", prop);
1005 return Qnil; /* not reached */
1008 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1009 Return charset identification number of CHARSET.
1013 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1016 /* #### We need to figure out which properties we really want to
1019 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1020 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1022 (charset, ccl_program))
1024 struct ccl_program test_ccl;
1026 charset = Fget_charset (charset);
1027 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1028 signal_simple_error ("Invalid ccl-program", ccl_program);
1029 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1034 invalidate_charset_font_caches (Lisp_Object charset)
1036 /* Invalidate font cache entries for charset on all devices. */
1037 Lisp_Object devcons, concons, hash_table;
1038 DEVICE_LOOP_NO_BREAK (devcons, concons)
1040 struct device *d = XDEVICE (XCAR (devcons));
1041 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1042 if (!UNBOUNDP (hash_table))
1043 Fclrhash (hash_table);
1047 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1048 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1049 Set the 'registry property of CHARSET to REGISTRY.
1051 (charset, registry))
1053 charset = Fget_charset (charset);
1054 CHECK_STRING (registry);
1055 XCHARSET_REGISTRY (charset) = registry;
1056 invalidate_charset_font_caches (charset);
1057 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1062 /************************************************************************/
1063 /* Lisp primitives for working with characters */
1064 /************************************************************************/
1066 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1067 Make a character from CHARSET and octets ARG1 and ARG2.
1068 ARG2 is required only for characters from two-dimensional charsets.
1069 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1070 character s with caron.
1072 (charset, arg1, arg2))
1076 int lowlim, highlim;
1078 charset = Fget_charset (charset);
1079 cs = XCHARSET (charset);
1081 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1082 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1083 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1084 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1087 /* It is useful (and safe, according to Olivier Galibert) to strip
1088 the 8th bit off ARG1 and ARG2 because it allows programmers to
1089 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1090 Latin 2 code of the character. */
1091 a1 = XINT (arg1) & 0x7f;
1092 if (a1 < lowlim || a1 > highlim)
1093 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1095 if (CHARSET_DIMENSION (cs) == 1)
1099 ("Charset is of dimension one; second octet must be nil", arg2);
1100 return make_char (MAKE_CHAR (charset, a1, 0));
1104 a2 = XINT (arg2) & 0x7f;
1105 if (a2 < lowlim || a2 > highlim)
1106 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1108 return make_char (MAKE_CHAR (charset, a1, a2));
1111 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1112 Return the character set of CHARACTER.
1116 CHECK_CHAR_COERCE_INT (character);
1118 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1119 (CHAR_LEADING_BYTE (XCHAR (character))));
1122 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1123 Return the octet numbered N (should be 0 or 1) of CHARACTER.
1124 N defaults to 0 if omitted.
1128 Lisp_Object charset;
1131 CHECK_CHAR_COERCE_INT (character);
1133 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
1135 if (NILP (n) || EQ (n, Qzero))
1136 return make_int (octet0);
1137 else if (EQ (n, make_int (1)))
1138 return make_int (octet1);
1140 signal_simple_error ("Octet number must be 0 or 1", n);
1143 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1144 Return list of charset and one or two position-codes of CHARACTER.
1148 /* This function can GC */
1149 struct gcpro gcpro1, gcpro2;
1150 Lisp_Object charset = Qnil;
1151 Lisp_Object rc = Qnil;
1154 GCPRO2 (charset, rc);
1155 CHECK_CHAR_COERCE_INT (character);
1157 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1159 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1161 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1165 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1173 #ifdef ENABLE_COMPOSITE_CHARS
1174 /************************************************************************/
1175 /* composite character functions */
1176 /************************************************************************/
1179 lookup_composite_char (Bufbyte *str, int len)
1181 Lisp_Object lispstr = make_string (str, len);
1182 Lisp_Object ch = Fgethash (lispstr,
1183 Vcomposite_char_string2char_hash_table,
1189 if (composite_char_row_next >= 128)
1190 signal_simple_error ("No more composite chars available", lispstr);
1191 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1192 composite_char_col_next);
1193 Fputhash (make_char (emch), lispstr,
1194 Vcomposite_char_char2string_hash_table);
1195 Fputhash (lispstr, make_char (emch),
1196 Vcomposite_char_string2char_hash_table);
1197 composite_char_col_next++;
1198 if (composite_char_col_next >= 128)
1200 composite_char_col_next = 32;
1201 composite_char_row_next++;
1210 composite_char_string (Emchar ch)
1212 Lisp_Object str = Fgethash (make_char (ch),
1213 Vcomposite_char_char2string_hash_table,
1215 assert (!UNBOUNDP (str));
1219 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1220 Convert a string into a single composite character.
1221 The character is the result of overstriking all the characters in
1226 CHECK_STRING (string);
1227 return make_char (lookup_composite_char (XSTRING_DATA (string),
1228 XSTRING_LENGTH (string)));
1231 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1232 Return a string of the characters comprising a composite character.
1240 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1241 signal_simple_error ("Must be composite char", ch);
1242 return composite_char_string (emch);
1244 #endif /* ENABLE_COMPOSITE_CHARS */
1247 /************************************************************************/
1248 /* initialization */
1249 /************************************************************************/
1252 syms_of_mule_charset (void)
1254 INIT_LRECORD_IMPLEMENTATION (charset);
1256 DEFSUBR (Fcharsetp);
1257 DEFSUBR (Ffind_charset);
1258 DEFSUBR (Fget_charset);
1259 DEFSUBR (Fcharset_list);
1260 DEFSUBR (Fcharset_name);
1261 DEFSUBR (Fmake_charset);
1262 DEFSUBR (Fmake_reverse_direction_charset);
1263 /* DEFSUBR (Freverse_direction_charset); */
1264 DEFSUBR (Fcharset_from_attributes);
1265 DEFSUBR (Fcharset_short_name);
1266 DEFSUBR (Fcharset_long_name);
1267 DEFSUBR (Fcharset_description);
1268 DEFSUBR (Fcharset_dimension);
1269 DEFSUBR (Fcharset_property);
1270 DEFSUBR (Fcharset_id);
1271 DEFSUBR (Fset_charset_ccl_program);
1272 DEFSUBR (Fset_charset_registry);
1274 DEFSUBR (Fmake_char);
1275 DEFSUBR (Fchar_charset);
1276 DEFSUBR (Fchar_octet);
1277 DEFSUBR (Fsplit_char);
1279 #ifdef ENABLE_COMPOSITE_CHARS
1280 DEFSUBR (Fmake_composite_char);
1281 DEFSUBR (Fcomposite_char_string);
1284 defsymbol (&Qcharsetp, "charsetp");
1285 defsymbol (&Qregistry, "registry");
1286 defsymbol (&Qfinal, "final");
1287 defsymbol (&Qgraphic, "graphic");
1288 defsymbol (&Qdirection, "direction");
1289 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1290 defsymbol (&Qshort_name, "short-name");
1291 defsymbol (&Qlong_name, "long-name");
1293 defsymbol (&Ql2r, "l2r");
1294 defsymbol (&Qr2l, "r2l");
1296 /* Charsets, compatible with FSF 20.3
1297 Naming convention is Script-Charset[-Edition] */
1298 defsymbol (&Qascii, "ascii");
1299 defsymbol (&Qcontrol_1, "control-1");
1300 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1301 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1302 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1303 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1304 defsymbol (&Qthai_tis620, "thai-tis620");
1305 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1306 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1307 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1308 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1309 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1310 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1311 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1312 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1313 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1314 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1315 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1316 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1317 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1318 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1319 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1320 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1322 defsymbol (&Qcomposite, "composite");
1326 vars_of_mule_charset (void)
1330 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
1331 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
1333 /* Table of charsets indexed by leading byte. */
1334 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1335 chlook->charset_by_leading_byte[i] = Qnil;
1337 /* Table of charsets indexed by type/final-byte/direction. */
1338 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1339 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1340 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1341 chlook->charset_by_attributes[i][j][k] = Qnil;
1343 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1344 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1348 complex_vars_of_mule_charset (void)
1350 staticpro (&Vcharset_hash_table);
1351 Vcharset_hash_table =
1352 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1354 /* Predefined character sets. We store them into variables for
1357 staticpro (&Vcharset_ascii);
1359 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1360 CHARSET_TYPE_94, 1, 0, 'B',
1361 CHARSET_LEFT_TO_RIGHT,
1362 build_string ("ASCII"),
1363 build_string ("ASCII)"),
1364 build_string ("ASCII (ISO646 IRV)"),
1365 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1366 staticpro (&Vcharset_control_1);
1367 Vcharset_control_1 =
1368 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1369 CHARSET_TYPE_94, 1, 1, 0,
1370 CHARSET_LEFT_TO_RIGHT,
1371 build_string ("C1"),
1372 build_string ("Control characters"),
1373 build_string ("Control characters 128-191"),
1375 staticpro (&Vcharset_latin_iso8859_1);
1376 Vcharset_latin_iso8859_1 =
1377 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1378 CHARSET_TYPE_96, 1, 1, 'A',
1379 CHARSET_LEFT_TO_RIGHT,
1380 build_string ("Latin-1"),
1381 build_string ("ISO8859-1 (Latin-1)"),
1382 build_string ("ISO8859-1 (Latin-1)"),
1383 build_string ("iso8859-1"));
1384 staticpro (&Vcharset_latin_iso8859_2);
1385 Vcharset_latin_iso8859_2 =
1386 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1387 CHARSET_TYPE_96, 1, 1, 'B',
1388 CHARSET_LEFT_TO_RIGHT,
1389 build_string ("Latin-2"),
1390 build_string ("ISO8859-2 (Latin-2)"),
1391 build_string ("ISO8859-2 (Latin-2)"),
1392 build_string ("iso8859-2"));
1393 staticpro (&Vcharset_latin_iso8859_3);
1394 Vcharset_latin_iso8859_3 =
1395 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1396 CHARSET_TYPE_96, 1, 1, 'C',
1397 CHARSET_LEFT_TO_RIGHT,
1398 build_string ("Latin-3"),
1399 build_string ("ISO8859-3 (Latin-3)"),
1400 build_string ("ISO8859-3 (Latin-3)"),
1401 build_string ("iso8859-3"));
1402 staticpro (&Vcharset_latin_iso8859_4);
1403 Vcharset_latin_iso8859_4 =
1404 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1405 CHARSET_TYPE_96, 1, 1, 'D',
1406 CHARSET_LEFT_TO_RIGHT,
1407 build_string ("Latin-4"),
1408 build_string ("ISO8859-4 (Latin-4)"),
1409 build_string ("ISO8859-4 (Latin-4)"),
1410 build_string ("iso8859-4"));
1411 staticpro (&Vcharset_thai_tis620);
1412 Vcharset_thai_tis620 =
1413 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1414 CHARSET_TYPE_96, 1, 1, 'T',
1415 CHARSET_LEFT_TO_RIGHT,
1416 build_string ("TIS620"),
1417 build_string ("TIS620 (Thai)"),
1418 build_string ("TIS620.2529 (Thai)"),
1419 build_string ("tis620"));
1420 staticpro (&Vcharset_greek_iso8859_7);
1421 Vcharset_greek_iso8859_7 =
1422 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1423 CHARSET_TYPE_96, 1, 1, 'F',
1424 CHARSET_LEFT_TO_RIGHT,
1425 build_string ("ISO8859-7"),
1426 build_string ("ISO8859-7 (Greek)"),
1427 build_string ("ISO8859-7 (Greek)"),
1428 build_string ("iso8859-7"));
1429 staticpro (&Vcharset_arabic_iso8859_6);
1430 Vcharset_arabic_iso8859_6 =
1431 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1432 CHARSET_TYPE_96, 1, 1, 'G',
1433 CHARSET_RIGHT_TO_LEFT,
1434 build_string ("ISO8859-6"),
1435 build_string ("ISO8859-6 (Arabic)"),
1436 build_string ("ISO8859-6 (Arabic)"),
1437 build_string ("iso8859-6"));
1438 staticpro (&Vcharset_hebrew_iso8859_8);
1439 Vcharset_hebrew_iso8859_8 =
1440 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1441 CHARSET_TYPE_96, 1, 1, 'H',
1442 CHARSET_RIGHT_TO_LEFT,
1443 build_string ("ISO8859-8"),
1444 build_string ("ISO8859-8 (Hebrew)"),
1445 build_string ("ISO8859-8 (Hebrew)"),
1446 build_string ("iso8859-8"));
1447 staticpro (&Vcharset_katakana_jisx0201);
1448 Vcharset_katakana_jisx0201 =
1449 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1450 CHARSET_TYPE_94, 1, 1, 'I',
1451 CHARSET_LEFT_TO_RIGHT,
1452 build_string ("JISX0201 Kana"),
1453 build_string ("JISX0201.1976 (Japanese Kana)"),
1454 build_string ("JISX0201.1976 Japanese Kana"),
1455 build_string ("jisx0201.1976"));
1456 staticpro (&Vcharset_latin_jisx0201);
1457 Vcharset_latin_jisx0201 =
1458 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1459 CHARSET_TYPE_94, 1, 0, 'J',
1460 CHARSET_LEFT_TO_RIGHT,
1461 build_string ("JISX0201 Roman"),
1462 build_string ("JISX0201.1976 (Japanese Roman)"),
1463 build_string ("JISX0201.1976 Japanese Roman"),
1464 build_string ("jisx0201.1976"));
1465 staticpro (&Vcharset_cyrillic_iso8859_5);
1466 Vcharset_cyrillic_iso8859_5 =
1467 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1468 CHARSET_TYPE_96, 1, 1, 'L',
1469 CHARSET_LEFT_TO_RIGHT,
1470 build_string ("ISO8859-5"),
1471 build_string ("ISO8859-5 (Cyrillic)"),
1472 build_string ("ISO8859-5 (Cyrillic)"),
1473 build_string ("iso8859-5"));
1474 staticpro (&Vcharset_latin_iso8859_9);
1475 Vcharset_latin_iso8859_9 =
1476 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1477 CHARSET_TYPE_96, 1, 1, 'M',
1478 CHARSET_LEFT_TO_RIGHT,
1479 build_string ("Latin-5"),
1480 build_string ("ISO8859-9 (Latin-5)"),
1481 build_string ("ISO8859-9 (Latin-5)"),
1482 build_string ("iso8859-9"));
1483 staticpro (&Vcharset_japanese_jisx0208_1978);
1484 Vcharset_japanese_jisx0208_1978 =
1485 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1486 CHARSET_TYPE_94X94, 2, 0, '@',
1487 CHARSET_LEFT_TO_RIGHT,
1488 build_string ("JISX0208.1978"),
1489 build_string ("JISX0208.1978 (Japanese)"),
1491 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1492 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1493 staticpro (&Vcharset_chinese_gb2312);
1494 Vcharset_chinese_gb2312 =
1495 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1496 CHARSET_TYPE_94X94, 2, 0, 'A',
1497 CHARSET_LEFT_TO_RIGHT,
1498 build_string ("GB2312"),
1499 build_string ("GB2312)"),
1500 build_string ("GB2312 Chinese simplified"),
1501 build_string ("gb2312"));
1502 staticpro (&Vcharset_japanese_jisx0208);
1503 Vcharset_japanese_jisx0208 =
1504 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1505 CHARSET_TYPE_94X94, 2, 0, 'B',
1506 CHARSET_LEFT_TO_RIGHT,
1507 build_string ("JISX0208"),
1508 build_string ("JISX0208.1983/1990 (Japanese)"),
1509 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1510 build_string ("jisx0208.19\\(83\\|90\\)"));
1511 staticpro (&Vcharset_korean_ksc5601);
1512 Vcharset_korean_ksc5601 =
1513 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1514 CHARSET_TYPE_94X94, 2, 0, 'C',
1515 CHARSET_LEFT_TO_RIGHT,
1516 build_string ("KSC5601"),
1517 build_string ("KSC5601 (Korean"),
1518 build_string ("KSC5601 Korean Hangul and Hanja"),
1519 build_string ("ksc5601"));
1520 staticpro (&Vcharset_japanese_jisx0212);
1521 Vcharset_japanese_jisx0212 =
1522 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1523 CHARSET_TYPE_94X94, 2, 0, 'D',
1524 CHARSET_LEFT_TO_RIGHT,
1525 build_string ("JISX0212"),
1526 build_string ("JISX0212 (Japanese)"),
1527 build_string ("JISX0212 Japanese Supplement"),
1528 build_string ("jisx0212"));
1530 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1531 staticpro (&Vcharset_chinese_cns11643_1);
1532 Vcharset_chinese_cns11643_1 =
1533 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1534 CHARSET_TYPE_94X94, 2, 0, 'G',
1535 CHARSET_LEFT_TO_RIGHT,
1536 build_string ("CNS11643-1"),
1537 build_string ("CNS11643-1 (Chinese traditional)"),
1539 ("CNS 11643 Plane 1 Chinese traditional"),
1540 build_string (CHINESE_CNS_PLANE_RE("1")));
1541 staticpro (&Vcharset_chinese_cns11643_2);
1542 Vcharset_chinese_cns11643_2 =
1543 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1544 CHARSET_TYPE_94X94, 2, 0, 'H',
1545 CHARSET_LEFT_TO_RIGHT,
1546 build_string ("CNS11643-2"),
1547 build_string ("CNS11643-2 (Chinese traditional)"),
1549 ("CNS 11643 Plane 2 Chinese traditional"),
1550 build_string (CHINESE_CNS_PLANE_RE("2")));
1551 staticpro (&Vcharset_chinese_big5_1);
1552 Vcharset_chinese_big5_1 =
1553 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1554 CHARSET_TYPE_94X94, 2, 0, '0',
1555 CHARSET_LEFT_TO_RIGHT,
1556 build_string ("Big5"),
1557 build_string ("Big5 (Level-1)"),
1559 ("Big5 Level-1 Chinese traditional"),
1560 build_string ("big5"));
1561 staticpro (&Vcharset_chinese_big5_2);
1562 Vcharset_chinese_big5_2 =
1563 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1564 CHARSET_TYPE_94X94, 2, 0, '1',
1565 CHARSET_LEFT_TO_RIGHT,
1566 build_string ("Big5"),
1567 build_string ("Big5 (Level-2)"),
1569 ("Big5 Level-2 Chinese traditional"),
1570 build_string ("big5"));
1573 #ifdef ENABLE_COMPOSITE_CHARS
1574 /* #### For simplicity, we put composite chars into a 96x96 charset.
1575 This is going to lead to problems because you can run out of
1576 room, esp. as we don't yet recycle numbers. */
1577 staticpro (&Vcharset_composite);
1578 Vcharset_composite =
1579 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1580 CHARSET_TYPE_96X96, 2, 0, 0,
1581 CHARSET_LEFT_TO_RIGHT,
1582 build_string ("Composite"),
1583 build_string ("Composite characters"),
1584 build_string ("Composite characters"),
1587 /* #### not dumped properly */
1588 composite_char_row_next = 32;
1589 composite_char_col_next = 32;
1591 Vcomposite_char_string2char_hash_table =
1592 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1593 Vcomposite_char_char2string_hash_table =
1594 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1595 staticpro (&Vcomposite_char_string2char_hash_table);
1596 staticpro (&Vcomposite_char_char2string_hash_table);
1597 #endif /* ENABLE_COMPOSITE_CHARS */