1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
72 static int composite_char_row_next;
73 static int composite_char_col_next;
75 #endif /* ENABLE_COMPOSITE_CHARS */
77 struct charset_lookup *chlook;
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
84 static const struct struct_description charset_lookup_description = {
85 sizeof(struct charset_lookup),
86 charset_lookup_description_1
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
95 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97 const Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
149 Lisp_Object Ql2r, Qr2l;
151 Lisp_Object Vcharset_hash_table;
153 static Bufbyte next_allocated_1_byte_leading_byte;
154 static Bufbyte next_allocated_2_byte_leading_byte;
156 /* Composite characters are characters constructed by overstriking two
157 or more regular characters.
159 1) The old Mule implementation involves storing composite characters
160 in a buffer as a tag followed by all of the actual characters
161 used to make up the composite character. I think this is a bad
162 idea; it greatly complicates code that wants to handle strings
163 one character at a time because it has to deal with the possibility
164 of great big ungainly characters. It's much more reasonable to
165 simply store an index into a table of composite characters.
167 2) The current implementation only allows for 16,384 separate
168 composite characters over the lifetime of the XEmacs process.
169 This could become a potential problem if the user
170 edited lots of different files that use composite characters.
171 Due to FSF bogosity, increasing the number of allowable
172 composite characters under Mule would decrease the number
173 of possible faces that can exist. Mule already has shrunk
174 this to 2048, and further shrinkage would become uncomfortable.
175 No such problems exist in XEmacs.
177 Composite characters could be represented as 0x80 C1 C2 C3,
178 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
179 for slightly under 2^20 (one million) composite characters
180 over the XEmacs process lifetime, and you only need to
181 increase the size of a Mule character from 19 to 21 bits.
182 Or you could use 0x80 C1 C2 C3 C4, allowing for about
183 85 million (slightly over 2^26) composite characters. */
186 /************************************************************************/
187 /* Basic Emchar functions */
188 /************************************************************************/
190 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
191 string in STR. Returns the number of bytes stored.
192 Do not call this directly. Use the macro set_charptr_emchar() instead.
196 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
204 BREAKUP_CHAR (c, charset, c1, c2);
205 lb = CHAR_LEADING_BYTE (c);
206 if (LEADING_BYTE_PRIVATE_P (lb))
207 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
209 if (EQ (charset, Vcharset_control_1))
218 /* Return the first character from a Mule-encoded string in STR,
219 assuming it's non-ASCII. Do not call this directly.
220 Use the macro charptr_emchar() instead. */
223 non_ascii_charptr_emchar (CONST Bufbyte *str)
225 Bufbyte i0 = *str, i1, i2 = 0;
228 if (i0 == LEADING_BYTE_CONTROL_1)
229 return (Emchar) (*++str - 0x20);
231 if (LEADING_BYTE_PREFIX_P (i0))
236 charset = CHARSET_BY_LEADING_BYTE (i0);
237 if (XCHARSET_DIMENSION (charset) == 2)
240 return MAKE_CHAR (charset, i1, i2);
243 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
244 Do not call this directly. Use the macro valid_char_p() instead. */
247 non_ascii_valid_char_p (Emchar ch)
251 /* Must have only lowest 19 bits set */
255 f1 = CHAR_FIELD1 (ch);
256 f2 = CHAR_FIELD2 (ch);
257 f3 = CHAR_FIELD3 (ch);
263 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
264 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
265 f2 > MAX_CHAR_FIELD2_PRIVATE)
270 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
271 f2 <= MAX_CHAR_FIELD2_PRIVATE))
275 NOTE: This takes advantage of the fact that
276 FIELD2_TO_OFFICIAL_LEADING_BYTE and
277 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
279 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
280 if (EQ (charset, Qnil))
282 return (XCHARSET_CHARS (charset) == 96);
288 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
289 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
290 f1 > MAX_CHAR_FIELD1_PRIVATE)
292 if (f2 < 0x20 || f3 < 0x20)
295 #ifdef ENABLE_COMPOSITE_CHARS
296 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
298 if (UNBOUNDP (Fgethash (make_int (ch),
299 Vcomposite_char_char2string_hash_table,
304 #endif /* ENABLE_COMPOSITE_CHARS */
306 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
307 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
310 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
312 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
315 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
317 if (EQ (charset, Qnil))
319 return (XCHARSET_CHARS (charset) == 96);
324 /************************************************************************/
325 /* Basic string functions */
326 /************************************************************************/
328 /* Copy the character pointed to by PTR into STR, assuming it's
329 non-ASCII. Do not call this directly. Use the macro
330 charptr_copy_char() instead. */
333 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
335 Bufbyte *strptr = str;
337 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
339 /* Notice fallthrough. */
340 case 4: *++strptr = *ptr++;
341 case 3: *++strptr = *ptr++;
342 case 2: *++strptr = *ptr;
347 return strptr + 1 - str;
351 /************************************************************************/
352 /* streams of Emchars */
353 /************************************************************************/
355 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
356 The functions below are not meant to be called directly; use
357 the macros in insdel.h. */
360 Lstream_get_emchar_1 (Lstream *stream, int ch)
362 Bufbyte str[MAX_EMCHAR_LEN];
363 Bufbyte *strptr = str;
365 str[0] = (Bufbyte) ch;
366 switch (REP_BYTES_BY_FIRST_BYTE (ch))
368 /* Notice fallthrough. */
370 ch = Lstream_getc (stream);
372 *++strptr = (Bufbyte) ch;
374 ch = Lstream_getc (stream);
376 *++strptr = (Bufbyte) ch;
378 ch = Lstream_getc (stream);
380 *++strptr = (Bufbyte) ch;
385 return charptr_emchar (str);
389 Lstream_fput_emchar (Lstream *stream, Emchar ch)
391 Bufbyte str[MAX_EMCHAR_LEN];
392 Bytecount len = set_charptr_emchar (str, ch);
393 return Lstream_write (stream, str, len);
397 Lstream_funget_emchar (Lstream *stream, Emchar ch)
399 Bufbyte str[MAX_EMCHAR_LEN];
400 Bytecount len = set_charptr_emchar (str, ch);
401 Lstream_unread (stream, str, len);
405 /************************************************************************/
407 /************************************************************************/
410 mark_charset (Lisp_Object obj)
412 struct Lisp_Charset *cs = XCHARSET (obj);
414 mark_object (cs->short_name);
415 mark_object (cs->long_name);
416 mark_object (cs->doc_string);
417 mark_object (cs->registry);
418 mark_object (cs->ccl_program);
423 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
425 struct Lisp_Charset *cs = XCHARSET (obj);
429 error ("printing unreadable object #<charset %s 0x%x>",
430 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
433 write_c_string ("#<charset ", printcharfun);
434 print_internal (CHARSET_NAME (cs), printcharfun, 0);
435 write_c_string (" ", printcharfun);
436 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
437 write_c_string (" ", printcharfun);
438 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
439 write_c_string (" ", printcharfun);
440 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
441 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
442 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
443 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
444 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
446 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
447 CHARSET_COLUMNS (cs),
448 CHARSET_GRAPHIC (cs),
450 write_c_string (buf, printcharfun);
451 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
452 sprintf (buf, " 0x%x>", cs->header.uid);
453 write_c_string (buf, printcharfun);
456 static const struct lrecord_description charset_description[] = {
457 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
461 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
462 mark_charset, print_charset, 0, 0, 0, charset_description,
463 struct Lisp_Charset);
464 /* Make a new charset. */
467 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
468 unsigned char type, unsigned char columns, unsigned char graphic,
469 Bufbyte final, unsigned char direction, Lisp_Object short_name,
470 Lisp_Object long_name, Lisp_Object doc,
474 struct Lisp_Charset *cs =
475 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
476 XSETCHARSET (obj, cs);
478 CHARSET_ID (cs) = id;
479 CHARSET_NAME (cs) = name;
480 CHARSET_SHORT_NAME (cs) = short_name;
481 CHARSET_LONG_NAME (cs) = long_name;
482 CHARSET_REP_BYTES (cs) = rep_bytes;
483 CHARSET_DIRECTION (cs) = direction;
484 CHARSET_TYPE (cs) = type;
485 CHARSET_COLUMNS (cs) = columns;
486 CHARSET_GRAPHIC (cs) = graphic;
487 CHARSET_FINAL (cs) = final;
488 CHARSET_DOC_STRING (cs) = doc;
489 CHARSET_REGISTRY (cs) = reg;
490 CHARSET_CCL_PROGRAM (cs) = Qnil;
491 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
493 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
494 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
495 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
496 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
500 /* some charsets do not have final characters. This includes
501 ASCII, Control-1, Composite, and the two faux private
503 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
504 chlook->charset_by_attributes[type][final][direction] = obj;
507 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
508 chlook->charset_by_leading_byte[id - 128] = obj;
510 /* Some charsets are "faux" and don't have names or really exist at
511 all except in the leading-byte table. */
513 Fputhash (name, obj, Vcharset_hash_table);
518 get_unallocated_leading_byte (int dimension)
524 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
527 lb = next_allocated_1_byte_leading_byte++;
531 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
534 lb = next_allocated_2_byte_leading_byte++;
539 ("No more character sets free for this dimension",
540 make_int (dimension));
546 /************************************************************************/
547 /* Basic charset Lisp functions */
548 /************************************************************************/
550 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
551 Return non-nil if OBJECT is a charset.
555 return CHARSETP (object) ? Qt : Qnil;
558 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
559 Retrieve the charset of the given name.
560 If CHARSET-OR-NAME is a charset object, it is simply returned.
561 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
562 nil is returned. Otherwise the associated charset object is returned.
566 if (CHARSETP (charset_or_name))
567 return charset_or_name;
569 CHECK_SYMBOL (charset_or_name);
570 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
573 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
574 Retrieve the charset of the given name.
575 Same as `find-charset' except an error is signalled if there is no such
576 charset instead of returning nil.
580 Lisp_Object charset = Ffind_charset (name);
583 signal_simple_error ("No such charset", name);
587 /* We store the charsets in hash tables with the names as the key and the
588 actual charset object as the value. Occasionally we need to use them
589 in a list format. These routines provide us with that. */
590 struct charset_list_closure
592 Lisp_Object *charset_list;
596 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
597 void *charset_list_closure)
599 /* This function can GC */
600 struct charset_list_closure *chcl =
601 (struct charset_list_closure*) charset_list_closure;
602 Lisp_Object *charset_list = chcl->charset_list;
604 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
608 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
609 Return a list of the names of all defined charsets.
613 Lisp_Object charset_list = Qnil;
615 struct charset_list_closure charset_list_closure;
617 GCPRO1 (charset_list);
618 charset_list_closure.charset_list = &charset_list;
619 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
620 &charset_list_closure);
626 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
627 Return the name of the given charset.
631 return XCHARSET_NAME (Fget_charset (charset));
634 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
635 Define a new character set.
636 This function is for use with Mule support.
637 NAME is a symbol, the name by which the character set is normally referred.
638 DOC-STRING is a string describing the character set.
639 PROPS is a property list, describing the specific nature of the
640 character set. Recognized properties are:
642 'short-name Short version of the charset name (ex: Latin-1)
643 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
644 'registry A regular expression matching the font registry field for
646 'dimension Number of octets used to index a character in this charset.
647 Either 1 or 2. Defaults to 1.
648 'columns Number of columns used to display a character in this charset.
649 Only used in TTY mode. (Under X, the actual width of a
650 character can be derived from the font used to display the
651 characters.) If unspecified, defaults to the dimension
652 (this is almost always the correct value).
653 'chars Number of characters in each dimension (94 or 96).
654 Defaults to 94. Note that if the dimension is 2, the
655 character set thus described is 94x94 or 96x96.
656 'final Final byte of ISO 2022 escape sequence. Must be
657 supplied. Each combination of (DIMENSION, CHARS) defines a
658 separate namespace for final bytes. Note that ISO
659 2022 restricts the final byte to the range
660 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
661 dimension == 2. Note also that final bytes in the range
662 0x30 - 0x3F are reserved for user-defined (not official)
664 'graphic 0 (use left half of font on output) or 1 (use right half
665 of font on output). Defaults to 0. For example, for
666 a font whose registry is ISO8859-1, the left half
667 (octets 0x20 - 0x7F) is the `ascii' character set, while
668 the right half (octets 0xA0 - 0xFF) is the `latin-1'
669 character set. With 'graphic set to 0, the octets
670 will have their high bit cleared; with it set to 1,
671 the octets will have their high bit set.
672 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
674 'ccl-program A compiled CCL program used to convert a character in
675 this charset into an index into the font. This is in
676 addition to the 'graphic property. The CCL program
677 is passed the octets of the character, with the high
678 bit cleared and set depending upon whether the value
679 of the 'graphic property is 0 or 1.
681 (name, doc_string, props))
683 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
684 int direction = CHARSET_LEFT_TO_RIGHT;
686 Lisp_Object registry = Qnil;
688 Lisp_Object rest, keyword, value;
689 Lisp_Object ccl_program = Qnil;
690 Lisp_Object short_name = Qnil, long_name = Qnil;
693 if (!NILP (doc_string))
694 CHECK_STRING (doc_string);
696 charset = Ffind_charset (name);
698 signal_simple_error ("Cannot redefine existing charset", name);
700 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
702 if (EQ (keyword, Qshort_name))
704 CHECK_STRING (value);
708 if (EQ (keyword, Qlong_name))
710 CHECK_STRING (value);
714 else if (EQ (keyword, Qdimension))
717 dimension = XINT (value);
718 if (dimension < 1 || dimension > 2)
719 signal_simple_error ("Invalid value for 'dimension", value);
722 else if (EQ (keyword, Qchars))
725 chars = XINT (value);
726 if (chars != 94 && chars != 96)
727 signal_simple_error ("Invalid value for 'chars", value);
730 else if (EQ (keyword, Qcolumns))
733 columns = XINT (value);
734 if (columns != 1 && columns != 2)
735 signal_simple_error ("Invalid value for 'columns", value);
738 else if (EQ (keyword, Qgraphic))
741 graphic = XINT (value);
742 if (graphic < 0 || graphic > 1)
743 signal_simple_error ("Invalid value for 'graphic", value);
746 else if (EQ (keyword, Qregistry))
748 CHECK_STRING (value);
752 else if (EQ (keyword, Qdirection))
754 if (EQ (value, Ql2r))
755 direction = CHARSET_LEFT_TO_RIGHT;
756 else if (EQ (value, Qr2l))
757 direction = CHARSET_RIGHT_TO_LEFT;
759 signal_simple_error ("Invalid value for 'direction", value);
762 else if (EQ (keyword, Qfinal))
764 CHECK_CHAR_COERCE_INT (value);
765 final = XCHAR (value);
766 if (final < '0' || final > '~')
767 signal_simple_error ("Invalid value for 'final", value);
770 else if (EQ (keyword, Qccl_program))
772 CHECK_VECTOR (value);
777 signal_simple_error ("Unrecognized property", keyword);
781 error ("'final must be specified");
782 if (dimension == 2 && final > 0x5F)
784 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
788 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
790 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
792 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
793 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
795 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
797 id = get_unallocated_leading_byte (dimension);
799 if (NILP (doc_string))
800 doc_string = build_string ("");
803 registry = build_string ("");
805 if (NILP (short_name))
806 XSETSTRING (short_name, XSYMBOL (name)->name);
808 if (NILP (long_name))
809 long_name = doc_string;
813 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
814 final, direction, short_name, long_name, doc_string, registry);
815 if (!NILP (ccl_program))
816 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
820 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
822 Make a charset equivalent to CHARSET but which goes in the opposite direction.
823 NEW-NAME is the name of the new charset. Return the new charset.
827 Lisp_Object new_charset = Qnil;
828 int id, dimension, columns, graphic, final;
830 Lisp_Object registry, doc_string, short_name, long_name;
831 struct Lisp_Charset *cs;
833 charset = Fget_charset (charset);
834 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
835 signal_simple_error ("Charset already has reverse-direction charset",
838 CHECK_SYMBOL (new_name);
839 if (!NILP (Ffind_charset (new_name)))
840 signal_simple_error ("Cannot redefine existing charset", new_name);
842 cs = XCHARSET (charset);
844 type = CHARSET_TYPE (cs);
845 columns = CHARSET_COLUMNS (cs);
846 dimension = CHARSET_DIMENSION (cs);
847 id = get_unallocated_leading_byte (dimension);
849 graphic = CHARSET_GRAPHIC (cs);
850 final = CHARSET_FINAL (cs);
851 direction = CHARSET_RIGHT_TO_LEFT;
852 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
853 direction = CHARSET_LEFT_TO_RIGHT;
854 doc_string = CHARSET_DOC_STRING (cs);
855 short_name = CHARSET_SHORT_NAME (cs);
856 long_name = CHARSET_LONG_NAME (cs);
857 registry = CHARSET_REGISTRY (cs);
859 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
860 graphic, final, direction, short_name, long_name,
861 doc_string, registry);
863 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
864 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
869 /* #### Reverse direction charsets not yet implemented. */
871 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
873 Return the reverse-direction charset parallel to CHARSET, if any.
874 This is the charset with the same properties (in particular, the same
875 dimension, number of characters per dimension, and final byte) as
876 CHARSET but whose characters are displayed in the opposite direction.
880 charset = Fget_charset (charset);
881 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
885 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
886 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
887 If DIRECTION is omitted, both directions will be checked (left-to-right
888 will be returned if character sets exist for both directions).
890 (dimension, chars, final, direction))
892 int dm, ch, fi, di = -1;
894 Lisp_Object obj = Qnil;
896 CHECK_INT (dimension);
897 dm = XINT (dimension);
898 if (dm < 1 || dm > 2)
899 signal_simple_error ("Invalid value for DIMENSION", dimension);
903 if (ch != 94 && ch != 96)
904 signal_simple_error ("Invalid value for CHARS", chars);
906 CHECK_CHAR_COERCE_INT (final);
908 if (fi < '0' || fi > '~')
909 signal_simple_error ("Invalid value for FINAL", final);
911 if (EQ (direction, Ql2r))
912 di = CHARSET_LEFT_TO_RIGHT;
913 else if (EQ (direction, Qr2l))
914 di = CHARSET_RIGHT_TO_LEFT;
915 else if (!NILP (direction))
916 signal_simple_error ("Invalid value for DIRECTION", direction);
918 if (dm == 2 && fi > 0x5F)
920 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
923 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
925 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
929 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
931 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
934 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
937 return XCHARSET_NAME (obj);
941 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
942 Return short name of CHARSET.
946 return XCHARSET_SHORT_NAME (Fget_charset (charset));
949 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
950 Return long name of CHARSET.
954 return XCHARSET_LONG_NAME (Fget_charset (charset));
957 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
958 Return description of CHARSET.
962 return XCHARSET_DOC_STRING (Fget_charset (charset));
965 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
966 Return dimension of CHARSET.
970 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
973 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
974 Return property PROP of CHARSET.
975 Recognized properties are those listed in `make-charset', as well as
976 'name and 'doc-string.
980 struct Lisp_Charset *cs;
982 charset = Fget_charset (charset);
983 cs = XCHARSET (charset);
986 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
987 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
988 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
989 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
990 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
991 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
992 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
993 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
994 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
995 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
996 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
997 if (EQ (prop, Qdirection))
998 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
999 if (EQ (prop, Qreverse_direction_charset))
1001 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1005 return XCHARSET_NAME (obj);
1007 signal_simple_error ("Unrecognized charset property name", prop);
1008 return Qnil; /* not reached */
1011 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1012 Return charset identification number of CHARSET.
1016 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1019 /* #### We need to figure out which properties we really want to
1022 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1023 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1025 (charset, ccl_program))
1027 charset = Fget_charset (charset);
1028 CHECK_VECTOR (ccl_program);
1029 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1034 invalidate_charset_font_caches (Lisp_Object charset)
1036 /* Invalidate font cache entries for charset on all devices. */
1037 Lisp_Object devcons, concons, hash_table;
1038 DEVICE_LOOP_NO_BREAK (devcons, concons)
1040 struct device *d = XDEVICE (XCAR (devcons));
1041 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1042 if (!UNBOUNDP (hash_table))
1043 Fclrhash (hash_table);
1047 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1048 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1049 Set the 'registry property of CHARSET to REGISTRY.
1051 (charset, registry))
1053 charset = Fget_charset (charset);
1054 CHECK_STRING (registry);
1055 XCHARSET_REGISTRY (charset) = registry;
1056 invalidate_charset_font_caches (charset);
1057 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1062 /************************************************************************/
1063 /* Lisp primitives for working with characters */
1064 /************************************************************************/
1066 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1067 Make a character from CHARSET and octets ARG1 and ARG2.
1068 ARG2 is required only for characters from two-dimensional charsets.
1069 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1070 character s with caron.
1072 (charset, arg1, arg2))
1074 struct Lisp_Charset *cs;
1076 int lowlim, highlim;
1078 charset = Fget_charset (charset);
1079 cs = XCHARSET (charset);
1081 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1082 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1083 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1084 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1087 /* It is useful (and safe, according to Olivier Galibert) to strip
1088 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1089 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1090 Latin 2 code of the character. */
1091 a1 = XINT (arg1) & 0x7f;
1092 if (a1 < lowlim || a1 > highlim)
1093 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1095 if (CHARSET_DIMENSION (cs) == 1)
1099 ("Charset is of dimension one; second octet must be nil", arg2);
1100 return make_char (MAKE_CHAR (charset, a1, 0));
1104 a2 = XINT (arg2) & 0x7f;
1105 if (a2 < lowlim || a2 > highlim)
1106 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1108 return make_char (MAKE_CHAR (charset, a1, a2));
1111 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1112 Return the character set of char CH.
1116 CHECK_CHAR_COERCE_INT (ch);
1118 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1119 (CHAR_LEADING_BYTE (XCHAR (ch))));
1122 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1123 Return the octet numbered N (should be 0 or 1) of char CH.
1124 N defaults to 0 if omitted.
1128 Lisp_Object charset;
1131 CHECK_CHAR_COERCE_INT (ch);
1133 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
1135 if (NILP (n) || EQ (n, Qzero))
1136 return make_int (octet0);
1137 else if (EQ (n, make_int (1)))
1138 return make_int (octet1);
1140 signal_simple_error ("Octet number must be 0 or 1", n);
1143 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1144 Return list of charset and one or two position-codes of CHAR.
1148 /* This function can GC */
1149 struct gcpro gcpro1, gcpro2;
1150 Lisp_Object charset = Qnil;
1151 Lisp_Object rc = Qnil;
1154 GCPRO2 (charset, rc);
1155 CHECK_CHAR_COERCE_INT (character);
1157 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1159 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1161 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1165 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1173 #ifdef ENABLE_COMPOSITE_CHARS
1174 /************************************************************************/
1175 /* composite character functions */
1176 /************************************************************************/
1179 lookup_composite_char (Bufbyte *str, int len)
1181 Lisp_Object lispstr = make_string (str, len);
1182 Lisp_Object ch = Fgethash (lispstr,
1183 Vcomposite_char_string2char_hash_table,
1189 if (composite_char_row_next >= 128)
1190 signal_simple_error ("No more composite chars available", lispstr);
1191 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1192 composite_char_col_next);
1193 Fputhash (make_char (emch), lispstr,
1194 Vcomposite_char_char2string_hash_table);
1195 Fputhash (lispstr, make_char (emch),
1196 Vcomposite_char_string2char_hash_table);
1197 composite_char_col_next++;
1198 if (composite_char_col_next >= 128)
1200 composite_char_col_next = 32;
1201 composite_char_row_next++;
1210 composite_char_string (Emchar ch)
1212 Lisp_Object str = Fgethash (make_char (ch),
1213 Vcomposite_char_char2string_hash_table,
1215 assert (!UNBOUNDP (str));
1219 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1220 Convert a string into a single composite character.
1221 The character is the result of overstriking all the characters in
1226 CHECK_STRING (string);
1227 return make_char (lookup_composite_char (XSTRING_DATA (string),
1228 XSTRING_LENGTH (string)));
1231 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1232 Return a string of the characters comprising a composite character.
1240 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1241 signal_simple_error ("Must be composite char", ch);
1242 return composite_char_string (emch);
1244 #endif /* ENABLE_COMPOSITE_CHARS */
1247 /************************************************************************/
1248 /* initialization */
1249 /************************************************************************/
1252 syms_of_mule_charset (void)
1254 DEFSUBR (Fcharsetp);
1255 DEFSUBR (Ffind_charset);
1256 DEFSUBR (Fget_charset);
1257 DEFSUBR (Fcharset_list);
1258 DEFSUBR (Fcharset_name);
1259 DEFSUBR (Fmake_charset);
1260 DEFSUBR (Fmake_reverse_direction_charset);
1261 /* DEFSUBR (Freverse_direction_charset); */
1262 DEFSUBR (Fcharset_from_attributes);
1263 DEFSUBR (Fcharset_short_name);
1264 DEFSUBR (Fcharset_long_name);
1265 DEFSUBR (Fcharset_description);
1266 DEFSUBR (Fcharset_dimension);
1267 DEFSUBR (Fcharset_property);
1268 DEFSUBR (Fcharset_id);
1269 DEFSUBR (Fset_charset_ccl_program);
1270 DEFSUBR (Fset_charset_registry);
1272 DEFSUBR (Fmake_char);
1273 DEFSUBR (Fchar_charset);
1274 DEFSUBR (Fchar_octet);
1275 DEFSUBR (Fsplit_char);
1277 #ifdef ENABLE_COMPOSITE_CHARS
1278 DEFSUBR (Fmake_composite_char);
1279 DEFSUBR (Fcomposite_char_string);
1282 defsymbol (&Qcharsetp, "charsetp");
1283 defsymbol (&Qregistry, "registry");
1284 defsymbol (&Qfinal, "final");
1285 defsymbol (&Qgraphic, "graphic");
1286 defsymbol (&Qdirection, "direction");
1287 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1288 defsymbol (&Qshort_name, "short-name");
1289 defsymbol (&Qlong_name, "long-name");
1291 defsymbol (&Ql2r, "l2r");
1292 defsymbol (&Qr2l, "r2l");
1294 /* Charsets, compatible with FSF 20.3
1295 Naming convention is Script-Charset[-Edition] */
1296 defsymbol (&Qascii, "ascii");
1297 defsymbol (&Qcontrol_1, "control-1");
1298 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1299 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1300 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1301 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1302 defsymbol (&Qthai_tis620, "thai-tis620");
1303 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1304 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1305 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1306 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1307 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1308 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1309 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1310 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1311 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1312 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1313 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1314 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1315 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1316 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1317 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1318 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1320 defsymbol (&Qcomposite, "composite");
1324 vars_of_mule_charset (void)
1328 chlook = xnew (struct charset_lookup);
1329 dumpstruct (&chlook, &charset_lookup_description);
1331 /* Table of charsets indexed by leading byte. */
1332 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1333 chlook->charset_by_leading_byte[i] = Qnil;
1335 /* Table of charsets indexed by type/final-byte/direction. */
1336 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1337 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1338 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1339 chlook->charset_by_attributes[i][j][k] = Qnil;
1341 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1342 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1346 complex_vars_of_mule_charset (void)
1348 staticpro (&Vcharset_hash_table);
1349 Vcharset_hash_table =
1350 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1352 /* Predefined character sets. We store them into variables for
1355 staticpro (&Vcharset_ascii);
1357 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1358 CHARSET_TYPE_94, 1, 0, 'B',
1359 CHARSET_LEFT_TO_RIGHT,
1360 build_string ("ASCII"),
1361 build_string ("ASCII)"),
1362 build_string ("ASCII (ISO646 IRV)"),
1363 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1364 staticpro (&Vcharset_control_1);
1365 Vcharset_control_1 =
1366 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1367 CHARSET_TYPE_94, 1, 1, 0,
1368 CHARSET_LEFT_TO_RIGHT,
1369 build_string ("C1"),
1370 build_string ("Control characters"),
1371 build_string ("Control characters 128-191"),
1373 staticpro (&Vcharset_latin_iso8859_1);
1374 Vcharset_latin_iso8859_1 =
1375 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1376 CHARSET_TYPE_96, 1, 1, 'A',
1377 CHARSET_LEFT_TO_RIGHT,
1378 build_string ("Latin-1"),
1379 build_string ("ISO8859-1 (Latin-1)"),
1380 build_string ("ISO8859-1 (Latin-1)"),
1381 build_string ("iso8859-1"));
1382 staticpro (&Vcharset_latin_iso8859_2);
1383 Vcharset_latin_iso8859_2 =
1384 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1385 CHARSET_TYPE_96, 1, 1, 'B',
1386 CHARSET_LEFT_TO_RIGHT,
1387 build_string ("Latin-2"),
1388 build_string ("ISO8859-2 (Latin-2)"),
1389 build_string ("ISO8859-2 (Latin-2)"),
1390 build_string ("iso8859-2"));
1391 staticpro (&Vcharset_latin_iso8859_3);
1392 Vcharset_latin_iso8859_3 =
1393 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1394 CHARSET_TYPE_96, 1, 1, 'C',
1395 CHARSET_LEFT_TO_RIGHT,
1396 build_string ("Latin-3"),
1397 build_string ("ISO8859-3 (Latin-3)"),
1398 build_string ("ISO8859-3 (Latin-3)"),
1399 build_string ("iso8859-3"));
1400 staticpro (&Vcharset_latin_iso8859_4);
1401 Vcharset_latin_iso8859_4 =
1402 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1403 CHARSET_TYPE_96, 1, 1, 'D',
1404 CHARSET_LEFT_TO_RIGHT,
1405 build_string ("Latin-4"),
1406 build_string ("ISO8859-4 (Latin-4)"),
1407 build_string ("ISO8859-4 (Latin-4)"),
1408 build_string ("iso8859-4"));
1409 staticpro (&Vcharset_thai_tis620);
1410 Vcharset_thai_tis620 =
1411 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1412 CHARSET_TYPE_96, 1, 1, 'T',
1413 CHARSET_LEFT_TO_RIGHT,
1414 build_string ("TIS620"),
1415 build_string ("TIS620 (Thai)"),
1416 build_string ("TIS620.2529 (Thai)"),
1417 build_string ("tis620"));
1418 staticpro (&Vcharset_greek_iso8859_7);
1419 Vcharset_greek_iso8859_7 =
1420 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1421 CHARSET_TYPE_96, 1, 1, 'F',
1422 CHARSET_LEFT_TO_RIGHT,
1423 build_string ("ISO8859-7"),
1424 build_string ("ISO8859-7 (Greek)"),
1425 build_string ("ISO8859-7 (Greek)"),
1426 build_string ("iso8859-7"));
1427 staticpro (&Vcharset_arabic_iso8859_6);
1428 Vcharset_arabic_iso8859_6 =
1429 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1430 CHARSET_TYPE_96, 1, 1, 'G',
1431 CHARSET_RIGHT_TO_LEFT,
1432 build_string ("ISO8859-6"),
1433 build_string ("ISO8859-6 (Arabic)"),
1434 build_string ("ISO8859-6 (Arabic)"),
1435 build_string ("iso8859-6"));
1436 staticpro (&Vcharset_hebrew_iso8859_8);
1437 Vcharset_hebrew_iso8859_8 =
1438 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1439 CHARSET_TYPE_96, 1, 1, 'H',
1440 CHARSET_RIGHT_TO_LEFT,
1441 build_string ("ISO8859-8"),
1442 build_string ("ISO8859-8 (Hebrew)"),
1443 build_string ("ISO8859-8 (Hebrew)"),
1444 build_string ("iso8859-8"));
1445 staticpro (&Vcharset_katakana_jisx0201);
1446 Vcharset_katakana_jisx0201 =
1447 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1448 CHARSET_TYPE_94, 1, 1, 'I',
1449 CHARSET_LEFT_TO_RIGHT,
1450 build_string ("JISX0201 Kana"),
1451 build_string ("JISX0201.1976 (Japanese Kana)"),
1452 build_string ("JISX0201.1976 Japanese Kana"),
1453 build_string ("jisx0201.1976"));
1454 staticpro (&Vcharset_latin_jisx0201);
1455 Vcharset_latin_jisx0201 =
1456 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1457 CHARSET_TYPE_94, 1, 0, 'J',
1458 CHARSET_LEFT_TO_RIGHT,
1459 build_string ("JISX0201 Roman"),
1460 build_string ("JISX0201.1976 (Japanese Roman)"),
1461 build_string ("JISX0201.1976 Japanese Roman"),
1462 build_string ("jisx0201.1976"));
1463 staticpro (&Vcharset_cyrillic_iso8859_5);
1464 Vcharset_cyrillic_iso8859_5 =
1465 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1466 CHARSET_TYPE_96, 1, 1, 'L',
1467 CHARSET_LEFT_TO_RIGHT,
1468 build_string ("ISO8859-5"),
1469 build_string ("ISO8859-5 (Cyrillic)"),
1470 build_string ("ISO8859-5 (Cyrillic)"),
1471 build_string ("iso8859-5"));
1472 staticpro (&Vcharset_latin_iso8859_9);
1473 Vcharset_latin_iso8859_9 =
1474 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1475 CHARSET_TYPE_96, 1, 1, 'M',
1476 CHARSET_LEFT_TO_RIGHT,
1477 build_string ("Latin-5"),
1478 build_string ("ISO8859-9 (Latin-5)"),
1479 build_string ("ISO8859-9 (Latin-5)"),
1480 build_string ("iso8859-9"));
1481 staticpro (&Vcharset_japanese_jisx0208_1978);
1482 Vcharset_japanese_jisx0208_1978 =
1483 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1484 CHARSET_TYPE_94X94, 2, 0, '@',
1485 CHARSET_LEFT_TO_RIGHT,
1486 build_string ("JISX0208.1978"),
1487 build_string ("JISX0208.1978 (Japanese)"),
1489 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1490 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1491 staticpro (&Vcharset_chinese_gb2312);
1492 Vcharset_chinese_gb2312 =
1493 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1494 CHARSET_TYPE_94X94, 2, 0, 'A',
1495 CHARSET_LEFT_TO_RIGHT,
1496 build_string ("GB2312"),
1497 build_string ("GB2312)"),
1498 build_string ("GB2312 Chinese simplified"),
1499 build_string ("gb2312"));
1500 staticpro (&Vcharset_japanese_jisx0208);
1501 Vcharset_japanese_jisx0208 =
1502 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1503 CHARSET_TYPE_94X94, 2, 0, 'B',
1504 CHARSET_LEFT_TO_RIGHT,
1505 build_string ("JISX0208"),
1506 build_string ("JISX0208.1983/1990 (Japanese)"),
1507 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1508 build_string ("jisx0208.19\\(83\\|90\\)"));
1509 staticpro (&Vcharset_korean_ksc5601);
1510 Vcharset_korean_ksc5601 =
1511 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1512 CHARSET_TYPE_94X94, 2, 0, 'C',
1513 CHARSET_LEFT_TO_RIGHT,
1514 build_string ("KSC5601"),
1515 build_string ("KSC5601 (Korean"),
1516 build_string ("KSC5601 Korean Hangul and Hanja"),
1517 build_string ("ksc5601"));
1518 staticpro (&Vcharset_japanese_jisx0212);
1519 Vcharset_japanese_jisx0212 =
1520 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1521 CHARSET_TYPE_94X94, 2, 0, 'D',
1522 CHARSET_LEFT_TO_RIGHT,
1523 build_string ("JISX0212"),
1524 build_string ("JISX0212 (Japanese)"),
1525 build_string ("JISX0212 Japanese Supplement"),
1526 build_string ("jisx0212"));
1528 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1529 staticpro (&Vcharset_chinese_cns11643_1);
1530 Vcharset_chinese_cns11643_1 =
1531 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1532 CHARSET_TYPE_94X94, 2, 0, 'G',
1533 CHARSET_LEFT_TO_RIGHT,
1534 build_string ("CNS11643-1"),
1535 build_string ("CNS11643-1 (Chinese traditional)"),
1537 ("CNS 11643 Plane 1 Chinese traditional"),
1538 build_string (CHINESE_CNS_PLANE_RE("1")));
1539 staticpro (&Vcharset_chinese_cns11643_2);
1540 Vcharset_chinese_cns11643_2 =
1541 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1542 CHARSET_TYPE_94X94, 2, 0, 'H',
1543 CHARSET_LEFT_TO_RIGHT,
1544 build_string ("CNS11643-2"),
1545 build_string ("CNS11643-2 (Chinese traditional)"),
1547 ("CNS 11643 Plane 2 Chinese traditional"),
1548 build_string (CHINESE_CNS_PLANE_RE("2")));
1549 staticpro (&Vcharset_chinese_big5_1);
1550 Vcharset_chinese_big5_1 =
1551 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1552 CHARSET_TYPE_94X94, 2, 0, '0',
1553 CHARSET_LEFT_TO_RIGHT,
1554 build_string ("Big5"),
1555 build_string ("Big5 (Level-1)"),
1557 ("Big5 Level-1 Chinese traditional"),
1558 build_string ("big5"));
1559 staticpro (&Vcharset_chinese_big5_2);
1560 Vcharset_chinese_big5_2 =
1561 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1562 CHARSET_TYPE_94X94, 2, 0, '1',
1563 CHARSET_LEFT_TO_RIGHT,
1564 build_string ("Big5"),
1565 build_string ("Big5 (Level-2)"),
1567 ("Big5 Level-2 Chinese traditional"),
1568 build_string ("big5"));
1571 #ifdef ENABLE_COMPOSITE_CHARS
1572 /* #### For simplicity, we put composite chars into a 96x96 charset.
1573 This is going to lead to problems because you can run out of
1574 room, esp. as we don't yet recycle numbers. */
1575 staticpro (&Vcharset_composite);
1576 Vcharset_composite =
1577 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1578 CHARSET_TYPE_96X96, 2, 0, 0,
1579 CHARSET_LEFT_TO_RIGHT,
1580 build_string ("Composite"),
1581 build_string ("Composite characters"),
1582 build_string ("Composite characters"),
1585 /* #### not dumped properly */
1586 composite_char_row_next = 32;
1587 composite_char_col_next = 32;
1589 Vcomposite_char_string2char_hash_table =
1590 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1591 Vcomposite_char_char2string_hash_table =
1592 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1593 staticpro (&Vcomposite_char_string2char_hash_table);
1594 staticpro (&Vcomposite_char_char2string_hash_table);
1595 #endif /* ENABLE_COMPOSITE_CHARS */