1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii_lower;
63 Lisp_Object Vcharset_latin_viscii_upper;
64 Lisp_Object Vcharset_hiragana_jisx0208;
65 Lisp_Object Vcharset_katakana_jisx0208;
67 Lisp_Object Vcharset_chinese_big5_1;
68 Lisp_Object Vcharset_chinese_big5_2;
70 #ifdef ENABLE_COMPOSITE_CHARS
71 Lisp_Object Vcharset_composite;
73 /* Hash tables for composite chars. One maps string representing
74 composed chars to their equivalent chars; one goes the
76 Lisp_Object Vcomposite_char_char2string_hash_table;
77 Lisp_Object Vcomposite_char_string2char_hash_table;
79 static int composite_char_row_next;
80 static int composite_char_col_next;
82 #endif /* ENABLE_COMPOSITE_CHARS */
84 /* Table of charsets indexed by leading byte. */
85 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
87 /* Table of charsets indexed by type/final-byte/direction. */
89 Lisp_Object charset_by_attributes[4][128];
91 Lisp_Object charset_by_attributes[4][128][2];
95 /* Table of number of bytes in the string representation of a character
96 indexed by the first byte of that representation.
98 rep_bytes_by_first_byte(c) is more efficient than the equivalent
99 canonical computation:
101 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
103 Bytecount rep_bytes_by_first_byte[0xA0] =
104 { /* 0x00 - 0x7f are for straight ASCII */
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 /* 0x80 - 0x8f are for Dimension-1 official charsets */
115 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 /* 0x90 - 0x9d are for Dimension-2 official charsets */
120 /* 0x9e is for Dimension-1 private charsets */
121 /* 0x9f is for Dimension-2 private charsets */
122 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
128 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
130 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
133 for (i = 0; i < 256; i++)
135 markobj (cte->property[i]);
141 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
143 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
144 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
147 for (i = 0; i < 256; i++)
148 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
149 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
150 if (!char_byte_table_equal (cte1->property[i],
151 cte2->property[i], depth + 1))
156 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
162 char_byte_table_hash (Lisp_Object obj, int depth)
164 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
166 return internal_array_hash (cte->property, 256, depth);
169 static const struct lrecord_description char_byte_table_description[] = {
170 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
174 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
175 mark_char_byte_table,
176 internal_object_printer,
177 0, char_byte_table_equal,
178 char_byte_table_hash,
179 char_byte_table_description,
180 struct Lisp_Char_Byte_Table);
184 make_char_byte_table (Lisp_Object initval)
188 struct Lisp_Char_Byte_Table *cte =
189 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
190 &lrecord_char_byte_table);
192 for (i = 0; i < 256; i++)
193 cte->property[i] = initval;
195 XSETCHAR_BYTE_TABLE (obj, cte);
200 copy_char_byte_table (Lisp_Object entry)
202 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
205 struct Lisp_Char_Byte_Table *ctenew =
206 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
207 &lrecord_char_byte_table);
209 for (i = 0; i < 256; i++)
211 Lisp_Object new = cte->property[i];
212 if (CHAR_BYTE_TABLE_P (new))
213 ctenew->property[i] = copy_char_byte_table (new);
215 ctenew->property[i] = new;
218 XSETCHAR_BYTE_TABLE (obj, ctenew);
222 #define make_char_code_table(initval) make_char_byte_table(initval)
225 get_char_code_table (Emchar ch, Lisp_Object table)
227 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
228 Lisp_Object ret = cpt->property [ch >> 24];
230 if (CHAR_BYTE_TABLE_P (ret))
231 cpt = XCHAR_BYTE_TABLE (ret);
235 ret = cpt->property [(unsigned char) (ch >> 16)];
236 if (CHAR_BYTE_TABLE_P (ret))
237 cpt = XCHAR_BYTE_TABLE (ret);
241 ret = cpt->property [(unsigned char) (ch >> 8)];
242 if (CHAR_BYTE_TABLE_P (ret))
243 cpt = XCHAR_BYTE_TABLE (ret);
247 return cpt->property [(unsigned char) ch];
251 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
253 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
254 Lisp_Object ret = cpt1->property[ch >> 24];
256 if (CHAR_BYTE_TABLE_P (ret))
258 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
260 ret = cpt2->property[(unsigned char)(ch >> 16)];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt3->property[(unsigned char)(ch >> 8)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt4
269 = XCHAR_BYTE_TABLE (ret);
271 cpt4->property[(unsigned char)ch] = value;
273 else if (!EQ (ret, value))
275 Lisp_Object cpt4 = make_char_byte_table (ret);
277 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
278 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
281 else if (!EQ (ret, value))
283 Lisp_Object cpt3 = make_char_byte_table (ret);
284 Lisp_Object cpt4 = make_char_byte_table (ret);
286 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
287 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
289 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
292 else if (!EQ (ret, value))
294 Lisp_Object cpt2 = make_char_byte_table (ret);
295 Lisp_Object cpt3 = make_char_byte_table (ret);
296 Lisp_Object cpt4 = make_char_byte_table (ret);
298 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
299 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
300 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
301 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
306 Lisp_Object Vutf_2000_version;
310 int leading_code_private_11;
313 Lisp_Object Qcharsetp;
315 /* Qdoc_string, Qdimension, Qchars defined in general.c */
316 Lisp_Object Qregistry, Qfinal, Qgraphic;
317 Lisp_Object Qdirection;
318 Lisp_Object Qreverse_direction_charset;
319 Lisp_Object Qleading_byte;
320 Lisp_Object Qshort_name, Qlong_name;
336 Qjapanese_jisx0208_1978,
347 Qvietnamese_viscii_lower,
348 Qvietnamese_viscii_upper,
356 Lisp_Object Ql2r, Qr2l;
358 Lisp_Object Vcharset_hash_table;
360 static Charset_ID next_allocated_1_byte_leading_byte;
361 static Charset_ID next_allocated_2_byte_leading_byte;
363 /* Composite characters are characters constructed by overstriking two
364 or more regular characters.
366 1) The old Mule implementation involves storing composite characters
367 in a buffer as a tag followed by all of the actual characters
368 used to make up the composite character. I think this is a bad
369 idea; it greatly complicates code that wants to handle strings
370 one character at a time because it has to deal with the possibility
371 of great big ungainly characters. It's much more reasonable to
372 simply store an index into a table of composite characters.
374 2) The current implementation only allows for 16,384 separate
375 composite characters over the lifetime of the XEmacs process.
376 This could become a potential problem if the user
377 edited lots of different files that use composite characters.
378 Due to FSF bogosity, increasing the number of allowable
379 composite characters under Mule would decrease the number
380 of possible faces that can exist. Mule already has shrunk
381 this to 2048, and further shrinkage would become uncomfortable.
382 No such problems exist in XEmacs.
384 Composite characters could be represented as 0x80 C1 C2 C3,
385 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
386 for slightly under 2^20 (one million) composite characters
387 over the XEmacs process lifetime, and you only need to
388 increase the size of a Mule character from 19 to 21 bits.
389 Or you could use 0x80 C1 C2 C3 C4, allowing for about
390 85 million (slightly over 2^26) composite characters. */
393 /************************************************************************/
394 /* Basic Emchar functions */
395 /************************************************************************/
397 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
398 string in STR. Returns the number of bytes stored.
399 Do not call this directly. Use the macro set_charptr_emchar() instead.
403 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
418 else if ( c <= 0x7ff )
420 *p++ = (c >> 6) | 0xc0;
421 *p++ = (c & 0x3f) | 0x80;
423 else if ( c <= 0xffff )
425 *p++ = (c >> 12) | 0xe0;
426 *p++ = ((c >> 6) & 0x3f) | 0x80;
427 *p++ = (c & 0x3f) | 0x80;
429 else if ( c <= 0x1fffff )
431 *p++ = (c >> 18) | 0xf0;
432 *p++ = ((c >> 12) & 0x3f) | 0x80;
433 *p++ = ((c >> 6) & 0x3f) | 0x80;
434 *p++ = (c & 0x3f) | 0x80;
436 else if ( c <= 0x3ffffff )
438 *p++ = (c >> 24) | 0xf8;
439 *p++ = ((c >> 18) & 0x3f) | 0x80;
440 *p++ = ((c >> 12) & 0x3f) | 0x80;
441 *p++ = ((c >> 6) & 0x3f) | 0x80;
442 *p++ = (c & 0x3f) | 0x80;
446 *p++ = (c >> 30) | 0xfc;
447 *p++ = ((c >> 24) & 0x3f) | 0x80;
448 *p++ = ((c >> 18) & 0x3f) | 0x80;
449 *p++ = ((c >> 12) & 0x3f) | 0x80;
450 *p++ = ((c >> 6) & 0x3f) | 0x80;
451 *p++ = (c & 0x3f) | 0x80;
454 BREAKUP_CHAR (c, charset, c1, c2);
455 lb = CHAR_LEADING_BYTE (c);
456 if (LEADING_BYTE_PRIVATE_P (lb))
457 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
459 if (EQ (charset, Vcharset_control_1))
468 /* Return the first character from a Mule-encoded string in STR,
469 assuming it's non-ASCII. Do not call this directly.
470 Use the macro charptr_emchar() instead. */
473 non_ascii_charptr_emchar (CONST Bufbyte *str)
486 else if ( b >= 0xf8 )
491 else if ( b >= 0xf0 )
496 else if ( b >= 0xe0 )
501 else if ( b >= 0xc0 )
511 for( ; len > 0; len-- )
514 ch = ( ch << 6 ) | ( b & 0x3f );
518 Bufbyte i0 = *str, i1, i2 = 0;
521 if (i0 == LEADING_BYTE_CONTROL_1)
522 return (Emchar) (*++str - 0x20);
524 if (LEADING_BYTE_PREFIX_P (i0))
529 charset = CHARSET_BY_LEADING_BYTE (i0);
530 if (XCHARSET_DIMENSION (charset) == 2)
533 return MAKE_CHAR (charset, i1, i2);
537 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
538 Do not call this directly. Use the macro valid_char_p() instead. */
542 non_ascii_valid_char_p (Emchar ch)
546 /* Must have only lowest 19 bits set */
550 f1 = CHAR_FIELD1 (ch);
551 f2 = CHAR_FIELD2 (ch);
552 f3 = CHAR_FIELD3 (ch);
558 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
559 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
560 f2 > MAX_CHAR_FIELD2_PRIVATE)
565 if (f3 != 0x20 && f3 != 0x7F)
569 NOTE: This takes advantage of the fact that
570 FIELD2_TO_OFFICIAL_LEADING_BYTE and
571 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
573 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
574 return (XCHARSET_CHARS (charset) == 96);
580 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
581 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
582 f1 > MAX_CHAR_FIELD1_PRIVATE)
584 if (f2 < 0x20 || f3 < 0x20)
587 #ifdef ENABLE_COMPOSITE_CHARS
588 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
590 if (UNBOUNDP (Fgethash (make_int (ch),
591 Vcomposite_char_char2string_hash_table,
596 #endif /* ENABLE_COMPOSITE_CHARS */
598 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
601 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
603 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
606 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
608 return (XCHARSET_CHARS (charset) == 96);
614 /************************************************************************/
615 /* Basic string functions */
616 /************************************************************************/
618 /* Copy the character pointed to by PTR into STR, assuming it's
619 non-ASCII. Do not call this directly. Use the macro
620 charptr_copy_char() instead. */
623 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
625 Bufbyte *strptr = str;
627 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
629 /* Notice fallthrough. */
631 case 6: *++strptr = *ptr++;
632 case 5: *++strptr = *ptr++;
634 case 4: *++strptr = *ptr++;
635 case 3: *++strptr = *ptr++;
636 case 2: *++strptr = *ptr;
641 return strptr + 1 - str;
645 /************************************************************************/
646 /* streams of Emchars */
647 /************************************************************************/
649 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
650 The functions below are not meant to be called directly; use
651 the macros in insdel.h. */
654 Lstream_get_emchar_1 (Lstream *stream, int ch)
656 Bufbyte str[MAX_EMCHAR_LEN];
657 Bufbyte *strptr = str;
659 str[0] = (Bufbyte) ch;
660 switch (REP_BYTES_BY_FIRST_BYTE (ch))
662 /* Notice fallthrough. */
665 ch = Lstream_getc (stream);
667 *++strptr = (Bufbyte) ch;
669 ch = Lstream_getc (stream);
671 *++strptr = (Bufbyte) ch;
674 ch = Lstream_getc (stream);
676 *++strptr = (Bufbyte) ch;
678 ch = Lstream_getc (stream);
680 *++strptr = (Bufbyte) ch;
682 ch = Lstream_getc (stream);
684 *++strptr = (Bufbyte) ch;
689 return charptr_emchar (str);
693 Lstream_fput_emchar (Lstream *stream, Emchar ch)
695 Bufbyte str[MAX_EMCHAR_LEN];
696 Bytecount len = set_charptr_emchar (str, ch);
697 return Lstream_write (stream, str, len);
701 Lstream_funget_emchar (Lstream *stream, Emchar ch)
703 Bufbyte str[MAX_EMCHAR_LEN];
704 Bytecount len = set_charptr_emchar (str, ch);
705 Lstream_unread (stream, str, len);
709 /************************************************************************/
711 /************************************************************************/
714 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
716 struct Lisp_Charset *cs = XCHARSET (obj);
718 markobj (cs->short_name);
719 markobj (cs->long_name);
720 markobj (cs->doc_string);
721 markobj (cs->registry);
722 markobj (cs->ccl_program);
724 markobj (cs->decoding_table);
725 markobj (cs->encoding_table);
731 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
733 struct Lisp_Charset *cs = XCHARSET (obj);
737 error ("printing unreadable object #<charset %s 0x%x>",
738 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
741 write_c_string ("#<charset ", printcharfun);
742 print_internal (CHARSET_NAME (cs), printcharfun, 0);
743 write_c_string (" ", printcharfun);
744 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
749 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
750 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
751 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
752 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
754 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
755 CHARSET_COLUMNS (cs),
756 CHARSET_GRAPHIC (cs),
758 write_c_string (buf, printcharfun);
759 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
760 sprintf (buf, " 0x%x>", cs->header.uid);
761 write_c_string (buf, printcharfun);
764 static const struct lrecord_description charset_description[] = {
765 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
767 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
772 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
773 mark_charset, print_charset, 0, 0, 0,
775 struct Lisp_Charset);
776 /* Make a new charset. */
779 make_charset (Charset_ID id, Lisp_Object name,
780 unsigned char type, unsigned char columns, unsigned char graphic,
781 Bufbyte final, unsigned char direction, Lisp_Object short_name,
782 Lisp_Object long_name, Lisp_Object doc,
784 Lisp_Object decoding_table,
785 Emchar ucs_min, Emchar ucs_max,
786 Emchar code_offset, unsigned char byte_offset)
789 struct Lisp_Charset *cs =
790 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
791 XSETCHARSET (obj, cs);
793 CHARSET_ID (cs) = id;
794 CHARSET_NAME (cs) = name;
795 CHARSET_SHORT_NAME (cs) = short_name;
796 CHARSET_LONG_NAME (cs) = long_name;
797 CHARSET_DIRECTION (cs) = direction;
798 CHARSET_TYPE (cs) = type;
799 CHARSET_COLUMNS (cs) = columns;
800 CHARSET_GRAPHIC (cs) = graphic;
801 CHARSET_FINAL (cs) = final;
802 CHARSET_DOC_STRING (cs) = doc;
803 CHARSET_REGISTRY (cs) = reg;
804 CHARSET_CCL_PROGRAM (cs) = Qnil;
805 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
807 CHARSET_DECODING_TABLE(cs) = decoding_table;
808 CHARSET_UCS_MIN(cs) = ucs_min;
809 CHARSET_UCS_MAX(cs) = ucs_max;
810 CHARSET_CODE_OFFSET(cs) = code_offset;
811 CHARSET_BYTE_OFFSET(cs) = byte_offset;
814 switch ( CHARSET_TYPE (cs) )
816 case CHARSET_TYPE_94:
817 CHARSET_DIMENSION (cs) = 1;
818 CHARSET_CHARS (cs) = 94;
820 if (!EQ (decoding_table, Qnil))
823 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
824 for (i = 0; i < 94; i++)
826 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
829 put_char_code_table (XCHAR (c), make_int (i + 33),
830 CHARSET_ENCODING_TABLE(cs));
834 CHARSET_ENCODING_TABLE(cs) = Qnil;
837 case CHARSET_TYPE_96:
838 CHARSET_DIMENSION (cs) = 1;
839 CHARSET_CHARS (cs) = 96;
841 if (!EQ (decoding_table, Qnil))
844 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
845 for (i = 0; i < 96; i++)
847 Lisp_Object c = XVECTOR_DATA(decoding_table)[i];
850 put_char_code_table (XCHAR (c), make_int (i + 32),
851 CHARSET_ENCODING_TABLE(cs));
855 CHARSET_ENCODING_TABLE(cs) = Qnil;
858 case CHARSET_TYPE_94X94:
859 CHARSET_DIMENSION (cs) = 2;
860 CHARSET_CHARS (cs) = 94;
862 CHARSET_ENCODING_TABLE(cs) = Qnil;
865 case CHARSET_TYPE_96X96:
866 CHARSET_DIMENSION (cs) = 2;
867 CHARSET_CHARS (cs) = 96;
869 CHARSET_ENCODING_TABLE(cs) = Qnil;
873 case CHARSET_TYPE_128X128:
874 CHARSET_DIMENSION (cs) = 2;
875 CHARSET_CHARS (cs) = 128;
877 CHARSET_ENCODING_TABLE(cs) = Qnil;
880 case CHARSET_TYPE_256X256:
881 CHARSET_DIMENSION (cs) = 2;
882 CHARSET_CHARS (cs) = 256;
884 CHARSET_ENCODING_TABLE(cs) = Qnil;
891 if (id == LEADING_BYTE_ASCII)
892 CHARSET_REP_BYTES (cs) = 1;
894 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
896 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
901 /* some charsets do not have final characters. This includes
902 ASCII, Control-1, Composite, and the two faux private
905 if (code_offset == 0)
907 assert (NILP (charset_by_attributes[type][final]));
908 charset_by_attributes[type][final] = obj;
911 assert (NILP (charset_by_attributes[type][final][direction]));
912 charset_by_attributes[type][final][direction] = obj;
916 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
917 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
920 /* official leading byte */
921 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
924 /* Some charsets are "faux" and don't have names or really exist at
925 all except in the leading-byte table. */
927 Fputhash (name, obj, Vcharset_hash_table);
932 get_unallocated_leading_byte (int dimension)
938 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
941 lb = next_allocated_1_byte_leading_byte++;
945 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
948 lb = next_allocated_2_byte_leading_byte++;
953 ("No more character sets free for this dimension",
954 make_int (dimension));
961 charset_get_byte1 (Lisp_Object charset, Emchar ch)
966 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
968 Lisp_Object value = get_char_code_table (ch, table);
972 Emchar code = XINT (value);
976 else if (code < (1 << 16))
978 else if (code < (1 << 24))
984 if ((XCHARSET_UCS_MIN (charset) <= ch)
985 && (ch <= XCHARSET_UCS_MAX (charset)))
986 return (ch - XCHARSET_UCS_MIN (charset)
987 + XCHARSET_CODE_OFFSET (charset))
988 / (XCHARSET_DIMENSION (charset) == 1 ?
991 XCHARSET_DIMENSION (charset) == 2 ?
992 XCHARSET_CHARS (charset)
994 XCHARSET_DIMENSION (charset) == 3 ?
995 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)
997 XCHARSET_CHARS (charset)
998 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
999 + XCHARSET_BYTE_OFFSET (charset);
1000 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1002 if (XCHARSET_DIMENSION (charset) == 1)
1004 if (XCHARSET_CHARS (charset) == 94)
1006 if (((d = ch - (MIN_CHAR_94
1007 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1011 else if (XCHARSET_CHARS (charset) == 96)
1013 if (((d = ch - (MIN_CHAR_96
1014 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1021 else if (XCHARSET_DIMENSION (charset) == 2)
1023 if (XCHARSET_CHARS (charset) == 94)
1025 if (((d = ch - (MIN_CHAR_94x94
1026 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1029 return (d / 94) + 33;
1031 else if (XCHARSET_CHARS (charset) == 96)
1033 if (((d = ch - (MIN_CHAR_96x96
1034 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1037 return (d / 96) + 32;
1045 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1047 if (XCHARSET_DIMENSION (charset) == 1)
1053 if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
1055 Lisp_Object value = get_char_code_table (ch, table);
1059 Emchar code = XINT (value);
1061 if (code < (1 << 16))
1062 return (unsigned char)code;
1063 else if (code < (1 << 24))
1064 return (unsigned char)(code >> 16);
1066 return (unsigned char)(code >> 24);
1069 if ((XCHARSET_UCS_MIN (charset) <= ch)
1070 && (ch <= XCHARSET_UCS_MAX (charset)))
1071 return ((ch - XCHARSET_UCS_MIN (charset)
1072 + XCHARSET_CODE_OFFSET (charset))
1073 / (XCHARSET_DIMENSION (charset) == 2 ?
1076 XCHARSET_DIMENSION (charset) == 3 ?
1077 XCHARSET_CHARS (charset)
1079 XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)))
1080 % XCHARSET_CHARS (charset)
1081 + XCHARSET_BYTE_OFFSET (charset);
1082 else if (XCHARSET_CHARS (charset) == 94)
1083 return (MIN_CHAR_94x94
1084 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1085 && (ch < MIN_CHAR_94x94
1086 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1087 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1088 else /* if (XCHARSET_CHARS (charset) == 96) */
1089 return (MIN_CHAR_96x96
1090 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1091 && (ch < MIN_CHAR_96x96
1092 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1093 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1097 Lisp_Object Vdefault_coded_charset_priority_list;
1101 /************************************************************************/
1102 /* Basic charset Lisp functions */
1103 /************************************************************************/
1105 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1106 Return non-nil if OBJECT is a charset.
1110 return CHARSETP (object) ? Qt : Qnil;
1113 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1114 Retrieve the charset of the given name.
1115 If CHARSET-OR-NAME is a charset object, it is simply returned.
1116 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1117 nil is returned. Otherwise the associated charset object is returned.
1121 if (CHARSETP (charset_or_name))
1122 return charset_or_name;
1124 CHECK_SYMBOL (charset_or_name);
1125 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1128 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1129 Retrieve the charset of the given name.
1130 Same as `find-charset' except an error is signalled if there is no such
1131 charset instead of returning nil.
1135 Lisp_Object charset = Ffind_charset (name);
1138 signal_simple_error ("No such charset", name);
1142 /* We store the charsets in hash tables with the names as the key and the
1143 actual charset object as the value. Occasionally we need to use them
1144 in a list format. These routines provide us with that. */
1145 struct charset_list_closure
1147 Lisp_Object *charset_list;
1151 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1152 void *charset_list_closure)
1154 /* This function can GC */
1155 struct charset_list_closure *chcl =
1156 (struct charset_list_closure*) charset_list_closure;
1157 Lisp_Object *charset_list = chcl->charset_list;
1159 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1163 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1164 Return a list of the names of all defined charsets.
1168 Lisp_Object charset_list = Qnil;
1169 struct gcpro gcpro1;
1170 struct charset_list_closure charset_list_closure;
1172 GCPRO1 (charset_list);
1173 charset_list_closure.charset_list = &charset_list;
1174 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1175 &charset_list_closure);
1178 return charset_list;
1181 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1182 Return the name of the given charset.
1186 return XCHARSET_NAME (Fget_charset (charset));
1189 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1190 Define a new character set.
1191 This function is for use with Mule support.
1192 NAME is a symbol, the name by which the character set is normally referred.
1193 DOC-STRING is a string describing the character set.
1194 PROPS is a property list, describing the specific nature of the
1195 character set. Recognized properties are:
1197 'short-name Short version of the charset name (ex: Latin-1)
1198 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1199 'registry A regular expression matching the font registry field for
1201 'dimension Number of octets used to index a character in this charset.
1202 Either 1 or 2. Defaults to 1.
1203 'columns Number of columns used to display a character in this charset.
1204 Only used in TTY mode. (Under X, the actual width of a
1205 character can be derived from the font used to display the
1206 characters.) If unspecified, defaults to the dimension
1207 (this is almost always the correct value).
1208 'chars Number of characters in each dimension (94 or 96).
1209 Defaults to 94. Note that if the dimension is 2, the
1210 character set thus described is 94x94 or 96x96.
1211 'final Final byte of ISO 2022 escape sequence. Must be
1212 supplied. Each combination of (DIMENSION, CHARS) defines a
1213 separate namespace for final bytes. Note that ISO
1214 2022 restricts the final byte to the range
1215 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1216 dimension == 2. Note also that final bytes in the range
1217 0x30 - 0x3F are reserved for user-defined (not official)
1219 'graphic 0 (use left half of font on output) or 1 (use right half
1220 of font on output). Defaults to 0. For example, for
1221 a font whose registry is ISO8859-1, the left half
1222 (octets 0x20 - 0x7F) is the `ascii' character set, while
1223 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1224 character set. With 'graphic set to 0, the octets
1225 will have their high bit cleared; with it set to 1,
1226 the octets will have their high bit set.
1227 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1229 'ccl-program A compiled CCL program used to convert a character in
1230 this charset into an index into the font. This is in
1231 addition to the 'graphic property. The CCL program
1232 is passed the octets of the character, with the high
1233 bit cleared and set depending upon whether the value
1234 of the 'graphic property is 0 or 1.
1236 (name, doc_string, props))
1238 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1239 int direction = CHARSET_LEFT_TO_RIGHT;
1241 Lisp_Object registry = Qnil;
1242 Lisp_Object charset;
1243 Lisp_Object rest, keyword, value;
1244 Lisp_Object ccl_program = Qnil;
1245 Lisp_Object short_name = Qnil, long_name = Qnil;
1247 CHECK_SYMBOL (name);
1248 if (!NILP (doc_string))
1249 CHECK_STRING (doc_string);
1251 charset = Ffind_charset (name);
1252 if (!NILP (charset))
1253 signal_simple_error ("Cannot redefine existing charset", name);
1255 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1257 if (EQ (keyword, Qshort_name))
1259 CHECK_STRING (value);
1263 if (EQ (keyword, Qlong_name))
1265 CHECK_STRING (value);
1269 else if (EQ (keyword, Qdimension))
1272 dimension = XINT (value);
1273 if (dimension < 1 || dimension > 2)
1274 signal_simple_error ("Invalid value for 'dimension", value);
1277 else if (EQ (keyword, Qchars))
1280 chars = XINT (value);
1281 if (chars != 94 && chars != 96)
1282 signal_simple_error ("Invalid value for 'chars", value);
1285 else if (EQ (keyword, Qcolumns))
1288 columns = XINT (value);
1289 if (columns != 1 && columns != 2)
1290 signal_simple_error ("Invalid value for 'columns", value);
1293 else if (EQ (keyword, Qgraphic))
1296 graphic = XINT (value);
1297 if (graphic < 0 || graphic > 1)
1298 signal_simple_error ("Invalid value for 'graphic", value);
1301 else if (EQ (keyword, Qregistry))
1303 CHECK_STRING (value);
1307 else if (EQ (keyword, Qdirection))
1309 if (EQ (value, Ql2r))
1310 direction = CHARSET_LEFT_TO_RIGHT;
1311 else if (EQ (value, Qr2l))
1312 direction = CHARSET_RIGHT_TO_LEFT;
1314 signal_simple_error ("Invalid value for 'direction", value);
1317 else if (EQ (keyword, Qfinal))
1319 CHECK_CHAR_COERCE_INT (value);
1320 final = XCHAR (value);
1321 if (final < '0' || final > '~')
1322 signal_simple_error ("Invalid value for 'final", value);
1325 else if (EQ (keyword, Qccl_program))
1327 CHECK_VECTOR (value);
1328 ccl_program = value;
1332 signal_simple_error ("Unrecognized property", keyword);
1336 error ("'final must be specified");
1337 if (dimension == 2 && final > 0x5F)
1339 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1343 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1345 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1347 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1348 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1350 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1357 /* id = CHARSET_ID_OFFSET_94 + final; */
1358 id = get_unallocated_leading_byte (dimension);
1360 else if (chars == 96)
1362 id = get_unallocated_leading_byte (dimension);
1369 else if (dimension == 2)
1373 id = get_unallocated_leading_byte (dimension);
1375 else if (chars == 96)
1377 id = get_unallocated_leading_byte (dimension);
1389 id = get_unallocated_leading_byte (dimension);
1392 if (NILP (doc_string))
1393 doc_string = build_string ("");
1395 if (NILP (registry))
1396 registry = build_string ("");
1398 if (NILP (short_name))
1399 XSETSTRING (short_name, XSYMBOL (name)->name);
1401 if (NILP (long_name))
1402 long_name = doc_string;
1405 columns = dimension;
1406 charset = make_charset (id, name, type, columns, graphic,
1407 final, direction, short_name, long_name,
1408 doc_string, registry,
1410 if (!NILP (ccl_program))
1411 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1415 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1417 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1418 NEW-NAME is the name of the new charset. Return the new charset.
1420 (charset, new_name))
1422 Lisp_Object new_charset = Qnil;
1423 int id, dimension, columns, graphic, final;
1424 int direction, type;
1425 Lisp_Object registry, doc_string, short_name, long_name;
1426 struct Lisp_Charset *cs;
1428 charset = Fget_charset (charset);
1429 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1430 signal_simple_error ("Charset already has reverse-direction charset",
1433 CHECK_SYMBOL (new_name);
1434 if (!NILP (Ffind_charset (new_name)))
1435 signal_simple_error ("Cannot redefine existing charset", new_name);
1437 cs = XCHARSET (charset);
1439 type = CHARSET_TYPE (cs);
1440 columns = CHARSET_COLUMNS (cs);
1441 dimension = CHARSET_DIMENSION (cs);
1442 id = get_unallocated_leading_byte (dimension);
1444 graphic = CHARSET_GRAPHIC (cs);
1445 final = CHARSET_FINAL (cs);
1446 direction = CHARSET_RIGHT_TO_LEFT;
1447 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1448 direction = CHARSET_LEFT_TO_RIGHT;
1449 doc_string = CHARSET_DOC_STRING (cs);
1450 short_name = CHARSET_SHORT_NAME (cs);
1451 long_name = CHARSET_LONG_NAME (cs);
1452 registry = CHARSET_REGISTRY (cs);
1454 new_charset = make_charset (id, new_name, type, columns,
1455 graphic, final, direction, short_name, long_name,
1456 doc_string, registry,
1458 CHARSET_DECODING_TABLE(cs),
1459 CHARSET_UCS_MIN(cs),
1460 CHARSET_UCS_MAX(cs),
1461 CHARSET_CODE_OFFSET(cs),
1462 CHARSET_BYTE_OFFSET(cs)
1468 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1469 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1474 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1475 Define symbol ALIAS as an alias for CHARSET.
1479 CHECK_SYMBOL (alias);
1480 charset = Fget_charset (charset);
1481 return Fputhash (alias, charset, Vcharset_hash_table);
1484 /* #### Reverse direction charsets not yet implemented. */
1486 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1488 Return the reverse-direction charset parallel to CHARSET, if any.
1489 This is the charset with the same properties (in particular, the same
1490 dimension, number of characters per dimension, and final byte) as
1491 CHARSET but whose characters are displayed in the opposite direction.
1495 charset = Fget_charset (charset);
1496 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1500 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1501 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1502 If DIRECTION is omitted, both directions will be checked (left-to-right
1503 will be returned if character sets exist for both directions).
1505 (dimension, chars, final, direction))
1507 int dm, ch, fi, di = -1;
1509 Lisp_Object obj = Qnil;
1511 CHECK_INT (dimension);
1512 dm = XINT (dimension);
1513 if (dm < 1 || dm > 2)
1514 signal_simple_error ("Invalid value for DIMENSION", dimension);
1518 if (ch != 94 && ch != 96)
1519 signal_simple_error ("Invalid value for CHARS", chars);
1521 CHECK_CHAR_COERCE_INT (final);
1523 if (fi < '0' || fi > '~')
1524 signal_simple_error ("Invalid value for FINAL", final);
1526 if (EQ (direction, Ql2r))
1527 di = CHARSET_LEFT_TO_RIGHT;
1528 else if (EQ (direction, Qr2l))
1529 di = CHARSET_RIGHT_TO_LEFT;
1530 else if (!NILP (direction))
1531 signal_simple_error ("Invalid value for DIRECTION", direction);
1533 if (dm == 2 && fi > 0x5F)
1535 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1538 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1540 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1544 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1546 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1549 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1552 return XCHARSET_NAME (obj);
1556 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1557 Return short name of CHARSET.
1561 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1564 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1565 Return long name of CHARSET.
1569 return XCHARSET_LONG_NAME (Fget_charset (charset));
1572 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1573 Return description of CHARSET.
1577 return XCHARSET_DOC_STRING (Fget_charset (charset));
1580 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1581 Return dimension of CHARSET.
1585 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1588 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1589 Return property PROP of CHARSET.
1590 Recognized properties are those listed in `make-charset', as well as
1591 'name and 'doc-string.
1595 struct Lisp_Charset *cs;
1597 charset = Fget_charset (charset);
1598 cs = XCHARSET (charset);
1600 CHECK_SYMBOL (prop);
1601 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1602 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1603 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1604 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1605 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1606 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1607 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1608 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1609 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1610 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1611 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1612 if (EQ (prop, Qdirection))
1613 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1614 if (EQ (prop, Qreverse_direction_charset))
1616 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1620 return XCHARSET_NAME (obj);
1622 signal_simple_error ("Unrecognized charset property name", prop);
1623 return Qnil; /* not reached */
1626 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1627 Return charset identification number of CHARSET.
1631 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1634 /* #### We need to figure out which properties we really want to
1637 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1638 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1640 (charset, ccl_program))
1642 charset = Fget_charset (charset);
1643 CHECK_VECTOR (ccl_program);
1644 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1649 invalidate_charset_font_caches (Lisp_Object charset)
1651 /* Invalidate font cache entries for charset on all devices. */
1652 Lisp_Object devcons, concons, hash_table;
1653 DEVICE_LOOP_NO_BREAK (devcons, concons)
1655 struct device *d = XDEVICE (XCAR (devcons));
1656 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1657 if (!UNBOUNDP (hash_table))
1658 Fclrhash (hash_table);
1662 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1663 Set the 'registry property of CHARSET to REGISTRY.
1665 (charset, registry))
1667 charset = Fget_charset (charset);
1668 CHECK_STRING (registry);
1669 XCHARSET_REGISTRY (charset) = registry;
1670 invalidate_charset_font_caches (charset);
1671 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1676 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1677 Return mapping-table of CHARSET.
1681 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1684 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1685 Set mapping-table of CHARSET to TABLE.
1689 struct Lisp_Charset *cs;
1690 Lisp_Object old_table;
1692 charset = Fget_charset (charset);
1693 CHECK_VECTOR (table);
1695 cs = XCHARSET (charset);
1696 CHARSET_DECODING_TABLE(cs) = table;
1697 old_table = CHARSET_ENCODING_TABLE(cs);
1698 switch (CHARSET_TYPE (cs))
1700 case CHARSET_TYPE_94:
1701 if (!EQ (table, Qnil))
1704 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1705 for (i = 0; i < 94; i++)
1707 Lisp_Object c = XVECTOR_DATA(table)[i];
1710 put_char_code_table (XCHAR (c), make_int (i + 33),
1711 CHARSET_ENCODING_TABLE(cs));
1715 CHARSET_ENCODING_TABLE(cs) = Qnil;
1717 case CHARSET_TYPE_96:
1718 if (!EQ (table, Qnil))
1721 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1722 for (i = 0; i < 96; i++)
1724 Lisp_Object c = XVECTOR_DATA(table)[i];
1727 put_char_code_table (XCHAR (c), make_int (i + 32),
1728 CHARSET_ENCODING_TABLE(cs));
1732 CHARSET_ENCODING_TABLE(cs) = Qnil;
1734 case CHARSET_TYPE_94X94:
1735 if (!EQ (table, Qnil))
1739 CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
1740 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1742 Lisp_Object v = XVECTOR_DATA(table)[i];
1748 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1750 Lisp_Object c = XVECTOR_DATA(v)[j];
1753 put_char_code_table (XCHAR (c),
1754 make_int (( (i + 33) << 8)
1756 CHARSET_ENCODING_TABLE(cs));
1760 put_char_code_table (XCHAR (v), make_int (i + 33),
1761 CHARSET_ENCODING_TABLE(cs));
1765 CHARSET_ENCODING_TABLE(cs) = Qnil;
1767 case CHARSET_TYPE_96X96:
1768 CHARSET_ENCODING_TABLE(cs) = Qnil;
1770 case CHARSET_TYPE_128X128:
1771 CHARSET_DIMENSION (cs) = 2;
1772 CHARSET_CHARS (cs) = 128;
1773 CHARSET_ENCODING_TABLE(cs) = Qnil;
1775 case CHARSET_TYPE_256X256:
1776 CHARSET_DIMENSION (cs) = 2;
1777 CHARSET_CHARS (cs) = 256;
1778 CHARSET_ENCODING_TABLE(cs) = Qnil;
1786 /************************************************************************/
1787 /* Lisp primitives for working with characters */
1788 /************************************************************************/
1790 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1791 Make a character from CHARSET and octets ARG1 and ARG2.
1792 ARG2 is required only for characters from two-dimensional charsets.
1793 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1794 character s with caron.
1796 (charset, arg1, arg2))
1798 struct Lisp_Charset *cs;
1800 int lowlim, highlim;
1802 charset = Fget_charset (charset);
1803 cs = XCHARSET (charset);
1805 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1806 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1808 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1810 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1811 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1814 /* It is useful (and safe, according to Olivier Galibert) to strip
1815 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1816 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1817 Latin 2 code of the character. */
1825 if (a1 < lowlim || a1 > highlim)
1826 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1828 if (CHARSET_DIMENSION (cs) == 1)
1832 ("Charset is of dimension one; second octet must be nil", arg2);
1833 return make_char (MAKE_CHAR (charset, a1, 0));
1842 a2 = XINT (arg2) & 0x7f;
1844 if (a2 < lowlim || a2 > highlim)
1845 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1847 return make_char (MAKE_CHAR (charset, a1, a2));
1850 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1851 Return the character set of char CH.
1855 CHECK_CHAR_COERCE_INT (ch);
1857 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1860 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1861 Return list of charset and one or two position-codes of CHAR.
1865 /* This function can GC */
1866 struct gcpro gcpro1, gcpro2;
1867 Lisp_Object charset = Qnil;
1868 Lisp_Object rc = Qnil;
1871 GCPRO2 (charset, rc);
1872 CHECK_CHAR_COERCE_INT (character);
1874 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1876 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1878 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1882 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1890 #ifdef ENABLE_COMPOSITE_CHARS
1891 /************************************************************************/
1892 /* composite character functions */
1893 /************************************************************************/
1896 lookup_composite_char (Bufbyte *str, int len)
1898 Lisp_Object lispstr = make_string (str, len);
1899 Lisp_Object ch = Fgethash (lispstr,
1900 Vcomposite_char_string2char_hash_table,
1906 if (composite_char_row_next >= 128)
1907 signal_simple_error ("No more composite chars available", lispstr);
1908 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1909 composite_char_col_next);
1910 Fputhash (make_char (emch), lispstr,
1911 Vcomposite_char_char2string_hash_table);
1912 Fputhash (lispstr, make_char (emch),
1913 Vcomposite_char_string2char_hash_table);
1914 composite_char_col_next++;
1915 if (composite_char_col_next >= 128)
1917 composite_char_col_next = 32;
1918 composite_char_row_next++;
1927 composite_char_string (Emchar ch)
1929 Lisp_Object str = Fgethash (make_char (ch),
1930 Vcomposite_char_char2string_hash_table,
1932 assert (!UNBOUNDP (str));
1936 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1937 Convert a string into a single composite character.
1938 The character is the result of overstriking all the characters in
1943 CHECK_STRING (string);
1944 return make_char (lookup_composite_char (XSTRING_DATA (string),
1945 XSTRING_LENGTH (string)));
1948 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1949 Return a string of the characters comprising a composite character.
1957 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1958 signal_simple_error ("Must be composite char", ch);
1959 return composite_char_string (emch);
1961 #endif /* ENABLE_COMPOSITE_CHARS */
1964 /************************************************************************/
1965 /* initialization */
1966 /************************************************************************/
1969 syms_of_mule_charset (void)
1971 DEFSUBR (Fcharsetp);
1972 DEFSUBR (Ffind_charset);
1973 DEFSUBR (Fget_charset);
1974 DEFSUBR (Fcharset_list);
1975 DEFSUBR (Fcharset_name);
1976 DEFSUBR (Fmake_charset);
1977 DEFSUBR (Fmake_reverse_direction_charset);
1978 /* DEFSUBR (Freverse_direction_charset); */
1979 DEFSUBR (Fdefine_charset_alias);
1980 DEFSUBR (Fcharset_from_attributes);
1981 DEFSUBR (Fcharset_short_name);
1982 DEFSUBR (Fcharset_long_name);
1983 DEFSUBR (Fcharset_description);
1984 DEFSUBR (Fcharset_dimension);
1985 DEFSUBR (Fcharset_property);
1986 DEFSUBR (Fcharset_id);
1987 DEFSUBR (Fset_charset_ccl_program);
1988 DEFSUBR (Fset_charset_registry);
1990 DEFSUBR (Fcharset_mapping_table);
1991 DEFSUBR (Fset_charset_mapping_table);
1994 DEFSUBR (Fmake_char);
1995 DEFSUBR (Fchar_charset);
1996 DEFSUBR (Fsplit_char);
1998 #ifdef ENABLE_COMPOSITE_CHARS
1999 DEFSUBR (Fmake_composite_char);
2000 DEFSUBR (Fcomposite_char_string);
2003 defsymbol (&Qcharsetp, "charsetp");
2004 defsymbol (&Qregistry, "registry");
2005 defsymbol (&Qfinal, "final");
2006 defsymbol (&Qgraphic, "graphic");
2007 defsymbol (&Qdirection, "direction");
2008 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2009 defsymbol (&Qshort_name, "short-name");
2010 defsymbol (&Qlong_name, "long-name");
2012 defsymbol (&Ql2r, "l2r");
2013 defsymbol (&Qr2l, "r2l");
2015 /* Charsets, compatible with FSF 20.3
2016 Naming convention is Script-Charset[-Edition] */
2017 defsymbol (&Qascii, "ascii");
2018 defsymbol (&Qcontrol_1, "control-1");
2019 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2020 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2021 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2022 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2023 defsymbol (&Qthai_tis620, "thai-tis620");
2024 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2025 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2026 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2027 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2028 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2029 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2030 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2031 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2032 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2033 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2034 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2035 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2036 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2037 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2039 defsymbol (&Qucs_bmp, "ucs-bmp");
2040 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2041 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2042 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2043 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2044 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2045 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2047 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2048 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2050 defsymbol (&Qcomposite, "composite");
2054 vars_of_mule_charset (void)
2061 /* Table of charsets indexed by leading byte. */
2062 for (i = 0; i < countof (charset_by_leading_byte); i++)
2063 charset_by_leading_byte[i] = Qnil;
2066 /* Table of charsets indexed by type/final-byte. */
2067 for (i = 0; i < countof (charset_by_attributes); i++)
2068 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2069 charset_by_attributes[i][j] = Qnil;
2071 /* Table of charsets indexed by type/final-byte/direction. */
2072 for (i = 0; i < countof (charset_by_attributes); i++)
2073 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2074 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2075 charset_by_attributes[i][j][k] = Qnil;
2078 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2080 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2082 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2086 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2087 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2088 Leading-code of private TYPE9N charset of column-width 1.
2090 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2094 Vutf_2000_version = build_string("0.8 (Kami)");
2095 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2096 Version number of UTF-2000.
2099 Vdefault_coded_charset_priority_list = Qnil;
2100 DEFVAR_LISP ("default-coded-charset-priority-list",
2101 &Vdefault_coded_charset_priority_list /*
2102 Default order of preferred coded-character-set.
2108 complex_vars_of_mule_charset (void)
2110 staticpro (&Vcharset_hash_table);
2111 Vcharset_hash_table =
2112 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2114 /* Predefined character sets. We store them into variables for
2119 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2120 CHARSET_TYPE_256X256, 1, 0, 0,
2121 CHARSET_LEFT_TO_RIGHT,
2122 build_string ("BMP"),
2123 build_string ("BMP"),
2124 build_string ("BMP"),
2126 Qnil, 0, 0xFFFF, 0, 0);
2128 # define MIN_CHAR_THAI 0
2129 # define MAX_CHAR_THAI 0
2130 # define MIN_CHAR_GREEK 0
2131 # define MAX_CHAR_GREEK 0
2132 # define MIN_CHAR_HEBREW 0
2133 # define MAX_CHAR_HEBREW 0
2134 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2135 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2136 # define MIN_CHAR_CYRILLIC 0
2137 # define MAX_CHAR_CYRILLIC 0
2140 make_charset (LEADING_BYTE_ASCII, Qascii,
2141 CHARSET_TYPE_94, 1, 0, 'B',
2142 CHARSET_LEFT_TO_RIGHT,
2143 build_string ("ASCII"),
2144 build_string ("ASCII)"),
2145 build_string ("ASCII (ISO646 IRV)"),
2146 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2147 Qnil, 0, 0x7F, 0, 0);
2148 Vcharset_control_1 =
2149 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2150 CHARSET_TYPE_94, 1, 1, 0,
2151 CHARSET_LEFT_TO_RIGHT,
2152 build_string ("C1"),
2153 build_string ("Control characters"),
2154 build_string ("Control characters 128-191"),
2156 Qnil, 0x80, 0x9F, 0, 0);
2157 Vcharset_latin_iso8859_1 =
2158 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2159 CHARSET_TYPE_96, 1, 1, 'A',
2160 CHARSET_LEFT_TO_RIGHT,
2161 build_string ("Latin-1"),
2162 build_string ("ISO8859-1 (Latin-1)"),
2163 build_string ("ISO8859-1 (Latin-1)"),
2164 build_string ("iso8859-1"),
2165 Qnil, 0xA0, 0xFF, 0, 32);
2166 Vcharset_latin_iso8859_2 =
2167 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2168 CHARSET_TYPE_96, 1, 1, 'B',
2169 CHARSET_LEFT_TO_RIGHT,
2170 build_string ("Latin-2"),
2171 build_string ("ISO8859-2 (Latin-2)"),
2172 build_string ("ISO8859-2 (Latin-2)"),
2173 build_string ("iso8859-2"),
2175 Vcharset_latin_iso8859_3 =
2176 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2177 CHARSET_TYPE_96, 1, 1, 'C',
2178 CHARSET_LEFT_TO_RIGHT,
2179 build_string ("Latin-3"),
2180 build_string ("ISO8859-3 (Latin-3)"),
2181 build_string ("ISO8859-3 (Latin-3)"),
2182 build_string ("iso8859-3"),
2184 Vcharset_latin_iso8859_4 =
2185 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2186 CHARSET_TYPE_96, 1, 1, 'D',
2187 CHARSET_LEFT_TO_RIGHT,
2188 build_string ("Latin-4"),
2189 build_string ("ISO8859-4 (Latin-4)"),
2190 build_string ("ISO8859-4 (Latin-4)"),
2191 build_string ("iso8859-4"),
2193 Vcharset_thai_tis620 =
2194 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2195 CHARSET_TYPE_96, 1, 1, 'T',
2196 CHARSET_LEFT_TO_RIGHT,
2197 build_string ("TIS620"),
2198 build_string ("TIS620 (Thai)"),
2199 build_string ("TIS620.2529 (Thai)"),
2200 build_string ("tis620"),
2201 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2202 Vcharset_greek_iso8859_7 =
2203 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2204 CHARSET_TYPE_96, 1, 1, 'F',
2205 CHARSET_LEFT_TO_RIGHT,
2206 build_string ("ISO8859-7"),
2207 build_string ("ISO8859-7 (Greek)"),
2208 build_string ("ISO8859-7 (Greek)"),
2209 build_string ("iso8859-7"),
2210 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2211 Vcharset_arabic_iso8859_6 =
2212 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2213 CHARSET_TYPE_96, 1, 1, 'G',
2214 CHARSET_RIGHT_TO_LEFT,
2215 build_string ("ISO8859-6"),
2216 build_string ("ISO8859-6 (Arabic)"),
2217 build_string ("ISO8859-6 (Arabic)"),
2218 build_string ("iso8859-6"),
2220 Vcharset_hebrew_iso8859_8 =
2221 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2222 CHARSET_TYPE_96, 1, 1, 'H',
2223 CHARSET_RIGHT_TO_LEFT,
2224 build_string ("ISO8859-8"),
2225 build_string ("ISO8859-8 (Hebrew)"),
2226 build_string ("ISO8859-8 (Hebrew)"),
2227 build_string ("iso8859-8"),
2228 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2229 Vcharset_katakana_jisx0201 =
2230 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2231 CHARSET_TYPE_94, 1, 1, 'I',
2232 CHARSET_LEFT_TO_RIGHT,
2233 build_string ("JISX0201 Kana"),
2234 build_string ("JISX0201.1976 (Japanese Kana)"),
2235 build_string ("JISX0201.1976 Japanese Kana"),
2236 build_string ("jisx0201\\.1976"),
2238 MIN_CHAR_HALFWIDTH_KATAKANA,
2239 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2240 Vcharset_latin_jisx0201 =
2241 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2242 CHARSET_TYPE_94, 1, 0, 'J',
2243 CHARSET_LEFT_TO_RIGHT,
2244 build_string ("JISX0201 Roman"),
2245 build_string ("JISX0201.1976 (Japanese Roman)"),
2246 build_string ("JISX0201.1976 Japanese Roman"),
2247 build_string ("jisx0201\\.1976"),
2249 Vcharset_cyrillic_iso8859_5 =
2250 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2251 CHARSET_TYPE_96, 1, 1, 'L',
2252 CHARSET_LEFT_TO_RIGHT,
2253 build_string ("ISO8859-5"),
2254 build_string ("ISO8859-5 (Cyrillic)"),
2255 build_string ("ISO8859-5 (Cyrillic)"),
2256 build_string ("iso8859-5"),
2257 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2258 Vcharset_latin_iso8859_9 =
2259 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2260 CHARSET_TYPE_96, 1, 1, 'M',
2261 CHARSET_LEFT_TO_RIGHT,
2262 build_string ("Latin-5"),
2263 build_string ("ISO8859-9 (Latin-5)"),
2264 build_string ("ISO8859-9 (Latin-5)"),
2265 build_string ("iso8859-9"),
2267 Vcharset_japanese_jisx0208_1978 =
2268 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2269 CHARSET_TYPE_94X94, 2, 0, '@',
2270 CHARSET_LEFT_TO_RIGHT,
2271 build_string ("JIS X0208:1978"),
2272 build_string ("JIS X0208:1978 (Japanese)"),
2274 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2275 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2277 Vcharset_chinese_gb2312 =
2278 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2279 CHARSET_TYPE_94X94, 2, 0, 'A',
2280 CHARSET_LEFT_TO_RIGHT,
2281 build_string ("GB2312"),
2282 build_string ("GB2312)"),
2283 build_string ("GB2312 Chinese simplified"),
2284 build_string ("gb2312"),
2286 Vcharset_japanese_jisx0208 =
2287 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2288 CHARSET_TYPE_94X94, 2, 0, 'B',
2289 CHARSET_LEFT_TO_RIGHT,
2290 build_string ("JISX0208"),
2291 build_string ("JIS X0208:1983 (Japanese)"),
2292 build_string ("JIS X0208:1983 Japanese Kanji"),
2293 build_string ("jisx0208\\.1983"),
2295 Vcharset_korean_ksc5601 =
2296 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2297 CHARSET_TYPE_94X94, 2, 0, 'C',
2298 CHARSET_LEFT_TO_RIGHT,
2299 build_string ("KSC5601"),
2300 build_string ("KSC5601 (Korean"),
2301 build_string ("KSC5601 Korean Hangul and Hanja"),
2302 build_string ("ksc5601"),
2304 Vcharset_japanese_jisx0212 =
2305 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2306 CHARSET_TYPE_94X94, 2, 0, 'D',
2307 CHARSET_LEFT_TO_RIGHT,
2308 build_string ("JISX0212"),
2309 build_string ("JISX0212 (Japanese)"),
2310 build_string ("JISX0212 Japanese Supplement"),
2311 build_string ("jisx0212"),
2314 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2315 Vcharset_chinese_cns11643_1 =
2316 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2317 CHARSET_TYPE_94X94, 2, 0, 'G',
2318 CHARSET_LEFT_TO_RIGHT,
2319 build_string ("CNS11643-1"),
2320 build_string ("CNS11643-1 (Chinese traditional)"),
2322 ("CNS 11643 Plane 1 Chinese traditional"),
2323 build_string (CHINESE_CNS_PLANE_RE("1")),
2325 Vcharset_chinese_cns11643_2 =
2326 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2327 CHARSET_TYPE_94X94, 2, 0, 'H',
2328 CHARSET_LEFT_TO_RIGHT,
2329 build_string ("CNS11643-2"),
2330 build_string ("CNS11643-2 (Chinese traditional)"),
2332 ("CNS 11643 Plane 2 Chinese traditional"),
2333 build_string (CHINESE_CNS_PLANE_RE("2")),
2336 Vcharset_latin_viscii_lower =
2337 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2338 CHARSET_TYPE_96, 1, 1, '1',
2339 CHARSET_LEFT_TO_RIGHT,
2340 build_string ("VISCII lower"),
2341 build_string ("VISCII lower (Vietnamese)"),
2342 build_string ("VISCII lower (Vietnamese)"),
2343 build_string ("VISCII1\\.1"),
2345 Vcharset_latin_viscii_upper =
2346 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2347 CHARSET_TYPE_96, 1, 1, '2',
2348 CHARSET_LEFT_TO_RIGHT,
2349 build_string ("VISCII upper"),
2350 build_string ("VISCII upper (Vietnamese)"),
2351 build_string ("VISCII upper (Vietnamese)"),
2352 build_string ("VISCII1\\.1"),
2354 Vcharset_hiragana_jisx0208 =
2355 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2356 CHARSET_TYPE_94X94, 2, 0, 'B',
2357 CHARSET_LEFT_TO_RIGHT,
2358 build_string ("Hiragana"),
2359 build_string ("Hiragana of JIS X0208"),
2360 build_string ("Japanese Hiragana of JIS X0208"),
2361 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2362 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2363 (0x24 - 33) * 94 + (0x21 - 33), 33);
2364 Vcharset_katakana_jisx0208 =
2365 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2366 CHARSET_TYPE_94X94, 2, 0, 'B',
2367 CHARSET_LEFT_TO_RIGHT,
2368 build_string ("Katakana"),
2369 build_string ("Katakana of JIS X0208"),
2370 build_string ("Japanese Katakana of JIS X0208"),
2371 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2372 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2373 (0x25 - 33) * 94 + (0x21 - 33), 33);
2375 Vcharset_chinese_big5_1 =
2376 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2377 CHARSET_TYPE_94X94, 2, 0, '0',
2378 CHARSET_LEFT_TO_RIGHT,
2379 build_string ("Big5"),
2380 build_string ("Big5 (Level-1)"),
2382 ("Big5 Level-1 Chinese traditional"),
2383 build_string ("big5"),
2385 Vcharset_chinese_big5_2 =
2386 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2387 CHARSET_TYPE_94X94, 2, 0, '1',
2388 CHARSET_LEFT_TO_RIGHT,
2389 build_string ("Big5"),
2390 build_string ("Big5 (Level-2)"),
2392 ("Big5 Level-2 Chinese traditional"),
2393 build_string ("big5"),
2396 #ifdef ENABLE_COMPOSITE_CHARS
2397 /* #### For simplicity, we put composite chars into a 96x96 charset.
2398 This is going to lead to problems because you can run out of
2399 room, esp. as we don't yet recycle numbers. */
2400 Vcharset_composite =
2401 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2402 CHARSET_TYPE_96X96, 2, 0, 0,
2403 CHARSET_LEFT_TO_RIGHT,
2404 build_string ("Composite"),
2405 build_string ("Composite characters"),
2406 build_string ("Composite characters"),
2409 composite_char_row_next = 32;
2410 composite_char_col_next = 32;
2412 Vcomposite_char_string2char_hash_table =
2413 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2414 Vcomposite_char_char2string_hash_table =
2415 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2416 staticpro (&Vcomposite_char_string2char_hash_table);
2417 staticpro (&Vcomposite_char_char2string_hash_table);
2418 #endif /* ENABLE_COMPOSITE_CHARS */