1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_system_char_id;
71 Lisp_Object Vcharset_ucs;
72 Lisp_Object Vcharset_ucs_bmp;
73 Lisp_Object Vcharset_ucs_smp;
74 Lisp_Object Vcharset_ucs_sip;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 put_char_ccs_code_point (Lisp_Object character,
181 Lisp_Object ccs, Lisp_Object value)
183 if ( !( EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 && INTP (value) && (XINT (value) < 0xF0000)
185 && XCHAR (character) == XINT (value) )
188 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
192 { /* obsolete representation: value must be a list of bytes */
193 Lisp_Object ret = Fcar (value);
197 signal_simple_error ("Invalid value for coded-charset", value);
198 code_point = XINT (ret);
199 if (XCHARSET_GRAPHIC (ccs) == 1)
207 signal_simple_error ("Invalid value for coded-charset",
211 signal_simple_error ("Invalid value for coded-charset",
214 if (XCHARSET_GRAPHIC (ccs) == 1)
216 code_point = (code_point << 8) | j;
219 value = make_int (code_point);
221 else if (INTP (value))
223 code_point = XINT (value);
224 if (XCHARSET_GRAPHIC (ccs) == 1)
226 code_point &= 0x7F7F7F7F;
227 value = make_int (code_point);
231 signal_simple_error ("Invalid value for coded-charset", value);
235 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
238 decoding_table_remove_char (ccs, XINT (cpos));
241 decoding_table_put_char (ccs, code_point, character);
247 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
249 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
250 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
252 if (VECTORP (decoding_table))
254 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
258 decoding_table_remove_char (ccs, XINT (cpos));
261 if (CHAR_TABLEP (encoding_table))
263 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
271 int leading_code_private_11;
274 Lisp_Object Qcharsetp;
276 /* Qdoc_string, Qdimension, Qchars defined in general.c */
277 Lisp_Object Qregistry, Qfinal, Qgraphic;
278 Lisp_Object Qdirection;
279 Lisp_Object Qreverse_direction_charset;
280 Lisp_Object Qleading_byte;
281 Lisp_Object Qshort_name, Qlong_name;
284 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
285 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
320 Qvietnamese_viscii_lower,
321 Qvietnamese_viscii_upper,
331 Lisp_Object Ql2r, Qr2l;
333 Lisp_Object Vcharset_hash_table;
335 /* Composite characters are characters constructed by overstriking two
336 or more regular characters.
338 1) The old Mule implementation involves storing composite characters
339 in a buffer as a tag followed by all of the actual characters
340 used to make up the composite character. I think this is a bad
341 idea; it greatly complicates code that wants to handle strings
342 one character at a time because it has to deal with the possibility
343 of great big ungainly characters. It's much more reasonable to
344 simply store an index into a table of composite characters.
346 2) The current implementation only allows for 16,384 separate
347 composite characters over the lifetime of the XEmacs process.
348 This could become a potential problem if the user
349 edited lots of different files that use composite characters.
350 Due to FSF bogosity, increasing the number of allowable
351 composite characters under Mule would decrease the number
352 of possible faces that can exist. Mule already has shrunk
353 this to 2048, and further shrinkage would become uncomfortable.
354 No such problems exist in XEmacs.
356 Composite characters could be represented as 0x80 C1 C2 C3,
357 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
358 for slightly under 2^20 (one million) composite characters
359 over the XEmacs process lifetime, and you only need to
360 increase the size of a Mule character from 19 to 21 bits.
361 Or you could use 0x80 C1 C2 C3 C4, allowing for about
362 85 million (slightly over 2^26) composite characters. */
365 /************************************************************************/
366 /* Basic Emchar functions */
367 /************************************************************************/
369 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
370 string in STR. Returns the number of bytes stored.
371 Do not call this directly. Use the macro set_charptr_emchar() instead.
375 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
390 else if ( c <= 0x7ff )
392 *p++ = (c >> 6) | 0xc0;
393 *p++ = (c & 0x3f) | 0x80;
395 else if ( c <= 0xffff )
397 *p++ = (c >> 12) | 0xe0;
398 *p++ = ((c >> 6) & 0x3f) | 0x80;
399 *p++ = (c & 0x3f) | 0x80;
401 else if ( c <= 0x1fffff )
403 *p++ = (c >> 18) | 0xf0;
404 *p++ = ((c >> 12) & 0x3f) | 0x80;
405 *p++ = ((c >> 6) & 0x3f) | 0x80;
406 *p++ = (c & 0x3f) | 0x80;
408 else if ( c <= 0x3ffffff )
410 *p++ = (c >> 24) | 0xf8;
411 *p++ = ((c >> 18) & 0x3f) | 0x80;
412 *p++ = ((c >> 12) & 0x3f) | 0x80;
413 *p++ = ((c >> 6) & 0x3f) | 0x80;
414 *p++ = (c & 0x3f) | 0x80;
418 *p++ = (c >> 30) | 0xfc;
419 *p++ = ((c >> 24) & 0x3f) | 0x80;
420 *p++ = ((c >> 18) & 0x3f) | 0x80;
421 *p++ = ((c >> 12) & 0x3f) | 0x80;
422 *p++ = ((c >> 6) & 0x3f) | 0x80;
423 *p++ = (c & 0x3f) | 0x80;
426 BREAKUP_CHAR (c, charset, c1, c2);
427 lb = CHAR_LEADING_BYTE (c);
428 if (LEADING_BYTE_PRIVATE_P (lb))
429 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
431 if (EQ (charset, Vcharset_control_1))
440 /* Return the first character from a Mule-encoded string in STR,
441 assuming it's non-ASCII. Do not call this directly.
442 Use the macro charptr_emchar() instead. */
445 non_ascii_charptr_emchar (const Bufbyte *str)
458 else if ( b >= 0xf8 )
463 else if ( b >= 0xf0 )
468 else if ( b >= 0xe0 )
473 else if ( b >= 0xc0 )
483 for( ; len > 0; len-- )
486 ch = ( ch << 6 ) | ( b & 0x3f );
490 Bufbyte i0 = *str, i1, i2 = 0;
493 if (i0 == LEADING_BYTE_CONTROL_1)
494 return (Emchar) (*++str - 0x20);
496 if (LEADING_BYTE_PREFIX_P (i0))
501 charset = CHARSET_BY_LEADING_BYTE (i0);
502 if (XCHARSET_DIMENSION (charset) == 2)
505 return MAKE_CHAR (charset, i1, i2);
509 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
510 Do not call this directly. Use the macro valid_char_p() instead. */
514 non_ascii_valid_char_p (Emchar ch)
518 /* Must have only lowest 19 bits set */
522 f1 = CHAR_FIELD1 (ch);
523 f2 = CHAR_FIELD2 (ch);
524 f3 = CHAR_FIELD3 (ch);
530 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
531 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
532 f2 > MAX_CHAR_FIELD2_PRIVATE)
537 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
538 f2 <= MAX_CHAR_FIELD2_PRIVATE))
542 NOTE: This takes advantage of the fact that
543 FIELD2_TO_OFFICIAL_LEADING_BYTE and
544 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
546 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
547 if (EQ (charset, Qnil))
549 return (XCHARSET_CHARS (charset) == 96);
555 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
556 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
557 f1 > MAX_CHAR_FIELD1_PRIVATE)
559 if (f2 < 0x20 || f3 < 0x20)
562 #ifdef ENABLE_COMPOSITE_CHARS
563 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
565 if (UNBOUNDP (Fgethash (make_int (ch),
566 Vcomposite_char_char2string_hash_table,
571 #endif /* ENABLE_COMPOSITE_CHARS */
573 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
574 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
577 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
582 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
584 if (EQ (charset, Qnil))
586 return (XCHARSET_CHARS (charset) == 96);
592 /************************************************************************/
593 /* Basic string functions */
594 /************************************************************************/
596 /* Copy the character pointed to by SRC into DST. Do not call this
597 directly. Use the macro charptr_copy_char() instead.
598 Return the number of bytes copied. */
601 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
603 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
605 for (i = bytes; i; i--, dst++, src++)
611 /************************************************************************/
612 /* streams of Emchars */
613 /************************************************************************/
615 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
616 The functions below are not meant to be called directly; use
617 the macros in insdel.h. */
620 Lstream_get_emchar_1 (Lstream *stream, int ch)
622 Bufbyte str[MAX_EMCHAR_LEN];
623 Bufbyte *strptr = str;
626 str[0] = (Bufbyte) ch;
628 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
630 int c = Lstream_getc (stream);
631 bufpos_checking_assert (c >= 0);
632 *++strptr = (Bufbyte) c;
634 return charptr_emchar (str);
638 Lstream_fput_emchar (Lstream *stream, Emchar ch)
640 Bufbyte str[MAX_EMCHAR_LEN];
641 Bytecount len = set_charptr_emchar (str, ch);
642 return Lstream_write (stream, str, len);
646 Lstream_funget_emchar (Lstream *stream, Emchar ch)
648 Bufbyte str[MAX_EMCHAR_LEN];
649 Bytecount len = set_charptr_emchar (str, ch);
650 Lstream_unread (stream, str, len);
654 /************************************************************************/
656 /************************************************************************/
659 mark_charset (Lisp_Object obj)
661 Lisp_Charset *cs = XCHARSET (obj);
663 mark_object (cs->short_name);
664 mark_object (cs->long_name);
665 mark_object (cs->doc_string);
666 mark_object (cs->registry);
667 mark_object (cs->ccl_program);
669 mark_object (cs->decoding_table);
670 mark_object (cs->mother);
676 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
678 Lisp_Charset *cs = XCHARSET (obj);
682 error ("printing unreadable object #<charset %s 0x%x>",
683 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
686 write_c_string ("#<charset ", printcharfun);
687 print_internal (CHARSET_NAME (cs), printcharfun, 0);
688 write_c_string (" ", printcharfun);
689 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
690 write_c_string (" ", printcharfun);
691 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
692 write_c_string (" ", printcharfun);
693 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
694 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
696 CHARSET_DIMENSION (cs),
697 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
698 CHARSET_COLUMNS (cs),
699 CHARSET_GRAPHIC (cs),
701 write_c_string (buf, printcharfun);
702 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
703 sprintf (buf, " 0x%x>", cs->header.uid);
704 write_c_string (buf, printcharfun);
707 static const struct lrecord_description charset_description[] = {
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
716 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
717 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
722 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
723 mark_charset, print_charset, 0, 0, 0,
727 /* Make a new charset. */
728 /* #### SJT Should generic properties be allowed? */
730 make_charset (Charset_ID id, Lisp_Object name,
731 unsigned short chars, unsigned char dimension,
732 unsigned char columns, unsigned char graphic,
733 Bufbyte final, unsigned char direction, Lisp_Object short_name,
734 Lisp_Object long_name, Lisp_Object doc,
736 Lisp_Object decoding_table,
737 Emchar min_code, Emchar max_code,
738 Emchar code_offset, unsigned char byte_offset,
739 Lisp_Object mother, unsigned char conversion)
742 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
746 XSETCHARSET (obj, cs);
748 CHARSET_ID (cs) = id;
749 CHARSET_NAME (cs) = name;
750 CHARSET_SHORT_NAME (cs) = short_name;
751 CHARSET_LONG_NAME (cs) = long_name;
752 CHARSET_CHARS (cs) = chars;
753 CHARSET_DIMENSION (cs) = dimension;
754 CHARSET_DIRECTION (cs) = direction;
755 CHARSET_COLUMNS (cs) = columns;
756 CHARSET_GRAPHIC (cs) = graphic;
757 CHARSET_FINAL (cs) = final;
758 CHARSET_DOC_STRING (cs) = doc;
759 CHARSET_REGISTRY (cs) = reg;
760 CHARSET_CCL_PROGRAM (cs) = Qnil;
761 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
763 CHARSET_DECODING_TABLE(cs) = Qunbound;
764 CHARSET_MIN_CODE (cs) = min_code;
765 CHARSET_MAX_CODE (cs) = max_code;
766 CHARSET_CODE_OFFSET (cs) = code_offset;
767 CHARSET_BYTE_OFFSET (cs) = byte_offset;
768 CHARSET_MOTHER (cs) = mother;
769 CHARSET_CONVERSION (cs) = conversion;
773 if (id == LEADING_BYTE_ASCII)
774 CHARSET_REP_BYTES (cs) = 1;
776 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
778 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
783 /* some charsets do not have final characters. This includes
784 ASCII, Control-1, Composite, and the two faux private
786 unsigned char iso2022_type
787 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
789 if (code_offset == 0)
791 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
792 chlook->charset_by_attributes[iso2022_type][final] = obj;
796 (chlook->charset_by_attributes[iso2022_type][final][direction]));
797 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
801 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
802 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
804 /* Some charsets are "faux" and don't have names or really exist at
805 all except in the leading-byte table. */
807 Fputhash (name, obj, Vcharset_hash_table);
812 get_unallocated_leading_byte (int dimension)
817 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
820 lb = chlook->next_allocated_leading_byte++;
824 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
827 lb = chlook->next_allocated_1_byte_leading_byte++;
831 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
834 lb = chlook->next_allocated_2_byte_leading_byte++;
840 ("No more character sets free for this dimension",
841 make_int (dimension));
847 /* Number of Big5 characters which have the same code in 1st byte. */
849 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
852 decode_ccs_conversion (int conv_type, int code_point)
854 if ( conv_type == CONVERSION_IDENTICAL )
858 if ( conv_type == CONVERSION_94x60 )
860 int row = code_point >> 8;
861 int cell = code_point & 255;
865 else if (row < 16 + 32 + 30)
866 return (row - (16 + 32)) * 94 + cell - 33;
867 else if (row < 18 + 32 + 30)
869 else if (row < 18 + 32 + 60)
870 return (row - (18 + 32)) * 94 + cell - 33;
872 else if ( conv_type == CONVERSION_94x94x60 )
874 int plane = code_point >> 16;
875 int row = (code_point >> 8) & 255;
876 int cell = code_point & 255;
880 else if (row < 16 + 32 + 30)
882 (plane - 33) * 94 * 60
883 + (row - (16 + 32)) * 94
885 else if (row < 18 + 32 + 30)
887 else if (row < 18 + 32 + 60)
889 (plane - 33) * 94 * 60
890 + (row - (18 + 32)) * 94
893 else if ( conv_type == CONVERSION_BIG5_1 )
896 = (((code_point >> 8) & 0x7F) - 33) * 94
897 + (( code_point & 0x7F) - 33);
898 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
899 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
901 b2 += b2 < 0x3F ? 0x40 : 0x62;
902 return (b1 << 8) | b2;
904 else if ( conv_type == CONVERSION_BIG5_2 )
907 = (((code_point >> 8) & 0x7F) - 33) * 94
908 + (( code_point & 0x7F) - 33)
909 + BIG5_SAME_ROW * (0xC9 - 0xA1);
910 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
911 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
913 b2 += b2 < 0x3F ? 0x40 : 0x62;
914 return (b1 << 8) | b2;
920 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
922 int dim = XCHARSET_DIMENSION (ccs);
923 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
931 = get_ccs_octet_table (decoding_table, ccs,
932 (code_point >> (dim * 8)) & 255);
934 if (CHARP (decoding_table))
935 return XCHAR (decoding_table);
937 if (EQ (decoding_table, Qunloaded))
939 char_id = load_char_decoding_entry_maybe (ccs, code_point);
941 #endif /* HAVE_CHISE */
944 else if ( !without_inheritance
945 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
948 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
952 code += XCHARSET_CODE_OFFSET(ccs);
953 if ( EQ (mother, Vcharset_ucs) )
954 return DECODE_CHAR (mother, code, without_inheritance);
956 return decode_defined_char (mother, code,
957 without_inheritance);
964 decode_builtin_char (Lisp_Object charset, int code_point)
966 Lisp_Object mother = XCHARSET_MOTHER (charset);
969 if ( XCHARSET_MAX_CODE (charset) > 0 )
971 if ( CHARSETP (mother) )
974 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
979 decode_builtin_char (mother,
980 code + XCHARSET_CODE_OFFSET(charset));
987 = (XCHARSET_DIMENSION (charset) == 1
989 code_point - XCHARSET_BYTE_OFFSET (charset)
991 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
992 * XCHARSET_CHARS (charset)
993 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
994 + XCHARSET_CODE_OFFSET (charset);
995 if ((cid < XCHARSET_MIN_CODE (charset))
996 || (XCHARSET_MAX_CODE (charset) < cid))
1001 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1003 if (XCHARSET_DIMENSION (charset) == 1)
1005 switch (XCHARSET_CHARS (charset))
1009 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1012 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1020 switch (XCHARSET_CHARS (charset))
1023 return MIN_CHAR_94x94
1024 + (final - '0') * 94 * 94
1025 + (((code_point >> 8) & 0x7F) - 33) * 94
1026 + ((code_point & 0x7F) - 33);
1028 return MIN_CHAR_96x96
1029 + (final - '0') * 96 * 96
1030 + (((code_point >> 8) & 0x7F) - 32) * 96
1031 + ((code_point & 0x7F) - 32);
1043 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1045 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1048 if ( CHAR_TABLEP (encoding_table)
1049 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1054 Lisp_Object mother = XCHARSET_MOTHER (charset);
1055 int min = XCHARSET_MIN_CODE (charset);
1056 int max = XCHARSET_MAX_CODE (charset);
1059 if ( CHARSETP (mother) )
1061 if (XCHARSET_FINAL (charset) >= '0')
1062 code = charset_code_point (mother, ch, 1);
1064 code = charset_code_point (mother, ch, defined_only);
1066 else if (defined_only)
1068 else if ( ((max == 0) && CHARSETP (mother)
1069 && (XCHARSET_FINAL (charset) == 0))
1070 || ((min <= ch) && (ch <= max)) )
1072 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1073 || ((min <= code) && (code <= max)) )
1075 int d = code - XCHARSET_CODE_OFFSET (charset);
1077 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1083 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1086 int cell = d % 94 + 33;
1092 return (row << 8) | cell;
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1096 int B1 = d >> 8, B2 = d & 0xFF;
1098 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1099 - (B2 < 0x7F ? 0x40 : 0x62);
1103 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1106 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1108 int B1 = d >> 8, B2 = d & 0xFF;
1110 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1111 - (B2 < 0x7F ? 0x40 : 0x62);
1115 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1116 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1119 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1120 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1122 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1123 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1125 int plane = d / (94 * 60) + 33;
1126 int row = (d % (94 * 60)) / 94;
1127 int cell = d % 94 + 33;
1133 return (plane << 16) | (row << 8) | cell;
1135 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1137 ( (d / (94 * 94) + 33) << 16)
1138 | ((d / 94 % 94 + 33) << 8)
1140 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1142 ( (d / (96 * 96) + 32) << 16)
1143 | ((d / 96 % 96 + 32) << 8)
1145 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1147 ( (d / (94 * 94 * 94) + 33) << 24)
1148 | ((d / (94 * 94) % 94 + 33) << 16)
1149 | ((d / 94 % 94 + 33) << 8)
1151 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1153 ( (d / (96 * 96 * 96) + 32) << 24)
1154 | ((d / (96 * 96) % 96 + 32) << 16)
1155 | ((d / 96 % 96 + 32) << 8)
1159 printf ("Unknown CCS-conversion %d is specified!",
1160 XCHARSET_CONVERSION (charset));
1164 else if (defined_only)
1166 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1167 ( XCHARSET_MIN_CODE (charset) == 0 )
1169 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1170 (XCHARSET_CODE_OFFSET (charset)
1171 == XCHARSET_MIN_CODE (charset))
1176 if (XCHARSET_DIMENSION (charset) == 1)
1178 if (XCHARSET_CHARS (charset) == 94)
1180 if (((d = ch - (MIN_CHAR_94
1181 + (XCHARSET_FINAL (charset) - '0') * 94))
1186 else if (XCHARSET_CHARS (charset) == 96)
1188 if (((d = ch - (MIN_CHAR_96
1189 + (XCHARSET_FINAL (charset) - '0') * 96))
1197 else if (XCHARSET_DIMENSION (charset) == 2)
1199 if (XCHARSET_CHARS (charset) == 94)
1201 if (((d = ch - (MIN_CHAR_94x94
1203 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1206 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1208 else if (XCHARSET_CHARS (charset) == 96)
1210 if (((d = ch - (MIN_CHAR_96x96
1212 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1215 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1226 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1228 if (c <= MAX_CHAR_BASIC_LATIN)
1230 *charset = Vcharset_ascii;
1235 *charset = Vcharset_control_1;
1240 *charset = Vcharset_latin_iso8859_1;
1244 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1246 *charset = Vcharset_hebrew_iso8859_8;
1247 return c - MIN_CHAR_HEBREW + 0x20;
1250 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1252 *charset = Vcharset_thai_tis620;
1253 return c - MIN_CHAR_THAI + 0x20;
1256 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1257 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1259 return list2 (Vcharset_katakana_jisx0201,
1260 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1263 else if (c <= MAX_CHAR_BMP)
1265 *charset = Vcharset_ucs_bmp;
1268 else if (c <= MAX_CHAR_SMP)
1270 *charset = Vcharset_ucs_smp;
1271 return c - MIN_CHAR_SMP;
1273 else if (c <= MAX_CHAR_SIP)
1275 *charset = Vcharset_ucs_sip;
1276 return c - MIN_CHAR_SIP;
1278 else if (c < MIN_CHAR_94)
1280 *charset = Vcharset_ucs;
1283 else if (c <= MAX_CHAR_94)
1285 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1286 ((c - MIN_CHAR_94) / 94) + '0',
1287 CHARSET_LEFT_TO_RIGHT);
1288 if (!NILP (*charset))
1289 return ((c - MIN_CHAR_94) % 94) + 33;
1292 *charset = Vcharset_ucs;
1296 else if (c <= MAX_CHAR_96)
1298 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1299 ((c - MIN_CHAR_96) / 96) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return ((c - MIN_CHAR_96) % 96) + 32;
1305 *charset = Vcharset_ucs;
1309 else if (c <= MAX_CHAR_94x94)
1312 = CHARSET_BY_ATTRIBUTES (94, 2,
1313 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1314 CHARSET_LEFT_TO_RIGHT);
1315 if (!NILP (*charset))
1316 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1317 | (((c - MIN_CHAR_94x94) % 94) + 33);
1320 *charset = Vcharset_ucs;
1324 else if (c <= MAX_CHAR_96x96)
1327 = CHARSET_BY_ATTRIBUTES (96, 2,
1328 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1329 CHARSET_LEFT_TO_RIGHT);
1330 if (!NILP (*charset))
1331 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1332 | (((c - MIN_CHAR_96x96) % 96) + 32);
1335 *charset = Vcharset_ucs;
1341 *charset = Vcharset_ucs;
1346 Lisp_Object Vdefault_coded_charset_priority_list;
1347 Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
1348 Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
1352 /************************************************************************/
1353 /* Basic charset Lisp functions */
1354 /************************************************************************/
1356 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1357 Return non-nil if OBJECT is a charset.
1361 return CHARSETP (object) ? Qt : Qnil;
1364 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1365 Retrieve the charset of the given name.
1366 If CHARSET-OR-NAME is a charset object, it is simply returned.
1367 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1368 nil is returned. Otherwise the associated charset object is returned.
1372 if (CHARSETP (charset_or_name))
1373 return charset_or_name;
1375 CHECK_SYMBOL (charset_or_name);
1376 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1379 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1380 Retrieve the charset of the given name.
1381 Same as `find-charset' except an error is signalled if there is no such
1382 charset instead of returning nil.
1386 Lisp_Object charset = Ffind_charset (name);
1389 signal_simple_error ("No such charset", name);
1393 /* We store the charsets in hash tables with the names as the key and the
1394 actual charset object as the value. Occasionally we need to use them
1395 in a list format. These routines provide us with that. */
1396 struct charset_list_closure
1398 Lisp_Object *charset_list;
1402 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1403 void *charset_list_closure)
1405 /* This function can GC */
1406 struct charset_list_closure *chcl =
1407 (struct charset_list_closure*) charset_list_closure;
1408 Lisp_Object *charset_list = chcl->charset_list;
1410 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1414 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1415 Return a list of the names of all defined charsets.
1419 Lisp_Object charset_list = Qnil;
1420 struct gcpro gcpro1;
1421 struct charset_list_closure charset_list_closure;
1423 GCPRO1 (charset_list);
1424 charset_list_closure.charset_list = &charset_list;
1425 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1426 &charset_list_closure);
1429 return charset_list;
1432 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1433 Return the name of charset CHARSET.
1437 return XCHARSET_NAME (Fget_charset (charset));
1440 /* #### SJT Should generic properties be allowed? */
1441 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1442 Define a new character set.
1443 This function is for use with Mule support.
1444 NAME is a symbol, the name by which the character set is normally referred.
1445 DOC-STRING is a string describing the character set.
1446 PROPS is a property list, describing the specific nature of the
1447 character set. Recognized properties are:
1449 'short-name Short version of the charset name (ex: Latin-1)
1450 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1451 'registry A regular expression matching the font registry field for
1453 'dimension Number of octets used to index a character in this charset.
1454 Either 1 or 2. Defaults to 1.
1455 If UTF-2000 feature is enabled, 3 or 4 are also available.
1456 'columns Number of columns used to display a character in this charset.
1457 Only used in TTY mode. (Under X, the actual width of a
1458 character can be derived from the font used to display the
1459 characters.) If unspecified, defaults to the dimension
1460 (this is almost always the correct value).
1461 'chars Number of characters in each dimension (94 or 96).
1462 Defaults to 94. Note that if the dimension is 2, the
1463 character set thus described is 94x94 or 96x96.
1464 If UTF-2000 feature is enabled, 128 or 256 are also available.
1465 'final Final byte of ISO 2022 escape sequence. Must be
1466 supplied. Each combination of (DIMENSION, CHARS) defines a
1467 separate namespace for final bytes. Note that ISO
1468 2022 restricts the final byte to the range
1469 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1470 dimension == 2. Note also that final bytes in the range
1471 0x30 - 0x3F are reserved for user-defined (not official)
1473 'graphic 0 (use left half of font on output) or 1 (use right half
1474 of font on output). Defaults to 0. For example, for
1475 a font whose registry is ISO8859-1, the left half
1476 (octets 0x20 - 0x7F) is the `ascii' character set, while
1477 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1478 character set. With 'graphic set to 0, the octets
1479 will have their high bit cleared; with it set to 1,
1480 the octets will have their high bit set.
1481 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1483 'ccl-program A compiled CCL program used to convert a character in
1484 this charset into an index into the font. This is in
1485 addition to the 'graphic property. The CCL program
1486 is passed the octets of the character, with the high
1487 bit cleared and set depending upon whether the value
1488 of the 'graphic property is 0 or 1.
1489 'mother [UTF-2000 only] Base coded-charset.
1490 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1491 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1492 'code-offset [UTF-2000 only] Offset for a code-point of a base
1494 'conversion [UTF-2000 only] Conversion for a code-point of a base
1495 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1497 (name, doc_string, props))
1499 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1500 int direction = CHARSET_LEFT_TO_RIGHT;
1501 Lisp_Object registry = Qnil;
1502 Lisp_Object charset;
1503 Lisp_Object ccl_program = Qnil;
1504 Lisp_Object short_name = Qnil, long_name = Qnil;
1505 Lisp_Object mother = Qnil;
1506 int min_code = 0, max_code = 0, code_offset = 0;
1507 int byte_offset = -1;
1510 CHECK_SYMBOL (name);
1511 if (!NILP (doc_string))
1512 CHECK_STRING (doc_string);
1514 charset = Ffind_charset (name);
1515 if (!NILP (charset))
1516 signal_simple_error ("Cannot redefine existing charset", name);
1519 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1521 if (EQ (keyword, Qshort_name))
1523 CHECK_STRING (value);
1527 else if (EQ (keyword, Qlong_name))
1529 CHECK_STRING (value);
1533 else if (EQ (keyword, Qiso_ir))
1537 id = - XINT (value);
1541 else if (EQ (keyword, Qdimension))
1544 dimension = XINT (value);
1545 if (dimension < 1 ||
1552 signal_simple_error ("Invalid value for 'dimension", value);
1555 else if (EQ (keyword, Qchars))
1558 chars = XINT (value);
1559 if (chars != 94 && chars != 96
1561 && chars != 128 && chars != 256
1564 signal_simple_error ("Invalid value for 'chars", value);
1567 else if (EQ (keyword, Qcolumns))
1570 columns = XINT (value);
1571 if (columns != 1 && columns != 2)
1572 signal_simple_error ("Invalid value for 'columns", value);
1575 else if (EQ (keyword, Qgraphic))
1578 graphic = XINT (value);
1586 signal_simple_error ("Invalid value for 'graphic", value);
1589 else if (EQ (keyword, Qregistry))
1591 CHECK_STRING (value);
1595 else if (EQ (keyword, Qdirection))
1597 if (EQ (value, Ql2r))
1598 direction = CHARSET_LEFT_TO_RIGHT;
1599 else if (EQ (value, Qr2l))
1600 direction = CHARSET_RIGHT_TO_LEFT;
1602 signal_simple_error ("Invalid value for 'direction", value);
1605 else if (EQ (keyword, Qfinal))
1607 CHECK_CHAR_COERCE_INT (value);
1608 final = XCHAR (value);
1609 if (final < '0' || final > '~')
1610 signal_simple_error ("Invalid value for 'final", value);
1614 else if (EQ (keyword, Qmother))
1616 mother = Fget_charset (value);
1619 else if (EQ (keyword, Qmin_code))
1622 min_code = XUINT (value);
1625 else if (EQ (keyword, Qmax_code))
1628 max_code = XUINT (value);
1631 else if (EQ (keyword, Qcode_offset))
1634 code_offset = XUINT (value);
1637 else if (EQ (keyword, Qconversion))
1639 if (EQ (value, Q94x60))
1640 conversion = CONVERSION_94x60;
1641 else if (EQ (value, Q94x94x60))
1642 conversion = CONVERSION_94x94x60;
1643 else if (EQ (value, Qbig5_1))
1644 conversion = CONVERSION_BIG5_1;
1645 else if (EQ (value, Qbig5_2))
1646 conversion = CONVERSION_BIG5_2;
1648 signal_simple_error ("Unrecognized conversion", value);
1652 else if (EQ (keyword, Qccl_program))
1654 struct ccl_program test_ccl;
1656 if (setup_ccl_program (&test_ccl, value) < 0)
1657 signal_simple_error ("Invalid value for 'ccl-program", value);
1658 ccl_program = value;
1662 signal_simple_error ("Unrecognized property", keyword);
1668 error ("'final must be specified");
1670 if (dimension == 2 && final > 0x5F)
1672 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1675 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1676 CHARSET_LEFT_TO_RIGHT)) ||
1677 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1678 CHARSET_RIGHT_TO_LEFT)))
1680 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1683 id = get_unallocated_leading_byte (dimension);
1685 if (NILP (doc_string))
1686 doc_string = build_string ("");
1688 if (NILP (registry))
1689 registry = build_string ("");
1691 if (NILP (short_name))
1692 XSETSTRING (short_name, XSYMBOL (name)->name);
1694 if (NILP (long_name))
1695 long_name = doc_string;
1698 columns = dimension;
1700 if (byte_offset < 0)
1704 else if (chars == 96)
1710 charset = make_charset (id, name, chars, dimension, columns, graphic,
1711 final, direction, short_name, long_name,
1712 doc_string, registry,
1713 Qnil, min_code, max_code, code_offset, byte_offset,
1714 mother, conversion);
1715 if (!NILP (ccl_program))
1716 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1720 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1722 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1723 NEW-NAME is the name of the new charset. Return the new charset.
1725 (charset, new_name))
1727 Lisp_Object new_charset = Qnil;
1728 int id, chars, dimension, columns, graphic, final;
1730 Lisp_Object registry, doc_string, short_name, long_name;
1733 charset = Fget_charset (charset);
1734 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1735 signal_simple_error ("Charset already has reverse-direction charset",
1738 CHECK_SYMBOL (new_name);
1739 if (!NILP (Ffind_charset (new_name)))
1740 signal_simple_error ("Cannot redefine existing charset", new_name);
1742 cs = XCHARSET (charset);
1744 chars = CHARSET_CHARS (cs);
1745 dimension = CHARSET_DIMENSION (cs);
1746 columns = CHARSET_COLUMNS (cs);
1747 id = get_unallocated_leading_byte (dimension);
1749 graphic = CHARSET_GRAPHIC (cs);
1750 final = CHARSET_FINAL (cs);
1751 direction = CHARSET_RIGHT_TO_LEFT;
1752 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1753 direction = CHARSET_LEFT_TO_RIGHT;
1754 doc_string = CHARSET_DOC_STRING (cs);
1755 short_name = CHARSET_SHORT_NAME (cs);
1756 long_name = CHARSET_LONG_NAME (cs);
1757 registry = CHARSET_REGISTRY (cs);
1759 new_charset = make_charset (id, new_name, chars, dimension, columns,
1760 graphic, final, direction, short_name, long_name,
1761 doc_string, registry,
1763 CHARSET_DECODING_TABLE(cs),
1764 CHARSET_MIN_CODE(cs),
1765 CHARSET_MAX_CODE(cs),
1766 CHARSET_CODE_OFFSET(cs),
1767 CHARSET_BYTE_OFFSET(cs),
1769 CHARSET_CONVERSION (cs)
1771 Qnil, 0, 0, 0, 0, Qnil, 0
1775 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1776 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1781 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1782 Define symbol ALIAS as an alias for CHARSET.
1786 CHECK_SYMBOL (alias);
1787 charset = Fget_charset (charset);
1788 return Fputhash (alias, charset, Vcharset_hash_table);
1791 /* #### Reverse direction charsets not yet implemented. */
1793 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1795 Return the reverse-direction charset parallel to CHARSET, if any.
1796 This is the charset with the same properties (in particular, the same
1797 dimension, number of characters per dimension, and final byte) as
1798 CHARSET but whose characters are displayed in the opposite direction.
1802 charset = Fget_charset (charset);
1803 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1807 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1808 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1809 If DIRECTION is omitted, both directions will be checked (left-to-right
1810 will be returned if character sets exist for both directions).
1812 (dimension, chars, final, direction))
1814 int dm, ch, fi, di = -1;
1815 Lisp_Object obj = Qnil;
1817 CHECK_INT (dimension);
1818 dm = XINT (dimension);
1819 if (dm < 1 || dm > 2)
1820 signal_simple_error ("Invalid value for DIMENSION", dimension);
1824 if (ch != 94 && ch != 96)
1825 signal_simple_error ("Invalid value for CHARS", chars);
1827 CHECK_CHAR_COERCE_INT (final);
1829 if (fi < '0' || fi > '~')
1830 signal_simple_error ("Invalid value for FINAL", final);
1832 if (EQ (direction, Ql2r))
1833 di = CHARSET_LEFT_TO_RIGHT;
1834 else if (EQ (direction, Qr2l))
1835 di = CHARSET_RIGHT_TO_LEFT;
1836 else if (!NILP (direction))
1837 signal_simple_error ("Invalid value for DIRECTION", direction);
1839 if (dm == 2 && fi > 0x5F)
1841 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1845 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1847 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1850 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1853 return XCHARSET_NAME (obj);
1857 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1858 Return short name of CHARSET.
1862 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1865 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1866 Return long name of CHARSET.
1870 return XCHARSET_LONG_NAME (Fget_charset (charset));
1873 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1874 Return description of CHARSET.
1878 return XCHARSET_DOC_STRING (Fget_charset (charset));
1881 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1882 Return dimension of CHARSET.
1886 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1889 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1890 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1891 Recognized properties are those listed in `make-charset', as well as
1892 'name and 'doc-string.
1898 charset = Fget_charset (charset);
1899 cs = XCHARSET (charset);
1901 CHECK_SYMBOL (prop);
1902 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1903 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1904 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1905 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1906 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1907 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1908 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1909 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1910 Qnil : make_char (CHARSET_FINAL (cs));
1911 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1912 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1913 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1914 if (EQ (prop, Qdirection))
1915 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1916 if (EQ (prop, Qreverse_direction_charset))
1918 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1919 /* #### Is this translation OK? If so, error checking sufficient? */
1920 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1923 if (EQ (prop, Qmother))
1924 return CHARSET_MOTHER (cs);
1925 if (EQ (prop, Qmin_code))
1926 return make_int (CHARSET_MIN_CODE (cs));
1927 if (EQ (prop, Qmax_code))
1928 return make_int (CHARSET_MAX_CODE (cs));
1930 signal_simple_error ("Unrecognized charset property name", prop);
1931 return Qnil; /* not reached */
1934 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1935 Return charset identification number of CHARSET.
1939 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1942 /* #### We need to figure out which properties we really want to
1945 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1946 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1948 (charset, ccl_program))
1950 struct ccl_program test_ccl;
1952 charset = Fget_charset (charset);
1953 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1954 signal_simple_error ("Invalid ccl-program", ccl_program);
1955 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1960 invalidate_charset_font_caches (Lisp_Object charset)
1962 /* Invalidate font cache entries for charset on all devices. */
1963 Lisp_Object devcons, concons, hash_table;
1964 DEVICE_LOOP_NO_BREAK (devcons, concons)
1966 struct device *d = XDEVICE (XCAR (devcons));
1967 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1968 if (!UNBOUNDP (hash_table))
1969 Fclrhash (hash_table);
1973 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1974 Set the 'registry property of CHARSET to REGISTRY.
1976 (charset, registry))
1978 charset = Fget_charset (charset);
1979 CHECK_STRING (registry);
1980 XCHARSET_REGISTRY (charset) = registry;
1981 invalidate_charset_font_caches (charset);
1982 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1987 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1988 Return mapping-table of CHARSET.
1992 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1995 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1996 Set mapping-table of CHARSET to TABLE.
2000 struct Lisp_Charset *cs;
2004 charset = Fget_charset (charset);
2005 cs = XCHARSET (charset);
2009 CHARSET_DECODING_TABLE(cs) = Qnil;
2012 else if (VECTORP (table))
2014 int ccs_len = CHARSET_BYTE_SIZE (cs);
2015 int ret = decoding_table_check_elements (table,
2016 CHARSET_DIMENSION (cs),
2021 signal_simple_error ("Too big table", table);
2023 signal_simple_error ("Invalid element is found", table);
2025 signal_simple_error ("Something wrong", table);
2027 CHARSET_DECODING_TABLE(cs) = Qnil;
2030 signal_error (Qwrong_type_argument,
2031 list2 (build_translated_string ("vector-or-nil-p"),
2034 byte_offset = CHARSET_BYTE_OFFSET (cs);
2035 switch (CHARSET_DIMENSION (cs))
2038 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2040 Lisp_Object c = XVECTOR_DATA(table)[i];
2043 Fput_char_attribute (c, XCHARSET_NAME (charset),
2044 make_int (i + byte_offset));
2048 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2050 Lisp_Object v = XVECTOR_DATA(table)[i];
2056 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2058 Lisp_Object c = XVECTOR_DATA(v)[j];
2062 (c, XCHARSET_NAME (charset),
2063 make_int ( ( (i + byte_offset) << 8 )
2069 Fput_char_attribute (v, XCHARSET_NAME (charset),
2070 make_int (i + byte_offset));
2078 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2079 Save mapping-table of CHARSET.
2083 struct Lisp_Charset *cs;
2084 int byte_min, byte_max;
2085 #ifdef HAVE_LIBCHISE
2087 #else /* HAVE_LIBCHISE */
2089 Lisp_Object db_file;
2090 #endif /* not HAVE_LIBCHISE */
2092 charset = Fget_charset (charset);
2093 cs = XCHARSET (charset);
2095 #ifdef HAVE_LIBCHISE
2096 if ( open_chise_data_source_maybe () )
2100 = chise_ds_get_ccs (default_chise_data_source,
2101 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2104 printf ("Can't open decoding-table %s\n",
2105 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2108 #else /* HAVE_LIBCHISE */
2109 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2110 Qsystem_char_id, 1);
2111 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2112 #endif /* not HAVE_LIBCHISE */
2114 byte_min = CHARSET_BYTE_OFFSET (cs);
2115 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2116 switch (CHARSET_DIMENSION (cs))
2120 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2123 for (cell = byte_min; cell < byte_max; cell++)
2125 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2129 #ifdef HAVE_LIBCHISE
2130 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2131 #else /* HAVE_LIBCHISE */
2132 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2133 Fprin1_to_string (c, Qnil),
2135 #endif /* not HAVE_LIBCHISE */
2142 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2145 for (row = byte_min; row < byte_max; row++)
2147 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2150 for (cell = byte_min; cell < byte_max; cell++)
2152 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2156 #ifdef HAVE_LIBCHISE
2157 chise_ccs_set_decoded_char
2159 (row << 8) | cell, XCHAR (c));
2160 #else /* HAVE_LIBCHISE */
2161 Fput_database (Fprin1_to_string (make_int ((row << 8)
2164 Fprin1_to_string (c, Qnil),
2166 #endif /* not HAVE_LIBCHISE */
2174 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2177 for (plane = byte_min; plane < byte_max; plane++)
2180 = get_ccs_octet_table (table_p, charset, plane);
2183 for (row = byte_min; row < byte_max; row++)
2186 = get_ccs_octet_table (table_r, charset, row);
2189 for (cell = byte_min; cell < byte_max; cell++)
2191 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2196 #ifdef HAVE_LIBCHISE
2197 chise_ccs_set_decoded_char
2202 #else /* HAVE_LIBCHISE */
2203 Fput_database (Fprin1_to_string
2204 (make_int ((plane << 16)
2208 Fprin1_to_string (c, Qnil),
2210 #endif /* not HAVE_LIBCHISE */
2219 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2222 for (group = byte_min; group < byte_max; group++)
2225 = get_ccs_octet_table (table_g, charset, group);
2228 for (plane = byte_min; plane < byte_max; plane++)
2231 = get_ccs_octet_table (table_p, charset, plane);
2234 for (row = byte_min; row < byte_max; row++)
2237 = get_ccs_octet_table (table_r, charset, row);
2240 for (cell = byte_min; cell < byte_max; cell++)
2243 = get_ccs_octet_table (table_c, charset, cell);
2247 #ifdef HAVE_LIBCHISE
2248 chise_ccs_set_decoded_char
2254 #else /* HAVE_LIBCHISE */
2255 Fput_database (Fprin1_to_string
2256 (make_int (( group << 24)
2261 Fprin1_to_string (c, Qnil),
2263 #endif /* not HAVE_LIBCHISE */
2271 #ifdef HAVE_LIBCHISE
2272 chise_ccs_sync (dt_ccs);
2274 #else /* HAVE_LIBCHISE */
2275 return Fclose_database (db);
2276 #endif /* not HAVE_LIBCHISE */
2279 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2280 Reset mapping-table of CCS with database file.
2284 #ifdef HAVE_LIBCHISE
2285 CHISE_CCS chise_ccs;
2287 Lisp_Object db_file;
2290 ccs = Fget_charset (ccs);
2292 #ifdef HAVE_LIBCHISE
2293 if ( open_chise_data_source_maybe () )
2296 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2297 XSTRING_DATA (Fsymbol_name
2298 (XCHARSET_NAME(ccs))));
2299 if (chise_ccs == NULL)
2302 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2303 Qsystem_char_id, 0);
2307 #ifdef HAVE_LIBCHISE
2308 chise_ccs_setup_db (chise_ccs, 0) == 0
2310 !NILP (Ffile_exists_p (db_file))
2314 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2321 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2323 #ifdef HAVE_LIBCHISE
2324 CHISE_Char_ID char_id;
2326 if ( open_chise_data_source_maybe () )
2330 = chise_ds_decode_char (default_chise_data_source,
2331 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2334 decoding_table_put_char (ccs, code_point, make_char (char_id));
2336 decoding_table_put_char (ccs, code_point, Qnil);
2338 /* chise_ccst_close (dt_ccs); */
2340 #else /* HAVE_LIBCHISE */
2343 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2346 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2350 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2357 decoding_table_put_char (ccs, code_point, ret);
2358 Fclose_database (db);
2362 decoding_table_put_char (ccs, code_point, Qnil);
2363 Fclose_database (db);
2366 #endif /* not HAVE_LIBCHISE */
2369 #ifdef HAVE_LIBCHISE
2370 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2371 Save properties of CHARSET.
2375 struct Lisp_Charset *cs;
2376 CHISE_Property property;
2378 unsigned char* feature_name;
2380 ccs = Fget_charset (charset);
2381 cs = XCHARSET (ccs);
2383 if ( open_chise_data_source_maybe () )
2386 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2388 property = chise_ds_get_property (default_chise_data_source,
2390 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2391 chise_feature_set_property_value
2392 (chise_ds_get_feature (default_chise_data_source, feature_name),
2393 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2395 chise_property_sync (property);
2397 charset = XCHARSET_NAME (ccs);
2398 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2400 property = chise_ds_get_property (default_chise_data_source,
2402 chise_feature_set_property_value
2403 (chise_ds_get_feature (default_chise_data_source, feature_name),
2404 property, XSTRING_DATA (Fprin1_to_string
2405 (CHARSET_DOC_STRING (cs), Qnil)));
2406 chise_property_sync (property);
2408 property = chise_ds_get_property (default_chise_data_source, "type");
2409 chise_feature_set_property_value
2410 (chise_ds_get_feature (default_chise_data_source, feature_name),
2412 chise_property_sync (property);
2414 property = chise_ds_get_property (default_chise_data_source, "chars");
2415 chise_feature_set_property_value
2416 (chise_ds_get_feature (default_chise_data_source, feature_name),
2417 property, XSTRING_DATA (Fprin1_to_string (make_int
2418 (CHARSET_CHARS (cs)),
2420 chise_property_sync (property);
2422 property = chise_ds_get_property (default_chise_data_source, "dimension");
2423 chise_feature_set_property_value
2424 (chise_ds_get_feature (default_chise_data_source, feature_name),
2425 property, XSTRING_DATA (Fprin1_to_string (make_int
2426 (CHARSET_DIMENSION (cs)),
2428 chise_property_sync (property);
2430 if ( CHARSET_FINAL (cs) != 0 )
2432 property = chise_ds_get_property (default_chise_data_source,
2434 chise_feature_set_property_value
2435 (chise_ds_get_feature (default_chise_data_source, feature_name),
2436 property, XSTRING_DATA (Fprin1_to_string (make_int
2437 (CHARSET_FINAL (cs)),
2439 chise_property_sync (property);
2442 if ( !NILP (CHARSET_MOTHER (cs)) )
2444 Lisp_Object mother = CHARSET_MOTHER (cs);
2446 if ( CHARSETP (mother) )
2447 mother = XCHARSET_NAME (mother);
2449 property = chise_ds_get_property (default_chise_data_source,
2451 chise_feature_set_property_value
2452 (chise_ds_get_feature (default_chise_data_source, feature_name),
2453 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2454 chise_property_sync (property);
2457 if ( CHARSET_MAX_CODE (cs) != 0 )
2461 property = chise_ds_get_property (default_chise_data_source,
2463 if ( CHARSET_MIN_CODE (cs) == 0 )
2464 chise_feature_set_property_value
2465 (chise_ds_get_feature (default_chise_data_source, feature_name),
2469 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2470 chise_feature_set_property_value
2471 (chise_ds_get_feature (default_chise_data_source, feature_name),
2474 chise_property_sync (property);
2476 property = chise_ds_get_property (default_chise_data_source,
2478 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2479 chise_feature_set_property_value
2480 (chise_ds_get_feature (default_chise_data_source, feature_name),
2482 chise_property_sync (property);
2484 property = chise_ds_get_property (default_chise_data_source,
2485 "mother-code-offset");
2486 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2487 chise_feature_set_property_value
2488 (chise_ds_get_feature (default_chise_data_source, feature_name),
2492 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2493 chise_feature_set_property_value
2494 (chise_ds_get_feature (default_chise_data_source, feature_name),
2497 chise_property_sync (property);
2499 property = chise_ds_get_property (default_chise_data_source,
2500 "mother-code-conversion");
2501 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2502 chise_feature_set_property_value
2503 (chise_ds_get_feature (default_chise_data_source, feature_name),
2504 property, "identical");
2507 Lisp_Object sym = Qnil;
2509 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2511 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2513 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2515 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2518 chise_feature_set_property_value
2519 (chise_ds_get_feature (default_chise_data_source, feature_name),
2520 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2522 chise_feature_set_property_value
2523 (chise_ds_get_feature (default_chise_data_source, feature_name),
2524 property, "unknown");
2526 chise_property_sync (property);
2530 #endif /* HAVE_LIBCHISE */
2532 #endif /* HAVE_CHISE */
2533 #endif /* UTF2000 */
2536 /************************************************************************/
2537 /* Lisp primitives for working with characters */
2538 /************************************************************************/
2541 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2542 Make a character from CHARSET and code-point CODE.
2543 If DEFINED_ONLY is non-nil, builtin character is not returned.
2544 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2545 If corresponding character is not found, nil is returned.
2547 (charset, code, defined_only, without_inheritance))
2551 charset = Fget_charset (charset);
2554 if (XCHARSET_GRAPHIC (charset) == 1)
2556 if (NILP (defined_only))
2557 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2559 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2560 return c >= 0 ? make_char (c) : Qnil;
2563 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2564 Make a builtin character from CHARSET and code-point CODE.
2571 charset = Fget_charset (charset);
2573 if (EQ (charset, Vcharset_latin_viscii))
2575 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2581 (ret = Fget_char_attribute (chr,
2582 Vcharset_latin_viscii_lower,
2585 charset = Vcharset_latin_viscii_lower;
2589 (ret = Fget_char_attribute (chr,
2590 Vcharset_latin_viscii_upper,
2593 charset = Vcharset_latin_viscii_upper;
2600 if (XCHARSET_GRAPHIC (charset) == 1)
2603 ch = decode_builtin_char (charset, c);
2605 ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
2609 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2610 Make a character from CHARSET and octets ARG1 and ARG2.
2611 ARG2 is required only for characters from two-dimensional charsets.
2612 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2613 character s with caron.
2615 (charset, arg1, arg2))
2619 int lowlim, highlim;
2621 charset = Fget_charset (charset);
2622 cs = XCHARSET (charset);
2624 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2625 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2627 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2629 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2630 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2633 /* It is useful (and safe, according to Olivier Galibert) to strip
2634 the 8th bit off ARG1 and ARG2 because it allows programmers to
2635 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2636 Latin 2 code of the character. */
2644 if (a1 < lowlim || a1 > highlim)
2645 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2647 if (CHARSET_DIMENSION (cs) == 1)
2651 ("Charset is of dimension one; second octet must be nil", arg2);
2652 return make_char (MAKE_CHAR (charset, a1, 0));
2661 a2 = XINT (arg2) & 0x7f;
2663 if (a2 < lowlim || a2 > highlim)
2664 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2666 return make_char (MAKE_CHAR (charset, a1, a2));
2669 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2670 Return the character set of CHARACTER.
2674 CHECK_CHAR_COERCE_INT (character);
2676 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2679 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2680 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2681 N defaults to 0 if omitted.
2685 Lisp_Object charset;
2688 CHECK_CHAR_COERCE_INT (character);
2690 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2692 if (NILP (n) || EQ (n, Qzero))
2693 return make_int (octet0);
2694 else if (EQ (n, make_int (1)))
2695 return make_int (octet1);
2697 signal_simple_error ("Octet number must be 0 or 1", n);
2701 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2702 Return code-point of CHARACTER in specified CHARSET.
2704 (character, charset, defined_only))
2708 CHECK_CHAR_COERCE_INT (character);
2709 charset = Fget_charset (charset);
2710 code_point = charset_code_point (charset, XCHAR (character),
2711 !NILP (defined_only));
2712 if (code_point >= 0)
2713 return make_int (code_point);
2719 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2720 Return list of charset and one or two position-codes of CHARACTER.
2724 /* This function can GC */
2725 struct gcpro gcpro1, gcpro2;
2726 Lisp_Object charset = Qnil;
2727 Lisp_Object rc = Qnil;
2735 GCPRO2 (charset, rc);
2736 CHECK_CHAR_COERCE_INT (character);
2739 code_point = ENCODE_CHAR (XCHAR (character), charset);
2740 dimension = XCHARSET_DIMENSION (charset);
2741 while (dimension > 0)
2743 rc = Fcons (make_int (code_point & 255), rc);
2747 rc = Fcons (XCHARSET_NAME (charset), rc);
2749 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2751 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2753 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2757 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2766 #ifdef ENABLE_COMPOSITE_CHARS
2767 /************************************************************************/
2768 /* composite character functions */
2769 /************************************************************************/
2772 lookup_composite_char (Bufbyte *str, int len)
2774 Lisp_Object lispstr = make_string (str, len);
2775 Lisp_Object ch = Fgethash (lispstr,
2776 Vcomposite_char_string2char_hash_table,
2782 if (composite_char_row_next >= 128)
2783 signal_simple_error ("No more composite chars available", lispstr);
2784 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2785 composite_char_col_next);
2786 Fputhash (make_char (emch), lispstr,
2787 Vcomposite_char_char2string_hash_table);
2788 Fputhash (lispstr, make_char (emch),
2789 Vcomposite_char_string2char_hash_table);
2790 composite_char_col_next++;
2791 if (composite_char_col_next >= 128)
2793 composite_char_col_next = 32;
2794 composite_char_row_next++;
2803 composite_char_string (Emchar ch)
2805 Lisp_Object str = Fgethash (make_char (ch),
2806 Vcomposite_char_char2string_hash_table,
2808 assert (!UNBOUNDP (str));
2812 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2813 Convert a string into a single composite character.
2814 The character is the result of overstriking all the characters in
2819 CHECK_STRING (string);
2820 return make_char (lookup_composite_char (XSTRING_DATA (string),
2821 XSTRING_LENGTH (string)));
2824 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2825 Return a string of the characters comprising a composite character.
2833 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2834 signal_simple_error ("Must be composite char", ch);
2835 return composite_char_string (emch);
2837 #endif /* ENABLE_COMPOSITE_CHARS */
2840 /************************************************************************/
2841 /* initialization */
2842 /************************************************************************/
2845 syms_of_mule_charset (void)
2847 INIT_LRECORD_IMPLEMENTATION (charset);
2849 DEFSUBR (Fcharsetp);
2850 DEFSUBR (Ffind_charset);
2851 DEFSUBR (Fget_charset);
2852 DEFSUBR (Fcharset_list);
2853 DEFSUBR (Fcharset_name);
2854 DEFSUBR (Fmake_charset);
2855 DEFSUBR (Fmake_reverse_direction_charset);
2856 /* DEFSUBR (Freverse_direction_charset); */
2857 DEFSUBR (Fdefine_charset_alias);
2858 DEFSUBR (Fcharset_from_attributes);
2859 DEFSUBR (Fcharset_short_name);
2860 DEFSUBR (Fcharset_long_name);
2861 DEFSUBR (Fcharset_description);
2862 DEFSUBR (Fcharset_dimension);
2863 DEFSUBR (Fcharset_property);
2864 DEFSUBR (Fcharset_id);
2865 DEFSUBR (Fset_charset_ccl_program);
2866 DEFSUBR (Fset_charset_registry);
2869 DEFSUBR (Fcharset_mapping_table);
2870 DEFSUBR (Fset_charset_mapping_table);
2872 DEFSUBR (Fsave_charset_mapping_table);
2873 DEFSUBR (Freset_charset_mapping_table);
2874 #ifdef HAVE_LIBCHISE
2875 DEFSUBR (Fsave_charset_properties);
2876 #endif /* HAVE_LIBCHISE */
2877 #endif /* HAVE_CHISE */
2878 DEFSUBR (Fdecode_char);
2879 DEFSUBR (Fdecode_builtin_char);
2880 DEFSUBR (Fencode_char);
2883 DEFSUBR (Fmake_char);
2884 DEFSUBR (Fchar_charset);
2885 DEFSUBR (Fchar_octet);
2886 DEFSUBR (Fsplit_char);
2888 #ifdef ENABLE_COMPOSITE_CHARS
2889 DEFSUBR (Fmake_composite_char);
2890 DEFSUBR (Fcomposite_char_string);
2893 defsymbol (&Qcharsetp, "charsetp");
2894 defsymbol (&Qregistry, "registry");
2895 defsymbol (&Qfinal, "final");
2896 defsymbol (&Qgraphic, "graphic");
2897 defsymbol (&Qdirection, "direction");
2898 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2899 defsymbol (&Qshort_name, "short-name");
2900 defsymbol (&Qlong_name, "long-name");
2901 defsymbol (&Qiso_ir, "iso-ir");
2903 defsymbol (&Qmother, "mother");
2904 defsymbol (&Qmin_code, "min-code");
2905 defsymbol (&Qmax_code, "max-code");
2906 defsymbol (&Qcode_offset, "code-offset");
2907 defsymbol (&Qconversion, "conversion");
2908 defsymbol (&Q94x60, "94x60");
2909 defsymbol (&Q94x94x60, "94x94x60");
2910 defsymbol (&Qbig5_1, "big5-1");
2911 defsymbol (&Qbig5_2, "big5-2");
2914 defsymbol (&Ql2r, "l2r");
2915 defsymbol (&Qr2l, "r2l");
2917 /* Charsets, compatible with FSF 20.3
2918 Naming convention is Script-Charset[-Edition] */
2919 defsymbol (&Qascii, "ascii");
2920 defsymbol (&Qcontrol_1, "control-1");
2921 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2922 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2923 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2924 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2925 defsymbol (&Qthai_tis620, "thai-tis620");
2926 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2927 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2928 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2929 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2930 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2931 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2932 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2933 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208@1978");
2934 defsymbol (&Qmap_gb2312, "=gb2312");
2935 defsymbol (&Qmap_gb12345, "=gb12345");
2936 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208@1983");
2937 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2938 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2939 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2940 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2942 defsymbol (&Qsystem_char_id, "system-char-id");
2943 defsymbol (&Qmap_ucs, "=ucs");
2944 defsymbol (&Qucs, "ucs");
2945 defsymbol (&Qucs_bmp, "ucs-bmp");
2946 defsymbol (&Qucs_smp, "ucs-smp");
2947 defsymbol (&Qucs_sip, "ucs-sip");
2948 defsymbol (&Qlatin_viscii, "latin-viscii");
2949 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2950 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2951 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2952 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2953 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2954 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2955 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208@1990");
2956 defsymbol (&Qmap_big5, "=big5");
2957 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2959 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2960 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2962 defsymbol (&Qcomposite, "composite");
2966 vars_of_mule_charset (void)
2973 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2974 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2976 /* Table of charsets indexed by leading byte. */
2977 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2978 chlook->charset_by_leading_byte[i] = Qnil;
2981 /* Table of charsets indexed by type/final-byte. */
2982 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2983 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2984 chlook->charset_by_attributes[i][j] = Qnil;
2986 /* Table of charsets indexed by type/final-byte/direction. */
2987 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2988 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2989 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2990 chlook->charset_by_attributes[i][j][k] = Qnil;
2994 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2996 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2997 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3001 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3002 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3003 Leading-code of private TYPE9N charset of column-width 1.
3005 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3009 Vdefault_coded_charset_priority_list = Qnil;
3010 DEFVAR_LISP ("default-coded-charset-priority-list",
3011 &Vdefault_coded_charset_priority_list /*
3012 Default order of preferred coded-character-sets.
3014 Vdisplay_coded_charset_priority_use_inheritance = Qt;
3015 DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
3016 &Vdisplay_coded_charset_priority_use_inheritance /*
3017 If non-nil, use character inheritance.
3019 Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
3020 DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
3021 &Vdisplay_coded_charset_priority_use_hierarchy_order /*
3022 If non-nil, prefer nearest character in hierarchy order.
3028 complex_vars_of_mule_charset (void)
3030 staticpro (&Vcharset_hash_table);
3031 Vcharset_hash_table =
3032 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3034 /* Predefined character sets. We store them into variables for
3038 staticpro (&Vcharset_system_char_id);
3039 Vcharset_system_char_id =
3040 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3041 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3042 build_string ("SCID"),
3043 build_string ("CHAR-ID"),
3044 build_string ("System char-id"),
3046 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3047 staticpro (&Vcharset_ucs);
3049 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
3050 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3051 build_string ("UCS"),
3052 build_string ("UCS"),
3053 build_string ("ISO/IEC 10646"),
3055 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3056 staticpro (&Vcharset_ucs_bmp);
3058 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3059 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3060 build_string ("BMP"),
3061 build_string ("UCS-BMP"),
3062 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3064 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3065 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3066 staticpro (&Vcharset_ucs_smp);
3068 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3069 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3070 build_string ("SMP"),
3071 build_string ("UCS-SMP"),
3072 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3073 build_string ("UCS00-1"),
3074 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3075 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
3076 staticpro (&Vcharset_ucs_sip);
3078 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3079 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3080 build_string ("SIP"),
3081 build_string ("UCS-SIP"),
3082 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3083 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3084 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3085 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
3087 # define MIN_CHAR_THAI 0
3088 # define MAX_CHAR_THAI 0
3089 /* # define MIN_CHAR_HEBREW 0 */
3090 /* # define MAX_CHAR_HEBREW 0 */
3091 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3092 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3094 staticpro (&Vcharset_ascii);
3096 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3097 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3098 build_string ("ASCII"),
3099 build_string ("ASCII)"),
3100 build_string ("ASCII (ISO646 IRV)"),
3101 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3102 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
3103 staticpro (&Vcharset_control_1);
3104 Vcharset_control_1 =
3105 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3106 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3107 build_string ("C1"),
3108 build_string ("Control characters"),
3109 build_string ("Control characters 128-191"),
3111 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
3112 staticpro (&Vcharset_latin_iso8859_1);
3113 Vcharset_latin_iso8859_1 =
3114 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3115 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3116 build_string ("Latin-1"),
3117 build_string ("ISO8859-1 (Latin-1)"),
3118 build_string ("ISO8859-1 (Latin-1)"),
3119 build_string ("iso8859-1"),
3120 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3121 staticpro (&Vcharset_latin_iso8859_2);
3122 Vcharset_latin_iso8859_2 =
3123 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3124 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3125 build_string ("Latin-2"),
3126 build_string ("ISO8859-2 (Latin-2)"),
3127 build_string ("ISO8859-2 (Latin-2)"),
3128 build_string ("iso8859-2"),
3129 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3130 staticpro (&Vcharset_latin_iso8859_3);
3131 Vcharset_latin_iso8859_3 =
3132 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3133 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3134 build_string ("Latin-3"),
3135 build_string ("ISO8859-3 (Latin-3)"),
3136 build_string ("ISO8859-3 (Latin-3)"),
3137 build_string ("iso8859-3"),
3138 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3139 staticpro (&Vcharset_latin_iso8859_4);
3140 Vcharset_latin_iso8859_4 =
3141 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3142 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3143 build_string ("Latin-4"),
3144 build_string ("ISO8859-4 (Latin-4)"),
3145 build_string ("ISO8859-4 (Latin-4)"),
3146 build_string ("iso8859-4"),
3147 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3148 staticpro (&Vcharset_thai_tis620);
3149 Vcharset_thai_tis620 =
3150 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3151 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3152 build_string ("TIS620"),
3153 build_string ("TIS620 (Thai)"),
3154 build_string ("TIS620.2529 (Thai)"),
3155 build_string ("tis620"),
3156 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3157 staticpro (&Vcharset_greek_iso8859_7);
3158 Vcharset_greek_iso8859_7 =
3159 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3160 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3161 build_string ("ISO8859-7"),
3162 build_string ("ISO8859-7 (Greek)"),
3163 build_string ("ISO8859-7 (Greek)"),
3164 build_string ("iso8859-7"),
3165 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3166 staticpro (&Vcharset_arabic_iso8859_6);
3167 Vcharset_arabic_iso8859_6 =
3168 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3169 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3170 build_string ("ISO8859-6"),
3171 build_string ("ISO8859-6 (Arabic)"),
3172 build_string ("ISO8859-6 (Arabic)"),
3173 build_string ("iso8859-6"),
3174 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3175 staticpro (&Vcharset_hebrew_iso8859_8);
3176 Vcharset_hebrew_iso8859_8 =
3177 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3178 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3179 build_string ("ISO8859-8"),
3180 build_string ("ISO8859-8 (Hebrew)"),
3181 build_string ("ISO8859-8 (Hebrew)"),
3182 build_string ("iso8859-8"),
3184 0 /* MIN_CHAR_HEBREW */,
3185 0 /* MAX_CHAR_HEBREW */, 0, 32,
3186 Qnil, CONVERSION_IDENTICAL);
3187 staticpro (&Vcharset_katakana_jisx0201);
3188 Vcharset_katakana_jisx0201 =
3189 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3190 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3191 build_string ("JISX0201 Kana"),
3192 build_string ("JISX0201.1976 (Japanese Kana)"),
3193 build_string ("JISX0201.1976 Japanese Kana"),
3194 build_string ("jisx0201\\.1976"),
3195 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3196 staticpro (&Vcharset_latin_jisx0201);
3197 Vcharset_latin_jisx0201 =
3198 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3199 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3200 build_string ("JISX0201 Roman"),
3201 build_string ("JISX0201.1976 (Japanese Roman)"),
3202 build_string ("JISX0201.1976 Japanese Roman"),
3203 build_string ("jisx0201\\.1976"),
3204 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3205 staticpro (&Vcharset_cyrillic_iso8859_5);
3206 Vcharset_cyrillic_iso8859_5 =
3207 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3208 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3209 build_string ("ISO8859-5"),
3210 build_string ("ISO8859-5 (Cyrillic)"),
3211 build_string ("ISO8859-5 (Cyrillic)"),
3212 build_string ("iso8859-5"),
3213 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3214 staticpro (&Vcharset_latin_iso8859_9);
3215 Vcharset_latin_iso8859_9 =
3216 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3217 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3218 build_string ("Latin-5"),
3219 build_string ("ISO8859-9 (Latin-5)"),
3220 build_string ("ISO8859-9 (Latin-5)"),
3221 build_string ("iso8859-9"),
3222 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3224 staticpro (&Vcharset_jis_x0208);
3225 Vcharset_jis_x0208 =
3226 make_charset (LEADING_BYTE_JIS_X0208,
3227 Qmap_jis_x0208, 94, 2,
3228 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3229 build_string ("JIS X0208"),
3230 build_string ("JIS X0208 Common"),
3231 build_string ("JIS X0208 Common part"),
3232 build_string ("jisx0208\\.1990"),
3234 MIN_CHAR_JIS_X0208_1990,
3235 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3236 Qnil, CONVERSION_94x94);
3238 staticpro (&Vcharset_japanese_jisx0208_1978);
3239 Vcharset_japanese_jisx0208_1978 =
3240 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3241 Qmap_jis_x0208_1978, 94, 2,
3242 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3243 build_string ("JIS X0208:1978"),
3244 build_string ("JIS X0208:1978 (Japanese)"),
3246 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3247 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3254 CONVERSION_IDENTICAL);
3255 staticpro (&Vcharset_chinese_gb2312);
3256 Vcharset_chinese_gb2312 =
3257 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3258 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3259 build_string ("GB2312"),
3260 build_string ("GB2312)"),
3261 build_string ("GB2312 Chinese simplified"),
3262 build_string ("gb2312"),
3263 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3264 staticpro (&Vcharset_chinese_gb12345);
3265 Vcharset_chinese_gb12345 =
3266 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3267 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3268 build_string ("G1"),
3269 build_string ("GB 12345)"),
3270 build_string ("GB 12345-1990"),
3271 build_string ("GB12345\\(\\.1990\\)?-0"),
3272 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3273 staticpro (&Vcharset_japanese_jisx0208);
3274 Vcharset_japanese_jisx0208 =
3275 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3276 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3277 build_string ("JISX0208"),
3278 build_string ("JIS X0208:1983 (Japanese)"),
3279 build_string ("JIS X0208:1983 Japanese Kanji"),
3280 build_string ("jisx0208\\.1983"),
3287 CONVERSION_IDENTICAL);
3289 staticpro (&Vcharset_japanese_jisx0208_1990);
3290 Vcharset_japanese_jisx0208_1990 =
3291 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3292 Qmap_jis_x0208_1990, 94, 2,
3293 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3294 build_string ("JISX0208-1990"),
3295 build_string ("JIS X0208:1990 (Japanese)"),
3296 build_string ("JIS X0208:1990 Japanese Kanji"),
3297 build_string ("jisx0208\\.1990"),
3299 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3300 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3301 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3302 Vcharset_jis_x0208 /* Qnil */,
3303 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3305 staticpro (&Vcharset_korean_ksc5601);
3306 Vcharset_korean_ksc5601 =
3307 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3308 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3309 build_string ("KSC5601"),
3310 build_string ("KSC5601 (Korean"),
3311 build_string ("KSC5601 Korean Hangul and Hanja"),
3312 build_string ("ksc5601"),
3313 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3314 staticpro (&Vcharset_japanese_jisx0212);
3315 Vcharset_japanese_jisx0212 =
3316 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3317 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3318 build_string ("JISX0212"),
3319 build_string ("JISX0212 (Japanese)"),
3320 build_string ("JISX0212 Japanese Supplement"),
3321 build_string ("jisx0212"),
3322 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3324 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3325 staticpro (&Vcharset_chinese_cns11643_1);
3326 Vcharset_chinese_cns11643_1 =
3327 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3328 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3329 build_string ("CNS11643-1"),
3330 build_string ("CNS11643-1 (Chinese traditional)"),
3332 ("CNS 11643 Plane 1 Chinese traditional"),
3333 build_string (CHINESE_CNS_PLANE_RE("1")),
3334 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3335 staticpro (&Vcharset_chinese_cns11643_2);
3336 Vcharset_chinese_cns11643_2 =
3337 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3338 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3339 build_string ("CNS11643-2"),
3340 build_string ("CNS11643-2 (Chinese traditional)"),
3342 ("CNS 11643 Plane 2 Chinese traditional"),
3343 build_string (CHINESE_CNS_PLANE_RE("2")),
3344 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3346 staticpro (&Vcharset_latin_tcvn5712);
3347 Vcharset_latin_tcvn5712 =
3348 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3349 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3350 build_string ("TCVN 5712"),
3351 build_string ("TCVN 5712 (VSCII-2)"),
3352 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3353 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3354 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3355 staticpro (&Vcharset_latin_viscii_lower);
3356 Vcharset_latin_viscii_lower =
3357 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3358 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3359 build_string ("VISCII lower"),
3360 build_string ("VISCII lower (Vietnamese)"),
3361 build_string ("VISCII lower (Vietnamese)"),
3362 build_string ("MULEVISCII-LOWER"),
3363 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3364 staticpro (&Vcharset_latin_viscii_upper);
3365 Vcharset_latin_viscii_upper =
3366 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3367 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3368 build_string ("VISCII upper"),
3369 build_string ("VISCII upper (Vietnamese)"),
3370 build_string ("VISCII upper (Vietnamese)"),
3371 build_string ("MULEVISCII-UPPER"),
3372 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3373 staticpro (&Vcharset_latin_viscii);
3374 Vcharset_latin_viscii =
3375 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3376 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3377 build_string ("VISCII"),
3378 build_string ("VISCII 1.1 (Vietnamese)"),
3379 build_string ("VISCII 1.1 (Vietnamese)"),
3380 build_string ("VISCII1\\.1"),
3381 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3382 staticpro (&Vcharset_chinese_big5);
3383 Vcharset_chinese_big5 =
3384 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3385 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3386 build_string ("Big5"),
3387 build_string ("Big5"),
3388 build_string ("Big5 Chinese traditional"),
3389 build_string ("big5-0"),
3391 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3392 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3394 staticpro (&Vcharset_ethiopic_ucs);
3395 Vcharset_ethiopic_ucs =
3396 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3397 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3398 build_string ("Ethiopic (UCS)"),
3399 build_string ("Ethiopic (UCS)"),
3400 build_string ("Ethiopic of UCS"),
3401 build_string ("Ethiopic-Unicode"),
3402 Qnil, 0x1200, 0x137F, 0, 0,
3403 Qnil, CONVERSION_IDENTICAL);
3405 staticpro (&Vcharset_chinese_big5_1);
3406 Vcharset_chinese_big5_1 =
3407 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3408 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3409 build_string ("Big5"),
3410 build_string ("Big5 (Level-1)"),
3412 ("Big5 Level-1 Chinese traditional"),
3413 build_string ("big5"),
3414 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3415 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3416 staticpro (&Vcharset_chinese_big5_2);
3417 Vcharset_chinese_big5_2 =
3418 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3419 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3420 build_string ("Big5"),
3421 build_string ("Big5 (Level-2)"),
3423 ("Big5 Level-2 Chinese traditional"),
3424 build_string ("big5"),
3425 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3426 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3428 #ifdef ENABLE_COMPOSITE_CHARS
3429 /* #### For simplicity, we put composite chars into a 96x96 charset.
3430 This is going to lead to problems because you can run out of
3431 room, esp. as we don't yet recycle numbers. */
3432 staticpro (&Vcharset_composite);
3433 Vcharset_composite =
3434 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3435 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3436 build_string ("Composite"),
3437 build_string ("Composite characters"),
3438 build_string ("Composite characters"),
3441 /* #### not dumped properly */
3442 composite_char_row_next = 32;
3443 composite_char_col_next = 32;
3445 Vcomposite_char_string2char_hash_table =
3446 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3447 Vcomposite_char_char2string_hash_table =
3448 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3449 staticpro (&Vcomposite_char_string2char_hash_table);
3450 staticpro (&Vcomposite_char_char2string_hash_table);
3451 #endif /* ENABLE_COMPOSITE_CHARS */