1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
44 /* The various pre-defined charsets. */
46 Lisp_Object Vcharset_ascii;
47 Lisp_Object Vcharset_control_1;
48 Lisp_Object Vcharset_latin_iso8859_1;
49 Lisp_Object Vcharset_latin_iso8859_2;
50 Lisp_Object Vcharset_latin_iso8859_3;
51 Lisp_Object Vcharset_latin_iso8859_4;
52 Lisp_Object Vcharset_thai_tis620;
53 Lisp_Object Vcharset_greek_iso8859_7;
54 Lisp_Object Vcharset_arabic_iso8859_6;
55 Lisp_Object Vcharset_hebrew_iso8859_8;
56 Lisp_Object Vcharset_katakana_jisx0201;
57 Lisp_Object Vcharset_latin_jisx0201;
58 Lisp_Object Vcharset_cyrillic_iso8859_5;
59 Lisp_Object Vcharset_latin_iso8859_9;
60 Lisp_Object Vcharset_japanese_jisx0208_1978;
61 Lisp_Object Vcharset_chinese_gb2312;
62 Lisp_Object Vcharset_chinese_gb12345;
63 Lisp_Object Vcharset_japanese_jisx0208;
64 Lisp_Object Vcharset_japanese_jisx0208_1990;
65 Lisp_Object Vcharset_korean_ksc5601;
66 Lisp_Object Vcharset_japanese_jisx0212;
67 Lisp_Object Vcharset_chinese_cns11643_1;
68 Lisp_Object Vcharset_chinese_cns11643_2;
70 Lisp_Object Vcharset_system_char_id;
71 Lisp_Object Vcharset_ucs;
72 Lisp_Object Vcharset_ucs_bmp;
73 Lisp_Object Vcharset_ucs_smp;
74 Lisp_Object Vcharset_ucs_sip;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 put_char_ccs_code_point (Lisp_Object character,
181 Lisp_Object ccs, Lisp_Object value)
183 if ( !(EQ (XCHARSET_NAME (ccs), Qmap_ucs)
184 && INTP (value) && (XINT (value) < 0xF0000))
186 /* || (XCHAR (character) != XINT (value)) */ )
188 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
192 { /* obsolete representation: value must be a list of bytes */
193 Lisp_Object ret = Fcar (value);
197 signal_simple_error ("Invalid value for coded-charset", value);
198 code_point = XINT (ret);
199 if (XCHARSET_GRAPHIC (ccs) == 1)
207 signal_simple_error ("Invalid value for coded-charset",
211 signal_simple_error ("Invalid value for coded-charset",
214 if (XCHARSET_GRAPHIC (ccs) == 1)
216 code_point = (code_point << 8) | j;
219 value = make_int (code_point);
221 else if (INTP (value))
223 code_point = XINT (value);
224 if (XCHARSET_GRAPHIC (ccs) == 1)
226 code_point &= 0x7F7F7F7F;
227 value = make_int (code_point);
231 signal_simple_error ("Invalid value for coded-charset", value);
235 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
238 decoding_table_remove_char (ccs, XINT (cpos));
241 decoding_table_put_char (ccs, code_point, character);
247 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
249 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
250 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
252 if (VECTORP (decoding_table))
254 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
258 decoding_table_remove_char (ccs, XINT (cpos));
261 if (CHAR_TABLEP (encoding_table))
263 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
271 int leading_code_private_11;
274 Lisp_Object Qcharsetp;
276 /* Qdoc_string, Qdimension, Qchars defined in general.c */
277 Lisp_Object Qregistry, Qfinal, Qgraphic;
278 Lisp_Object Qdirection;
279 Lisp_Object Qreverse_direction_charset;
280 Lisp_Object Qleading_byte;
281 Lisp_Object Qshort_name, Qlong_name;
284 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
285 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
320 Qvietnamese_viscii_lower,
321 Qvietnamese_viscii_upper,
331 Lisp_Object Ql2r, Qr2l;
333 Lisp_Object Vcharset_hash_table;
335 /* Composite characters are characters constructed by overstriking two
336 or more regular characters.
338 1) The old Mule implementation involves storing composite characters
339 in a buffer as a tag followed by all of the actual characters
340 used to make up the composite character. I think this is a bad
341 idea; it greatly complicates code that wants to handle strings
342 one character at a time because it has to deal with the possibility
343 of great big ungainly characters. It's much more reasonable to
344 simply store an index into a table of composite characters.
346 2) The current implementation only allows for 16,384 separate
347 composite characters over the lifetime of the XEmacs process.
348 This could become a potential problem if the user
349 edited lots of different files that use composite characters.
350 Due to FSF bogosity, increasing the number of allowable
351 composite characters under Mule would decrease the number
352 of possible faces that can exist. Mule already has shrunk
353 this to 2048, and further shrinkage would become uncomfortable.
354 No such problems exist in XEmacs.
356 Composite characters could be represented as 0x80 C1 C2 C3,
357 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
358 for slightly under 2^20 (one million) composite characters
359 over the XEmacs process lifetime, and you only need to
360 increase the size of a Mule character from 19 to 21 bits.
361 Or you could use 0x80 C1 C2 C3 C4, allowing for about
362 85 million (slightly over 2^26) composite characters. */
365 /************************************************************************/
366 /* Basic Emchar functions */
367 /************************************************************************/
369 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
370 string in STR. Returns the number of bytes stored.
371 Do not call this directly. Use the macro set_charptr_emchar() instead.
375 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
390 else if ( c <= 0x7ff )
392 *p++ = (c >> 6) | 0xc0;
393 *p++ = (c & 0x3f) | 0x80;
395 else if ( c <= 0xffff )
397 *p++ = (c >> 12) | 0xe0;
398 *p++ = ((c >> 6) & 0x3f) | 0x80;
399 *p++ = (c & 0x3f) | 0x80;
401 else if ( c <= 0x1fffff )
403 *p++ = (c >> 18) | 0xf0;
404 *p++ = ((c >> 12) & 0x3f) | 0x80;
405 *p++ = ((c >> 6) & 0x3f) | 0x80;
406 *p++ = (c & 0x3f) | 0x80;
408 else if ( c <= 0x3ffffff )
410 *p++ = (c >> 24) | 0xf8;
411 *p++ = ((c >> 18) & 0x3f) | 0x80;
412 *p++ = ((c >> 12) & 0x3f) | 0x80;
413 *p++ = ((c >> 6) & 0x3f) | 0x80;
414 *p++ = (c & 0x3f) | 0x80;
418 *p++ = (c >> 30) | 0xfc;
419 *p++ = ((c >> 24) & 0x3f) | 0x80;
420 *p++ = ((c >> 18) & 0x3f) | 0x80;
421 *p++ = ((c >> 12) & 0x3f) | 0x80;
422 *p++ = ((c >> 6) & 0x3f) | 0x80;
423 *p++ = (c & 0x3f) | 0x80;
426 BREAKUP_CHAR (c, charset, c1, c2);
427 lb = CHAR_LEADING_BYTE (c);
428 if (LEADING_BYTE_PRIVATE_P (lb))
429 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
431 if (EQ (charset, Vcharset_control_1))
440 /* Return the first character from a Mule-encoded string in STR,
441 assuming it's non-ASCII. Do not call this directly.
442 Use the macro charptr_emchar() instead. */
445 non_ascii_charptr_emchar (const Bufbyte *str)
458 else if ( b >= 0xf8 )
463 else if ( b >= 0xf0 )
468 else if ( b >= 0xe0 )
473 else if ( b >= 0xc0 )
483 for( ; len > 0; len-- )
486 ch = ( ch << 6 ) | ( b & 0x3f );
490 Bufbyte i0 = *str, i1, i2 = 0;
493 if (i0 == LEADING_BYTE_CONTROL_1)
494 return (Emchar) (*++str - 0x20);
496 if (LEADING_BYTE_PREFIX_P (i0))
501 charset = CHARSET_BY_LEADING_BYTE (i0);
502 if (XCHARSET_DIMENSION (charset) == 2)
505 return MAKE_CHAR (charset, i1, i2);
509 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
510 Do not call this directly. Use the macro valid_char_p() instead. */
514 non_ascii_valid_char_p (Emchar ch)
518 /* Must have only lowest 19 bits set */
522 f1 = CHAR_FIELD1 (ch);
523 f2 = CHAR_FIELD2 (ch);
524 f3 = CHAR_FIELD3 (ch);
530 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
531 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
532 f2 > MAX_CHAR_FIELD2_PRIVATE)
537 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
538 f2 <= MAX_CHAR_FIELD2_PRIVATE))
542 NOTE: This takes advantage of the fact that
543 FIELD2_TO_OFFICIAL_LEADING_BYTE and
544 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
546 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
547 if (EQ (charset, Qnil))
549 return (XCHARSET_CHARS (charset) == 96);
555 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
556 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
557 f1 > MAX_CHAR_FIELD1_PRIVATE)
559 if (f2 < 0x20 || f3 < 0x20)
562 #ifdef ENABLE_COMPOSITE_CHARS
563 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
565 if (UNBOUNDP (Fgethash (make_int (ch),
566 Vcomposite_char_char2string_hash_table,
571 #endif /* ENABLE_COMPOSITE_CHARS */
573 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
574 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
577 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
582 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
584 if (EQ (charset, Qnil))
586 return (XCHARSET_CHARS (charset) == 96);
592 /************************************************************************/
593 /* Basic string functions */
594 /************************************************************************/
596 /* Copy the character pointed to by SRC into DST. Do not call this
597 directly. Use the macro charptr_copy_char() instead.
598 Return the number of bytes copied. */
601 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
603 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
605 for (i = bytes; i; i--, dst++, src++)
611 /************************************************************************/
612 /* streams of Emchars */
613 /************************************************************************/
615 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
616 The functions below are not meant to be called directly; use
617 the macros in insdel.h. */
620 Lstream_get_emchar_1 (Lstream *stream, int ch)
622 Bufbyte str[MAX_EMCHAR_LEN];
623 Bufbyte *strptr = str;
626 str[0] = (Bufbyte) ch;
628 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
630 int c = Lstream_getc (stream);
631 bufpos_checking_assert (c >= 0);
632 *++strptr = (Bufbyte) c;
634 return charptr_emchar (str);
638 Lstream_fput_emchar (Lstream *stream, Emchar ch)
640 Bufbyte str[MAX_EMCHAR_LEN];
641 Bytecount len = set_charptr_emchar (str, ch);
642 return Lstream_write (stream, str, len);
646 Lstream_funget_emchar (Lstream *stream, Emchar ch)
648 Bufbyte str[MAX_EMCHAR_LEN];
649 Bytecount len = set_charptr_emchar (str, ch);
650 Lstream_unread (stream, str, len);
654 /************************************************************************/
656 /************************************************************************/
659 mark_charset (Lisp_Object obj)
661 Lisp_Charset *cs = XCHARSET (obj);
663 mark_object (cs->short_name);
664 mark_object (cs->long_name);
665 mark_object (cs->doc_string);
666 mark_object (cs->registry);
667 mark_object (cs->ccl_program);
669 mark_object (cs->decoding_table);
670 mark_object (cs->mother);
676 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
678 Lisp_Charset *cs = XCHARSET (obj);
682 error ("printing unreadable object #<charset %s 0x%x>",
683 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
686 write_c_string ("#<charset ", printcharfun);
687 print_internal (CHARSET_NAME (cs), printcharfun, 0);
688 write_c_string (" ", printcharfun);
689 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
690 write_c_string (" ", printcharfun);
691 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
692 write_c_string (" ", printcharfun);
693 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
694 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
696 CHARSET_DIMENSION (cs),
697 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
698 CHARSET_COLUMNS (cs),
699 CHARSET_GRAPHIC (cs),
701 write_c_string (buf, printcharfun);
702 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
703 sprintf (buf, " 0x%x>", cs->header.uid);
704 write_c_string (buf, printcharfun);
707 static const struct lrecord_description charset_description[] = {
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
716 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
717 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
722 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
723 mark_charset, print_charset, 0, 0, 0,
727 /* Make a new charset. */
728 /* #### SJT Should generic properties be allowed? */
730 make_charset (Charset_ID id, Lisp_Object name,
731 unsigned short chars, unsigned char dimension,
732 unsigned char columns, unsigned char graphic,
733 Bufbyte final, unsigned char direction, Lisp_Object short_name,
734 Lisp_Object long_name, Lisp_Object doc,
736 Lisp_Object decoding_table,
737 Emchar min_code, Emchar max_code,
738 Emchar code_offset, unsigned char byte_offset,
739 Lisp_Object mother, unsigned char conversion)
742 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
746 XSETCHARSET (obj, cs);
748 CHARSET_ID (cs) = id;
749 CHARSET_NAME (cs) = name;
750 CHARSET_SHORT_NAME (cs) = short_name;
751 CHARSET_LONG_NAME (cs) = long_name;
752 CHARSET_CHARS (cs) = chars;
753 CHARSET_DIMENSION (cs) = dimension;
754 CHARSET_DIRECTION (cs) = direction;
755 CHARSET_COLUMNS (cs) = columns;
756 CHARSET_GRAPHIC (cs) = graphic;
757 CHARSET_FINAL (cs) = final;
758 CHARSET_DOC_STRING (cs) = doc;
759 CHARSET_REGISTRY (cs) = reg;
760 CHARSET_CCL_PROGRAM (cs) = Qnil;
761 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
763 CHARSET_DECODING_TABLE(cs) = Qunbound;
764 CHARSET_MIN_CODE (cs) = min_code;
765 CHARSET_MAX_CODE (cs) = max_code;
766 CHARSET_CODE_OFFSET (cs) = code_offset;
767 CHARSET_BYTE_OFFSET (cs) = byte_offset;
768 CHARSET_MOTHER (cs) = mother;
769 CHARSET_CONVERSION (cs) = conversion;
773 if (id == LEADING_BYTE_ASCII)
774 CHARSET_REP_BYTES (cs) = 1;
776 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
778 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
783 /* some charsets do not have final characters. This includes
784 ASCII, Control-1, Composite, and the two faux private
786 unsigned char iso2022_type
787 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
789 if (code_offset == 0)
791 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
792 chlook->charset_by_attributes[iso2022_type][final] = obj;
796 (chlook->charset_by_attributes[iso2022_type][final][direction]));
797 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
801 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
802 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
804 /* Some charsets are "faux" and don't have names or really exist at
805 all except in the leading-byte table. */
807 Fputhash (name, obj, Vcharset_hash_table);
812 get_unallocated_leading_byte (int dimension)
817 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
820 lb = chlook->next_allocated_leading_byte++;
824 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
827 lb = chlook->next_allocated_1_byte_leading_byte++;
831 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
834 lb = chlook->next_allocated_2_byte_leading_byte++;
840 ("No more character sets free for this dimension",
841 make_int (dimension));
847 /* Number of Big5 characters which have the same code in 1st byte. */
849 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
852 decode_ccs_conversion (int conv_type, int code_point)
854 if ( conv_type == CONVERSION_IDENTICAL )
858 if ( conv_type == CONVERSION_94x60 )
860 int row = code_point >> 8;
861 int cell = code_point & 255;
865 else if (row < 16 + 32 + 30)
866 return (row - (16 + 32)) * 94 + cell - 33;
867 else if (row < 18 + 32 + 30)
869 else if (row < 18 + 32 + 60)
870 return (row - (18 + 32)) * 94 + cell - 33;
872 else if ( conv_type == CONVERSION_94x94x60 )
874 int plane = code_point >> 16;
875 int row = (code_point >> 8) & 255;
876 int cell = code_point & 255;
880 else if (row < 16 + 32 + 30)
882 (plane - 33) * 94 * 60
883 + (row - (16 + 32)) * 94
885 else if (row < 18 + 32 + 30)
887 else if (row < 18 + 32 + 60)
889 (plane - 33) * 94 * 60
890 + (row - (18 + 32)) * 94
893 else if ( conv_type == CONVERSION_BIG5_1 )
896 = (((code_point >> 8) & 0x7F) - 33) * 94
897 + (( code_point & 0x7F) - 33);
898 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
899 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
901 b2 += b2 < 0x3F ? 0x40 : 0x62;
902 return (b1 << 8) | b2;
904 else if ( conv_type == CONVERSION_BIG5_2 )
907 = (((code_point >> 8) & 0x7F) - 33) * 94
908 + (( code_point & 0x7F) - 33)
909 + BIG5_SAME_ROW * (0xC9 - 0xA1);
910 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
911 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
913 b2 += b2 < 0x3F ? 0x40 : 0x62;
914 return (b1 << 8) | b2;
920 decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
922 int dim = XCHARSET_DIMENSION (ccs);
923 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
931 = get_ccs_octet_table (decoding_table, ccs,
932 (code_point >> (dim * 8)) & 255);
934 if (CHARP (decoding_table))
935 return XCHAR (decoding_table);
937 if (EQ (decoding_table, Qunloaded))
939 char_id = load_char_decoding_entry_maybe (ccs, code_point);
941 #endif /* HAVE_CHISE */
944 else if ( !without_inheritance
945 && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
948 = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
952 code += XCHARSET_CODE_OFFSET(ccs);
953 if ( EQ (mother, Vcharset_ucs) )
954 return DECODE_CHAR (mother, code, without_inheritance);
956 return decode_defined_char (mother, code,
957 without_inheritance);
964 decode_builtin_char (Lisp_Object charset, int code_point)
966 Lisp_Object mother = XCHARSET_MOTHER (charset);
969 if ( XCHARSET_MAX_CODE (charset) > 0 )
971 if ( CHARSETP (mother) )
974 = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
979 decode_builtin_char (mother,
980 code + XCHARSET_CODE_OFFSET(charset));
987 = (XCHARSET_DIMENSION (charset) == 1
989 code_point - XCHARSET_BYTE_OFFSET (charset)
991 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
992 * XCHARSET_CHARS (charset)
993 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
994 + XCHARSET_CODE_OFFSET (charset);
995 if ((cid < XCHARSET_MIN_CODE (charset))
996 || (XCHARSET_MAX_CODE (charset) < cid))
1001 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1003 if (XCHARSET_DIMENSION (charset) == 1)
1005 switch (XCHARSET_CHARS (charset))
1009 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1012 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1020 switch (XCHARSET_CHARS (charset))
1023 return MIN_CHAR_94x94
1024 + (final - '0') * 94 * 94
1025 + (((code_point >> 8) & 0x7F) - 33) * 94
1026 + ((code_point & 0x7F) - 33);
1028 return MIN_CHAR_96x96
1029 + (final - '0') * 96 * 96
1030 + (((code_point >> 8) & 0x7F) - 32) * 96
1031 + ((code_point & 0x7F) - 32);
1043 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1045 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1048 if ( CHAR_TABLEP (encoding_table)
1049 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1054 Lisp_Object mother = XCHARSET_MOTHER (charset);
1055 int min = XCHARSET_MIN_CODE (charset);
1056 int max = XCHARSET_MAX_CODE (charset);
1059 if ( CHARSETP (mother) )
1061 if (XCHARSET_FINAL (charset) >= '0')
1062 code = charset_code_point (mother, ch, 1);
1064 code = charset_code_point (mother, ch, defined_only);
1066 else if (defined_only)
1068 else if ( ((max == 0) && CHARSETP (mother)
1069 && (XCHARSET_FINAL (charset) == 0))
1070 || ((min <= ch) && (ch <= max)) )
1072 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1073 || ((min <= code) && (code <= max)) )
1075 int d = code - XCHARSET_CODE_OFFSET (charset);
1077 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1079 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1081 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1083 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1086 int cell = d % 94 + 33;
1092 return (row << 8) | cell;
1094 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1096 int B1 = d >> 8, B2 = d & 0xFF;
1098 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1099 - (B2 < 0x7F ? 0x40 : 0x62);
1103 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1106 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1108 int B1 = d >> 8, B2 = d & 0xFF;
1110 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1111 - (B2 < 0x7F ? 0x40 : 0x62);
1115 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1116 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1119 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1120 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1121 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1122 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1123 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1125 int plane = d / (94 * 60) + 33;
1126 int row = (d % (94 * 60)) / 94;
1127 int cell = d % 94 + 33;
1133 return (plane << 16) | (row << 8) | cell;
1135 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1137 ( (d / (94 * 94) + 33) << 16)
1138 | ((d / 94 % 94 + 33) << 8)
1140 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1142 ( (d / (96 * 96) + 32) << 16)
1143 | ((d / 96 % 96 + 32) << 8)
1145 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1147 ( (d / (94 * 94 * 94) + 33) << 24)
1148 | ((d / (94 * 94) % 94 + 33) << 16)
1149 | ((d / 94 % 94 + 33) << 8)
1151 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1153 ( (d / (96 * 96 * 96) + 32) << 24)
1154 | ((d / (96 * 96) % 96 + 32) << 16)
1155 | ((d / 96 % 96 + 32) << 8)
1159 printf ("Unknown CCS-conversion %d is specified!",
1160 XCHARSET_CONVERSION (charset));
1164 else if (defined_only)
1166 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1167 ( XCHARSET_MIN_CODE (charset) == 0 )
1169 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1170 (XCHARSET_CODE_OFFSET (charset)
1171 == XCHARSET_MIN_CODE (charset))
1176 if (XCHARSET_DIMENSION (charset) == 1)
1178 if (XCHARSET_CHARS (charset) == 94)
1180 if (((d = ch - (MIN_CHAR_94
1181 + (XCHARSET_FINAL (charset) - '0') * 94))
1186 else if (XCHARSET_CHARS (charset) == 96)
1188 if (((d = ch - (MIN_CHAR_96
1189 + (XCHARSET_FINAL (charset) - '0') * 96))
1197 else if (XCHARSET_DIMENSION (charset) == 2)
1199 if (XCHARSET_CHARS (charset) == 94)
1201 if (((d = ch - (MIN_CHAR_94x94
1203 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1206 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1208 else if (XCHARSET_CHARS (charset) == 96)
1210 if (((d = ch - (MIN_CHAR_96x96
1212 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1215 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1226 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1228 if (c <= MAX_CHAR_BASIC_LATIN)
1230 *charset = Vcharset_ascii;
1235 *charset = Vcharset_control_1;
1240 *charset = Vcharset_latin_iso8859_1;
1244 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1246 *charset = Vcharset_hebrew_iso8859_8;
1247 return c - MIN_CHAR_HEBREW + 0x20;
1250 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1252 *charset = Vcharset_thai_tis620;
1253 return c - MIN_CHAR_THAI + 0x20;
1256 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1257 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1259 return list2 (Vcharset_katakana_jisx0201,
1260 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1263 else if (c <= MAX_CHAR_BMP)
1265 *charset = Vcharset_ucs_bmp;
1268 else if (c <= MAX_CHAR_SMP)
1270 *charset = Vcharset_ucs_smp;
1271 return c - MIN_CHAR_SMP;
1273 else if (c <= MAX_CHAR_SIP)
1275 *charset = Vcharset_ucs_sip;
1276 return c - MIN_CHAR_SIP;
1278 else if (c < MIN_CHAR_94)
1280 *charset = Vcharset_ucs;
1283 else if (c <= MAX_CHAR_94)
1285 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1286 ((c - MIN_CHAR_94) / 94) + '0',
1287 CHARSET_LEFT_TO_RIGHT);
1288 if (!NILP (*charset))
1289 return ((c - MIN_CHAR_94) % 94) + 33;
1292 *charset = Vcharset_ucs;
1296 else if (c <= MAX_CHAR_96)
1298 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1299 ((c - MIN_CHAR_96) / 96) + '0',
1300 CHARSET_LEFT_TO_RIGHT);
1301 if (!NILP (*charset))
1302 return ((c - MIN_CHAR_96) % 96) + 32;
1305 *charset = Vcharset_ucs;
1309 else if (c <= MAX_CHAR_94x94)
1312 = CHARSET_BY_ATTRIBUTES (94, 2,
1313 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1314 CHARSET_LEFT_TO_RIGHT);
1315 if (!NILP (*charset))
1316 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1317 | (((c - MIN_CHAR_94x94) % 94) + 33);
1320 *charset = Vcharset_ucs;
1324 else if (c <= MAX_CHAR_96x96)
1327 = CHARSET_BY_ATTRIBUTES (96, 2,
1328 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1329 CHARSET_LEFT_TO_RIGHT);
1330 if (!NILP (*charset))
1331 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1332 | (((c - MIN_CHAR_96x96) % 96) + 32);
1335 *charset = Vcharset_ucs;
1341 *charset = Vcharset_ucs;
1346 Lisp_Object Vdefault_coded_charset_priority_list;
1350 /************************************************************************/
1351 /* Basic charset Lisp functions */
1352 /************************************************************************/
1354 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1355 Return non-nil if OBJECT is a charset.
1359 return CHARSETP (object) ? Qt : Qnil;
1362 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1363 Retrieve the charset of the given name.
1364 If CHARSET-OR-NAME is a charset object, it is simply returned.
1365 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1366 nil is returned. Otherwise the associated charset object is returned.
1370 if (CHARSETP (charset_or_name))
1371 return charset_or_name;
1373 CHECK_SYMBOL (charset_or_name);
1374 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1377 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1378 Retrieve the charset of the given name.
1379 Same as `find-charset' except an error is signalled if there is no such
1380 charset instead of returning nil.
1384 Lisp_Object charset = Ffind_charset (name);
1387 signal_simple_error ("No such charset", name);
1391 /* We store the charsets in hash tables with the names as the key and the
1392 actual charset object as the value. Occasionally we need to use them
1393 in a list format. These routines provide us with that. */
1394 struct charset_list_closure
1396 Lisp_Object *charset_list;
1400 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1401 void *charset_list_closure)
1403 /* This function can GC */
1404 struct charset_list_closure *chcl =
1405 (struct charset_list_closure*) charset_list_closure;
1406 Lisp_Object *charset_list = chcl->charset_list;
1408 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1412 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1413 Return a list of the names of all defined charsets.
1417 Lisp_Object charset_list = Qnil;
1418 struct gcpro gcpro1;
1419 struct charset_list_closure charset_list_closure;
1421 GCPRO1 (charset_list);
1422 charset_list_closure.charset_list = &charset_list;
1423 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1424 &charset_list_closure);
1427 return charset_list;
1430 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1431 Return the name of charset CHARSET.
1435 return XCHARSET_NAME (Fget_charset (charset));
1438 /* #### SJT Should generic properties be allowed? */
1439 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1440 Define a new character set.
1441 This function is for use with Mule support.
1442 NAME is a symbol, the name by which the character set is normally referred.
1443 DOC-STRING is a string describing the character set.
1444 PROPS is a property list, describing the specific nature of the
1445 character set. Recognized properties are:
1447 'short-name Short version of the charset name (ex: Latin-1)
1448 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1449 'registry A regular expression matching the font registry field for
1451 'dimension Number of octets used to index a character in this charset.
1452 Either 1 or 2. Defaults to 1.
1453 If UTF-2000 feature is enabled, 3 or 4 are also available.
1454 'columns Number of columns used to display a character in this charset.
1455 Only used in TTY mode. (Under X, the actual width of a
1456 character can be derived from the font used to display the
1457 characters.) If unspecified, defaults to the dimension
1458 (this is almost always the correct value).
1459 'chars Number of characters in each dimension (94 or 96).
1460 Defaults to 94. Note that if the dimension is 2, the
1461 character set thus described is 94x94 or 96x96.
1462 If UTF-2000 feature is enabled, 128 or 256 are also available.
1463 'final Final byte of ISO 2022 escape sequence. Must be
1464 supplied. Each combination of (DIMENSION, CHARS) defines a
1465 separate namespace for final bytes. Note that ISO
1466 2022 restricts the final byte to the range
1467 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1468 dimension == 2. Note also that final bytes in the range
1469 0x30 - 0x3F are reserved for user-defined (not official)
1471 'graphic 0 (use left half of font on output) or 1 (use right half
1472 of font on output). Defaults to 0. For example, for
1473 a font whose registry is ISO8859-1, the left half
1474 (octets 0x20 - 0x7F) is the `ascii' character set, while
1475 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1476 character set. With 'graphic set to 0, the octets
1477 will have their high bit cleared; with it set to 1,
1478 the octets will have their high bit set.
1479 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1481 'ccl-program A compiled CCL program used to convert a character in
1482 this charset into an index into the font. This is in
1483 addition to the 'graphic property. The CCL program
1484 is passed the octets of the character, with the high
1485 bit cleared and set depending upon whether the value
1486 of the 'graphic property is 0 or 1.
1487 'mother [UTF-2000 only] Base coded-charset.
1488 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1489 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1490 'code-offset [UTF-2000 only] Offset for a code-point of a base
1492 'conversion [UTF-2000 only] Conversion for a code-point of a base
1493 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1495 (name, doc_string, props))
1497 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1498 int direction = CHARSET_LEFT_TO_RIGHT;
1499 Lisp_Object registry = Qnil;
1500 Lisp_Object charset;
1501 Lisp_Object ccl_program = Qnil;
1502 Lisp_Object short_name = Qnil, long_name = Qnil;
1503 Lisp_Object mother = Qnil;
1504 int min_code = 0, max_code = 0, code_offset = 0;
1505 int byte_offset = -1;
1508 CHECK_SYMBOL (name);
1509 if (!NILP (doc_string))
1510 CHECK_STRING (doc_string);
1512 charset = Ffind_charset (name);
1513 if (!NILP (charset))
1514 signal_simple_error ("Cannot redefine existing charset", name);
1517 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1519 if (EQ (keyword, Qshort_name))
1521 CHECK_STRING (value);
1525 else if (EQ (keyword, Qlong_name))
1527 CHECK_STRING (value);
1531 else if (EQ (keyword, Qiso_ir))
1535 id = - XINT (value);
1539 else if (EQ (keyword, Qdimension))
1542 dimension = XINT (value);
1543 if (dimension < 1 ||
1550 signal_simple_error ("Invalid value for 'dimension", value);
1553 else if (EQ (keyword, Qchars))
1556 chars = XINT (value);
1557 if (chars != 94 && chars != 96
1559 && chars != 128 && chars != 256
1562 signal_simple_error ("Invalid value for 'chars", value);
1565 else if (EQ (keyword, Qcolumns))
1568 columns = XINT (value);
1569 if (columns != 1 && columns != 2)
1570 signal_simple_error ("Invalid value for 'columns", value);
1573 else if (EQ (keyword, Qgraphic))
1576 graphic = XINT (value);
1584 signal_simple_error ("Invalid value for 'graphic", value);
1587 else if (EQ (keyword, Qregistry))
1589 CHECK_STRING (value);
1593 else if (EQ (keyword, Qdirection))
1595 if (EQ (value, Ql2r))
1596 direction = CHARSET_LEFT_TO_RIGHT;
1597 else if (EQ (value, Qr2l))
1598 direction = CHARSET_RIGHT_TO_LEFT;
1600 signal_simple_error ("Invalid value for 'direction", value);
1603 else if (EQ (keyword, Qfinal))
1605 CHECK_CHAR_COERCE_INT (value);
1606 final = XCHAR (value);
1607 if (final < '0' || final > '~')
1608 signal_simple_error ("Invalid value for 'final", value);
1612 else if (EQ (keyword, Qmother))
1614 mother = Fget_charset (value);
1617 else if (EQ (keyword, Qmin_code))
1620 min_code = XUINT (value);
1623 else if (EQ (keyword, Qmax_code))
1626 max_code = XUINT (value);
1629 else if (EQ (keyword, Qcode_offset))
1632 code_offset = XUINT (value);
1635 else if (EQ (keyword, Qconversion))
1637 if (EQ (value, Q94x60))
1638 conversion = CONVERSION_94x60;
1639 else if (EQ (value, Q94x94x60))
1640 conversion = CONVERSION_94x94x60;
1641 else if (EQ (value, Qbig5_1))
1642 conversion = CONVERSION_BIG5_1;
1643 else if (EQ (value, Qbig5_2))
1644 conversion = CONVERSION_BIG5_2;
1646 signal_simple_error ("Unrecognized conversion", value);
1650 else if (EQ (keyword, Qccl_program))
1652 struct ccl_program test_ccl;
1654 if (setup_ccl_program (&test_ccl, value) < 0)
1655 signal_simple_error ("Invalid value for 'ccl-program", value);
1656 ccl_program = value;
1660 signal_simple_error ("Unrecognized property", keyword);
1666 error ("'final must be specified");
1668 if (dimension == 2 && final > 0x5F)
1670 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1673 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1674 CHARSET_LEFT_TO_RIGHT)) ||
1675 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1676 CHARSET_RIGHT_TO_LEFT)))
1678 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1681 id = get_unallocated_leading_byte (dimension);
1683 if (NILP (doc_string))
1684 doc_string = build_string ("");
1686 if (NILP (registry))
1687 registry = build_string ("");
1689 if (NILP (short_name))
1690 XSETSTRING (short_name, XSYMBOL (name)->name);
1692 if (NILP (long_name))
1693 long_name = doc_string;
1696 columns = dimension;
1698 if (byte_offset < 0)
1702 else if (chars == 96)
1708 charset = make_charset (id, name, chars, dimension, columns, graphic,
1709 final, direction, short_name, long_name,
1710 doc_string, registry,
1711 Qnil, min_code, max_code, code_offset, byte_offset,
1712 mother, conversion);
1713 if (!NILP (ccl_program))
1714 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1718 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1720 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1721 NEW-NAME is the name of the new charset. Return the new charset.
1723 (charset, new_name))
1725 Lisp_Object new_charset = Qnil;
1726 int id, chars, dimension, columns, graphic, final;
1728 Lisp_Object registry, doc_string, short_name, long_name;
1731 charset = Fget_charset (charset);
1732 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1733 signal_simple_error ("Charset already has reverse-direction charset",
1736 CHECK_SYMBOL (new_name);
1737 if (!NILP (Ffind_charset (new_name)))
1738 signal_simple_error ("Cannot redefine existing charset", new_name);
1740 cs = XCHARSET (charset);
1742 chars = CHARSET_CHARS (cs);
1743 dimension = CHARSET_DIMENSION (cs);
1744 columns = CHARSET_COLUMNS (cs);
1745 id = get_unallocated_leading_byte (dimension);
1747 graphic = CHARSET_GRAPHIC (cs);
1748 final = CHARSET_FINAL (cs);
1749 direction = CHARSET_RIGHT_TO_LEFT;
1750 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1751 direction = CHARSET_LEFT_TO_RIGHT;
1752 doc_string = CHARSET_DOC_STRING (cs);
1753 short_name = CHARSET_SHORT_NAME (cs);
1754 long_name = CHARSET_LONG_NAME (cs);
1755 registry = CHARSET_REGISTRY (cs);
1757 new_charset = make_charset (id, new_name, chars, dimension, columns,
1758 graphic, final, direction, short_name, long_name,
1759 doc_string, registry,
1761 CHARSET_DECODING_TABLE(cs),
1762 CHARSET_MIN_CODE(cs),
1763 CHARSET_MAX_CODE(cs),
1764 CHARSET_CODE_OFFSET(cs),
1765 CHARSET_BYTE_OFFSET(cs),
1767 CHARSET_CONVERSION (cs)
1769 Qnil, 0, 0, 0, 0, Qnil, 0
1773 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1774 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1779 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1780 Define symbol ALIAS as an alias for CHARSET.
1784 CHECK_SYMBOL (alias);
1785 charset = Fget_charset (charset);
1786 return Fputhash (alias, charset, Vcharset_hash_table);
1789 /* #### Reverse direction charsets not yet implemented. */
1791 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1793 Return the reverse-direction charset parallel to CHARSET, if any.
1794 This is the charset with the same properties (in particular, the same
1795 dimension, number of characters per dimension, and final byte) as
1796 CHARSET but whose characters are displayed in the opposite direction.
1800 charset = Fget_charset (charset);
1801 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1805 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1806 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1807 If DIRECTION is omitted, both directions will be checked (left-to-right
1808 will be returned if character sets exist for both directions).
1810 (dimension, chars, final, direction))
1812 int dm, ch, fi, di = -1;
1813 Lisp_Object obj = Qnil;
1815 CHECK_INT (dimension);
1816 dm = XINT (dimension);
1817 if (dm < 1 || dm > 2)
1818 signal_simple_error ("Invalid value for DIMENSION", dimension);
1822 if (ch != 94 && ch != 96)
1823 signal_simple_error ("Invalid value for CHARS", chars);
1825 CHECK_CHAR_COERCE_INT (final);
1827 if (fi < '0' || fi > '~')
1828 signal_simple_error ("Invalid value for FINAL", final);
1830 if (EQ (direction, Ql2r))
1831 di = CHARSET_LEFT_TO_RIGHT;
1832 else if (EQ (direction, Qr2l))
1833 di = CHARSET_RIGHT_TO_LEFT;
1834 else if (!NILP (direction))
1835 signal_simple_error ("Invalid value for DIRECTION", direction);
1837 if (dm == 2 && fi > 0x5F)
1839 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1843 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1845 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1848 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1851 return XCHARSET_NAME (obj);
1855 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1856 Return short name of CHARSET.
1860 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1863 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1864 Return long name of CHARSET.
1868 return XCHARSET_LONG_NAME (Fget_charset (charset));
1871 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1872 Return description of CHARSET.
1876 return XCHARSET_DOC_STRING (Fget_charset (charset));
1879 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1880 Return dimension of CHARSET.
1884 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1887 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1888 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1889 Recognized properties are those listed in `make-charset', as well as
1890 'name and 'doc-string.
1896 charset = Fget_charset (charset);
1897 cs = XCHARSET (charset);
1899 CHECK_SYMBOL (prop);
1900 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1901 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1902 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1903 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1904 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1905 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1906 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1907 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1908 Qnil : make_char (CHARSET_FINAL (cs));
1909 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1910 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1911 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1912 if (EQ (prop, Qdirection))
1913 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1914 if (EQ (prop, Qreverse_direction_charset))
1916 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1917 /* #### Is this translation OK? If so, error checking sufficient? */
1918 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1921 if (EQ (prop, Qmother))
1922 return CHARSET_MOTHER (cs);
1923 if (EQ (prop, Qmin_code))
1924 return make_int (CHARSET_MIN_CODE (cs));
1925 if (EQ (prop, Qmax_code))
1926 return make_int (CHARSET_MAX_CODE (cs));
1928 signal_simple_error ("Unrecognized charset property name", prop);
1929 return Qnil; /* not reached */
1932 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1933 Return charset identification number of CHARSET.
1937 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1940 /* #### We need to figure out which properties we really want to
1943 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1944 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1946 (charset, ccl_program))
1948 struct ccl_program test_ccl;
1950 charset = Fget_charset (charset);
1951 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1952 signal_simple_error ("Invalid ccl-program", ccl_program);
1953 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1958 invalidate_charset_font_caches (Lisp_Object charset)
1960 /* Invalidate font cache entries for charset on all devices. */
1961 Lisp_Object devcons, concons, hash_table;
1962 DEVICE_LOOP_NO_BREAK (devcons, concons)
1964 struct device *d = XDEVICE (XCAR (devcons));
1965 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1966 if (!UNBOUNDP (hash_table))
1967 Fclrhash (hash_table);
1971 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1972 Set the 'registry property of CHARSET to REGISTRY.
1974 (charset, registry))
1976 charset = Fget_charset (charset);
1977 CHECK_STRING (registry);
1978 XCHARSET_REGISTRY (charset) = registry;
1979 invalidate_charset_font_caches (charset);
1980 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1985 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1986 Return mapping-table of CHARSET.
1990 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1993 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1994 Set mapping-table of CHARSET to TABLE.
1998 struct Lisp_Charset *cs;
2002 charset = Fget_charset (charset);
2003 cs = XCHARSET (charset);
2007 CHARSET_DECODING_TABLE(cs) = Qnil;
2010 else if (VECTORP (table))
2012 int ccs_len = CHARSET_BYTE_SIZE (cs);
2013 int ret = decoding_table_check_elements (table,
2014 CHARSET_DIMENSION (cs),
2019 signal_simple_error ("Too big table", table);
2021 signal_simple_error ("Invalid element is found", table);
2023 signal_simple_error ("Something wrong", table);
2025 CHARSET_DECODING_TABLE(cs) = Qnil;
2028 signal_error (Qwrong_type_argument,
2029 list2 (build_translated_string ("vector-or-nil-p"),
2032 byte_offset = CHARSET_BYTE_OFFSET (cs);
2033 switch (CHARSET_DIMENSION (cs))
2036 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2038 Lisp_Object c = XVECTOR_DATA(table)[i];
2041 Fput_char_attribute (c, XCHARSET_NAME (charset),
2042 make_int (i + byte_offset));
2046 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2048 Lisp_Object v = XVECTOR_DATA(table)[i];
2054 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2056 Lisp_Object c = XVECTOR_DATA(v)[j];
2060 (c, XCHARSET_NAME (charset),
2061 make_int ( ( (i + byte_offset) << 8 )
2067 Fput_char_attribute (v, XCHARSET_NAME (charset),
2068 make_int (i + byte_offset));
2076 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2077 Save mapping-table of CHARSET.
2081 struct Lisp_Charset *cs;
2082 int byte_min, byte_max;
2083 #ifdef HAVE_LIBCHISE
2085 #else /* HAVE_LIBCHISE */
2087 Lisp_Object db_file;
2088 #endif /* not HAVE_LIBCHISE */
2090 charset = Fget_charset (charset);
2091 cs = XCHARSET (charset);
2093 #ifdef HAVE_LIBCHISE
2094 if ( open_chise_data_source_maybe () )
2098 = chise_ds_get_ccs (default_chise_data_source,
2099 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2102 printf ("Can't open decoding-table %s\n",
2103 XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
2106 #else /* HAVE_LIBCHISE */
2107 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2108 Qsystem_char_id, 1);
2109 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2110 #endif /* not HAVE_LIBCHISE */
2112 byte_min = CHARSET_BYTE_OFFSET (cs);
2113 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2114 switch (CHARSET_DIMENSION (cs))
2118 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2121 for (cell = byte_min; cell < byte_max; cell++)
2123 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2127 #ifdef HAVE_LIBCHISE
2128 chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
2129 #else /* HAVE_LIBCHISE */
2130 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2131 Fprin1_to_string (c, Qnil),
2133 #endif /* not HAVE_LIBCHISE */
2140 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2143 for (row = byte_min; row < byte_max; row++)
2145 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2148 for (cell = byte_min; cell < byte_max; cell++)
2150 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2154 #ifdef HAVE_LIBCHISE
2155 chise_ccs_set_decoded_char
2157 (row << 8) | cell, XCHAR (c));
2158 #else /* HAVE_LIBCHISE */
2159 Fput_database (Fprin1_to_string (make_int ((row << 8)
2162 Fprin1_to_string (c, Qnil),
2164 #endif /* not HAVE_LIBCHISE */
2172 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2175 for (plane = byte_min; plane < byte_max; plane++)
2178 = get_ccs_octet_table (table_p, charset, plane);
2181 for (row = byte_min; row < byte_max; row++)
2184 = get_ccs_octet_table (table_r, charset, row);
2187 for (cell = byte_min; cell < byte_max; cell++)
2189 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2194 #ifdef HAVE_LIBCHISE
2195 chise_ccs_set_decoded_char
2200 #else /* HAVE_LIBCHISE */
2201 Fput_database (Fprin1_to_string
2202 (make_int ((plane << 16)
2206 Fprin1_to_string (c, Qnil),
2208 #endif /* not HAVE_LIBCHISE */
2217 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2220 for (group = byte_min; group < byte_max; group++)
2223 = get_ccs_octet_table (table_g, charset, group);
2226 for (plane = byte_min; plane < byte_max; plane++)
2229 = get_ccs_octet_table (table_p, charset, plane);
2232 for (row = byte_min; row < byte_max; row++)
2235 = get_ccs_octet_table (table_r, charset, row);
2238 for (cell = byte_min; cell < byte_max; cell++)
2241 = get_ccs_octet_table (table_c, charset, cell);
2245 #ifdef HAVE_LIBCHISE
2246 chise_ccs_set_decoded_char
2252 #else /* HAVE_LIBCHISE */
2253 Fput_database (Fprin1_to_string
2254 (make_int (( group << 24)
2259 Fprin1_to_string (c, Qnil),
2261 #endif /* not HAVE_LIBCHISE */
2269 #ifdef HAVE_LIBCHISE
2270 chise_ccs_sync (dt_ccs);
2272 #else /* HAVE_LIBCHISE */
2273 return Fclose_database (db);
2274 #endif /* not HAVE_LIBCHISE */
2277 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2278 Reset mapping-table of CCS with database file.
2282 #ifdef HAVE_LIBCHISE
2283 CHISE_CCS chise_ccs;
2285 Lisp_Object db_file;
2288 ccs = Fget_charset (ccs);
2290 #ifdef HAVE_LIBCHISE
2291 if ( open_chise_data_source_maybe () )
2294 chise_ccs = chise_ds_get_ccs (default_chise_data_source,
2295 XSTRING_DATA (Fsymbol_name
2296 (XCHARSET_NAME(ccs))));
2297 if (chise_ccs == NULL)
2300 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2301 Qsystem_char_id, 0);
2305 #ifdef HAVE_LIBCHISE
2306 chise_ccs_setup_db (chise_ccs, 0) == 0
2308 !NILP (Ffile_exists_p (db_file))
2312 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2319 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2321 #ifdef HAVE_LIBCHISE
2322 CHISE_Char_ID char_id;
2324 if ( open_chise_data_source_maybe () )
2328 = chise_ds_decode_char (default_chise_data_source,
2329 XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
2332 decoding_table_put_char (ccs, code_point, make_char (char_id));
2334 decoding_table_put_char (ccs, code_point, Qnil);
2336 /* chise_ccst_close (dt_ccs); */
2338 #else /* HAVE_LIBCHISE */
2341 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2344 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2348 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2355 decoding_table_put_char (ccs, code_point, ret);
2356 Fclose_database (db);
2360 decoding_table_put_char (ccs, code_point, Qnil);
2361 Fclose_database (db);
2364 #endif /* not HAVE_LIBCHISE */
2367 #ifdef HAVE_LIBCHISE
2368 DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
2369 Save properties of CHARSET.
2373 struct Lisp_Charset *cs;
2374 CHISE_Property property;
2376 unsigned char* feature_name;
2378 ccs = Fget_charset (charset);
2379 cs = XCHARSET (ccs);
2381 if ( open_chise_data_source_maybe () )
2384 if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
2386 property = chise_ds_get_property (default_chise_data_source,
2388 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2389 chise_feature_set_property_value
2390 (chise_ds_get_feature (default_chise_data_source, feature_name),
2391 property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
2393 chise_property_sync (property);
2395 charset = XCHARSET_NAME (ccs);
2396 feature_name = XSTRING_DATA (Fsymbol_name (charset));
2398 property = chise_ds_get_property (default_chise_data_source,
2400 chise_feature_set_property_value
2401 (chise_ds_get_feature (default_chise_data_source, feature_name),
2402 property, XSTRING_DATA (Fprin1_to_string
2403 (CHARSET_DOC_STRING (cs), Qnil)));
2404 chise_property_sync (property);
2406 property = chise_ds_get_property (default_chise_data_source, "type");
2407 chise_feature_set_property_value
2408 (chise_ds_get_feature (default_chise_data_source, feature_name),
2410 chise_property_sync (property);
2412 property = chise_ds_get_property (default_chise_data_source, "chars");
2413 chise_feature_set_property_value
2414 (chise_ds_get_feature (default_chise_data_source, feature_name),
2415 property, XSTRING_DATA (Fprin1_to_string (make_int
2416 (CHARSET_CHARS (cs)),
2418 chise_property_sync (property);
2420 property = chise_ds_get_property (default_chise_data_source, "dimension");
2421 chise_feature_set_property_value
2422 (chise_ds_get_feature (default_chise_data_source, feature_name),
2423 property, XSTRING_DATA (Fprin1_to_string (make_int
2424 (CHARSET_DIMENSION (cs)),
2426 chise_property_sync (property);
2428 if ( CHARSET_FINAL (cs) != 0 )
2430 property = chise_ds_get_property (default_chise_data_source,
2432 chise_feature_set_property_value
2433 (chise_ds_get_feature (default_chise_data_source, feature_name),
2434 property, XSTRING_DATA (Fprin1_to_string (make_int
2435 (CHARSET_FINAL (cs)),
2437 chise_property_sync (property);
2440 if ( !NILP (CHARSET_MOTHER (cs)) )
2442 Lisp_Object mother = CHARSET_MOTHER (cs);
2444 if ( CHARSETP (mother) )
2445 mother = XCHARSET_NAME (mother);
2447 property = chise_ds_get_property (default_chise_data_source,
2449 chise_feature_set_property_value
2450 (chise_ds_get_feature (default_chise_data_source, feature_name),
2451 property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
2452 chise_property_sync (property);
2455 if ( CHARSET_MAX_CODE (cs) != 0 )
2459 property = chise_ds_get_property (default_chise_data_source,
2461 if ( CHARSET_MIN_CODE (cs) == 0 )
2462 chise_feature_set_property_value
2463 (chise_ds_get_feature (default_chise_data_source, feature_name),
2467 sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
2468 chise_feature_set_property_value
2469 (chise_ds_get_feature (default_chise_data_source, feature_name),
2472 chise_property_sync (property);
2474 property = chise_ds_get_property (default_chise_data_source,
2476 sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
2477 chise_feature_set_property_value
2478 (chise_ds_get_feature (default_chise_data_source, feature_name),
2480 chise_property_sync (property);
2482 property = chise_ds_get_property (default_chise_data_source,
2483 "mother-code-offset");
2484 if ( CHARSET_CODE_OFFSET (cs) == 0 )
2485 chise_feature_set_property_value
2486 (chise_ds_get_feature (default_chise_data_source, feature_name),
2490 sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
2491 chise_feature_set_property_value
2492 (chise_ds_get_feature (default_chise_data_source, feature_name),
2495 chise_property_sync (property);
2497 property = chise_ds_get_property (default_chise_data_source,
2498 "mother-code-conversion");
2499 if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
2500 chise_feature_set_property_value
2501 (chise_ds_get_feature (default_chise_data_source, feature_name),
2502 property, "identical");
2505 Lisp_Object sym = Qnil;
2507 if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
2509 else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
2511 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
2513 else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
2516 chise_feature_set_property_value
2517 (chise_ds_get_feature (default_chise_data_source, feature_name),
2518 property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
2520 chise_feature_set_property_value
2521 (chise_ds_get_feature (default_chise_data_source, feature_name),
2522 property, "unknown");
2524 chise_property_sync (property);
2528 #endif /* HAVE_LIBCHISE */
2530 #endif /* HAVE_CHISE */
2531 #endif /* UTF2000 */
2534 /************************************************************************/
2535 /* Lisp primitives for working with characters */
2536 /************************************************************************/
2539 DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
2540 Make a character from CHARSET and code-point CODE.
2541 If DEFINED_ONLY is non-nil, builtin character is not returned.
2542 If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
2543 If corresponding character is not found, nil is returned.
2545 (charset, code, defined_only, without_inheritance))
2549 charset = Fget_charset (charset);
2552 if (XCHARSET_GRAPHIC (charset) == 1)
2554 if (NILP (defined_only))
2555 c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
2557 c = decode_defined_char (charset, c, !NILP (without_inheritance));
2558 return c >= 0 ? make_char (c) : Qnil;
2561 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2562 Make a builtin character from CHARSET and code-point CODE.
2568 charset = Fget_charset (charset);
2570 if (EQ (charset, Vcharset_latin_viscii))
2572 Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
2578 (ret = Fget_char_attribute (chr,
2579 Vcharset_latin_viscii_lower,
2582 charset = Vcharset_latin_viscii_lower;
2586 (ret = Fget_char_attribute (chr,
2587 Vcharset_latin_viscii_upper,
2590 charset = Vcharset_latin_viscii_upper;
2597 if (XCHARSET_GRAPHIC (charset) == 1)
2600 c = decode_builtin_char (charset, c);
2602 c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil, Qnil);
2606 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2607 Make a character from CHARSET and octets ARG1 and ARG2.
2608 ARG2 is required only for characters from two-dimensional charsets.
2609 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2610 character s with caron.
2612 (charset, arg1, arg2))
2616 int lowlim, highlim;
2618 charset = Fget_charset (charset);
2619 cs = XCHARSET (charset);
2621 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2622 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2624 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2626 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2627 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2630 /* It is useful (and safe, according to Olivier Galibert) to strip
2631 the 8th bit off ARG1 and ARG2 because it allows programmers to
2632 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2633 Latin 2 code of the character. */
2641 if (a1 < lowlim || a1 > highlim)
2642 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2644 if (CHARSET_DIMENSION (cs) == 1)
2648 ("Charset is of dimension one; second octet must be nil", arg2);
2649 return make_char (MAKE_CHAR (charset, a1, 0));
2658 a2 = XINT (arg2) & 0x7f;
2660 if (a2 < lowlim || a2 > highlim)
2661 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2663 return make_char (MAKE_CHAR (charset, a1, a2));
2666 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2667 Return the character set of CHARACTER.
2671 CHECK_CHAR_COERCE_INT (character);
2673 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2676 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2677 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2678 N defaults to 0 if omitted.
2682 Lisp_Object charset;
2685 CHECK_CHAR_COERCE_INT (character);
2687 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2689 if (NILP (n) || EQ (n, Qzero))
2690 return make_int (octet0);
2691 else if (EQ (n, make_int (1)))
2692 return make_int (octet1);
2694 signal_simple_error ("Octet number must be 0 or 1", n);
2698 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2699 Return code-point of CHARACTER in specified CHARSET.
2701 (character, charset, defined_only))
2705 CHECK_CHAR_COERCE_INT (character);
2706 charset = Fget_charset (charset);
2707 code_point = charset_code_point (charset, XCHAR (character),
2708 !NILP (defined_only));
2709 if (code_point >= 0)
2710 return make_int (code_point);
2716 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2717 Return list of charset and one or two position-codes of CHARACTER.
2721 /* This function can GC */
2722 struct gcpro gcpro1, gcpro2;
2723 Lisp_Object charset = Qnil;
2724 Lisp_Object rc = Qnil;
2732 GCPRO2 (charset, rc);
2733 CHECK_CHAR_COERCE_INT (character);
2736 code_point = ENCODE_CHAR (XCHAR (character), charset);
2737 dimension = XCHARSET_DIMENSION (charset);
2738 while (dimension > 0)
2740 rc = Fcons (make_int (code_point & 255), rc);
2744 rc = Fcons (XCHARSET_NAME (charset), rc);
2746 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2748 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2750 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2754 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2763 #ifdef ENABLE_COMPOSITE_CHARS
2764 /************************************************************************/
2765 /* composite character functions */
2766 /************************************************************************/
2769 lookup_composite_char (Bufbyte *str, int len)
2771 Lisp_Object lispstr = make_string (str, len);
2772 Lisp_Object ch = Fgethash (lispstr,
2773 Vcomposite_char_string2char_hash_table,
2779 if (composite_char_row_next >= 128)
2780 signal_simple_error ("No more composite chars available", lispstr);
2781 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2782 composite_char_col_next);
2783 Fputhash (make_char (emch), lispstr,
2784 Vcomposite_char_char2string_hash_table);
2785 Fputhash (lispstr, make_char (emch),
2786 Vcomposite_char_string2char_hash_table);
2787 composite_char_col_next++;
2788 if (composite_char_col_next >= 128)
2790 composite_char_col_next = 32;
2791 composite_char_row_next++;
2800 composite_char_string (Emchar ch)
2802 Lisp_Object str = Fgethash (make_char (ch),
2803 Vcomposite_char_char2string_hash_table,
2805 assert (!UNBOUNDP (str));
2809 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2810 Convert a string into a single composite character.
2811 The character is the result of overstriking all the characters in
2816 CHECK_STRING (string);
2817 return make_char (lookup_composite_char (XSTRING_DATA (string),
2818 XSTRING_LENGTH (string)));
2821 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2822 Return a string of the characters comprising a composite character.
2830 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2831 signal_simple_error ("Must be composite char", ch);
2832 return composite_char_string (emch);
2834 #endif /* ENABLE_COMPOSITE_CHARS */
2837 /************************************************************************/
2838 /* initialization */
2839 /************************************************************************/
2842 syms_of_mule_charset (void)
2844 INIT_LRECORD_IMPLEMENTATION (charset);
2846 DEFSUBR (Fcharsetp);
2847 DEFSUBR (Ffind_charset);
2848 DEFSUBR (Fget_charset);
2849 DEFSUBR (Fcharset_list);
2850 DEFSUBR (Fcharset_name);
2851 DEFSUBR (Fmake_charset);
2852 DEFSUBR (Fmake_reverse_direction_charset);
2853 /* DEFSUBR (Freverse_direction_charset); */
2854 DEFSUBR (Fdefine_charset_alias);
2855 DEFSUBR (Fcharset_from_attributes);
2856 DEFSUBR (Fcharset_short_name);
2857 DEFSUBR (Fcharset_long_name);
2858 DEFSUBR (Fcharset_description);
2859 DEFSUBR (Fcharset_dimension);
2860 DEFSUBR (Fcharset_property);
2861 DEFSUBR (Fcharset_id);
2862 DEFSUBR (Fset_charset_ccl_program);
2863 DEFSUBR (Fset_charset_registry);
2866 DEFSUBR (Fcharset_mapping_table);
2867 DEFSUBR (Fset_charset_mapping_table);
2869 DEFSUBR (Fsave_charset_mapping_table);
2870 DEFSUBR (Freset_charset_mapping_table);
2871 #ifdef HAVE_LIBCHISE
2872 DEFSUBR (Fsave_charset_properties);
2873 #endif /* HAVE_LIBCHISE */
2874 #endif /* HAVE_CHISE */
2875 DEFSUBR (Fdecode_char);
2876 DEFSUBR (Fdecode_builtin_char);
2877 DEFSUBR (Fencode_char);
2880 DEFSUBR (Fmake_char);
2881 DEFSUBR (Fchar_charset);
2882 DEFSUBR (Fchar_octet);
2883 DEFSUBR (Fsplit_char);
2885 #ifdef ENABLE_COMPOSITE_CHARS
2886 DEFSUBR (Fmake_composite_char);
2887 DEFSUBR (Fcomposite_char_string);
2890 defsymbol (&Qcharsetp, "charsetp");
2891 defsymbol (&Qregistry, "registry");
2892 defsymbol (&Qfinal, "final");
2893 defsymbol (&Qgraphic, "graphic");
2894 defsymbol (&Qdirection, "direction");
2895 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2896 defsymbol (&Qshort_name, "short-name");
2897 defsymbol (&Qlong_name, "long-name");
2898 defsymbol (&Qiso_ir, "iso-ir");
2900 defsymbol (&Qmother, "mother");
2901 defsymbol (&Qmin_code, "min-code");
2902 defsymbol (&Qmax_code, "max-code");
2903 defsymbol (&Qcode_offset, "code-offset");
2904 defsymbol (&Qconversion, "conversion");
2905 defsymbol (&Q94x60, "94x60");
2906 defsymbol (&Q94x94x60, "94x94x60");
2907 defsymbol (&Qbig5_1, "big5-1");
2908 defsymbol (&Qbig5_2, "big5-2");
2911 defsymbol (&Ql2r, "l2r");
2912 defsymbol (&Qr2l, "r2l");
2914 /* Charsets, compatible with FSF 20.3
2915 Naming convention is Script-Charset[-Edition] */
2916 defsymbol (&Qascii, "ascii");
2917 defsymbol (&Qcontrol_1, "control-1");
2918 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2919 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2920 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2921 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2922 defsymbol (&Qthai_tis620, "thai-tis620");
2923 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2924 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2925 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2926 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2927 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2928 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2929 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2930 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2931 defsymbol (&Qmap_gb2312, "=gb2312");
2932 defsymbol (&Qmap_gb12345, "=gb12345");
2933 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2934 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2935 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2936 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2937 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2939 defsymbol (&Qsystem_char_id, "system-char-id");
2940 defsymbol (&Qmap_ucs, "=ucs");
2941 defsymbol (&Qucs, "ucs");
2942 defsymbol (&Qucs_bmp, "ucs-bmp");
2943 defsymbol (&Qucs_smp, "ucs-smp");
2944 defsymbol (&Qucs_sip, "ucs-sip");
2945 defsymbol (&Qlatin_viscii, "latin-viscii");
2946 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2947 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2948 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2949 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2950 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2951 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2952 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2953 defsymbol (&Qmap_big5, "=big5");
2954 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2956 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2957 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2959 defsymbol (&Qcomposite, "composite");
2963 vars_of_mule_charset (void)
2970 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2971 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2973 /* Table of charsets indexed by leading byte. */
2974 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2975 chlook->charset_by_leading_byte[i] = Qnil;
2978 /* Table of charsets indexed by type/final-byte. */
2979 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2980 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2981 chlook->charset_by_attributes[i][j] = Qnil;
2983 /* Table of charsets indexed by type/final-byte/direction. */
2984 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2985 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2986 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2987 chlook->charset_by_attributes[i][j][k] = Qnil;
2991 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2993 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2994 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2998 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2999 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3000 Leading-code of private TYPE9N charset of column-width 1.
3002 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3006 Vdefault_coded_charset_priority_list = Qnil;
3007 DEFVAR_LISP ("default-coded-charset-priority-list",
3008 &Vdefault_coded_charset_priority_list /*
3009 Default order of preferred coded-character-sets.
3015 complex_vars_of_mule_charset (void)
3017 staticpro (&Vcharset_hash_table);
3018 Vcharset_hash_table =
3019 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3021 /* Predefined character sets. We store them into variables for
3025 staticpro (&Vcharset_system_char_id);
3026 Vcharset_system_char_id =
3027 make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
3028 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3029 build_string ("SCID"),
3030 build_string ("CHAR-ID"),
3031 build_string ("System char-id"),
3033 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3034 staticpro (&Vcharset_ucs);
3036 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
3037 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3038 build_string ("UCS"),
3039 build_string ("UCS"),
3040 build_string ("ISO/IEC 10646"),
3042 Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3043 staticpro (&Vcharset_ucs_bmp);
3045 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3046 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3047 build_string ("BMP"),
3048 build_string ("UCS-BMP"),
3049 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3051 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
3052 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
3053 staticpro (&Vcharset_ucs_smp);
3055 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
3056 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3057 build_string ("SMP"),
3058 build_string ("UCS-SMP"),
3059 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
3060 build_string ("UCS00-1"),
3061 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
3062 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
3063 staticpro (&Vcharset_ucs_sip);
3065 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
3066 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3067 build_string ("SIP"),
3068 build_string ("UCS-SIP"),
3069 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
3070 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
3071 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
3072 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
3074 # define MIN_CHAR_THAI 0
3075 # define MAX_CHAR_THAI 0
3076 /* # define MIN_CHAR_HEBREW 0 */
3077 /* # define MAX_CHAR_HEBREW 0 */
3078 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3079 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3081 staticpro (&Vcharset_ascii);
3083 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3084 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3085 build_string ("ASCII"),
3086 build_string ("ASCII)"),
3087 build_string ("ASCII (ISO646 IRV)"),
3088 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3089 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
3090 staticpro (&Vcharset_control_1);
3091 Vcharset_control_1 =
3092 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3093 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3094 build_string ("C1"),
3095 build_string ("Control characters"),
3096 build_string ("Control characters 128-191"),
3098 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
3099 staticpro (&Vcharset_latin_iso8859_1);
3100 Vcharset_latin_iso8859_1 =
3101 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3102 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3103 build_string ("Latin-1"),
3104 build_string ("ISO8859-1 (Latin-1)"),
3105 build_string ("ISO8859-1 (Latin-1)"),
3106 build_string ("iso8859-1"),
3107 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3108 staticpro (&Vcharset_latin_iso8859_2);
3109 Vcharset_latin_iso8859_2 =
3110 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3111 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3112 build_string ("Latin-2"),
3113 build_string ("ISO8859-2 (Latin-2)"),
3114 build_string ("ISO8859-2 (Latin-2)"),
3115 build_string ("iso8859-2"),
3116 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3117 staticpro (&Vcharset_latin_iso8859_3);
3118 Vcharset_latin_iso8859_3 =
3119 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3120 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3121 build_string ("Latin-3"),
3122 build_string ("ISO8859-3 (Latin-3)"),
3123 build_string ("ISO8859-3 (Latin-3)"),
3124 build_string ("iso8859-3"),
3125 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3126 staticpro (&Vcharset_latin_iso8859_4);
3127 Vcharset_latin_iso8859_4 =
3128 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3129 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3130 build_string ("Latin-4"),
3131 build_string ("ISO8859-4 (Latin-4)"),
3132 build_string ("ISO8859-4 (Latin-4)"),
3133 build_string ("iso8859-4"),
3134 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3135 staticpro (&Vcharset_thai_tis620);
3136 Vcharset_thai_tis620 =
3137 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3138 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3139 build_string ("TIS620"),
3140 build_string ("TIS620 (Thai)"),
3141 build_string ("TIS620.2529 (Thai)"),
3142 build_string ("tis620"),
3143 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3144 staticpro (&Vcharset_greek_iso8859_7);
3145 Vcharset_greek_iso8859_7 =
3146 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3147 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3148 build_string ("ISO8859-7"),
3149 build_string ("ISO8859-7 (Greek)"),
3150 build_string ("ISO8859-7 (Greek)"),
3151 build_string ("iso8859-7"),
3152 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3153 staticpro (&Vcharset_arabic_iso8859_6);
3154 Vcharset_arabic_iso8859_6 =
3155 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3156 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3157 build_string ("ISO8859-6"),
3158 build_string ("ISO8859-6 (Arabic)"),
3159 build_string ("ISO8859-6 (Arabic)"),
3160 build_string ("iso8859-6"),
3161 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3162 staticpro (&Vcharset_hebrew_iso8859_8);
3163 Vcharset_hebrew_iso8859_8 =
3164 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3165 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3166 build_string ("ISO8859-8"),
3167 build_string ("ISO8859-8 (Hebrew)"),
3168 build_string ("ISO8859-8 (Hebrew)"),
3169 build_string ("iso8859-8"),
3171 0 /* MIN_CHAR_HEBREW */,
3172 0 /* MAX_CHAR_HEBREW */, 0, 32,
3173 Qnil, CONVERSION_IDENTICAL);
3174 staticpro (&Vcharset_katakana_jisx0201);
3175 Vcharset_katakana_jisx0201 =
3176 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3177 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3178 build_string ("JISX0201 Kana"),
3179 build_string ("JISX0201.1976 (Japanese Kana)"),
3180 build_string ("JISX0201.1976 Japanese Kana"),
3181 build_string ("jisx0201\\.1976"),
3182 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3183 staticpro (&Vcharset_latin_jisx0201);
3184 Vcharset_latin_jisx0201 =
3185 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3186 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3187 build_string ("JISX0201 Roman"),
3188 build_string ("JISX0201.1976 (Japanese Roman)"),
3189 build_string ("JISX0201.1976 Japanese Roman"),
3190 build_string ("jisx0201\\.1976"),
3191 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3192 staticpro (&Vcharset_cyrillic_iso8859_5);
3193 Vcharset_cyrillic_iso8859_5 =
3194 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3195 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3196 build_string ("ISO8859-5"),
3197 build_string ("ISO8859-5 (Cyrillic)"),
3198 build_string ("ISO8859-5 (Cyrillic)"),
3199 build_string ("iso8859-5"),
3200 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3201 staticpro (&Vcharset_latin_iso8859_9);
3202 Vcharset_latin_iso8859_9 =
3203 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3204 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3205 build_string ("Latin-5"),
3206 build_string ("ISO8859-9 (Latin-5)"),
3207 build_string ("ISO8859-9 (Latin-5)"),
3208 build_string ("iso8859-9"),
3209 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3211 staticpro (&Vcharset_jis_x0208);
3212 Vcharset_jis_x0208 =
3213 make_charset (LEADING_BYTE_JIS_X0208,
3214 Qmap_jis_x0208, 94, 2,
3215 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3216 build_string ("JIS X0208"),
3217 build_string ("JIS X0208 Common"),
3218 build_string ("JIS X0208 Common part"),
3219 build_string ("jisx0208\\.1990"),
3221 MIN_CHAR_JIS_X0208_1990,
3222 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3223 Qnil, CONVERSION_94x94);
3225 staticpro (&Vcharset_japanese_jisx0208_1978);
3226 Vcharset_japanese_jisx0208_1978 =
3227 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3228 Qmap_jis_x0208_1978, 94, 2,
3229 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3230 build_string ("JIS X0208:1978"),
3231 build_string ("JIS X0208:1978 (Japanese)"),
3233 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3234 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3241 CONVERSION_IDENTICAL);
3242 staticpro (&Vcharset_chinese_gb2312);
3243 Vcharset_chinese_gb2312 =
3244 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
3245 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3246 build_string ("GB2312"),
3247 build_string ("GB2312)"),
3248 build_string ("GB2312 Chinese simplified"),
3249 build_string ("gb2312"),
3250 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3251 staticpro (&Vcharset_chinese_gb12345);
3252 Vcharset_chinese_gb12345 =
3253 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
3254 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3255 build_string ("G1"),
3256 build_string ("GB 12345)"),
3257 build_string ("GB 12345-1990"),
3258 build_string ("GB12345\\(\\.1990\\)?-0"),
3259 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3260 staticpro (&Vcharset_japanese_jisx0208);
3261 Vcharset_japanese_jisx0208 =
3262 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
3263 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3264 build_string ("JISX0208"),
3265 build_string ("JIS X0208:1983 (Japanese)"),
3266 build_string ("JIS X0208:1983 Japanese Kanji"),
3267 build_string ("jisx0208\\.1983"),
3274 CONVERSION_IDENTICAL);
3276 staticpro (&Vcharset_japanese_jisx0208_1990);
3277 Vcharset_japanese_jisx0208_1990 =
3278 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3279 Qmap_jis_x0208_1990, 94, 2,
3280 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3281 build_string ("JISX0208-1990"),
3282 build_string ("JIS X0208:1990 (Japanese)"),
3283 build_string ("JIS X0208:1990 Japanese Kanji"),
3284 build_string ("jisx0208\\.1990"),
3286 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3287 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3288 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3289 Vcharset_jis_x0208 /* Qnil */,
3290 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3292 staticpro (&Vcharset_korean_ksc5601);
3293 Vcharset_korean_ksc5601 =
3294 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
3295 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3296 build_string ("KSC5601"),
3297 build_string ("KSC5601 (Korean"),
3298 build_string ("KSC5601 Korean Hangul and Hanja"),
3299 build_string ("ksc5601"),
3300 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3301 staticpro (&Vcharset_japanese_jisx0212);
3302 Vcharset_japanese_jisx0212 =
3303 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
3304 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3305 build_string ("JISX0212"),
3306 build_string ("JISX0212 (Japanese)"),
3307 build_string ("JISX0212 Japanese Supplement"),
3308 build_string ("jisx0212"),
3309 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3311 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3312 staticpro (&Vcharset_chinese_cns11643_1);
3313 Vcharset_chinese_cns11643_1 =
3314 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
3315 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3316 build_string ("CNS11643-1"),
3317 build_string ("CNS11643-1 (Chinese traditional)"),
3319 ("CNS 11643 Plane 1 Chinese traditional"),
3320 build_string (CHINESE_CNS_PLANE_RE("1")),
3321 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3322 staticpro (&Vcharset_chinese_cns11643_2);
3323 Vcharset_chinese_cns11643_2 =
3324 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
3325 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3326 build_string ("CNS11643-2"),
3327 build_string ("CNS11643-2 (Chinese traditional)"),
3329 ("CNS 11643 Plane 2 Chinese traditional"),
3330 build_string (CHINESE_CNS_PLANE_RE("2")),
3331 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3333 staticpro (&Vcharset_latin_tcvn5712);
3334 Vcharset_latin_tcvn5712 =
3335 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3336 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3337 build_string ("TCVN 5712"),
3338 build_string ("TCVN 5712 (VSCII-2)"),
3339 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3340 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3341 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3342 staticpro (&Vcharset_latin_viscii_lower);
3343 Vcharset_latin_viscii_lower =
3344 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3345 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3346 build_string ("VISCII lower"),
3347 build_string ("VISCII lower (Vietnamese)"),
3348 build_string ("VISCII lower (Vietnamese)"),
3349 build_string ("MULEVISCII-LOWER"),
3350 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3351 staticpro (&Vcharset_latin_viscii_upper);
3352 Vcharset_latin_viscii_upper =
3353 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3354 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3355 build_string ("VISCII upper"),
3356 build_string ("VISCII upper (Vietnamese)"),
3357 build_string ("VISCII upper (Vietnamese)"),
3358 build_string ("MULEVISCII-UPPER"),
3359 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3360 staticpro (&Vcharset_latin_viscii);
3361 Vcharset_latin_viscii =
3362 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3363 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3364 build_string ("VISCII"),
3365 build_string ("VISCII 1.1 (Vietnamese)"),
3366 build_string ("VISCII 1.1 (Vietnamese)"),
3367 build_string ("VISCII1\\.1"),
3368 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3369 staticpro (&Vcharset_chinese_big5);
3370 Vcharset_chinese_big5 =
3371 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
3372 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3373 build_string ("Big5"),
3374 build_string ("Big5"),
3375 build_string ("Big5 Chinese traditional"),
3376 build_string ("big5-0"),
3378 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3379 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3381 staticpro (&Vcharset_ethiopic_ucs);
3382 Vcharset_ethiopic_ucs =
3383 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3384 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3385 build_string ("Ethiopic (UCS)"),
3386 build_string ("Ethiopic (UCS)"),
3387 build_string ("Ethiopic of UCS"),
3388 build_string ("Ethiopic-Unicode"),
3389 Qnil, 0x1200, 0x137F, 0, 0,
3390 Qnil, CONVERSION_IDENTICAL);
3392 staticpro (&Vcharset_chinese_big5_1);
3393 Vcharset_chinese_big5_1 =
3394 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3395 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3396 build_string ("Big5"),
3397 build_string ("Big5 (Level-1)"),
3399 ("Big5 Level-1 Chinese traditional"),
3400 build_string ("big5"),
3401 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3402 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3403 staticpro (&Vcharset_chinese_big5_2);
3404 Vcharset_chinese_big5_2 =
3405 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3406 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3407 build_string ("Big5"),
3408 build_string ("Big5 (Level-2)"),
3410 ("Big5 Level-2 Chinese traditional"),
3411 build_string ("big5"),
3412 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3413 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3415 #ifdef ENABLE_COMPOSITE_CHARS
3416 /* #### For simplicity, we put composite chars into a 96x96 charset.
3417 This is going to lead to problems because you can run out of
3418 room, esp. as we don't yet recycle numbers. */
3419 staticpro (&Vcharset_composite);
3420 Vcharset_composite =
3421 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3422 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3423 build_string ("Composite"),
3424 build_string ("Composite characters"),
3425 build_string ("Composite characters"),
3428 /* #### not dumped properly */
3429 composite_char_row_next = 32;
3430 composite_char_col_next = 32;
3432 Vcomposite_char_string2char_hash_table =
3433 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3434 Vcomposite_char_char2string_hash_table =
3435 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3436 staticpro (&Vcomposite_char_string2char_hash_table);
3437 staticpro (&Vcomposite_char_char2string_hash_table);
3438 #endif /* ENABLE_COMPOSITE_CHARS */