1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_ideograph_gt_pj_1;
94 Lisp_Object Vcharset_ideograph_gt_pj_2;
95 Lisp_Object Vcharset_ideograph_gt_pj_3;
96 Lisp_Object Vcharset_ideograph_gt_pj_4;
97 Lisp_Object Vcharset_ideograph_gt_pj_5;
98 Lisp_Object Vcharset_ideograph_gt_pj_6;
99 Lisp_Object Vcharset_ideograph_gt_pj_7;
100 Lisp_Object Vcharset_ideograph_gt_pj_8;
101 Lisp_Object Vcharset_ideograph_gt_pj_9;
102 Lisp_Object Vcharset_ideograph_gt_pj_10;
103 Lisp_Object Vcharset_ideograph_gt_pj_11;
104 Lisp_Object Vcharset_ideograph_daikanwa_2;
105 Lisp_Object Vcharset_ideograph_daikanwa;
106 Lisp_Object Vcharset_ethiopic_ucs;
108 Lisp_Object Vcharset_chinese_big5_1;
109 Lisp_Object Vcharset_chinese_big5_2;
111 #ifdef ENABLE_COMPOSITE_CHARS
112 Lisp_Object Vcharset_composite;
114 /* Hash tables for composite chars. One maps string representing
115 composed chars to their equivalent chars; one goes the
117 Lisp_Object Vcomposite_char_char2string_hash_table;
118 Lisp_Object Vcomposite_char_string2char_hash_table;
120 static int composite_char_row_next;
121 static int composite_char_col_next;
123 #endif /* ENABLE_COMPOSITE_CHARS */
125 struct charset_lookup *chlook;
127 static const struct lrecord_description charset_lookup_description_1[] = {
128 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
130 NUM_LEADING_BYTES+4*128
137 static const struct struct_description charset_lookup_description = {
138 sizeof (struct charset_lookup),
139 charset_lookup_description_1
143 /* Table of number of bytes in the string representation of a character
144 indexed by the first byte of that representation.
146 rep_bytes_by_first_byte(c) is more efficient than the equivalent
147 canonical computation:
149 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
151 const Bytecount rep_bytes_by_first_byte[0xA0] =
152 { /* 0x00 - 0x7f are for straight ASCII */
153 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 /* 0x80 - 0x8f are for Dimension-1 official charsets */
163 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
167 /* 0x90 - 0x9d are for Dimension-2 official charsets */
168 /* 0x9e is for Dimension-1 private charsets */
169 /* 0x9f is for Dimension-2 private charsets */
170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
176 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
178 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
182 if (XVECTOR_LENGTH (v) > ccs_len)
185 for (i = 0; i < XVECTOR_LENGTH (v); i++)
187 Lisp_Object c = XVECTOR_DATA(v)[i];
189 if (!NILP (c) && !CHARP (c))
193 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
205 put_char_ccs_code_point (Lisp_Object character,
206 Lisp_Object ccs, Lisp_Object value)
208 if (!EQ (XCHARSET_NAME (ccs), Qucs)
210 || (XCHAR (character) != XINT (value)))
212 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
216 { /* obsolete representation: value must be a list of bytes */
217 Lisp_Object ret = Fcar (value);
221 signal_simple_error ("Invalid value for coded-charset", value);
222 code_point = XINT (ret);
223 if (XCHARSET_GRAPHIC (ccs) == 1)
231 signal_simple_error ("Invalid value for coded-charset",
235 signal_simple_error ("Invalid value for coded-charset",
238 if (XCHARSET_GRAPHIC (ccs) == 1)
240 code_point = (code_point << 8) | j;
243 value = make_int (code_point);
245 else if (INTP (value))
247 code_point = XINT (value);
248 if (XCHARSET_GRAPHIC (ccs) == 1)
250 code_point &= 0x7F7F7F7F;
251 value = make_int (code_point);
255 signal_simple_error ("Invalid value for coded-charset", value);
259 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
262 decoding_table_remove_char (ccs, XINT (cpos));
265 decoding_table_put_char (ccs, code_point, character);
271 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
273 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
274 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
276 if (VECTORP (decoding_table))
278 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
282 decoding_table_remove_char (ccs, XINT (cpos));
285 if (CHAR_TABLEP (encoding_table))
287 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
295 int leading_code_private_11;
298 Lisp_Object Qcharsetp;
300 /* Qdoc_string, Qdimension, Qchars defined in general.c */
301 Lisp_Object Qregistry, Qfinal, Qgraphic;
302 Lisp_Object Qdirection;
303 Lisp_Object Qreverse_direction_charset;
304 Lisp_Object Qleading_byte;
305 Lisp_Object Qshort_name, Qlong_name;
307 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
308 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
325 Qjapanese_jisx0208_1978,
329 Qjapanese_jisx0208_1990,
347 Qvietnamese_viscii_lower,
348 Qvietnamese_viscii_upper,
351 /* Qchinese_big5_cdp, */
352 Qideograph_hanziku_1,
353 Qideograph_hanziku_2,
354 Qideograph_hanziku_3,
355 Qideograph_hanziku_4,
356 Qideograph_hanziku_5,
357 Qideograph_hanziku_6,
358 Qideograph_hanziku_7,
359 Qideograph_hanziku_8,
360 Qideograph_hanziku_9,
361 Qideograph_hanziku_10,
362 Qideograph_hanziku_11,
363 Qideograph_hanziku_12,
364 Qideograph_daikanwa_2,
383 Lisp_Object Ql2r, Qr2l;
385 Lisp_Object Vcharset_hash_table;
387 /* Composite characters are characters constructed by overstriking two
388 or more regular characters.
390 1) The old Mule implementation involves storing composite characters
391 in a buffer as a tag followed by all of the actual characters
392 used to make up the composite character. I think this is a bad
393 idea; it greatly complicates code that wants to handle strings
394 one character at a time because it has to deal with the possibility
395 of great big ungainly characters. It's much more reasonable to
396 simply store an index into a table of composite characters.
398 2) The current implementation only allows for 16,384 separate
399 composite characters over the lifetime of the XEmacs process.
400 This could become a potential problem if the user
401 edited lots of different files that use composite characters.
402 Due to FSF bogosity, increasing the number of allowable
403 composite characters under Mule would decrease the number
404 of possible faces that can exist. Mule already has shrunk
405 this to 2048, and further shrinkage would become uncomfortable.
406 No such problems exist in XEmacs.
408 Composite characters could be represented as 0x80 C1 C2 C3,
409 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
410 for slightly under 2^20 (one million) composite characters
411 over the XEmacs process lifetime, and you only need to
412 increase the size of a Mule character from 19 to 21 bits.
413 Or you could use 0x80 C1 C2 C3 C4, allowing for about
414 85 million (slightly over 2^26) composite characters. */
417 /************************************************************************/
418 /* Basic Emchar functions */
419 /************************************************************************/
421 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
422 string in STR. Returns the number of bytes stored.
423 Do not call this directly. Use the macro set_charptr_emchar() instead.
427 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
442 else if ( c <= 0x7ff )
444 *p++ = (c >> 6) | 0xc0;
445 *p++ = (c & 0x3f) | 0x80;
447 else if ( c <= 0xffff )
449 *p++ = (c >> 12) | 0xe0;
450 *p++ = ((c >> 6) & 0x3f) | 0x80;
451 *p++ = (c & 0x3f) | 0x80;
453 else if ( c <= 0x1fffff )
455 *p++ = (c >> 18) | 0xf0;
456 *p++ = ((c >> 12) & 0x3f) | 0x80;
457 *p++ = ((c >> 6) & 0x3f) | 0x80;
458 *p++ = (c & 0x3f) | 0x80;
460 else if ( c <= 0x3ffffff )
462 *p++ = (c >> 24) | 0xf8;
463 *p++ = ((c >> 18) & 0x3f) | 0x80;
464 *p++ = ((c >> 12) & 0x3f) | 0x80;
465 *p++ = ((c >> 6) & 0x3f) | 0x80;
466 *p++ = (c & 0x3f) | 0x80;
470 *p++ = (c >> 30) | 0xfc;
471 *p++ = ((c >> 24) & 0x3f) | 0x80;
472 *p++ = ((c >> 18) & 0x3f) | 0x80;
473 *p++ = ((c >> 12) & 0x3f) | 0x80;
474 *p++ = ((c >> 6) & 0x3f) | 0x80;
475 *p++ = (c & 0x3f) | 0x80;
478 BREAKUP_CHAR (c, charset, c1, c2);
479 lb = CHAR_LEADING_BYTE (c);
480 if (LEADING_BYTE_PRIVATE_P (lb))
481 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
483 if (EQ (charset, Vcharset_control_1))
492 /* Return the first character from a Mule-encoded string in STR,
493 assuming it's non-ASCII. Do not call this directly.
494 Use the macro charptr_emchar() instead. */
497 non_ascii_charptr_emchar (const Bufbyte *str)
510 else if ( b >= 0xf8 )
515 else if ( b >= 0xf0 )
520 else if ( b >= 0xe0 )
525 else if ( b >= 0xc0 )
535 for( ; len > 0; len-- )
538 ch = ( ch << 6 ) | ( b & 0x3f );
542 Bufbyte i0 = *str, i1, i2 = 0;
545 if (i0 == LEADING_BYTE_CONTROL_1)
546 return (Emchar) (*++str - 0x20);
548 if (LEADING_BYTE_PREFIX_P (i0))
553 charset = CHARSET_BY_LEADING_BYTE (i0);
554 if (XCHARSET_DIMENSION (charset) == 2)
557 return MAKE_CHAR (charset, i1, i2);
561 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
562 Do not call this directly. Use the macro valid_char_p() instead. */
566 non_ascii_valid_char_p (Emchar ch)
570 /* Must have only lowest 19 bits set */
574 f1 = CHAR_FIELD1 (ch);
575 f2 = CHAR_FIELD2 (ch);
576 f3 = CHAR_FIELD3 (ch);
582 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
583 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
584 f2 > MAX_CHAR_FIELD2_PRIVATE)
589 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
590 f2 <= MAX_CHAR_FIELD2_PRIVATE))
594 NOTE: This takes advantage of the fact that
595 FIELD2_TO_OFFICIAL_LEADING_BYTE and
596 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
598 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
599 if (EQ (charset, Qnil))
601 return (XCHARSET_CHARS (charset) == 96);
607 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
608 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
609 f1 > MAX_CHAR_FIELD1_PRIVATE)
611 if (f2 < 0x20 || f3 < 0x20)
614 #ifdef ENABLE_COMPOSITE_CHARS
615 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
617 if (UNBOUNDP (Fgethash (make_int (ch),
618 Vcomposite_char_char2string_hash_table,
623 #endif /* ENABLE_COMPOSITE_CHARS */
625 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
626 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
629 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
631 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
634 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
636 if (EQ (charset, Qnil))
638 return (XCHARSET_CHARS (charset) == 96);
644 /************************************************************************/
645 /* Basic string functions */
646 /************************************************************************/
648 /* Copy the character pointed to by SRC into DST. Do not call this
649 directly. Use the macro charptr_copy_char() instead.
650 Return the number of bytes copied. */
653 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
655 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
657 for (i = bytes; i; i--, dst++, src++)
663 /************************************************************************/
664 /* streams of Emchars */
665 /************************************************************************/
667 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
668 The functions below are not meant to be called directly; use
669 the macros in insdel.h. */
672 Lstream_get_emchar_1 (Lstream *stream, int ch)
674 Bufbyte str[MAX_EMCHAR_LEN];
675 Bufbyte *strptr = str;
678 str[0] = (Bufbyte) ch;
680 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
682 int c = Lstream_getc (stream);
683 bufpos_checking_assert (c >= 0);
684 *++strptr = (Bufbyte) c;
686 return charptr_emchar (str);
690 Lstream_fput_emchar (Lstream *stream, Emchar ch)
692 Bufbyte str[MAX_EMCHAR_LEN];
693 Bytecount len = set_charptr_emchar (str, ch);
694 return Lstream_write (stream, str, len);
698 Lstream_funget_emchar (Lstream *stream, Emchar ch)
700 Bufbyte str[MAX_EMCHAR_LEN];
701 Bytecount len = set_charptr_emchar (str, ch);
702 Lstream_unread (stream, str, len);
706 /************************************************************************/
708 /************************************************************************/
711 mark_charset (Lisp_Object obj)
713 Lisp_Charset *cs = XCHARSET (obj);
715 mark_object (cs->short_name);
716 mark_object (cs->long_name);
717 mark_object (cs->doc_string);
718 mark_object (cs->registry);
719 mark_object (cs->ccl_program);
721 mark_object (cs->decoding_table);
722 mark_object (cs->mother);
728 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
730 Lisp_Charset *cs = XCHARSET (obj);
734 error ("printing unreadable object #<charset %s 0x%x>",
735 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
738 write_c_string ("#<charset ", printcharfun);
739 print_internal (CHARSET_NAME (cs), printcharfun, 0);
740 write_c_string (" ", printcharfun);
741 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
742 write_c_string (" ", printcharfun);
743 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
744 write_c_string (" ", printcharfun);
745 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
746 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
748 CHARSET_DIMENSION (cs),
749 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
750 CHARSET_COLUMNS (cs),
751 CHARSET_GRAPHIC (cs),
753 write_c_string (buf, printcharfun);
754 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
755 sprintf (buf, " 0x%x>", cs->header.uid);
756 write_c_string (buf, printcharfun);
759 static const struct lrecord_description charset_description[] = {
760 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
761 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
762 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
763 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
764 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
774 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
775 mark_charset, print_charset, 0, 0, 0,
779 /* Make a new charset. */
780 /* #### SJT Should generic properties be allowed? */
782 make_charset (Charset_ID id, Lisp_Object name,
783 unsigned short chars, unsigned char dimension,
784 unsigned char columns, unsigned char graphic,
785 Bufbyte final, unsigned char direction, Lisp_Object short_name,
786 Lisp_Object long_name, Lisp_Object doc,
788 Lisp_Object decoding_table,
789 Emchar min_code, Emchar max_code,
790 Emchar code_offset, unsigned char byte_offset,
791 Lisp_Object mother, unsigned char conversion)
794 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
798 XSETCHARSET (obj, cs);
800 CHARSET_ID (cs) = id;
801 CHARSET_NAME (cs) = name;
802 CHARSET_SHORT_NAME (cs) = short_name;
803 CHARSET_LONG_NAME (cs) = long_name;
804 CHARSET_CHARS (cs) = chars;
805 CHARSET_DIMENSION (cs) = dimension;
806 CHARSET_DIRECTION (cs) = direction;
807 CHARSET_COLUMNS (cs) = columns;
808 CHARSET_GRAPHIC (cs) = graphic;
809 CHARSET_FINAL (cs) = final;
810 CHARSET_DOC_STRING (cs) = doc;
811 CHARSET_REGISTRY (cs) = reg;
812 CHARSET_CCL_PROGRAM (cs) = Qnil;
813 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
815 CHARSET_DECODING_TABLE(cs) = Qunbound;
816 CHARSET_MIN_CODE (cs) = min_code;
817 CHARSET_MAX_CODE (cs) = max_code;
818 CHARSET_CODE_OFFSET (cs) = code_offset;
819 CHARSET_BYTE_OFFSET (cs) = byte_offset;
820 CHARSET_MOTHER (cs) = mother;
821 CHARSET_CONVERSION (cs) = conversion;
825 if (id == LEADING_BYTE_ASCII)
826 CHARSET_REP_BYTES (cs) = 1;
828 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
830 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
835 /* some charsets do not have final characters. This includes
836 ASCII, Control-1, Composite, and the two faux private
838 unsigned char iso2022_type
839 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
841 if (code_offset == 0)
843 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
844 chlook->charset_by_attributes[iso2022_type][final] = obj;
848 (chlook->charset_by_attributes[iso2022_type][final][direction]));
849 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
853 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
854 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
856 /* Some charsets are "faux" and don't have names or really exist at
857 all except in the leading-byte table. */
859 Fputhash (name, obj, Vcharset_hash_table);
864 get_unallocated_leading_byte (int dimension)
869 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
872 lb = chlook->next_allocated_leading_byte++;
876 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
879 lb = chlook->next_allocated_1_byte_leading_byte++;
883 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
886 lb = chlook->next_allocated_2_byte_leading_byte++;
892 ("No more character sets free for this dimension",
893 make_int (dimension));
899 /* Number of Big5 characters which have the same code in 1st byte. */
901 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
904 decode_defined_char (Lisp_Object ccs, int code_point)
906 int dim = XCHARSET_DIMENSION (ccs);
907 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
915 = get_ccs_octet_table (decoding_table, ccs,
916 (code_point >> (dim * 8)) & 255);
918 if (CHARP (decoding_table))
919 return XCHAR (decoding_table);
920 #ifdef HAVE_CHISE_CLIENT
921 if (EQ (decoding_table, Qunloaded))
923 char_id = load_char_decoding_entry_maybe (ccs, code_point);
928 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
930 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
932 if ( EQ (mother, Vcharset_ucs) )
933 return DECODE_CHAR (mother, code_point);
935 return decode_defined_char (mother, code_point);
937 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
940 = (((code_point >> 8) & 0x7F) - 33) * 94
941 + (( code_point & 0x7F) - 33);
942 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
943 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
945 b2 += b2 < 0x3F ? 0x40 : 0x62;
946 return decode_defined_char (mother, (b1 << 8) | b2);
948 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
951 = (((code_point >> 8) & 0x7F) - 33) * 94
952 + (( code_point & 0x7F) - 33)
953 + BIG5_SAME_ROW * (0xC9 - 0xA1);
954 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
955 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
957 b2 += b2 < 0x3F ? 0x40 : 0x62;
958 return decode_defined_char (mother, (b1 << 8) | b2);
965 decode_builtin_char (Lisp_Object charset, int code_point)
967 Lisp_Object mother = XCHARSET_MOTHER (charset);
970 if ( XCHARSET_MAX_CODE (charset) > 0 )
972 if ( CHARSETP (mother) )
974 int code = code_point;
976 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
978 int row = code_point >> 8;
979 int cell = code_point & 255;
983 else if (row < 16 + 32 + 30)
984 code = (row - (16 + 32)) * 94 + cell - 33;
985 else if (row < 18 + 32 + 30)
987 else if (row < 18 + 32 + 60)
988 code = (row - (18 + 32)) * 94 + cell - 33;
990 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
992 int plane = code_point >> 16;
993 int row = (code_point >> 8) & 255;
994 int cell = code_point & 255;
998 else if (row < 16 + 32 + 30)
1000 = (plane - 33) * 94 * 60
1001 + (row - (16 + 32)) * 94
1003 else if (row < 18 + 32 + 30)
1005 else if (row < 18 + 32 + 60)
1007 = (plane - 33) * 94 * 60
1008 + (row - (18 + 32)) * 94
1011 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1014 = (((code_point >> 8) & 0x7F) - 33) * 94
1015 + (( code_point & 0x7F) - 33);
1016 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1017 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1019 b2 += b2 < 0x3F ? 0x40 : 0x62;
1020 code = (b1 << 8) | b2;
1022 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1025 = (((code_point >> 8) & 0x7F) - 33) * 94
1026 + (( code_point & 0x7F) - 33)
1027 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1028 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1029 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1031 b2 += b2 < 0x3F ? 0x40 : 0x62;
1032 code = (b1 << 8) | b2;
1035 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
1040 = (XCHARSET_DIMENSION (charset) == 1
1042 code_point - XCHARSET_BYTE_OFFSET (charset)
1044 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1045 * XCHARSET_CHARS (charset)
1046 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1047 + XCHARSET_CODE_OFFSET (charset);
1048 if ((cid < XCHARSET_MIN_CODE (charset))
1049 || (XCHARSET_MAX_CODE (charset) < cid))
1054 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1056 if (XCHARSET_DIMENSION (charset) == 1)
1058 switch (XCHARSET_CHARS (charset))
1062 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1065 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1073 switch (XCHARSET_CHARS (charset))
1076 return MIN_CHAR_94x94
1077 + (final - '0') * 94 * 94
1078 + (((code_point >> 8) & 0x7F) - 33) * 94
1079 + ((code_point & 0x7F) - 33);
1081 return MIN_CHAR_96x96
1082 + (final - '0') * 96 * 96
1083 + (((code_point >> 8) & 0x7F) - 32) * 96
1084 + ((code_point & 0x7F) - 32);
1096 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1098 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1101 if ( CHAR_TABLEP (encoding_table)
1102 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1107 Lisp_Object mother = XCHARSET_MOTHER (charset);
1108 int min = XCHARSET_MIN_CODE (charset);
1109 int max = XCHARSET_MAX_CODE (charset);
1112 if ( CHARSETP (mother) )
1114 if (XCHARSET_FINAL (charset) >= '0')
1115 code = charset_code_point (mother, ch, 1);
1117 code = charset_code_point (mother, ch, defined_only);
1119 else if (defined_only)
1121 else if ( ((max == 0) && CHARSETP (mother)
1122 && (XCHARSET_FINAL (charset) == 0))
1123 || ((min <= ch) && (ch <= max)) )
1125 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1126 || ((min <= code) && (code <= max)) )
1128 int d = code - XCHARSET_CODE_OFFSET (charset);
1130 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1132 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1134 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1136 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1139 int cell = d % 94 + 33;
1145 return (row << 8) | cell;
1147 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1149 int B1 = d >> 8, B2 = d & 0xFF;
1151 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1152 - (B2 < 0x7F ? 0x40 : 0x62);
1156 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1159 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1161 int B1 = d >> 8, B2 = d & 0xFF;
1163 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1164 - (B2 < 0x7F ? 0x40 : 0x62);
1168 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1169 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1172 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1173 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1174 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1175 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1176 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1178 int plane = d / (94 * 60) + 33;
1179 int row = (d % (94 * 60)) / 94;
1180 int cell = d % 94 + 33;
1186 return (plane << 16) | (row << 8) | cell;
1188 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1190 ( (d / (94 * 94) + 33) << 16)
1191 | ((d / 94 % 94 + 33) << 8)
1193 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1195 ( (d / (96 * 96) + 32) << 16)
1196 | ((d / 96 % 96 + 32) << 8)
1198 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1200 ( (d / (94 * 94 * 94) + 33) << 24)
1201 | ((d / (94 * 94) % 94 + 33) << 16)
1202 | ((d / 94 % 94 + 33) << 8)
1204 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1206 ( (d / (96 * 96 * 96) + 32) << 24)
1207 | ((d / (96 * 96) % 96 + 32) << 16)
1208 | ((d / 96 % 96 + 32) << 8)
1212 printf ("Unknown CCS-conversion %d is specified!",
1213 XCHARSET_CONVERSION (charset));
1217 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1218 ( XCHARSET_MIN_CODE (charset) == 0 )
1220 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1221 (XCHARSET_CODE_OFFSET (charset)
1222 == XCHARSET_MIN_CODE (charset))
1227 if (XCHARSET_DIMENSION (charset) == 1)
1229 if (XCHARSET_CHARS (charset) == 94)
1231 if (((d = ch - (MIN_CHAR_94
1232 + (XCHARSET_FINAL (charset) - '0') * 94))
1237 else if (XCHARSET_CHARS (charset) == 96)
1239 if (((d = ch - (MIN_CHAR_96
1240 + (XCHARSET_FINAL (charset) - '0') * 96))
1248 else if (XCHARSET_DIMENSION (charset) == 2)
1250 if (XCHARSET_CHARS (charset) == 94)
1252 if (((d = ch - (MIN_CHAR_94x94
1254 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1257 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1259 else if (XCHARSET_CHARS (charset) == 96)
1261 if (((d = ch - (MIN_CHAR_96x96
1263 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1266 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1277 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1279 if (c <= MAX_CHAR_BASIC_LATIN)
1281 *charset = Vcharset_ascii;
1286 *charset = Vcharset_control_1;
1291 *charset = Vcharset_latin_iso8859_1;
1295 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1297 *charset = Vcharset_hebrew_iso8859_8;
1298 return c - MIN_CHAR_HEBREW + 0x20;
1301 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1303 *charset = Vcharset_thai_tis620;
1304 return c - MIN_CHAR_THAI + 0x20;
1307 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1308 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1310 return list2 (Vcharset_katakana_jisx0201,
1311 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1314 else if (c <= MAX_CHAR_BMP)
1316 *charset = Vcharset_ucs_bmp;
1319 else if (c <= MAX_CHAR_SMP)
1321 *charset = Vcharset_ucs_smp;
1322 return c - MIN_CHAR_SMP;
1324 else if (c <= MAX_CHAR_SIP)
1326 *charset = Vcharset_ucs_sip;
1327 return c - MIN_CHAR_SIP;
1329 else if (c < MIN_CHAR_DAIKANWA)
1331 *charset = Vcharset_ucs;
1334 else if (c <= MAX_CHAR_DAIKANWA)
1336 *charset = Vcharset_ideograph_daikanwa;
1337 return c - MIN_CHAR_DAIKANWA;
1339 else if (c < MIN_CHAR_94)
1341 *charset = Vcharset_ucs;
1344 else if (c <= MAX_CHAR_94)
1346 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1347 ((c - MIN_CHAR_94) / 94) + '0',
1348 CHARSET_LEFT_TO_RIGHT);
1349 if (!NILP (*charset))
1350 return ((c - MIN_CHAR_94) % 94) + 33;
1353 *charset = Vcharset_ucs;
1357 else if (c <= MAX_CHAR_96)
1359 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1360 ((c - MIN_CHAR_96) / 96) + '0',
1361 CHARSET_LEFT_TO_RIGHT);
1362 if (!NILP (*charset))
1363 return ((c - MIN_CHAR_96) % 96) + 32;
1366 *charset = Vcharset_ucs;
1370 else if (c <= MAX_CHAR_94x94)
1373 = CHARSET_BY_ATTRIBUTES (94, 2,
1374 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1375 CHARSET_LEFT_TO_RIGHT);
1376 if (!NILP (*charset))
1377 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1378 | (((c - MIN_CHAR_94x94) % 94) + 33);
1381 *charset = Vcharset_ucs;
1385 else if (c <= MAX_CHAR_96x96)
1388 = CHARSET_BY_ATTRIBUTES (96, 2,
1389 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1390 CHARSET_LEFT_TO_RIGHT);
1391 if (!NILP (*charset))
1392 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1393 | (((c - MIN_CHAR_96x96) % 96) + 32);
1396 *charset = Vcharset_ucs;
1402 *charset = Vcharset_ucs;
1407 Lisp_Object Vdefault_coded_charset_priority_list;
1411 /************************************************************************/
1412 /* Basic charset Lisp functions */
1413 /************************************************************************/
1415 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1416 Return non-nil if OBJECT is a charset.
1420 return CHARSETP (object) ? Qt : Qnil;
1423 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1424 Retrieve the charset of the given name.
1425 If CHARSET-OR-NAME is a charset object, it is simply returned.
1426 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1427 nil is returned. Otherwise the associated charset object is returned.
1431 if (CHARSETP (charset_or_name))
1432 return charset_or_name;
1434 CHECK_SYMBOL (charset_or_name);
1435 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1438 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1439 Retrieve the charset of the given name.
1440 Same as `find-charset' except an error is signalled if there is no such
1441 charset instead of returning nil.
1445 Lisp_Object charset = Ffind_charset (name);
1448 signal_simple_error ("No such charset", name);
1452 /* We store the charsets in hash tables with the names as the key and the
1453 actual charset object as the value. Occasionally we need to use them
1454 in a list format. These routines provide us with that. */
1455 struct charset_list_closure
1457 Lisp_Object *charset_list;
1461 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1462 void *charset_list_closure)
1464 /* This function can GC */
1465 struct charset_list_closure *chcl =
1466 (struct charset_list_closure*) charset_list_closure;
1467 Lisp_Object *charset_list = chcl->charset_list;
1469 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1473 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1474 Return a list of the names of all defined charsets.
1478 Lisp_Object charset_list = Qnil;
1479 struct gcpro gcpro1;
1480 struct charset_list_closure charset_list_closure;
1482 GCPRO1 (charset_list);
1483 charset_list_closure.charset_list = &charset_list;
1484 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1485 &charset_list_closure);
1488 return charset_list;
1491 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1492 Return the name of charset CHARSET.
1496 return XCHARSET_NAME (Fget_charset (charset));
1499 /* #### SJT Should generic properties be allowed? */
1500 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1501 Define a new character set.
1502 This function is for use with Mule support.
1503 NAME is a symbol, the name by which the character set is normally referred.
1504 DOC-STRING is a string describing the character set.
1505 PROPS is a property list, describing the specific nature of the
1506 character set. Recognized properties are:
1508 'short-name Short version of the charset name (ex: Latin-1)
1509 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1510 'registry A regular expression matching the font registry field for
1512 'dimension Number of octets used to index a character in this charset.
1513 Either 1 or 2. Defaults to 1.
1514 If UTF-2000 feature is enabled, 3 or 4 are also available.
1515 'columns Number of columns used to display a character in this charset.
1516 Only used in TTY mode. (Under X, the actual width of a
1517 character can be derived from the font used to display the
1518 characters.) If unspecified, defaults to the dimension
1519 (this is almost always the correct value).
1520 'chars Number of characters in each dimension (94 or 96).
1521 Defaults to 94. Note that if the dimension is 2, the
1522 character set thus described is 94x94 or 96x96.
1523 If UTF-2000 feature is enabled, 128 or 256 are also available.
1524 'final Final byte of ISO 2022 escape sequence. Must be
1525 supplied. Each combination of (DIMENSION, CHARS) defines a
1526 separate namespace for final bytes. Note that ISO
1527 2022 restricts the final byte to the range
1528 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1529 dimension == 2. Note also that final bytes in the range
1530 0x30 - 0x3F are reserved for user-defined (not official)
1532 'graphic 0 (use left half of font on output) or 1 (use right half
1533 of font on output). Defaults to 0. For example, for
1534 a font whose registry is ISO8859-1, the left half
1535 (octets 0x20 - 0x7F) is the `ascii' character set, while
1536 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1537 character set. With 'graphic set to 0, the octets
1538 will have their high bit cleared; with it set to 1,
1539 the octets will have their high bit set.
1540 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1542 'ccl-program A compiled CCL program used to convert a character in
1543 this charset into an index into the font. This is in
1544 addition to the 'graphic property. The CCL program
1545 is passed the octets of the character, with the high
1546 bit cleared and set depending upon whether the value
1547 of the 'graphic property is 0 or 1.
1548 'mother [UTF-2000 only] Base coded-charset.
1549 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1550 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1551 'code-offset [UTF-2000 only] Offset for a code-point of a base
1553 'conversion [UTF-2000 only] Conversion for a code-point of a base
1554 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1556 (name, doc_string, props))
1558 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1559 int direction = CHARSET_LEFT_TO_RIGHT;
1560 Lisp_Object registry = Qnil;
1561 Lisp_Object charset;
1562 Lisp_Object ccl_program = Qnil;
1563 Lisp_Object short_name = Qnil, long_name = Qnil;
1564 Lisp_Object mother = Qnil;
1565 int min_code = 0, max_code = 0, code_offset = 0;
1566 int byte_offset = -1;
1569 CHECK_SYMBOL (name);
1570 if (!NILP (doc_string))
1571 CHECK_STRING (doc_string);
1573 charset = Ffind_charset (name);
1574 if (!NILP (charset))
1575 signal_simple_error ("Cannot redefine existing charset", name);
1578 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1580 if (EQ (keyword, Qshort_name))
1582 CHECK_STRING (value);
1586 else if (EQ (keyword, Qlong_name))
1588 CHECK_STRING (value);
1592 else if (EQ (keyword, Qdimension))
1595 dimension = XINT (value);
1596 if (dimension < 1 ||
1603 signal_simple_error ("Invalid value for 'dimension", value);
1606 else if (EQ (keyword, Qchars))
1609 chars = XINT (value);
1610 if (chars != 94 && chars != 96
1612 && chars != 128 && chars != 256
1615 signal_simple_error ("Invalid value for 'chars", value);
1618 else if (EQ (keyword, Qcolumns))
1621 columns = XINT (value);
1622 if (columns != 1 && columns != 2)
1623 signal_simple_error ("Invalid value for 'columns", value);
1626 else if (EQ (keyword, Qgraphic))
1629 graphic = XINT (value);
1637 signal_simple_error ("Invalid value for 'graphic", value);
1640 else if (EQ (keyword, Qregistry))
1642 CHECK_STRING (value);
1646 else if (EQ (keyword, Qdirection))
1648 if (EQ (value, Ql2r))
1649 direction = CHARSET_LEFT_TO_RIGHT;
1650 else if (EQ (value, Qr2l))
1651 direction = CHARSET_RIGHT_TO_LEFT;
1653 signal_simple_error ("Invalid value for 'direction", value);
1656 else if (EQ (keyword, Qfinal))
1658 CHECK_CHAR_COERCE_INT (value);
1659 final = XCHAR (value);
1660 if (final < '0' || final > '~')
1661 signal_simple_error ("Invalid value for 'final", value);
1665 else if (EQ (keyword, Qmother))
1667 mother = Fget_charset (value);
1670 else if (EQ (keyword, Qmin_code))
1673 min_code = XUINT (value);
1676 else if (EQ (keyword, Qmax_code))
1679 max_code = XUINT (value);
1682 else if (EQ (keyword, Qcode_offset))
1685 code_offset = XUINT (value);
1688 else if (EQ (keyword, Qconversion))
1690 if (EQ (value, Q94x60))
1691 conversion = CONVERSION_94x60;
1692 else if (EQ (value, Q94x94x60))
1693 conversion = CONVERSION_94x94x60;
1694 else if (EQ (value, Qbig5_1))
1695 conversion = CONVERSION_BIG5_1;
1696 else if (EQ (value, Qbig5_2))
1697 conversion = CONVERSION_BIG5_2;
1699 signal_simple_error ("Unrecognized conversion", value);
1703 else if (EQ (keyword, Qccl_program))
1705 struct ccl_program test_ccl;
1707 if (setup_ccl_program (&test_ccl, value) < 0)
1708 signal_simple_error ("Invalid value for 'ccl-program", value);
1709 ccl_program = value;
1713 signal_simple_error ("Unrecognized property", keyword);
1719 error ("'final must be specified");
1721 if (dimension == 2 && final > 0x5F)
1723 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1726 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1727 CHARSET_LEFT_TO_RIGHT)) ||
1728 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1729 CHARSET_RIGHT_TO_LEFT)))
1731 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1733 id = get_unallocated_leading_byte (dimension);
1735 if (NILP (doc_string))
1736 doc_string = build_string ("");
1738 if (NILP (registry))
1739 registry = build_string ("");
1741 if (NILP (short_name))
1742 XSETSTRING (short_name, XSYMBOL (name)->name);
1744 if (NILP (long_name))
1745 long_name = doc_string;
1748 columns = dimension;
1750 if (byte_offset < 0)
1754 else if (chars == 96)
1760 charset = make_charset (id, name, chars, dimension, columns, graphic,
1761 final, direction, short_name, long_name,
1762 doc_string, registry,
1763 Qnil, min_code, max_code, code_offset, byte_offset,
1764 mother, conversion);
1765 if (!NILP (ccl_program))
1766 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1770 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1772 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1773 NEW-NAME is the name of the new charset. Return the new charset.
1775 (charset, new_name))
1777 Lisp_Object new_charset = Qnil;
1778 int id, chars, dimension, columns, graphic, final;
1780 Lisp_Object registry, doc_string, short_name, long_name;
1783 charset = Fget_charset (charset);
1784 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1785 signal_simple_error ("Charset already has reverse-direction charset",
1788 CHECK_SYMBOL (new_name);
1789 if (!NILP (Ffind_charset (new_name)))
1790 signal_simple_error ("Cannot redefine existing charset", new_name);
1792 cs = XCHARSET (charset);
1794 chars = CHARSET_CHARS (cs);
1795 dimension = CHARSET_DIMENSION (cs);
1796 columns = CHARSET_COLUMNS (cs);
1797 id = get_unallocated_leading_byte (dimension);
1799 graphic = CHARSET_GRAPHIC (cs);
1800 final = CHARSET_FINAL (cs);
1801 direction = CHARSET_RIGHT_TO_LEFT;
1802 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1803 direction = CHARSET_LEFT_TO_RIGHT;
1804 doc_string = CHARSET_DOC_STRING (cs);
1805 short_name = CHARSET_SHORT_NAME (cs);
1806 long_name = CHARSET_LONG_NAME (cs);
1807 registry = CHARSET_REGISTRY (cs);
1809 new_charset = make_charset (id, new_name, chars, dimension, columns,
1810 graphic, final, direction, short_name, long_name,
1811 doc_string, registry,
1813 CHARSET_DECODING_TABLE(cs),
1814 CHARSET_MIN_CODE(cs),
1815 CHARSET_MAX_CODE(cs),
1816 CHARSET_CODE_OFFSET(cs),
1817 CHARSET_BYTE_OFFSET(cs),
1819 CHARSET_CONVERSION (cs)
1821 Qnil, 0, 0, 0, 0, Qnil, 0
1825 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1826 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1831 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1832 Define symbol ALIAS as an alias for CHARSET.
1836 CHECK_SYMBOL (alias);
1837 charset = Fget_charset (charset);
1838 return Fputhash (alias, charset, Vcharset_hash_table);
1841 /* #### Reverse direction charsets not yet implemented. */
1843 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1845 Return the reverse-direction charset parallel to CHARSET, if any.
1846 This is the charset with the same properties (in particular, the same
1847 dimension, number of characters per dimension, and final byte) as
1848 CHARSET but whose characters are displayed in the opposite direction.
1852 charset = Fget_charset (charset);
1853 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1857 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1858 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1859 If DIRECTION is omitted, both directions will be checked (left-to-right
1860 will be returned if character sets exist for both directions).
1862 (dimension, chars, final, direction))
1864 int dm, ch, fi, di = -1;
1865 Lisp_Object obj = Qnil;
1867 CHECK_INT (dimension);
1868 dm = XINT (dimension);
1869 if (dm < 1 || dm > 2)
1870 signal_simple_error ("Invalid value for DIMENSION", dimension);
1874 if (ch != 94 && ch != 96)
1875 signal_simple_error ("Invalid value for CHARS", chars);
1877 CHECK_CHAR_COERCE_INT (final);
1879 if (fi < '0' || fi > '~')
1880 signal_simple_error ("Invalid value for FINAL", final);
1882 if (EQ (direction, Ql2r))
1883 di = CHARSET_LEFT_TO_RIGHT;
1884 else if (EQ (direction, Qr2l))
1885 di = CHARSET_RIGHT_TO_LEFT;
1886 else if (!NILP (direction))
1887 signal_simple_error ("Invalid value for DIRECTION", direction);
1889 if (dm == 2 && fi > 0x5F)
1891 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1895 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1897 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1900 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1903 return XCHARSET_NAME (obj);
1907 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1908 Return short name of CHARSET.
1912 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1915 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1916 Return long name of CHARSET.
1920 return XCHARSET_LONG_NAME (Fget_charset (charset));
1923 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1924 Return description of CHARSET.
1928 return XCHARSET_DOC_STRING (Fget_charset (charset));
1931 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1932 Return dimension of CHARSET.
1936 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1939 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1940 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1941 Recognized properties are those listed in `make-charset', as well as
1942 'name and 'doc-string.
1948 charset = Fget_charset (charset);
1949 cs = XCHARSET (charset);
1951 CHECK_SYMBOL (prop);
1952 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1953 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1954 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1955 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1956 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1957 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1958 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1959 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1960 Qnil : make_char (CHARSET_FINAL (cs));
1961 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1962 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1963 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1964 if (EQ (prop, Qdirection))
1965 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1966 if (EQ (prop, Qreverse_direction_charset))
1968 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1969 /* #### Is this translation OK? If so, error checking sufficient? */
1970 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1973 if (EQ (prop, Qmother))
1974 return CHARSET_MOTHER (cs);
1975 if (EQ (prop, Qmin_code))
1976 return make_int (CHARSET_MIN_CODE (cs));
1977 if (EQ (prop, Qmax_code))
1978 return make_int (CHARSET_MAX_CODE (cs));
1980 signal_simple_error ("Unrecognized charset property name", prop);
1981 return Qnil; /* not reached */
1984 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1985 Return charset identification number of CHARSET.
1989 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1992 /* #### We need to figure out which properties we really want to
1995 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1996 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1998 (charset, ccl_program))
2000 struct ccl_program test_ccl;
2002 charset = Fget_charset (charset);
2003 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2004 signal_simple_error ("Invalid ccl-program", ccl_program);
2005 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2010 invalidate_charset_font_caches (Lisp_Object charset)
2012 /* Invalidate font cache entries for charset on all devices. */
2013 Lisp_Object devcons, concons, hash_table;
2014 DEVICE_LOOP_NO_BREAK (devcons, concons)
2016 struct device *d = XDEVICE (XCAR (devcons));
2017 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2018 if (!UNBOUNDP (hash_table))
2019 Fclrhash (hash_table);
2023 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2024 Set the 'registry property of CHARSET to REGISTRY.
2026 (charset, registry))
2028 charset = Fget_charset (charset);
2029 CHECK_STRING (registry);
2030 XCHARSET_REGISTRY (charset) = registry;
2031 invalidate_charset_font_caches (charset);
2032 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2037 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2038 Return mapping-table of CHARSET.
2042 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2045 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2046 Set mapping-table of CHARSET to TABLE.
2050 struct Lisp_Charset *cs;
2054 charset = Fget_charset (charset);
2055 cs = XCHARSET (charset);
2059 CHARSET_DECODING_TABLE(cs) = Qnil;
2062 else if (VECTORP (table))
2064 int ccs_len = CHARSET_BYTE_SIZE (cs);
2065 int ret = decoding_table_check_elements (table,
2066 CHARSET_DIMENSION (cs),
2071 signal_simple_error ("Too big table", table);
2073 signal_simple_error ("Invalid element is found", table);
2075 signal_simple_error ("Something wrong", table);
2077 CHARSET_DECODING_TABLE(cs) = Qnil;
2080 signal_error (Qwrong_type_argument,
2081 list2 (build_translated_string ("vector-or-nil-p"),
2084 byte_offset = CHARSET_BYTE_OFFSET (cs);
2085 switch (CHARSET_DIMENSION (cs))
2088 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2090 Lisp_Object c = XVECTOR_DATA(table)[i];
2093 Fput_char_attribute (c, XCHARSET_NAME (charset),
2094 make_int (i + byte_offset));
2098 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2100 Lisp_Object v = XVECTOR_DATA(table)[i];
2106 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2108 Lisp_Object c = XVECTOR_DATA(v)[j];
2112 (c, XCHARSET_NAME (charset),
2113 make_int ( ( (i + byte_offset) << 8 )
2119 Fput_char_attribute (v, XCHARSET_NAME (charset),
2120 make_int (i + byte_offset));
2127 #ifdef HAVE_CHISE_CLIENT
2128 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2129 Save mapping-table of CHARSET.
2133 struct Lisp_Charset *cs;
2134 int byte_min, byte_max;
2136 Lisp_Object db_file;
2138 charset = Fget_charset (charset);
2139 cs = XCHARSET (charset);
2141 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2142 Qsystem_char_id, 1);
2143 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2145 byte_min = CHARSET_BYTE_OFFSET (cs);
2146 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2147 switch (CHARSET_DIMENSION (cs))
2151 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2154 for (cell = byte_min; cell < byte_max; cell++)
2156 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2159 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2160 Fprin1_to_string (c, Qnil),
2167 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2170 for (row = byte_min; row < byte_max; row++)
2172 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2175 for (cell = byte_min; cell < byte_max; cell++)
2177 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2180 Fput_database (Fprin1_to_string (make_int ((row << 8)
2183 Fprin1_to_string (c, Qnil),
2191 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2194 for (plane = byte_min; plane < byte_max; plane++)
2197 = get_ccs_octet_table (table_p, charset, plane);
2200 for (row = byte_min; row < byte_max; row++)
2203 = get_ccs_octet_table (table_r, charset, row);
2206 for (cell = byte_min; cell < byte_max; cell++)
2208 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2212 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2216 Fprin1_to_string (c, Qnil),
2225 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2228 for (group = byte_min; group < byte_max; group++)
2231 = get_ccs_octet_table (table_g, charset, group);
2234 for (plane = byte_min; plane < byte_max; plane++)
2237 = get_ccs_octet_table (table_p, charset, plane);
2240 for (row = byte_min; row < byte_max; row++)
2243 = get_ccs_octet_table (table_r, charset, row);
2246 for (cell = byte_min; cell < byte_max; cell++)
2249 = get_ccs_octet_table (table_c, charset, cell);
2252 Fput_database (Fprin1_to_string
2253 (make_int (( group << 24)
2258 Fprin1_to_string (c, Qnil),
2266 return Fclose_database (db);
2269 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2270 Reset mapping-table of CCS with database file.
2274 Lisp_Object db_file;
2276 ccs = Fget_charset (ccs);
2277 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2278 Qsystem_char_id, 0);
2280 if (!NILP (Ffile_exists_p (db_file)))
2282 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2289 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2293 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2296 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2300 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2307 decoding_table_put_char (ccs, code_point, ret);
2308 Fclose_database (db);
2312 decoding_table_put_char (ccs, code_point, Qnil);
2313 Fclose_database (db);
2317 #endif /* HAVE_CHISE_CLIENT */
2318 #endif /* UTF2000 */
2321 /************************************************************************/
2322 /* Lisp primitives for working with characters */
2323 /************************************************************************/
2326 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2327 Make a character from CHARSET and code-point CODE.
2328 If DEFINED_ONLY is non-nil, builtin character is not returned.
2329 If corresponding character is not found, nil is returned.
2331 (charset, code, defined_only))
2335 charset = Fget_charset (charset);
2338 if (XCHARSET_GRAPHIC (charset) == 1)
2340 if (NILP (defined_only))
2341 c = DECODE_CHAR (charset, c);
2343 c = decode_defined_char (charset, c);
2344 return c >= 0 ? make_char (c) : Qnil;
2347 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2348 Make a builtin character from CHARSET and code-point CODE.
2354 charset = Fget_charset (charset);
2356 if (EQ (charset, Vcharset_latin_viscii))
2358 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2364 (ret = Fget_char_attribute (chr,
2365 Vcharset_latin_viscii_lower,
2368 charset = Vcharset_latin_viscii_lower;
2372 (ret = Fget_char_attribute (chr,
2373 Vcharset_latin_viscii_upper,
2376 charset = Vcharset_latin_viscii_upper;
2383 if (XCHARSET_GRAPHIC (charset) == 1)
2386 c = decode_builtin_char (charset, c);
2387 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2391 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2392 Make a character from CHARSET and octets ARG1 and ARG2.
2393 ARG2 is required only for characters from two-dimensional charsets.
2394 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2395 character s with caron.
2397 (charset, arg1, arg2))
2401 int lowlim, highlim;
2403 charset = Fget_charset (charset);
2404 cs = XCHARSET (charset);
2406 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2407 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2409 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2411 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2412 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2415 /* It is useful (and safe, according to Olivier Galibert) to strip
2416 the 8th bit off ARG1 and ARG2 because it allows programmers to
2417 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2418 Latin 2 code of the character. */
2426 if (a1 < lowlim || a1 > highlim)
2427 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2429 if (CHARSET_DIMENSION (cs) == 1)
2433 ("Charset is of dimension one; second octet must be nil", arg2);
2434 return make_char (MAKE_CHAR (charset, a1, 0));
2443 a2 = XINT (arg2) & 0x7f;
2445 if (a2 < lowlim || a2 > highlim)
2446 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2448 return make_char (MAKE_CHAR (charset, a1, a2));
2451 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2452 Return the character set of CHARACTER.
2456 CHECK_CHAR_COERCE_INT (character);
2458 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2461 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2462 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2463 N defaults to 0 if omitted.
2467 Lisp_Object charset;
2470 CHECK_CHAR_COERCE_INT (character);
2472 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2474 if (NILP (n) || EQ (n, Qzero))
2475 return make_int (octet0);
2476 else if (EQ (n, make_int (1)))
2477 return make_int (octet1);
2479 signal_simple_error ("Octet number must be 0 or 1", n);
2483 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2484 Return code-point of CHARACTER in specified CHARSET.
2486 (character, charset, defined_only))
2490 CHECK_CHAR_COERCE_INT (character);
2491 charset = Fget_charset (charset);
2492 code_point = charset_code_point (charset, XCHAR (character),
2493 !NILP (defined_only));
2494 if (code_point >= 0)
2495 return make_int (code_point);
2501 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2502 Return list of charset and one or two position-codes of CHARACTER.
2506 /* This function can GC */
2507 struct gcpro gcpro1, gcpro2;
2508 Lisp_Object charset = Qnil;
2509 Lisp_Object rc = Qnil;
2517 GCPRO2 (charset, rc);
2518 CHECK_CHAR_COERCE_INT (character);
2521 code_point = ENCODE_CHAR (XCHAR (character), charset);
2522 dimension = XCHARSET_DIMENSION (charset);
2523 while (dimension > 0)
2525 rc = Fcons (make_int (code_point & 255), rc);
2529 rc = Fcons (XCHARSET_NAME (charset), rc);
2531 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2533 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2535 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2539 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2548 #ifdef ENABLE_COMPOSITE_CHARS
2549 /************************************************************************/
2550 /* composite character functions */
2551 /************************************************************************/
2554 lookup_composite_char (Bufbyte *str, int len)
2556 Lisp_Object lispstr = make_string (str, len);
2557 Lisp_Object ch = Fgethash (lispstr,
2558 Vcomposite_char_string2char_hash_table,
2564 if (composite_char_row_next >= 128)
2565 signal_simple_error ("No more composite chars available", lispstr);
2566 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2567 composite_char_col_next);
2568 Fputhash (make_char (emch), lispstr,
2569 Vcomposite_char_char2string_hash_table);
2570 Fputhash (lispstr, make_char (emch),
2571 Vcomposite_char_string2char_hash_table);
2572 composite_char_col_next++;
2573 if (composite_char_col_next >= 128)
2575 composite_char_col_next = 32;
2576 composite_char_row_next++;
2585 composite_char_string (Emchar ch)
2587 Lisp_Object str = Fgethash (make_char (ch),
2588 Vcomposite_char_char2string_hash_table,
2590 assert (!UNBOUNDP (str));
2594 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2595 Convert a string into a single composite character.
2596 The character is the result of overstriking all the characters in
2601 CHECK_STRING (string);
2602 return make_char (lookup_composite_char (XSTRING_DATA (string),
2603 XSTRING_LENGTH (string)));
2606 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2607 Return a string of the characters comprising a composite character.
2615 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2616 signal_simple_error ("Must be composite char", ch);
2617 return composite_char_string (emch);
2619 #endif /* ENABLE_COMPOSITE_CHARS */
2622 /************************************************************************/
2623 /* initialization */
2624 /************************************************************************/
2627 syms_of_mule_charset (void)
2629 INIT_LRECORD_IMPLEMENTATION (charset);
2631 DEFSUBR (Fcharsetp);
2632 DEFSUBR (Ffind_charset);
2633 DEFSUBR (Fget_charset);
2634 DEFSUBR (Fcharset_list);
2635 DEFSUBR (Fcharset_name);
2636 DEFSUBR (Fmake_charset);
2637 DEFSUBR (Fmake_reverse_direction_charset);
2638 /* DEFSUBR (Freverse_direction_charset); */
2639 DEFSUBR (Fdefine_charset_alias);
2640 DEFSUBR (Fcharset_from_attributes);
2641 DEFSUBR (Fcharset_short_name);
2642 DEFSUBR (Fcharset_long_name);
2643 DEFSUBR (Fcharset_description);
2644 DEFSUBR (Fcharset_dimension);
2645 DEFSUBR (Fcharset_property);
2646 DEFSUBR (Fcharset_id);
2647 DEFSUBR (Fset_charset_ccl_program);
2648 DEFSUBR (Fset_charset_registry);
2650 DEFSUBR (Fcharset_mapping_table);
2651 DEFSUBR (Fset_charset_mapping_table);
2652 #ifdef HAVE_CHISE_CLIENT
2653 DEFSUBR (Fsave_charset_mapping_table);
2654 DEFSUBR (Freset_charset_mapping_table);
2657 DEFSUBR (Fdecode_char);
2658 DEFSUBR (Fdecode_builtin_char);
2659 DEFSUBR (Fencode_char);
2661 DEFSUBR (Fmake_char);
2662 DEFSUBR (Fchar_charset);
2663 DEFSUBR (Fchar_octet);
2664 DEFSUBR (Fsplit_char);
2666 #ifdef ENABLE_COMPOSITE_CHARS
2667 DEFSUBR (Fmake_composite_char);
2668 DEFSUBR (Fcomposite_char_string);
2671 defsymbol (&Qcharsetp, "charsetp");
2672 defsymbol (&Qregistry, "registry");
2673 defsymbol (&Qfinal, "final");
2674 defsymbol (&Qgraphic, "graphic");
2675 defsymbol (&Qdirection, "direction");
2676 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2677 defsymbol (&Qshort_name, "short-name");
2678 defsymbol (&Qlong_name, "long-name");
2680 defsymbol (&Qmother, "mother");
2681 defsymbol (&Qmin_code, "min-code");
2682 defsymbol (&Qmax_code, "max-code");
2683 defsymbol (&Qcode_offset, "code-offset");
2684 defsymbol (&Qconversion, "conversion");
2685 defsymbol (&Q94x60, "94x60");
2686 defsymbol (&Q94x94x60, "94x94x60");
2687 defsymbol (&Qbig5_1, "big5-1");
2688 defsymbol (&Qbig5_2, "big5-2");
2691 defsymbol (&Ql2r, "l2r");
2692 defsymbol (&Qr2l, "r2l");
2694 /* Charsets, compatible with FSF 20.3
2695 Naming convention is Script-Charset[-Edition] */
2696 defsymbol (&Qascii, "ascii");
2697 defsymbol (&Qcontrol_1, "control-1");
2698 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2699 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2700 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2701 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2702 defsymbol (&Qthai_tis620, "thai-tis620");
2703 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2704 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2705 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2706 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2707 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2708 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2709 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2710 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2711 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2712 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2713 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2714 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2715 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2716 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2717 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2718 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2720 defsymbol (&Qucs, "ucs");
2721 defsymbol (&Qucs_bmp, "ucs-bmp");
2722 defsymbol (&Qucs_smp, "ucs-smp");
2723 defsymbol (&Qucs_sip, "ucs-sip");
2724 defsymbol (&Qucs_gb, "ucs-gb");
2725 defsymbol (&Qucs_cns, "ucs-cns");
2726 defsymbol (&Qucs_jis, "ucs-jis");
2727 defsymbol (&Qucs_ks, "ucs-ks");
2728 defsymbol (&Qlatin_viscii, "latin-viscii");
2729 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2730 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2731 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2732 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2733 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2734 defsymbol (&Qjis_x0208, "=jis-x0208");
2735 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2736 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2737 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2738 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2739 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2740 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2741 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2742 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2743 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2744 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2745 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2746 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2747 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2748 defsymbol (&Qchinese_big5, "chinese-big5");
2749 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2750 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2751 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2752 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2753 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2754 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2755 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2756 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2757 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2758 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2759 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2760 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2761 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2762 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2764 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2765 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2767 defsymbol (&Qcomposite, "composite");
2771 vars_of_mule_charset (void)
2778 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2779 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2781 /* Table of charsets indexed by leading byte. */
2782 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2783 chlook->charset_by_leading_byte[i] = Qnil;
2786 /* Table of charsets indexed by type/final-byte. */
2787 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2788 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2789 chlook->charset_by_attributes[i][j] = Qnil;
2791 /* Table of charsets indexed by type/final-byte/direction. */
2792 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2793 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2794 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2795 chlook->charset_by_attributes[i][j][k] = Qnil;
2799 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2801 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2802 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2806 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2807 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2808 Leading-code of private TYPE9N charset of column-width 1.
2810 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2814 Vdefault_coded_charset_priority_list = Qnil;
2815 DEFVAR_LISP ("default-coded-charset-priority-list",
2816 &Vdefault_coded_charset_priority_list /*
2817 Default order of preferred coded-character-sets.
2823 complex_vars_of_mule_charset (void)
2825 staticpro (&Vcharset_hash_table);
2826 Vcharset_hash_table =
2827 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2829 /* Predefined character sets. We store them into variables for
2833 staticpro (&Vcharset_ucs);
2835 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2836 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2837 build_string ("UCS"),
2838 build_string ("UCS"),
2839 build_string ("ISO/IEC 10646"),
2841 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2842 staticpro (&Vcharset_ucs_bmp);
2844 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2845 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2846 build_string ("BMP"),
2847 build_string ("UCS-BMP"),
2848 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2850 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2851 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2852 staticpro (&Vcharset_ucs_smp);
2854 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2855 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2856 build_string ("SMP"),
2857 build_string ("UCS-SMP"),
2858 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2859 build_string ("UCS00-1"),
2860 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2861 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_ucs_sip);
2864 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2865 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2866 build_string ("SIP"),
2867 build_string ("UCS-SIP"),
2868 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2869 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2870 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2871 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2872 staticpro (&Vcharset_ucs_gb);
2874 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2875 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2876 build_string ("UCS for GB"),
2877 build_string ("UCS for GB"),
2878 build_string ("ISO/IEC 10646 for GB"),
2880 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_ucs_cns);
2883 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2884 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2885 build_string ("UCS for CNS"),
2886 build_string ("UCS for CNS 11643"),
2887 build_string ("ISO/IEC 10646 for CNS 11643"),
2889 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2890 staticpro (&Vcharset_ucs_jis);
2892 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2893 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2894 build_string ("UCS for JIS"),
2895 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2897 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2899 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2900 staticpro (&Vcharset_ucs_ks);
2902 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2903 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2904 build_string ("UCS for KS"),
2905 build_string ("UCS for CCS defined by KS"),
2906 build_string ("ISO/IEC 10646 for Korean Standards"),
2908 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2910 # define MIN_CHAR_THAI 0
2911 # define MAX_CHAR_THAI 0
2912 /* # define MIN_CHAR_HEBREW 0 */
2913 /* # define MAX_CHAR_HEBREW 0 */
2914 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2915 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2917 staticpro (&Vcharset_ascii);
2919 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2920 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2921 build_string ("ASCII"),
2922 build_string ("ASCII)"),
2923 build_string ("ASCII (ISO646 IRV)"),
2924 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2925 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2926 staticpro (&Vcharset_control_1);
2927 Vcharset_control_1 =
2928 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2929 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2930 build_string ("C1"),
2931 build_string ("Control characters"),
2932 build_string ("Control characters 128-191"),
2934 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2935 staticpro (&Vcharset_latin_iso8859_1);
2936 Vcharset_latin_iso8859_1 =
2937 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2938 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2939 build_string ("Latin-1"),
2940 build_string ("ISO8859-1 (Latin-1)"),
2941 build_string ("ISO8859-1 (Latin-1)"),
2942 build_string ("iso8859-1"),
2943 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2944 staticpro (&Vcharset_latin_iso8859_2);
2945 Vcharset_latin_iso8859_2 =
2946 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2947 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2948 build_string ("Latin-2"),
2949 build_string ("ISO8859-2 (Latin-2)"),
2950 build_string ("ISO8859-2 (Latin-2)"),
2951 build_string ("iso8859-2"),
2952 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2953 staticpro (&Vcharset_latin_iso8859_3);
2954 Vcharset_latin_iso8859_3 =
2955 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2956 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2957 build_string ("Latin-3"),
2958 build_string ("ISO8859-3 (Latin-3)"),
2959 build_string ("ISO8859-3 (Latin-3)"),
2960 build_string ("iso8859-3"),
2961 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2962 staticpro (&Vcharset_latin_iso8859_4);
2963 Vcharset_latin_iso8859_4 =
2964 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2965 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2966 build_string ("Latin-4"),
2967 build_string ("ISO8859-4 (Latin-4)"),
2968 build_string ("ISO8859-4 (Latin-4)"),
2969 build_string ("iso8859-4"),
2970 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2971 staticpro (&Vcharset_thai_tis620);
2972 Vcharset_thai_tis620 =
2973 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2974 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2975 build_string ("TIS620"),
2976 build_string ("TIS620 (Thai)"),
2977 build_string ("TIS620.2529 (Thai)"),
2978 build_string ("tis620"),
2979 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2980 staticpro (&Vcharset_greek_iso8859_7);
2981 Vcharset_greek_iso8859_7 =
2982 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2983 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2984 build_string ("ISO8859-7"),
2985 build_string ("ISO8859-7 (Greek)"),
2986 build_string ("ISO8859-7 (Greek)"),
2987 build_string ("iso8859-7"),
2988 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2989 staticpro (&Vcharset_arabic_iso8859_6);
2990 Vcharset_arabic_iso8859_6 =
2991 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2992 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2993 build_string ("ISO8859-6"),
2994 build_string ("ISO8859-6 (Arabic)"),
2995 build_string ("ISO8859-6 (Arabic)"),
2996 build_string ("iso8859-6"),
2997 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2998 staticpro (&Vcharset_hebrew_iso8859_8);
2999 Vcharset_hebrew_iso8859_8 =
3000 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3001 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3002 build_string ("ISO8859-8"),
3003 build_string ("ISO8859-8 (Hebrew)"),
3004 build_string ("ISO8859-8 (Hebrew)"),
3005 build_string ("iso8859-8"),
3007 0 /* MIN_CHAR_HEBREW */,
3008 0 /* MAX_CHAR_HEBREW */, 0, 32,
3009 Qnil, CONVERSION_IDENTICAL);
3010 staticpro (&Vcharset_katakana_jisx0201);
3011 Vcharset_katakana_jisx0201 =
3012 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3013 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3014 build_string ("JISX0201 Kana"),
3015 build_string ("JISX0201.1976 (Japanese Kana)"),
3016 build_string ("JISX0201.1976 Japanese Kana"),
3017 build_string ("jisx0201\\.1976"),
3018 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3019 staticpro (&Vcharset_latin_jisx0201);
3020 Vcharset_latin_jisx0201 =
3021 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3022 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3023 build_string ("JISX0201 Roman"),
3024 build_string ("JISX0201.1976 (Japanese Roman)"),
3025 build_string ("JISX0201.1976 Japanese Roman"),
3026 build_string ("jisx0201\\.1976"),
3027 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3028 staticpro (&Vcharset_cyrillic_iso8859_5);
3029 Vcharset_cyrillic_iso8859_5 =
3030 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3031 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3032 build_string ("ISO8859-5"),
3033 build_string ("ISO8859-5 (Cyrillic)"),
3034 build_string ("ISO8859-5 (Cyrillic)"),
3035 build_string ("iso8859-5"),
3036 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3037 staticpro (&Vcharset_latin_iso8859_9);
3038 Vcharset_latin_iso8859_9 =
3039 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3040 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3041 build_string ("Latin-5"),
3042 build_string ("ISO8859-9 (Latin-5)"),
3043 build_string ("ISO8859-9 (Latin-5)"),
3044 build_string ("iso8859-9"),
3045 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3047 staticpro (&Vcharset_jis_x0208);
3048 Vcharset_jis_x0208 =
3049 make_charset (LEADING_BYTE_JIS_X0208,
3051 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3052 build_string ("JIS X0208"),
3053 build_string ("JIS X0208 Common"),
3054 build_string ("JIS X0208 Common part"),
3055 build_string ("jisx0208\\.1990"),
3057 MIN_CHAR_JIS_X0208_1990,
3058 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3059 Qnil, CONVERSION_94x94);
3061 staticpro (&Vcharset_japanese_jisx0208_1978);
3062 Vcharset_japanese_jisx0208_1978 =
3063 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3064 Qjapanese_jisx0208_1978, 94, 2,
3065 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3066 build_string ("JIS X0208:1978"),
3067 build_string ("JIS X0208:1978 (Japanese)"),
3069 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3070 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3077 CONVERSION_IDENTICAL);
3078 staticpro (&Vcharset_chinese_gb2312);
3079 Vcharset_chinese_gb2312 =
3080 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3081 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3082 build_string ("GB2312"),
3083 build_string ("GB2312)"),
3084 build_string ("GB2312 Chinese simplified"),
3085 build_string ("gb2312"),
3086 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3087 staticpro (&Vcharset_chinese_gb12345);
3088 Vcharset_chinese_gb12345 =
3089 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3090 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3091 build_string ("G1"),
3092 build_string ("GB 12345)"),
3093 build_string ("GB 12345-1990"),
3094 build_string ("GB12345\\(\\.1990\\)?-0"),
3095 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3096 staticpro (&Vcharset_japanese_jisx0208);
3097 Vcharset_japanese_jisx0208 =
3098 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3099 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3100 build_string ("JISX0208"),
3101 build_string ("JIS X0208:1983 (Japanese)"),
3102 build_string ("JIS X0208:1983 Japanese Kanji"),
3103 build_string ("jisx0208\\.1983"),
3110 CONVERSION_IDENTICAL);
3112 staticpro (&Vcharset_japanese_jisx0208_1990);
3113 Vcharset_japanese_jisx0208_1990 =
3114 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3115 Qjapanese_jisx0208_1990, 94, 2,
3116 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3117 build_string ("JISX0208-1990"),
3118 build_string ("JIS X0208:1990 (Japanese)"),
3119 build_string ("JIS X0208:1990 Japanese Kanji"),
3120 build_string ("jisx0208\\.1990"),
3122 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3123 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3124 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3125 Vcharset_jis_x0208 /* Qnil */,
3126 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3128 staticpro (&Vcharset_korean_ksc5601);
3129 Vcharset_korean_ksc5601 =
3130 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3131 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3132 build_string ("KSC5601"),
3133 build_string ("KSC5601 (Korean"),
3134 build_string ("KSC5601 Korean Hangul and Hanja"),
3135 build_string ("ksc5601"),
3136 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3137 staticpro (&Vcharset_japanese_jisx0212);
3138 Vcharset_japanese_jisx0212 =
3139 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3140 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3141 build_string ("JISX0212"),
3142 build_string ("JISX0212 (Japanese)"),
3143 build_string ("JISX0212 Japanese Supplement"),
3144 build_string ("jisx0212"),
3145 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3147 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3148 staticpro (&Vcharset_chinese_cns11643_1);
3149 Vcharset_chinese_cns11643_1 =
3150 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3151 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3152 build_string ("CNS11643-1"),
3153 build_string ("CNS11643-1 (Chinese traditional)"),
3155 ("CNS 11643 Plane 1 Chinese traditional"),
3156 build_string (CHINESE_CNS_PLANE_RE("1")),
3157 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3158 staticpro (&Vcharset_chinese_cns11643_2);
3159 Vcharset_chinese_cns11643_2 =
3160 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3161 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3162 build_string ("CNS11643-2"),
3163 build_string ("CNS11643-2 (Chinese traditional)"),
3165 ("CNS 11643 Plane 2 Chinese traditional"),
3166 build_string (CHINESE_CNS_PLANE_RE("2")),
3167 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3169 staticpro (&Vcharset_latin_tcvn5712);
3170 Vcharset_latin_tcvn5712 =
3171 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3172 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3173 build_string ("TCVN 5712"),
3174 build_string ("TCVN 5712 (VSCII-2)"),
3175 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3176 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3177 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3178 staticpro (&Vcharset_latin_viscii_lower);
3179 Vcharset_latin_viscii_lower =
3180 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3181 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3182 build_string ("VISCII lower"),
3183 build_string ("VISCII lower (Vietnamese)"),
3184 build_string ("VISCII lower (Vietnamese)"),
3185 build_string ("MULEVISCII-LOWER"),
3186 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3187 staticpro (&Vcharset_latin_viscii_upper);
3188 Vcharset_latin_viscii_upper =
3189 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3190 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3191 build_string ("VISCII upper"),
3192 build_string ("VISCII upper (Vietnamese)"),
3193 build_string ("VISCII upper (Vietnamese)"),
3194 build_string ("MULEVISCII-UPPER"),
3195 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3196 staticpro (&Vcharset_latin_viscii);
3197 Vcharset_latin_viscii =
3198 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3199 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3200 build_string ("VISCII"),
3201 build_string ("VISCII 1.1 (Vietnamese)"),
3202 build_string ("VISCII 1.1 (Vietnamese)"),
3203 build_string ("VISCII1\\.1"),
3204 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3205 staticpro (&Vcharset_chinese_big5);
3206 Vcharset_chinese_big5 =
3207 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3208 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3209 build_string ("Big5"),
3210 build_string ("Big5"),
3211 build_string ("Big5 Chinese traditional"),
3212 build_string ("big5-0"),
3214 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3215 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3217 staticpro (&Vcharset_chinese_big5_cdp);
3218 Vcharset_chinese_big5_cdp =
3219 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3220 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3221 build_string ("Big5-CDP"),
3222 build_string ("Big5 + CDP extension"),
3223 build_string ("Big5 with CDP extension"),
3224 build_string ("big5\\.cdp-0"),
3225 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3226 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3228 #define DEF_HANZIKU(n) \
3229 staticpro (&Vcharset_ideograph_hanziku_##n); \
3230 Vcharset_ideograph_hanziku_##n = \
3231 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3232 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3233 build_string ("HZK-"#n), \
3234 build_string ("HANZIKU-"#n), \
3235 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3237 ("hanziku-"#n"$"), \
3238 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3239 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3252 #define DEF_GT_PJ(n) \
3253 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3254 Vcharset_ideograph_gt_pj_##n = \
3255 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3256 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3257 build_string ("GT-PJ-"#n), \
3258 build_string ("GT (pseudo JIS encoding) part "#n), \
3259 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3261 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3262 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3275 staticpro (&Vcharset_ideograph_daikanwa_2);
3276 Vcharset_ideograph_daikanwa_2 =
3277 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3278 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3279 build_string ("Daikanwa Rev."),
3280 build_string ("Morohashi's Daikanwa Rev."),
3282 ("Daikanwa dictionary (revised version)"),
3283 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3284 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3285 staticpro (&Vcharset_ideograph_daikanwa);
3286 Vcharset_ideograph_daikanwa =
3287 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3288 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3289 build_string ("Daikanwa"),
3290 build_string ("Morohashi's Daikanwa Rev.2"),
3292 ("Daikanwa dictionary (second revised version)"),
3293 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3294 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3295 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3297 staticpro (&Vcharset_ethiopic_ucs);
3298 Vcharset_ethiopic_ucs =
3299 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3300 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3301 build_string ("Ethiopic (UCS)"),
3302 build_string ("Ethiopic (UCS)"),
3303 build_string ("Ethiopic of UCS"),
3304 build_string ("Ethiopic-Unicode"),
3305 Qnil, 0x1200, 0x137F, 0, 0,
3306 Qnil, CONVERSION_IDENTICAL);
3308 staticpro (&Vcharset_chinese_big5_1);
3309 Vcharset_chinese_big5_1 =
3310 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3311 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3312 build_string ("Big5"),
3313 build_string ("Big5 (Level-1)"),
3315 ("Big5 Level-1 Chinese traditional"),
3316 build_string ("big5"),
3317 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3318 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3319 staticpro (&Vcharset_chinese_big5_2);
3320 Vcharset_chinese_big5_2 =
3321 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3322 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3323 build_string ("Big5"),
3324 build_string ("Big5 (Level-2)"),
3326 ("Big5 Level-2 Chinese traditional"),
3327 build_string ("big5"),
3328 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3329 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3331 #ifdef ENABLE_COMPOSITE_CHARS
3332 /* #### For simplicity, we put composite chars into a 96x96 charset.
3333 This is going to lead to problems because you can run out of
3334 room, esp. as we don't yet recycle numbers. */
3335 staticpro (&Vcharset_composite);
3336 Vcharset_composite =
3337 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3338 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Composite"),
3340 build_string ("Composite characters"),
3341 build_string ("Composite characters"),
3344 /* #### not dumped properly */
3345 composite_char_row_next = 32;
3346 composite_char_col_next = 32;
3348 Vcomposite_char_string2char_hash_table =
3349 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3350 Vcomposite_char_char2string_hash_table =
3351 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3352 staticpro (&Vcomposite_char_string2char_hash_table);
3353 staticpro (&Vcomposite_char_char2string_hash_table);
3354 #endif /* ENABLE_COMPOSITE_CHARS */