1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 /* Lisp_Object Vcharset_chinese_big5_cdp; */
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
350 Qvietnamese_viscii_lower,
351 Qvietnamese_viscii_upper,
353 /* Qchinese_big5_cdp, */
354 Qideograph_hanziku_1,
355 Qideograph_hanziku_2,
356 Qideograph_hanziku_3,
357 Qideograph_hanziku_4,
358 Qideograph_hanziku_5,
359 Qideograph_hanziku_6,
360 Qideograph_hanziku_7,
361 Qideograph_hanziku_8,
362 Qideograph_hanziku_9,
363 Qideograph_hanziku_10,
364 Qideograph_hanziku_11,
365 Qideograph_hanziku_12,
368 Qideograph_daikanwa_2,
388 Lisp_Object Ql2r, Qr2l;
390 Lisp_Object Vcharset_hash_table;
392 /* Composite characters are characters constructed by overstriking two
393 or more regular characters.
395 1) The old Mule implementation involves storing composite characters
396 in a buffer as a tag followed by all of the actual characters
397 used to make up the composite character. I think this is a bad
398 idea; it greatly complicates code that wants to handle strings
399 one character at a time because it has to deal with the possibility
400 of great big ungainly characters. It's much more reasonable to
401 simply store an index into a table of composite characters.
403 2) The current implementation only allows for 16,384 separate
404 composite characters over the lifetime of the XEmacs process.
405 This could become a potential problem if the user
406 edited lots of different files that use composite characters.
407 Due to FSF bogosity, increasing the number of allowable
408 composite characters under Mule would decrease the number
409 of possible faces that can exist. Mule already has shrunk
410 this to 2048, and further shrinkage would become uncomfortable.
411 No such problems exist in XEmacs.
413 Composite characters could be represented as 0x80 C1 C2 C3,
414 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
415 for slightly under 2^20 (one million) composite characters
416 over the XEmacs process lifetime, and you only need to
417 increase the size of a Mule character from 19 to 21 bits.
418 Or you could use 0x80 C1 C2 C3 C4, allowing for about
419 85 million (slightly over 2^26) composite characters. */
422 /************************************************************************/
423 /* Basic Emchar functions */
424 /************************************************************************/
426 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
427 string in STR. Returns the number of bytes stored.
428 Do not call this directly. Use the macro set_charptr_emchar() instead.
432 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
447 else if ( c <= 0x7ff )
449 *p++ = (c >> 6) | 0xc0;
450 *p++ = (c & 0x3f) | 0x80;
452 else if ( c <= 0xffff )
454 *p++ = (c >> 12) | 0xe0;
455 *p++ = ((c >> 6) & 0x3f) | 0x80;
456 *p++ = (c & 0x3f) | 0x80;
458 else if ( c <= 0x1fffff )
460 *p++ = (c >> 18) | 0xf0;
461 *p++ = ((c >> 12) & 0x3f) | 0x80;
462 *p++ = ((c >> 6) & 0x3f) | 0x80;
463 *p++ = (c & 0x3f) | 0x80;
465 else if ( c <= 0x3ffffff )
467 *p++ = (c >> 24) | 0xf8;
468 *p++ = ((c >> 18) & 0x3f) | 0x80;
469 *p++ = ((c >> 12) & 0x3f) | 0x80;
470 *p++ = ((c >> 6) & 0x3f) | 0x80;
471 *p++ = (c & 0x3f) | 0x80;
475 *p++ = (c >> 30) | 0xfc;
476 *p++ = ((c >> 24) & 0x3f) | 0x80;
477 *p++ = ((c >> 18) & 0x3f) | 0x80;
478 *p++ = ((c >> 12) & 0x3f) | 0x80;
479 *p++ = ((c >> 6) & 0x3f) | 0x80;
480 *p++ = (c & 0x3f) | 0x80;
483 BREAKUP_CHAR (c, charset, c1, c2);
484 lb = CHAR_LEADING_BYTE (c);
485 if (LEADING_BYTE_PRIVATE_P (lb))
486 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
488 if (EQ (charset, Vcharset_control_1))
497 /* Return the first character from a Mule-encoded string in STR,
498 assuming it's non-ASCII. Do not call this directly.
499 Use the macro charptr_emchar() instead. */
502 non_ascii_charptr_emchar (const Bufbyte *str)
515 else if ( b >= 0xf8 )
520 else if ( b >= 0xf0 )
525 else if ( b >= 0xe0 )
530 else if ( b >= 0xc0 )
540 for( ; len > 0; len-- )
543 ch = ( ch << 6 ) | ( b & 0x3f );
547 Bufbyte i0 = *str, i1, i2 = 0;
550 if (i0 == LEADING_BYTE_CONTROL_1)
551 return (Emchar) (*++str - 0x20);
553 if (LEADING_BYTE_PREFIX_P (i0))
558 charset = CHARSET_BY_LEADING_BYTE (i0);
559 if (XCHARSET_DIMENSION (charset) == 2)
562 return MAKE_CHAR (charset, i1, i2);
566 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
567 Do not call this directly. Use the macro valid_char_p() instead. */
571 non_ascii_valid_char_p (Emchar ch)
575 /* Must have only lowest 19 bits set */
579 f1 = CHAR_FIELD1 (ch);
580 f2 = CHAR_FIELD2 (ch);
581 f3 = CHAR_FIELD3 (ch);
587 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
588 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
589 f2 > MAX_CHAR_FIELD2_PRIVATE)
594 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
595 f2 <= MAX_CHAR_FIELD2_PRIVATE))
599 NOTE: This takes advantage of the fact that
600 FIELD2_TO_OFFICIAL_LEADING_BYTE and
601 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
603 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
604 if (EQ (charset, Qnil))
606 return (XCHARSET_CHARS (charset) == 96);
612 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
613 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
614 f1 > MAX_CHAR_FIELD1_PRIVATE)
616 if (f2 < 0x20 || f3 < 0x20)
619 #ifdef ENABLE_COMPOSITE_CHARS
620 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
622 if (UNBOUNDP (Fgethash (make_int (ch),
623 Vcomposite_char_char2string_hash_table,
628 #endif /* ENABLE_COMPOSITE_CHARS */
630 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
631 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
634 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
636 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
639 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
641 if (EQ (charset, Qnil))
643 return (XCHARSET_CHARS (charset) == 96);
649 /************************************************************************/
650 /* Basic string functions */
651 /************************************************************************/
653 /* Copy the character pointed to by SRC into DST. Do not call this
654 directly. Use the macro charptr_copy_char() instead.
655 Return the number of bytes copied. */
658 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
660 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
662 for (i = bytes; i; i--, dst++, src++)
668 /************************************************************************/
669 /* streams of Emchars */
670 /************************************************************************/
672 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
673 The functions below are not meant to be called directly; use
674 the macros in insdel.h. */
677 Lstream_get_emchar_1 (Lstream *stream, int ch)
679 Bufbyte str[MAX_EMCHAR_LEN];
680 Bufbyte *strptr = str;
683 str[0] = (Bufbyte) ch;
685 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
687 int c = Lstream_getc (stream);
688 bufpos_checking_assert (c >= 0);
689 *++strptr = (Bufbyte) c;
691 return charptr_emchar (str);
695 Lstream_fput_emchar (Lstream *stream, Emchar ch)
697 Bufbyte str[MAX_EMCHAR_LEN];
698 Bytecount len = set_charptr_emchar (str, ch);
699 return Lstream_write (stream, str, len);
703 Lstream_funget_emchar (Lstream *stream, Emchar ch)
705 Bufbyte str[MAX_EMCHAR_LEN];
706 Bytecount len = set_charptr_emchar (str, ch);
707 Lstream_unread (stream, str, len);
711 /************************************************************************/
713 /************************************************************************/
716 mark_charset (Lisp_Object obj)
718 Lisp_Charset *cs = XCHARSET (obj);
720 mark_object (cs->short_name);
721 mark_object (cs->long_name);
722 mark_object (cs->doc_string);
723 mark_object (cs->registry);
724 mark_object (cs->ccl_program);
726 mark_object (cs->decoding_table);
727 mark_object (cs->mother);
733 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
735 Lisp_Charset *cs = XCHARSET (obj);
739 error ("printing unreadable object #<charset %s 0x%x>",
740 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
743 write_c_string ("#<charset ", printcharfun);
744 print_internal (CHARSET_NAME (cs), printcharfun, 0);
745 write_c_string (" ", printcharfun);
746 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
747 write_c_string (" ", printcharfun);
748 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
749 write_c_string (" ", printcharfun);
750 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
751 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
753 CHARSET_DIMENSION (cs),
754 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
755 CHARSET_COLUMNS (cs),
756 CHARSET_GRAPHIC (cs),
758 write_c_string (buf, printcharfun);
759 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
760 sprintf (buf, " 0x%x>", cs->header.uid);
761 write_c_string (buf, printcharfun);
764 static const struct lrecord_description charset_description[] = {
765 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
773 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
779 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
780 mark_charset, print_charset, 0, 0, 0,
784 /* Make a new charset. */
785 /* #### SJT Should generic properties be allowed? */
787 make_charset (Charset_ID id, Lisp_Object name,
788 unsigned short chars, unsigned char dimension,
789 unsigned char columns, unsigned char graphic,
790 Bufbyte final, unsigned char direction, Lisp_Object short_name,
791 Lisp_Object long_name, Lisp_Object doc,
793 Lisp_Object decoding_table,
794 Emchar min_code, Emchar max_code,
795 Emchar code_offset, unsigned char byte_offset,
796 Lisp_Object mother, unsigned char conversion)
799 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
803 XSETCHARSET (obj, cs);
805 CHARSET_ID (cs) = id;
806 CHARSET_NAME (cs) = name;
807 CHARSET_SHORT_NAME (cs) = short_name;
808 CHARSET_LONG_NAME (cs) = long_name;
809 CHARSET_CHARS (cs) = chars;
810 CHARSET_DIMENSION (cs) = dimension;
811 CHARSET_DIRECTION (cs) = direction;
812 CHARSET_COLUMNS (cs) = columns;
813 CHARSET_GRAPHIC (cs) = graphic;
814 CHARSET_FINAL (cs) = final;
815 CHARSET_DOC_STRING (cs) = doc;
816 CHARSET_REGISTRY (cs) = reg;
817 CHARSET_CCL_PROGRAM (cs) = Qnil;
818 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
820 CHARSET_DECODING_TABLE(cs) = Qunbound;
821 CHARSET_MIN_CODE (cs) = min_code;
822 CHARSET_MAX_CODE (cs) = max_code;
823 CHARSET_CODE_OFFSET (cs) = code_offset;
824 CHARSET_BYTE_OFFSET (cs) = byte_offset;
825 CHARSET_MOTHER (cs) = mother;
826 CHARSET_CONVERSION (cs) = conversion;
830 if (id == LEADING_BYTE_ASCII)
831 CHARSET_REP_BYTES (cs) = 1;
833 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
835 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
840 /* some charsets do not have final characters. This includes
841 ASCII, Control-1, Composite, and the two faux private
843 unsigned char iso2022_type
844 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
846 if (code_offset == 0)
848 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
849 chlook->charset_by_attributes[iso2022_type][final] = obj;
853 (chlook->charset_by_attributes[iso2022_type][final][direction]));
854 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
858 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
859 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
861 /* Some charsets are "faux" and don't have names or really exist at
862 all except in the leading-byte table. */
864 Fputhash (name, obj, Vcharset_hash_table);
869 get_unallocated_leading_byte (int dimension)
874 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
877 lb = chlook->next_allocated_leading_byte++;
881 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
884 lb = chlook->next_allocated_1_byte_leading_byte++;
888 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
891 lb = chlook->next_allocated_2_byte_leading_byte++;
897 ("No more character sets free for this dimension",
898 make_int (dimension));
904 /* Number of Big5 characters which have the same code in 1st byte. */
906 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
909 decode_defined_char (Lisp_Object ccs, int code_point)
911 int dim = XCHARSET_DIMENSION (ccs);
912 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
920 = get_ccs_octet_table (decoding_table, ccs,
921 (code_point >> (dim * 8)) & 255);
923 if (CHARP (decoding_table))
924 return XCHAR (decoding_table);
926 if (EQ (decoding_table, Qunloaded) ||
927 EQ (decoding_table, Qunbound) ||
928 NILP (decoding_table) )
930 char_id = load_char_decoding_entry_maybe (ccs, code_point);
935 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
937 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
939 if ( EQ (mother, Vcharset_ucs) )
940 return DECODE_CHAR (mother, code_point);
942 return decode_defined_char (mother, code_point);
944 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
947 = (((code_point >> 8) & 0x7F) - 33) * 94
948 + (( code_point & 0x7F) - 33);
949 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
950 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
952 b2 += b2 < 0x3F ? 0x40 : 0x62;
953 return decode_defined_char (mother, (b1 << 8) | b2);
955 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
958 = (((code_point >> 8) & 0x7F) - 33) * 94
959 + (( code_point & 0x7F) - 33)
960 + BIG5_SAME_ROW * (0xC9 - 0xA1);
961 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
962 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
964 b2 += b2 < 0x3F ? 0x40 : 0x62;
965 return decode_defined_char (mother, (b1 << 8) | b2);
972 decode_builtin_char (Lisp_Object charset, int code_point)
974 Lisp_Object mother = XCHARSET_MOTHER (charset);
977 if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
979 int code = code_point;
981 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
983 int row = code_point >> 8;
984 int cell = code_point & 255;
988 else if (row < 16 + 32 + 30)
989 code = (row - (16 + 32)) * 94 + cell - 33;
990 else if (row < 18 + 32 + 30)
992 else if (row < 18 + 32 + 60)
993 code = (row - (18 + 32)) * 94 + cell - 33;
995 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
997 int plane = code_point >> 16;
998 int row = (code_point >> 8) & 255;
999 int cell = code_point & 255;
1003 else if (row < 16 + 32 + 30)
1005 = (plane - 33) * 94 * 60
1006 + (row - (16 + 32)) * 94
1008 else if (row < 18 + 32 + 30)
1010 else if (row < 18 + 32 + 60)
1012 = (plane - 33) * 94 * 60
1013 + (row - (18 + 32)) * 94
1016 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1019 = (((code_point >> 8) & 0x7F) - 33) * 94
1020 + (( code_point & 0x7F) - 33);
1021 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1022 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1024 b2 += b2 < 0x3F ? 0x40 : 0x62;
1025 code = (b1 << 8) | b2;
1027 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1030 = (((code_point >> 8) & 0x7F) - 33) * 94
1031 + (( code_point & 0x7F) - 33)
1032 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1033 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1034 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1036 b2 += b2 < 0x3F ? 0x40 : 0x62;
1037 code = (b1 << 8) | b2;
1040 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
1043 else if (EQ (charset, Vcharset_chinese_big5))
1045 int c1 = code_point >> 8;
1046 int c2 = code_point & 0xFF;
1049 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1051 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1052 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1054 I = (c1 - 0xA1) * BIG5_SAME_ROW
1055 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1059 charset = Vcharset_chinese_big5_1;
1063 charset = Vcharset_chinese_big5_2;
1064 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1066 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1070 if ((final = XCHARSET_FINAL (charset)) >= '0')
1072 if (XCHARSET_DIMENSION (charset) == 1)
1074 switch (XCHARSET_CHARS (charset))
1078 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1081 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1089 switch (XCHARSET_CHARS (charset))
1092 return MIN_CHAR_94x94
1093 + (final - '0') * 94 * 94
1094 + (((code_point >> 8) & 0x7F) - 33) * 94
1095 + ((code_point & 0x7F) - 33);
1097 return MIN_CHAR_96x96
1098 + (final - '0') * 96 * 96
1099 + (((code_point >> 8) & 0x7F) - 32) * 96
1100 + ((code_point & 0x7F) - 32);
1107 else if (XCHARSET_MAX_CODE (charset))
1110 = (XCHARSET_DIMENSION (charset) == 1
1112 code_point - XCHARSET_BYTE_OFFSET (charset)
1114 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1115 * XCHARSET_CHARS (charset)
1116 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1117 + XCHARSET_CODE_OFFSET (charset);
1118 if ((cid < XCHARSET_MIN_CODE (charset))
1119 || (XCHARSET_MAX_CODE (charset) < cid))
1128 charset_code_point (Lisp_Object charset, Emchar ch)
1130 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1133 if ( CHAR_TABLEP (encoding_table)
1134 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1139 Lisp_Object mother = XCHARSET_MOTHER (charset);
1140 int min = XCHARSET_MIN_CODE (charset);
1141 int max = XCHARSET_MAX_CODE (charset);
1144 if ( CHARSETP (mother) )
1145 code = charset_code_point (mother, ch);
1148 if ( ((max == 0) && CHARSETP (mother)) ||
1149 ((min <= code) && (code <= max)) )
1151 int d = code - XCHARSET_CODE_OFFSET (charset);
1153 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1155 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1157 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1159 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1162 int cell = d % 94 + 33;
1168 return (row << 8) | cell;
1170 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1172 int B1 = d >> 8, B2 = d & 0xFF;
1174 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1175 - (B2 < 0x7F ? 0x40 : 0x62);
1179 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1182 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1184 int B1 = d >> 8, B2 = d & 0xFF;
1186 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1187 - (B2 < 0x7F ? 0x40 : 0x62);
1191 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1192 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1195 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1196 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1197 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1198 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1199 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1201 int plane = d / (94 * 60) + 33;
1202 int row = (d % (94 * 60)) / 94;
1203 int cell = d % 94 + 33;
1209 return (plane << 16) | (row << 8) | cell;
1211 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1213 ( (d / (94 * 94) + 33) << 16)
1214 | ((d / 94 % 94 + 33) << 8)
1216 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1218 ( (d / (96 * 96) + 32) << 16)
1219 | ((d / 96 % 96 + 32) << 8)
1221 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1223 ( (d / (94 * 94 * 94) + 33) << 24)
1224 | ((d / (94 * 94) % 94 + 33) << 16)
1225 | ((d / 94 % 94 + 33) << 8)
1227 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1229 ( (d / (96 * 96 * 96) + 32) << 24)
1230 | ((d / (96 * 96) % 96 + 32) << 16)
1231 | ((d / 96 % 96 + 32) << 8)
1235 printf ("Unknown CCS-conversion %d is specified!",
1236 XCHARSET_CONVERSION (charset));
1240 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1241 (XCHARSET_CODE_OFFSET (charset)
1242 == XCHARSET_MIN_CODE (charset)) )
1246 if (XCHARSET_DIMENSION (charset) == 1)
1248 if (XCHARSET_CHARS (charset) == 94)
1250 if (((d = ch - (MIN_CHAR_94
1251 + (XCHARSET_FINAL (charset) - '0') * 94))
1256 else if (XCHARSET_CHARS (charset) == 96)
1258 if (((d = ch - (MIN_CHAR_96
1259 + (XCHARSET_FINAL (charset) - '0') * 96))
1267 else if (XCHARSET_DIMENSION (charset) == 2)
1269 if (XCHARSET_CHARS (charset) == 94)
1271 if (((d = ch - (MIN_CHAR_94x94
1273 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1276 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1278 else if (XCHARSET_CHARS (charset) == 96)
1280 if (((d = ch - (MIN_CHAR_96x96
1282 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1285 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1296 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1298 if (c <= MAX_CHAR_BASIC_LATIN)
1300 *charset = Vcharset_ascii;
1305 *charset = Vcharset_control_1;
1310 *charset = Vcharset_latin_iso8859_1;
1314 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1316 *charset = Vcharset_hebrew_iso8859_8;
1317 return c - MIN_CHAR_HEBREW + 0x20;
1320 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1322 *charset = Vcharset_thai_tis620;
1323 return c - MIN_CHAR_THAI + 0x20;
1326 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1327 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1329 return list2 (Vcharset_katakana_jisx0201,
1330 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1333 else if (c <= MAX_CHAR_BMP)
1335 *charset = Vcharset_ucs_bmp;
1338 else if (c <= MAX_CHAR_SMP)
1340 *charset = Vcharset_ucs_smp;
1341 return c - MIN_CHAR_SMP;
1343 else if (c <= MAX_CHAR_SIP)
1345 *charset = Vcharset_ucs_sip;
1346 return c - MIN_CHAR_SIP;
1348 else if (c < MIN_CHAR_DAIKANWA)
1350 *charset = Vcharset_ucs;
1353 else if (c <= MAX_CHAR_DAIKANWA)
1355 *charset = Vcharset_ideograph_daikanwa;
1356 return c - MIN_CHAR_DAIKANWA;
1358 else if (c < MIN_CHAR_94)
1360 *charset = Vcharset_ucs;
1363 else if (c <= MAX_CHAR_94)
1365 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1366 ((c - MIN_CHAR_94) / 94) + '0',
1367 CHARSET_LEFT_TO_RIGHT);
1368 if (!NILP (*charset))
1369 return ((c - MIN_CHAR_94) % 94) + 33;
1372 *charset = Vcharset_ucs;
1376 else if (c <= MAX_CHAR_96)
1378 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1379 ((c - MIN_CHAR_96) / 96) + '0',
1380 CHARSET_LEFT_TO_RIGHT);
1381 if (!NILP (*charset))
1382 return ((c - MIN_CHAR_96) % 96) + 32;
1385 *charset = Vcharset_ucs;
1389 else if (c <= MAX_CHAR_94x94)
1392 = CHARSET_BY_ATTRIBUTES (94, 2,
1393 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1394 CHARSET_LEFT_TO_RIGHT);
1395 if (!NILP (*charset))
1396 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1397 | (((c - MIN_CHAR_94x94) % 94) + 33);
1400 *charset = Vcharset_ucs;
1404 else if (c <= MAX_CHAR_96x96)
1407 = CHARSET_BY_ATTRIBUTES (96, 2,
1408 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1409 CHARSET_LEFT_TO_RIGHT);
1410 if (!NILP (*charset))
1411 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1412 | (((c - MIN_CHAR_96x96) % 96) + 32);
1415 *charset = Vcharset_ucs;
1421 *charset = Vcharset_ucs;
1426 Lisp_Object Vdefault_coded_charset_priority_list;
1430 /************************************************************************/
1431 /* Basic charset Lisp functions */
1432 /************************************************************************/
1434 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1435 Return non-nil if OBJECT is a charset.
1439 return CHARSETP (object) ? Qt : Qnil;
1442 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1443 Retrieve the charset of the given name.
1444 If CHARSET-OR-NAME is a charset object, it is simply returned.
1445 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1446 nil is returned. Otherwise the associated charset object is returned.
1450 if (CHARSETP (charset_or_name))
1451 return charset_or_name;
1453 CHECK_SYMBOL (charset_or_name);
1454 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1457 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1458 Retrieve the charset of the given name.
1459 Same as `find-charset' except an error is signalled if there is no such
1460 charset instead of returning nil.
1464 Lisp_Object charset = Ffind_charset (name);
1467 signal_simple_error ("No such charset", name);
1471 /* We store the charsets in hash tables with the names as the key and the
1472 actual charset object as the value. Occasionally we need to use them
1473 in a list format. These routines provide us with that. */
1474 struct charset_list_closure
1476 Lisp_Object *charset_list;
1480 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1481 void *charset_list_closure)
1483 /* This function can GC */
1484 struct charset_list_closure *chcl =
1485 (struct charset_list_closure*) charset_list_closure;
1486 Lisp_Object *charset_list = chcl->charset_list;
1488 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1492 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1493 Return a list of the names of all defined charsets.
1497 Lisp_Object charset_list = Qnil;
1498 struct gcpro gcpro1;
1499 struct charset_list_closure charset_list_closure;
1501 GCPRO1 (charset_list);
1502 charset_list_closure.charset_list = &charset_list;
1503 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1504 &charset_list_closure);
1507 return charset_list;
1510 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1511 Return the name of charset CHARSET.
1515 return XCHARSET_NAME (Fget_charset (charset));
1518 /* #### SJT Should generic properties be allowed? */
1519 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1520 Define a new character set.
1521 This function is for use with Mule support.
1522 NAME is a symbol, the name by which the character set is normally referred.
1523 DOC-STRING is a string describing the character set.
1524 PROPS is a property list, describing the specific nature of the
1525 character set. Recognized properties are:
1527 'short-name Short version of the charset name (ex: Latin-1)
1528 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1529 'registry A regular expression matching the font registry field for
1531 'dimension Number of octets used to index a character in this charset.
1532 Either 1 or 2. Defaults to 1.
1533 If UTF-2000 feature is enabled, 3 or 4 are also available.
1534 'columns Number of columns used to display a character in this charset.
1535 Only used in TTY mode. (Under X, the actual width of a
1536 character can be derived from the font used to display the
1537 characters.) If unspecified, defaults to the dimension
1538 (this is almost always the correct value).
1539 'chars Number of characters in each dimension (94 or 96).
1540 Defaults to 94. Note that if the dimension is 2, the
1541 character set thus described is 94x94 or 96x96.
1542 If UTF-2000 feature is enabled, 128 or 256 are also available.
1543 'final Final byte of ISO 2022 escape sequence. Must be
1544 supplied. Each combination of (DIMENSION, CHARS) defines a
1545 separate namespace for final bytes. Note that ISO
1546 2022 restricts the final byte to the range
1547 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1548 dimension == 2. Note also that final bytes in the range
1549 0x30 - 0x3F are reserved for user-defined (not official)
1551 'graphic 0 (use left half of font on output) or 1 (use right half
1552 of font on output). Defaults to 0. For example, for
1553 a font whose registry is ISO8859-1, the left half
1554 (octets 0x20 - 0x7F) is the `ascii' character set, while
1555 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1556 character set. With 'graphic set to 0, the octets
1557 will have their high bit cleared; with it set to 1,
1558 the octets will have their high bit set.
1559 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1561 'ccl-program A compiled CCL program used to convert a character in
1562 this charset into an index into the font. This is in
1563 addition to the 'graphic property. The CCL program
1564 is passed the octets of the character, with the high
1565 bit cleared and set depending upon whether the value
1566 of the 'graphic property is 0 or 1.
1567 'mother [UTF-2000 only] Base coded-charset.
1568 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1569 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1570 'code-offset [UTF-2000 only] Offset for a code-point of a base
1572 'conversion [UTF-2000 only] Conversion for a code-point of a base
1573 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1575 (name, doc_string, props))
1577 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1578 int direction = CHARSET_LEFT_TO_RIGHT;
1579 Lisp_Object registry = Qnil;
1580 Lisp_Object charset;
1581 Lisp_Object ccl_program = Qnil;
1582 Lisp_Object short_name = Qnil, long_name = Qnil;
1583 Lisp_Object mother = Qnil;
1584 int min_code = 0, max_code = 0, code_offset = 0;
1585 int byte_offset = -1;
1588 CHECK_SYMBOL (name);
1589 if (!NILP (doc_string))
1590 CHECK_STRING (doc_string);
1592 charset = Ffind_charset (name);
1593 if (!NILP (charset))
1594 signal_simple_error ("Cannot redefine existing charset", name);
1597 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1599 if (EQ (keyword, Qshort_name))
1601 CHECK_STRING (value);
1605 if (EQ (keyword, Qlong_name))
1607 CHECK_STRING (value);
1611 else if (EQ (keyword, Qdimension))
1614 dimension = XINT (value);
1615 if (dimension < 1 ||
1622 signal_simple_error ("Invalid value for 'dimension", value);
1625 else if (EQ (keyword, Qchars))
1628 chars = XINT (value);
1629 if (chars != 94 && chars != 96
1631 && chars != 128 && chars != 256
1634 signal_simple_error ("Invalid value for 'chars", value);
1637 else if (EQ (keyword, Qcolumns))
1640 columns = XINT (value);
1641 if (columns != 1 && columns != 2)
1642 signal_simple_error ("Invalid value for 'columns", value);
1645 else if (EQ (keyword, Qgraphic))
1648 graphic = XINT (value);
1656 signal_simple_error ("Invalid value for 'graphic", value);
1659 else if (EQ (keyword, Qregistry))
1661 CHECK_STRING (value);
1665 else if (EQ (keyword, Qdirection))
1667 if (EQ (value, Ql2r))
1668 direction = CHARSET_LEFT_TO_RIGHT;
1669 else if (EQ (value, Qr2l))
1670 direction = CHARSET_RIGHT_TO_LEFT;
1672 signal_simple_error ("Invalid value for 'direction", value);
1675 else if (EQ (keyword, Qfinal))
1677 CHECK_CHAR_COERCE_INT (value);
1678 final = XCHAR (value);
1679 if (final < '0' || final > '~')
1680 signal_simple_error ("Invalid value for 'final", value);
1684 else if (EQ (keyword, Qmother))
1686 mother = Fget_charset (value);
1689 else if (EQ (keyword, Qmin_code))
1692 min_code = XUINT (value);
1695 else if (EQ (keyword, Qmax_code))
1698 max_code = XUINT (value);
1701 else if (EQ (keyword, Qcode_offset))
1704 code_offset = XUINT (value);
1707 else if (EQ (keyword, Qconversion))
1709 if (EQ (value, Q94x60))
1710 conversion = CONVERSION_94x60;
1711 else if (EQ (value, Q94x94x60))
1712 conversion = CONVERSION_94x94x60;
1713 else if (EQ (value, Qbig5_1))
1714 conversion = CONVERSION_BIG5_1;
1715 else if (EQ (value, Qbig5_2))
1716 conversion = CONVERSION_BIG5_2;
1718 signal_simple_error ("Unrecognized conversion", value);
1722 else if (EQ (keyword, Qccl_program))
1724 struct ccl_program test_ccl;
1726 if (setup_ccl_program (&test_ccl, value) < 0)
1727 signal_simple_error ("Invalid value for 'ccl-program", value);
1728 ccl_program = value;
1732 signal_simple_error ("Unrecognized property", keyword);
1738 error ("'final must be specified");
1740 if (dimension == 2 && final > 0x5F)
1742 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1745 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1746 CHARSET_LEFT_TO_RIGHT)) ||
1747 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1748 CHARSET_RIGHT_TO_LEFT)))
1750 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1752 id = get_unallocated_leading_byte (dimension);
1754 if (NILP (doc_string))
1755 doc_string = build_string ("");
1757 if (NILP (registry))
1758 registry = build_string ("");
1760 if (NILP (short_name))
1761 XSETSTRING (short_name, XSYMBOL (name)->name);
1763 if (NILP (long_name))
1764 long_name = doc_string;
1767 columns = dimension;
1769 if (byte_offset < 0)
1773 else if (chars == 96)
1779 charset = make_charset (id, name, chars, dimension, columns, graphic,
1780 final, direction, short_name, long_name,
1781 doc_string, registry,
1782 Qnil, min_code, max_code, code_offset, byte_offset,
1783 mother, conversion);
1784 if (!NILP (ccl_program))
1785 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1789 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1791 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1792 NEW-NAME is the name of the new charset. Return the new charset.
1794 (charset, new_name))
1796 Lisp_Object new_charset = Qnil;
1797 int id, chars, dimension, columns, graphic, final;
1799 Lisp_Object registry, doc_string, short_name, long_name;
1802 charset = Fget_charset (charset);
1803 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1804 signal_simple_error ("Charset already has reverse-direction charset",
1807 CHECK_SYMBOL (new_name);
1808 if (!NILP (Ffind_charset (new_name)))
1809 signal_simple_error ("Cannot redefine existing charset", new_name);
1811 cs = XCHARSET (charset);
1813 chars = CHARSET_CHARS (cs);
1814 dimension = CHARSET_DIMENSION (cs);
1815 columns = CHARSET_COLUMNS (cs);
1816 id = get_unallocated_leading_byte (dimension);
1818 graphic = CHARSET_GRAPHIC (cs);
1819 final = CHARSET_FINAL (cs);
1820 direction = CHARSET_RIGHT_TO_LEFT;
1821 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1822 direction = CHARSET_LEFT_TO_RIGHT;
1823 doc_string = CHARSET_DOC_STRING (cs);
1824 short_name = CHARSET_SHORT_NAME (cs);
1825 long_name = CHARSET_LONG_NAME (cs);
1826 registry = CHARSET_REGISTRY (cs);
1828 new_charset = make_charset (id, new_name, chars, dimension, columns,
1829 graphic, final, direction, short_name, long_name,
1830 doc_string, registry,
1832 CHARSET_DECODING_TABLE(cs),
1833 CHARSET_MIN_CODE(cs),
1834 CHARSET_MAX_CODE(cs),
1835 CHARSET_CODE_OFFSET(cs),
1836 CHARSET_BYTE_OFFSET(cs),
1838 CHARSET_CONVERSION (cs)
1840 Qnil, 0, 0, 0, 0, Qnil, 0
1844 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1845 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1850 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1851 Define symbol ALIAS as an alias for CHARSET.
1855 CHECK_SYMBOL (alias);
1856 charset = Fget_charset (charset);
1857 return Fputhash (alias, charset, Vcharset_hash_table);
1860 /* #### Reverse direction charsets not yet implemented. */
1862 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1864 Return the reverse-direction charset parallel to CHARSET, if any.
1865 This is the charset with the same properties (in particular, the same
1866 dimension, number of characters per dimension, and final byte) as
1867 CHARSET but whose characters are displayed in the opposite direction.
1871 charset = Fget_charset (charset);
1872 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1876 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1877 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1878 If DIRECTION is omitted, both directions will be checked (left-to-right
1879 will be returned if character sets exist for both directions).
1881 (dimension, chars, final, direction))
1883 int dm, ch, fi, di = -1;
1884 Lisp_Object obj = Qnil;
1886 CHECK_INT (dimension);
1887 dm = XINT (dimension);
1888 if (dm < 1 || dm > 2)
1889 signal_simple_error ("Invalid value for DIMENSION", dimension);
1893 if (ch != 94 && ch != 96)
1894 signal_simple_error ("Invalid value for CHARS", chars);
1896 CHECK_CHAR_COERCE_INT (final);
1898 if (fi < '0' || fi > '~')
1899 signal_simple_error ("Invalid value for FINAL", final);
1901 if (EQ (direction, Ql2r))
1902 di = CHARSET_LEFT_TO_RIGHT;
1903 else if (EQ (direction, Qr2l))
1904 di = CHARSET_RIGHT_TO_LEFT;
1905 else if (!NILP (direction))
1906 signal_simple_error ("Invalid value for DIRECTION", direction);
1908 if (dm == 2 && fi > 0x5F)
1910 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1914 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1916 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1919 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1922 return XCHARSET_NAME (obj);
1926 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1927 Return short name of CHARSET.
1931 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1934 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1935 Return long name of CHARSET.
1939 return XCHARSET_LONG_NAME (Fget_charset (charset));
1942 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1943 Return description of CHARSET.
1947 return XCHARSET_DOC_STRING (Fget_charset (charset));
1950 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1951 Return dimension of CHARSET.
1955 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1958 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1959 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1960 Recognized properties are those listed in `make-charset', as well as
1961 'name and 'doc-string.
1967 charset = Fget_charset (charset);
1968 cs = XCHARSET (charset);
1970 CHECK_SYMBOL (prop);
1971 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1972 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1973 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1974 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1975 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1976 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1977 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1978 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1979 Qnil : make_char (CHARSET_FINAL (cs));
1980 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1981 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1982 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1983 if (EQ (prop, Qdirection))
1984 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1985 if (EQ (prop, Qreverse_direction_charset))
1987 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1988 /* #### Is this translation OK? If so, error checking sufficient? */
1989 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1992 if (EQ (prop, Qmother))
1993 return CHARSET_MOTHER (cs);
1994 if (EQ (prop, Qmin_code))
1995 return make_int (CHARSET_MIN_CODE (cs));
1996 if (EQ (prop, Qmax_code))
1997 return make_int (CHARSET_MAX_CODE (cs));
1999 signal_simple_error ("Unrecognized charset property name", prop);
2000 return Qnil; /* not reached */
2003 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2004 Return charset identification number of CHARSET.
2008 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2011 /* #### We need to figure out which properties we really want to
2014 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2015 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2017 (charset, ccl_program))
2019 struct ccl_program test_ccl;
2021 charset = Fget_charset (charset);
2022 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2023 signal_simple_error ("Invalid ccl-program", ccl_program);
2024 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2029 invalidate_charset_font_caches (Lisp_Object charset)
2031 /* Invalidate font cache entries for charset on all devices. */
2032 Lisp_Object devcons, concons, hash_table;
2033 DEVICE_LOOP_NO_BREAK (devcons, concons)
2035 struct device *d = XDEVICE (XCAR (devcons));
2036 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2037 if (!UNBOUNDP (hash_table))
2038 Fclrhash (hash_table);
2042 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2043 Set the 'registry property of CHARSET to REGISTRY.
2045 (charset, registry))
2047 charset = Fget_charset (charset);
2048 CHECK_STRING (registry);
2049 XCHARSET_REGISTRY (charset) = registry;
2050 invalidate_charset_font_caches (charset);
2051 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2056 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2057 Return mapping-table of CHARSET.
2061 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2064 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2065 Set mapping-table of CHARSET to TABLE.
2069 struct Lisp_Charset *cs;
2073 charset = Fget_charset (charset);
2074 cs = XCHARSET (charset);
2078 CHARSET_DECODING_TABLE(cs) = Qnil;
2081 else if (VECTORP (table))
2083 int ccs_len = CHARSET_BYTE_SIZE (cs);
2084 int ret = decoding_table_check_elements (table,
2085 CHARSET_DIMENSION (cs),
2090 signal_simple_error ("Too big table", table);
2092 signal_simple_error ("Invalid element is found", table);
2094 signal_simple_error ("Something wrong", table);
2096 CHARSET_DECODING_TABLE(cs) = Qnil;
2099 signal_error (Qwrong_type_argument,
2100 list2 (build_translated_string ("vector-or-nil-p"),
2103 byte_offset = CHARSET_BYTE_OFFSET (cs);
2104 switch (CHARSET_DIMENSION (cs))
2107 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2109 Lisp_Object c = XVECTOR_DATA(table)[i];
2112 Fput_char_attribute (c, XCHARSET_NAME (charset),
2113 make_int (i + byte_offset));
2117 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2119 Lisp_Object v = XVECTOR_DATA(table)[i];
2125 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2127 Lisp_Object c = XVECTOR_DATA(v)[j];
2131 (c, XCHARSET_NAME (charset),
2132 make_int ( ( (i + byte_offset) << 8 )
2138 Fput_char_attribute (v, XCHARSET_NAME (charset),
2139 make_int (i + byte_offset));
2146 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2147 Save mapping-table of CHARSET.
2151 #ifdef HAVE_DATABASE
2152 struct Lisp_Charset *cs;
2153 int byte_min, byte_max;
2155 Lisp_Object db_file;
2157 charset = Fget_charset (charset);
2158 cs = XCHARSET (charset);
2160 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2161 Qsystem_char_id, 1);
2162 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2164 byte_min = CHARSET_BYTE_OFFSET (cs);
2165 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2166 switch (CHARSET_DIMENSION (cs))
2170 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2173 for (cell = byte_min; cell < byte_max; cell++)
2175 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2178 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2179 Fprin1_to_string (c, Qnil),
2186 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2189 for (row = byte_min; row < byte_max; row++)
2191 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2194 for (cell = byte_min; cell < byte_max; cell++)
2196 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2199 Fput_database (Fprin1_to_string (make_int ((row << 8)
2202 Fprin1_to_string (c, Qnil),
2210 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2213 for (plane = byte_min; plane < byte_max; plane++)
2216 = get_ccs_octet_table (table_p, charset, plane);
2219 for (row = byte_min; row < byte_max; row++)
2222 = get_ccs_octet_table (table_r, charset, row);
2225 for (cell = byte_min; cell < byte_max; cell++)
2227 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2231 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2235 Fprin1_to_string (c, Qnil),
2244 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2247 for (group = byte_min; group < byte_max; group++)
2250 = get_ccs_octet_table (table_g, charset, group);
2253 for (plane = byte_min; plane < byte_max; plane++)
2256 = get_ccs_octet_table (table_p, charset, plane);
2259 for (row = byte_min; row < byte_max; row++)
2262 = get_ccs_octet_table (table_r, charset, row);
2265 for (cell = byte_min; cell < byte_max; cell++)
2268 = get_ccs_octet_table (table_c, charset, cell);
2271 Fput_database (Fprin1_to_string
2272 (make_int (( group << 24)
2277 Fprin1_to_string (c, Qnil),
2285 return Fclose_database (db);
2291 #ifdef HAVE_CHISE_CLIENT
2293 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2297 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2300 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2304 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2311 decoding_table_put_char (ccs, code_point, ret);
2312 Fclose_database (db);
2316 Fclose_database (db);
2320 #endif /* HAVE_CHISE_CLIENT */
2321 #endif /* UTF2000 */
2324 /************************************************************************/
2325 /* Lisp primitives for working with characters */
2326 /************************************************************************/
2329 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2330 Make a character from CHARSET and code-point CODE.
2331 If DEFINED_ONLY is non-nil, builtin character is not returned.
2332 If corresponding character is not found, nil is returned.
2334 (charset, code, defined_only))
2338 charset = Fget_charset (charset);
2341 if (XCHARSET_GRAPHIC (charset) == 1)
2343 if (NILP (defined_only))
2344 c = DECODE_CHAR (charset, c);
2346 c = decode_defined_char (charset, c);
2347 return c >= 0 ? make_char (c) : Qnil;
2350 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2351 Make a builtin character from CHARSET and code-point CODE.
2357 charset = Fget_charset (charset);
2359 if (EQ (charset, Vcharset_latin_viscii))
2361 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2367 (ret = Fget_char_attribute (chr,
2368 Vcharset_latin_viscii_lower,
2371 charset = Vcharset_latin_viscii_lower;
2375 (ret = Fget_char_attribute (chr,
2376 Vcharset_latin_viscii_upper,
2379 charset = Vcharset_latin_viscii_upper;
2386 if (XCHARSET_GRAPHIC (charset) == 1)
2389 c = decode_builtin_char (charset, c);
2390 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2394 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2395 Make a character from CHARSET and octets ARG1 and ARG2.
2396 ARG2 is required only for characters from two-dimensional charsets.
2397 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2398 character s with caron.
2400 (charset, arg1, arg2))
2404 int lowlim, highlim;
2406 charset = Fget_charset (charset);
2407 cs = XCHARSET (charset);
2409 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2410 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2412 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2414 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2415 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2418 /* It is useful (and safe, according to Olivier Galibert) to strip
2419 the 8th bit off ARG1 and ARG2 because it allows programmers to
2420 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2421 Latin 2 code of the character. */
2429 if (a1 < lowlim || a1 > highlim)
2430 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2432 if (CHARSET_DIMENSION (cs) == 1)
2436 ("Charset is of dimension one; second octet must be nil", arg2);
2437 return make_char (MAKE_CHAR (charset, a1, 0));
2446 a2 = XINT (arg2) & 0x7f;
2448 if (a2 < lowlim || a2 > highlim)
2449 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2451 return make_char (MAKE_CHAR (charset, a1, a2));
2454 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2455 Return the character set of CHARACTER.
2459 CHECK_CHAR_COERCE_INT (character);
2461 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2464 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2465 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2466 N defaults to 0 if omitted.
2470 Lisp_Object charset;
2473 CHECK_CHAR_COERCE_INT (character);
2475 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2477 if (NILP (n) || EQ (n, Qzero))
2478 return make_int (octet0);
2479 else if (EQ (n, make_int (1)))
2480 return make_int (octet1);
2482 signal_simple_error ("Octet number must be 0 or 1", n);
2486 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2487 Return code-point of CHARACTER in specified CHARSET.
2489 (character, charset))
2493 CHECK_CHAR_COERCE_INT (character);
2494 charset = Fget_charset (charset);
2495 code_point = charset_code_point (charset, XCHAR (character));
2496 if (code_point >= 0)
2497 return make_int (code_point);
2503 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2504 Return list of charset and one or two position-codes of CHARACTER.
2508 /* This function can GC */
2509 struct gcpro gcpro1, gcpro2;
2510 Lisp_Object charset = Qnil;
2511 Lisp_Object rc = Qnil;
2519 GCPRO2 (charset, rc);
2520 CHECK_CHAR_COERCE_INT (character);
2523 code_point = ENCODE_CHAR (XCHAR (character), charset);
2524 dimension = XCHARSET_DIMENSION (charset);
2525 while (dimension > 0)
2527 rc = Fcons (make_int (code_point & 255), rc);
2531 rc = Fcons (XCHARSET_NAME (charset), rc);
2533 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2535 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2537 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2541 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2550 #ifdef ENABLE_COMPOSITE_CHARS
2551 /************************************************************************/
2552 /* composite character functions */
2553 /************************************************************************/
2556 lookup_composite_char (Bufbyte *str, int len)
2558 Lisp_Object lispstr = make_string (str, len);
2559 Lisp_Object ch = Fgethash (lispstr,
2560 Vcomposite_char_string2char_hash_table,
2566 if (composite_char_row_next >= 128)
2567 signal_simple_error ("No more composite chars available", lispstr);
2568 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2569 composite_char_col_next);
2570 Fputhash (make_char (emch), lispstr,
2571 Vcomposite_char_char2string_hash_table);
2572 Fputhash (lispstr, make_char (emch),
2573 Vcomposite_char_string2char_hash_table);
2574 composite_char_col_next++;
2575 if (composite_char_col_next >= 128)
2577 composite_char_col_next = 32;
2578 composite_char_row_next++;
2587 composite_char_string (Emchar ch)
2589 Lisp_Object str = Fgethash (make_char (ch),
2590 Vcomposite_char_char2string_hash_table,
2592 assert (!UNBOUNDP (str));
2596 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2597 Convert a string into a single composite character.
2598 The character is the result of overstriking all the characters in
2603 CHECK_STRING (string);
2604 return make_char (lookup_composite_char (XSTRING_DATA (string),
2605 XSTRING_LENGTH (string)));
2608 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2609 Return a string of the characters comprising a composite character.
2617 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2618 signal_simple_error ("Must be composite char", ch);
2619 return composite_char_string (emch);
2621 #endif /* ENABLE_COMPOSITE_CHARS */
2624 /************************************************************************/
2625 /* initialization */
2626 /************************************************************************/
2629 syms_of_mule_charset (void)
2631 INIT_LRECORD_IMPLEMENTATION (charset);
2633 DEFSUBR (Fcharsetp);
2634 DEFSUBR (Ffind_charset);
2635 DEFSUBR (Fget_charset);
2636 DEFSUBR (Fcharset_list);
2637 DEFSUBR (Fcharset_name);
2638 DEFSUBR (Fmake_charset);
2639 DEFSUBR (Fmake_reverse_direction_charset);
2640 /* DEFSUBR (Freverse_direction_charset); */
2641 DEFSUBR (Fdefine_charset_alias);
2642 DEFSUBR (Fcharset_from_attributes);
2643 DEFSUBR (Fcharset_short_name);
2644 DEFSUBR (Fcharset_long_name);
2645 DEFSUBR (Fcharset_description);
2646 DEFSUBR (Fcharset_dimension);
2647 DEFSUBR (Fcharset_property);
2648 DEFSUBR (Fcharset_id);
2649 DEFSUBR (Fset_charset_ccl_program);
2650 DEFSUBR (Fset_charset_registry);
2652 DEFSUBR (Fcharset_mapping_table);
2653 DEFSUBR (Fset_charset_mapping_table);
2657 DEFSUBR (Fdecode_char);
2658 DEFSUBR (Fdecode_builtin_char);
2659 DEFSUBR (Fencode_char);
2660 DEFSUBR (Fsave_charset_mapping_table);
2662 DEFSUBR (Fmake_char);
2663 DEFSUBR (Fchar_charset);
2664 DEFSUBR (Fchar_octet);
2665 DEFSUBR (Fsplit_char);
2667 #ifdef ENABLE_COMPOSITE_CHARS
2668 DEFSUBR (Fmake_composite_char);
2669 DEFSUBR (Fcomposite_char_string);
2672 defsymbol (&Qcharsetp, "charsetp");
2673 defsymbol (&Qregistry, "registry");
2674 defsymbol (&Qfinal, "final");
2675 defsymbol (&Qgraphic, "graphic");
2676 defsymbol (&Qdirection, "direction");
2677 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2678 defsymbol (&Qshort_name, "short-name");
2679 defsymbol (&Qlong_name, "long-name");
2681 defsymbol (&Qmother, "mother");
2682 defsymbol (&Qmin_code, "min-code");
2683 defsymbol (&Qmax_code, "max-code");
2684 defsymbol (&Qcode_offset, "code-offset");
2685 defsymbol (&Qconversion, "conversion");
2686 defsymbol (&Q94x60, "94x60");
2687 defsymbol (&Q94x94x60, "94x94x60");
2688 defsymbol (&Qbig5_1, "big5-1");
2689 defsymbol (&Qbig5_2, "big5-2");
2692 defsymbol (&Ql2r, "l2r");
2693 defsymbol (&Qr2l, "r2l");
2695 /* Charsets, compatible with FSF 20.3
2696 Naming convention is Script-Charset[-Edition] */
2697 defsymbol (&Qascii, "ascii");
2698 defsymbol (&Qcontrol_1, "control-1");
2699 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2700 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2701 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2702 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2703 defsymbol (&Qthai_tis620, "thai-tis620");
2704 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2705 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2706 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2707 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2708 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2709 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2710 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2711 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2712 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2713 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2714 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2715 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2716 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2717 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2718 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2719 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2721 defsymbol (&Qucs, "ucs");
2722 defsymbol (&Qucs_bmp, "ucs-bmp");
2723 defsymbol (&Qucs_smp, "ucs-smp");
2724 defsymbol (&Qucs_sip, "ucs-sip");
2725 defsymbol (&Qucs_cns, "ucs-cns");
2726 defsymbol (&Qucs_jis, "ucs-jis");
2727 defsymbol (&Qucs_ks, "ucs-ks");
2728 defsymbol (&Qucs_big5, "ucs-big5");
2729 defsymbol (&Qlatin_viscii, "latin-viscii");
2730 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2731 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2732 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2733 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2734 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2735 defsymbol (&Qideograph_gt, "ideograph-gt");
2736 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2737 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2738 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2739 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2740 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2741 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2742 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2743 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2744 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2745 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2746 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2747 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2748 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2749 defsymbol (&Qchinese_big5, "chinese-big5");
2750 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2751 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2752 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2753 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2754 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2755 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2756 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2757 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2758 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2759 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2760 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2761 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2762 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2763 defsymbol (&Qchina3_jef, "china3-jef");
2764 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2765 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2767 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2768 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2770 defsymbol (&Qcomposite, "composite");
2774 vars_of_mule_charset (void)
2781 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2782 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2784 /* Table of charsets indexed by leading byte. */
2785 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2786 chlook->charset_by_leading_byte[i] = Qnil;
2789 /* Table of charsets indexed by type/final-byte. */
2790 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2791 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2792 chlook->charset_by_attributes[i][j] = Qnil;
2794 /* Table of charsets indexed by type/final-byte/direction. */
2795 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2796 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2797 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2798 chlook->charset_by_attributes[i][j][k] = Qnil;
2802 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2804 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2805 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2809 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2810 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2811 Leading-code of private TYPE9N charset of column-width 1.
2813 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2817 Vdefault_coded_charset_priority_list = Qnil;
2818 DEFVAR_LISP ("default-coded-charset-priority-list",
2819 &Vdefault_coded_charset_priority_list /*
2820 Default order of preferred coded-character-sets.
2826 complex_vars_of_mule_charset (void)
2828 staticpro (&Vcharset_hash_table);
2829 Vcharset_hash_table =
2830 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2832 /* Predefined character sets. We store them into variables for
2836 staticpro (&Vcharset_ucs);
2838 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2839 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2840 build_string ("UCS"),
2841 build_string ("UCS"),
2842 build_string ("ISO/IEC 10646"),
2844 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2845 staticpro (&Vcharset_ucs_bmp);
2847 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2848 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2849 build_string ("BMP"),
2850 build_string ("UCS-BMP"),
2851 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2853 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2854 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2855 staticpro (&Vcharset_ucs_smp);
2857 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2858 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2859 build_string ("SMP"),
2860 build_string ("UCS-SMP"),
2861 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2862 build_string ("UCS00-1"),
2863 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2864 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2865 staticpro (&Vcharset_ucs_sip);
2867 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2868 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2869 build_string ("SIP"),
2870 build_string ("UCS-SIP"),
2871 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2872 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2873 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2874 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2875 staticpro (&Vcharset_ucs_cns);
2877 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2878 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2879 build_string ("UCS for CNS"),
2880 build_string ("UCS for CNS 11643"),
2881 build_string ("ISO/IEC 10646 for CNS 11643"),
2883 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2884 staticpro (&Vcharset_ucs_jis);
2886 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2887 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2888 build_string ("UCS for JIS"),
2889 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2891 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2893 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2894 staticpro (&Vcharset_ucs_ks);
2896 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2897 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2898 build_string ("UCS for KS"),
2899 build_string ("UCS for CCS defined by KS"),
2900 build_string ("ISO/IEC 10646 for Korean Standards"),
2902 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2903 staticpro (&Vcharset_ucs_big5);
2905 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2906 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2907 build_string ("UCS for Big5"),
2908 build_string ("UCS for Big5"),
2909 build_string ("ISO/IEC 10646 for Big5"),
2911 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2913 # define MIN_CHAR_THAI 0
2914 # define MAX_CHAR_THAI 0
2915 /* # define MIN_CHAR_HEBREW 0 */
2916 /* # define MAX_CHAR_HEBREW 0 */
2917 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2918 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2920 staticpro (&Vcharset_ascii);
2922 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2923 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2924 build_string ("ASCII"),
2925 build_string ("ASCII)"),
2926 build_string ("ASCII (ISO646 IRV)"),
2927 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2928 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2929 staticpro (&Vcharset_control_1);
2930 Vcharset_control_1 =
2931 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2932 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2933 build_string ("C1"),
2934 build_string ("Control characters"),
2935 build_string ("Control characters 128-191"),
2937 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2938 staticpro (&Vcharset_latin_iso8859_1);
2939 Vcharset_latin_iso8859_1 =
2940 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2941 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Latin-1"),
2943 build_string ("ISO8859-1 (Latin-1)"),
2944 build_string ("ISO8859-1 (Latin-1)"),
2945 build_string ("iso8859-1"),
2946 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2947 staticpro (&Vcharset_latin_iso8859_2);
2948 Vcharset_latin_iso8859_2 =
2949 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2950 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2951 build_string ("Latin-2"),
2952 build_string ("ISO8859-2 (Latin-2)"),
2953 build_string ("ISO8859-2 (Latin-2)"),
2954 build_string ("iso8859-2"),
2955 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2956 staticpro (&Vcharset_latin_iso8859_3);
2957 Vcharset_latin_iso8859_3 =
2958 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2959 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2960 build_string ("Latin-3"),
2961 build_string ("ISO8859-3 (Latin-3)"),
2962 build_string ("ISO8859-3 (Latin-3)"),
2963 build_string ("iso8859-3"),
2964 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2965 staticpro (&Vcharset_latin_iso8859_4);
2966 Vcharset_latin_iso8859_4 =
2967 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2968 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2969 build_string ("Latin-4"),
2970 build_string ("ISO8859-4 (Latin-4)"),
2971 build_string ("ISO8859-4 (Latin-4)"),
2972 build_string ("iso8859-4"),
2973 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2974 staticpro (&Vcharset_thai_tis620);
2975 Vcharset_thai_tis620 =
2976 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2977 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2978 build_string ("TIS620"),
2979 build_string ("TIS620 (Thai)"),
2980 build_string ("TIS620.2529 (Thai)"),
2981 build_string ("tis620"),
2982 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2983 MIN_CHAR_THAI, 32, Qnil, CONVERSION_96);
2984 staticpro (&Vcharset_greek_iso8859_7);
2985 Vcharset_greek_iso8859_7 =
2986 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2987 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2988 build_string ("ISO8859-7"),
2989 build_string ("ISO8859-7 (Greek)"),
2990 build_string ("ISO8859-7 (Greek)"),
2991 build_string ("iso8859-7"),
2992 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2993 staticpro (&Vcharset_arabic_iso8859_6);
2994 Vcharset_arabic_iso8859_6 =
2995 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2996 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2997 build_string ("ISO8859-6"),
2998 build_string ("ISO8859-6 (Arabic)"),
2999 build_string ("ISO8859-6 (Arabic)"),
3000 build_string ("iso8859-6"),
3001 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3002 staticpro (&Vcharset_hebrew_iso8859_8);
3003 Vcharset_hebrew_iso8859_8 =
3004 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3005 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3006 build_string ("ISO8859-8"),
3007 build_string ("ISO8859-8 (Hebrew)"),
3008 build_string ("ISO8859-8 (Hebrew)"),
3009 build_string ("iso8859-8"),
3011 0 /* MIN_CHAR_HEBREW */,
3012 0 /* MAX_CHAR_HEBREW */, 0, 32,
3013 Qnil, CONVERSION_IDENTICAL);
3014 staticpro (&Vcharset_katakana_jisx0201);
3015 Vcharset_katakana_jisx0201 =
3016 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3017 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3018 build_string ("JISX0201 Kana"),
3019 build_string ("JISX0201.1976 (Japanese Kana)"),
3020 build_string ("JISX0201.1976 Japanese Kana"),
3021 build_string ("jisx0201\\.1976"),
3022 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3023 staticpro (&Vcharset_latin_jisx0201);
3024 Vcharset_latin_jisx0201 =
3025 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3026 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3027 build_string ("JISX0201 Roman"),
3028 build_string ("JISX0201.1976 (Japanese Roman)"),
3029 build_string ("JISX0201.1976 Japanese Roman"),
3030 build_string ("jisx0201\\.1976"),
3031 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3032 staticpro (&Vcharset_cyrillic_iso8859_5);
3033 Vcharset_cyrillic_iso8859_5 =
3034 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3035 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3036 build_string ("ISO8859-5"),
3037 build_string ("ISO8859-5 (Cyrillic)"),
3038 build_string ("ISO8859-5 (Cyrillic)"),
3039 build_string ("iso8859-5"),
3040 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3041 staticpro (&Vcharset_latin_iso8859_9);
3042 Vcharset_latin_iso8859_9 =
3043 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3044 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3045 build_string ("Latin-5"),
3046 build_string ("ISO8859-9 (Latin-5)"),
3047 build_string ("ISO8859-9 (Latin-5)"),
3048 build_string ("iso8859-9"),
3049 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3050 staticpro (&Vcharset_japanese_jisx0208_1978);
3051 Vcharset_japanese_jisx0208_1978 =
3052 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3053 Qjapanese_jisx0208_1978, 94, 2,
3054 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3055 build_string ("JIS X0208:1978"),
3056 build_string ("JIS X0208:1978 (Japanese)"),
3058 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3059 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3060 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3061 staticpro (&Vcharset_chinese_gb2312);
3062 Vcharset_chinese_gb2312 =
3063 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3064 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3065 build_string ("GB2312"),
3066 build_string ("GB2312)"),
3067 build_string ("GB2312 Chinese simplified"),
3068 build_string ("gb2312"),
3069 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3070 staticpro (&Vcharset_chinese_gb12345);
3071 Vcharset_chinese_gb12345 =
3072 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3073 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3074 build_string ("G1"),
3075 build_string ("GB 12345)"),
3076 build_string ("GB 12345-1990"),
3077 build_string ("GB12345\\(\\.1990\\)?-0"),
3078 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3079 staticpro (&Vcharset_japanese_jisx0208);
3080 Vcharset_japanese_jisx0208 =
3081 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3082 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3083 build_string ("JISX0208"),
3084 build_string ("JIS X0208:1983 (Japanese)"),
3085 build_string ("JIS X0208:1983 Japanese Kanji"),
3086 build_string ("jisx0208\\.1983"),
3087 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3089 staticpro (&Vcharset_japanese_jisx0208_1990);
3090 Vcharset_japanese_jisx0208_1990 =
3091 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3092 Qjapanese_jisx0208_1990, 94, 2,
3093 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3094 build_string ("JISX0208-1990"),
3095 build_string ("JIS X0208:1990 (Japanese)"),
3096 build_string ("JIS X0208:1990 Japanese Kanji"),
3097 build_string ("jisx0208\\.1990"),
3099 MIN_CHAR_JIS_X0208_1990,
3100 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3101 Qnil, CONVERSION_94x94);
3103 staticpro (&Vcharset_korean_ksc5601);
3104 Vcharset_korean_ksc5601 =
3105 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3106 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3107 build_string ("KSC5601"),
3108 build_string ("KSC5601 (Korean"),
3109 build_string ("KSC5601 Korean Hangul and Hanja"),
3110 build_string ("ksc5601"),
3111 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3112 staticpro (&Vcharset_japanese_jisx0212);
3113 Vcharset_japanese_jisx0212 =
3114 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3115 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3116 build_string ("JISX0212"),
3117 build_string ("JISX0212 (Japanese)"),
3118 build_string ("JISX0212 Japanese Supplement"),
3119 build_string ("jisx0212"),
3120 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3122 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3123 staticpro (&Vcharset_chinese_cns11643_1);
3124 Vcharset_chinese_cns11643_1 =
3125 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3126 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3127 build_string ("CNS11643-1"),
3128 build_string ("CNS11643-1 (Chinese traditional)"),
3130 ("CNS 11643 Plane 1 Chinese traditional"),
3131 build_string (CHINESE_CNS_PLANE_RE("1")),
3132 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3133 staticpro (&Vcharset_chinese_cns11643_2);
3134 Vcharset_chinese_cns11643_2 =
3135 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3136 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3137 build_string ("CNS11643-2"),
3138 build_string ("CNS11643-2 (Chinese traditional)"),
3140 ("CNS 11643 Plane 2 Chinese traditional"),
3141 build_string (CHINESE_CNS_PLANE_RE("2")),
3142 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3144 staticpro (&Vcharset_latin_tcvn5712);
3145 Vcharset_latin_tcvn5712 =
3146 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3147 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3148 build_string ("TCVN 5712"),
3149 build_string ("TCVN 5712 (VSCII-2)"),
3150 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3151 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3152 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3153 staticpro (&Vcharset_latin_viscii_lower);
3154 Vcharset_latin_viscii_lower =
3155 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3156 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3157 build_string ("VISCII lower"),
3158 build_string ("VISCII lower (Vietnamese)"),
3159 build_string ("VISCII lower (Vietnamese)"),
3160 build_string ("MULEVISCII-LOWER"),
3161 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3162 staticpro (&Vcharset_latin_viscii_upper);
3163 Vcharset_latin_viscii_upper =
3164 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3165 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3166 build_string ("VISCII upper"),
3167 build_string ("VISCII upper (Vietnamese)"),
3168 build_string ("VISCII upper (Vietnamese)"),
3169 build_string ("MULEVISCII-UPPER"),
3170 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3171 staticpro (&Vcharset_latin_viscii);
3172 Vcharset_latin_viscii =
3173 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3174 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3175 build_string ("VISCII"),
3176 build_string ("VISCII 1.1 (Vietnamese)"),
3177 build_string ("VISCII 1.1 (Vietnamese)"),
3178 build_string ("VISCII1\\.1"),
3179 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3180 staticpro (&Vcharset_chinese_big5);
3181 Vcharset_chinese_big5 =
3182 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3183 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3184 build_string ("Big5"),
3185 build_string ("Big5"),
3186 build_string ("Big5 Chinese traditional"),
3187 build_string ("big5-0"),
3189 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3190 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3192 staticpro (&Vcharset_chinese_big5_cdp);
3193 Vcharset_chinese_big5_cdp =
3194 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3195 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3196 build_string ("Big5-CDP"),
3197 build_string ("Big5 + CDP extension"),
3198 build_string ("Big5 with CDP extension"),
3199 build_string ("big5\\.cdp-0"),
3200 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3201 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3203 #define DEF_HANZIKU(n) \
3204 staticpro (&Vcharset_ideograph_hanziku_##n); \
3205 Vcharset_ideograph_hanziku_##n = \
3206 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3207 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3208 build_string ("HZK-"#n), \
3209 build_string ("HANZIKU-"#n), \
3210 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3212 ("hanziku-"#n"$"), \
3213 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3214 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3227 staticpro (&Vcharset_china3_jef);
3228 Vcharset_china3_jef =
3229 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
3230 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3231 build_string ("JC3"),
3232 build_string ("JEF + CHINA3"),
3233 build_string ("JEF + CHINA3 private characters"),
3234 build_string ("china3jef-0"),
3235 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
3236 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
3237 staticpro (&Vcharset_ideograph_cbeta);
3238 Vcharset_ideograph_cbeta =
3239 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3240 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3241 build_string ("CB"),
3242 build_string ("CBETA"),
3243 build_string ("CBETA private characters"),
3244 build_string ("cbeta-0"),
3245 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3246 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3247 staticpro (&Vcharset_ideograph_gt);
3248 Vcharset_ideograph_gt =
3249 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3250 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3251 build_string ("GT"),
3252 build_string ("GT"),
3253 build_string ("GT"),
3255 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3256 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3257 #define DEF_GT_PJ(n) \
3258 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3259 Vcharset_ideograph_gt_pj_##n = \
3260 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3261 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3262 build_string ("GT-PJ-"#n), \
3263 build_string ("GT (pseudo JIS encoding) part "#n), \
3264 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3266 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3267 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3280 staticpro (&Vcharset_ideograph_daikanwa_2);
3281 Vcharset_ideograph_daikanwa_2 =
3282 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3283 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3284 build_string ("Daikanwa Rev."),
3285 build_string ("Morohashi's Daikanwa Rev."),
3287 ("Daikanwa dictionary (revised version)"),
3288 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3289 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3290 staticpro (&Vcharset_ideograph_daikanwa);
3291 Vcharset_ideograph_daikanwa =
3292 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3293 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3294 build_string ("Daikanwa"),
3295 build_string ("Morohashi's Daikanwa Rev.2"),
3297 ("Daikanwa dictionary (second revised version)"),
3298 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3299 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3300 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3302 staticpro (&Vcharset_ethiopic_ucs);
3303 Vcharset_ethiopic_ucs =
3304 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3305 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3306 build_string ("Ethiopic (UCS)"),
3307 build_string ("Ethiopic (UCS)"),
3308 build_string ("Ethiopic of UCS"),
3309 build_string ("Ethiopic-Unicode"),
3310 Qnil, 0x1200, 0x137F, 0, 0,
3311 Qnil, CONVERSION_IDENTICAL);
3313 staticpro (&Vcharset_chinese_big5_1);
3314 Vcharset_chinese_big5_1 =
3315 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3316 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3317 build_string ("Big5"),
3318 build_string ("Big5 (Level-1)"),
3320 ("Big5 Level-1 Chinese traditional"),
3321 build_string ("big5"),
3322 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3323 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3324 staticpro (&Vcharset_chinese_big5_2);
3325 Vcharset_chinese_big5_2 =
3326 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3327 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3328 build_string ("Big5"),
3329 build_string ("Big5 (Level-2)"),
3331 ("Big5 Level-2 Chinese traditional"),
3332 build_string ("big5"),
3333 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3334 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3336 #ifdef ENABLE_COMPOSITE_CHARS
3337 /* #### For simplicity, we put composite chars into a 96x96 charset.
3338 This is going to lead to problems because you can run out of
3339 room, esp. as we don't yet recycle numbers. */
3340 staticpro (&Vcharset_composite);
3341 Vcharset_composite =
3342 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3343 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3344 build_string ("Composite"),
3345 build_string ("Composite characters"),
3346 build_string ("Composite characters"),
3349 /* #### not dumped properly */
3350 composite_char_row_next = 32;
3351 composite_char_col_next = 32;
3353 Vcomposite_char_string2char_hash_table =
3354 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3355 Vcomposite_char_char2string_hash_table =
3356 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3357 staticpro (&Vcomposite_char_string2char_hash_table);
3358 staticpro (&Vcomposite_char_char2string_hash_table);
3359 #endif /* ENABLE_COMPOSITE_CHARS */