1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_ucs_big5;
76 Lisp_Object Vcharset_latin_viscii;
77 Lisp_Object Vcharset_latin_tcvn5712;
78 Lisp_Object Vcharset_latin_viscii_lower;
79 Lisp_Object Vcharset_latin_viscii_upper;
80 Lisp_Object Vcharset_jis_x0208;
81 Lisp_Object Vcharset_chinese_big5;
82 Lisp_Object Vcharset_ideograph_hanziku_1;
83 Lisp_Object Vcharset_ideograph_hanziku_2;
84 Lisp_Object Vcharset_ideograph_hanziku_3;
85 Lisp_Object Vcharset_ideograph_hanziku_4;
86 Lisp_Object Vcharset_ideograph_hanziku_5;
87 Lisp_Object Vcharset_ideograph_hanziku_6;
88 Lisp_Object Vcharset_ideograph_hanziku_7;
89 Lisp_Object Vcharset_ideograph_hanziku_8;
90 Lisp_Object Vcharset_ideograph_hanziku_9;
91 Lisp_Object Vcharset_ideograph_hanziku_10;
92 Lisp_Object Vcharset_ideograph_hanziku_11;
93 Lisp_Object Vcharset_ideograph_hanziku_12;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
181 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
185 if (XVECTOR_LENGTH (v) > ccs_len)
188 for (i = 0; i < XVECTOR_LENGTH (v); i++)
190 Lisp_Object c = XVECTOR_DATA(v)[i];
192 if (!NILP (c) && !CHARP (c))
196 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
208 put_char_ccs_code_point (Lisp_Object character,
209 Lisp_Object ccs, Lisp_Object value)
211 if (!EQ (XCHARSET_NAME (ccs), Qucs)
213 || (XCHAR (character) != XINT (value)))
215 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
219 { /* obsolete representation: value must be a list of bytes */
220 Lisp_Object ret = Fcar (value);
224 signal_simple_error ("Invalid value for coded-charset", value);
225 code_point = XINT (ret);
226 if (XCHARSET_GRAPHIC (ccs) == 1)
234 signal_simple_error ("Invalid value for coded-charset",
238 signal_simple_error ("Invalid value for coded-charset",
241 if (XCHARSET_GRAPHIC (ccs) == 1)
243 code_point = (code_point << 8) | j;
246 value = make_int (code_point);
248 else if (INTP (value))
250 code_point = XINT (value);
251 if (XCHARSET_GRAPHIC (ccs) == 1)
253 code_point &= 0x7F7F7F7F;
254 value = make_int (code_point);
258 signal_simple_error ("Invalid value for coded-charset", value);
262 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
265 decoding_table_remove_char (ccs, XINT (cpos));
268 decoding_table_put_char (ccs, code_point, character);
274 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
276 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
277 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
279 if (VECTORP (decoding_table))
281 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
285 decoding_table_remove_char (ccs, XINT (cpos));
288 if (CHAR_TABLEP (encoding_table))
290 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
298 int leading_code_private_11;
301 Lisp_Object Qcharsetp;
303 /* Qdoc_string, Qdimension, Qchars defined in general.c */
304 Lisp_Object Qregistry, Qfinal, Qgraphic;
305 Lisp_Object Qdirection;
306 Lisp_Object Qreverse_direction_charset;
307 Lisp_Object Qleading_byte;
308 Lisp_Object Qshort_name, Qlong_name;
310 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
311 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2;
328 Qjapanese_jisx0208_1978,
332 Qjapanese_jisx0208_1990,
351 Qvietnamese_viscii_lower,
352 Qvietnamese_viscii_upper,
355 /* Qchinese_big5_cdp, */
356 Qideograph_hanziku_1,
357 Qideograph_hanziku_2,
358 Qideograph_hanziku_3,
359 Qideograph_hanziku_4,
360 Qideograph_hanziku_5,
361 Qideograph_hanziku_6,
362 Qideograph_hanziku_7,
363 Qideograph_hanziku_8,
364 Qideograph_hanziku_9,
365 Qideograph_hanziku_10,
366 Qideograph_hanziku_11,
367 Qideograph_hanziku_12,
369 Qideograph_daikanwa_2,
389 Lisp_Object Ql2r, Qr2l;
391 Lisp_Object Vcharset_hash_table;
393 /* Composite characters are characters constructed by overstriking two
394 or more regular characters.
396 1) The old Mule implementation involves storing composite characters
397 in a buffer as a tag followed by all of the actual characters
398 used to make up the composite character. I think this is a bad
399 idea; it greatly complicates code that wants to handle strings
400 one character at a time because it has to deal with the possibility
401 of great big ungainly characters. It's much more reasonable to
402 simply store an index into a table of composite characters.
404 2) The current implementation only allows for 16,384 separate
405 composite characters over the lifetime of the XEmacs process.
406 This could become a potential problem if the user
407 edited lots of different files that use composite characters.
408 Due to FSF bogosity, increasing the number of allowable
409 composite characters under Mule would decrease the number
410 of possible faces that can exist. Mule already has shrunk
411 this to 2048, and further shrinkage would become uncomfortable.
412 No such problems exist in XEmacs.
414 Composite characters could be represented as 0x80 C1 C2 C3,
415 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
416 for slightly under 2^20 (one million) composite characters
417 over the XEmacs process lifetime, and you only need to
418 increase the size of a Mule character from 19 to 21 bits.
419 Or you could use 0x80 C1 C2 C3 C4, allowing for about
420 85 million (slightly over 2^26) composite characters. */
423 /************************************************************************/
424 /* Basic Emchar functions */
425 /************************************************************************/
427 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
428 string in STR. Returns the number of bytes stored.
429 Do not call this directly. Use the macro set_charptr_emchar() instead.
433 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
448 else if ( c <= 0x7ff )
450 *p++ = (c >> 6) | 0xc0;
451 *p++ = (c & 0x3f) | 0x80;
453 else if ( c <= 0xffff )
455 *p++ = (c >> 12) | 0xe0;
456 *p++ = ((c >> 6) & 0x3f) | 0x80;
457 *p++ = (c & 0x3f) | 0x80;
459 else if ( c <= 0x1fffff )
461 *p++ = (c >> 18) | 0xf0;
462 *p++ = ((c >> 12) & 0x3f) | 0x80;
463 *p++ = ((c >> 6) & 0x3f) | 0x80;
464 *p++ = (c & 0x3f) | 0x80;
466 else if ( c <= 0x3ffffff )
468 *p++ = (c >> 24) | 0xf8;
469 *p++ = ((c >> 18) & 0x3f) | 0x80;
470 *p++ = ((c >> 12) & 0x3f) | 0x80;
471 *p++ = ((c >> 6) & 0x3f) | 0x80;
472 *p++ = (c & 0x3f) | 0x80;
476 *p++ = (c >> 30) | 0xfc;
477 *p++ = ((c >> 24) & 0x3f) | 0x80;
478 *p++ = ((c >> 18) & 0x3f) | 0x80;
479 *p++ = ((c >> 12) & 0x3f) | 0x80;
480 *p++ = ((c >> 6) & 0x3f) | 0x80;
481 *p++ = (c & 0x3f) | 0x80;
484 BREAKUP_CHAR (c, charset, c1, c2);
485 lb = CHAR_LEADING_BYTE (c);
486 if (LEADING_BYTE_PRIVATE_P (lb))
487 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
489 if (EQ (charset, Vcharset_control_1))
498 /* Return the first character from a Mule-encoded string in STR,
499 assuming it's non-ASCII. Do not call this directly.
500 Use the macro charptr_emchar() instead. */
503 non_ascii_charptr_emchar (const Bufbyte *str)
516 else if ( b >= 0xf8 )
521 else if ( b >= 0xf0 )
526 else if ( b >= 0xe0 )
531 else if ( b >= 0xc0 )
541 for( ; len > 0; len-- )
544 ch = ( ch << 6 ) | ( b & 0x3f );
548 Bufbyte i0 = *str, i1, i2 = 0;
551 if (i0 == LEADING_BYTE_CONTROL_1)
552 return (Emchar) (*++str - 0x20);
554 if (LEADING_BYTE_PREFIX_P (i0))
559 charset = CHARSET_BY_LEADING_BYTE (i0);
560 if (XCHARSET_DIMENSION (charset) == 2)
563 return MAKE_CHAR (charset, i1, i2);
567 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
568 Do not call this directly. Use the macro valid_char_p() instead. */
572 non_ascii_valid_char_p (Emchar ch)
576 /* Must have only lowest 19 bits set */
580 f1 = CHAR_FIELD1 (ch);
581 f2 = CHAR_FIELD2 (ch);
582 f3 = CHAR_FIELD3 (ch);
588 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
589 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
590 f2 > MAX_CHAR_FIELD2_PRIVATE)
595 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
596 f2 <= MAX_CHAR_FIELD2_PRIVATE))
600 NOTE: This takes advantage of the fact that
601 FIELD2_TO_OFFICIAL_LEADING_BYTE and
602 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
604 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
605 if (EQ (charset, Qnil))
607 return (XCHARSET_CHARS (charset) == 96);
613 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
614 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
615 f1 > MAX_CHAR_FIELD1_PRIVATE)
617 if (f2 < 0x20 || f3 < 0x20)
620 #ifdef ENABLE_COMPOSITE_CHARS
621 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
623 if (UNBOUNDP (Fgethash (make_int (ch),
624 Vcomposite_char_char2string_hash_table,
629 #endif /* ENABLE_COMPOSITE_CHARS */
631 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
632 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
635 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
637 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
640 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
642 if (EQ (charset, Qnil))
644 return (XCHARSET_CHARS (charset) == 96);
650 /************************************************************************/
651 /* Basic string functions */
652 /************************************************************************/
654 /* Copy the character pointed to by SRC into DST. Do not call this
655 directly. Use the macro charptr_copy_char() instead.
656 Return the number of bytes copied. */
659 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
661 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
663 for (i = bytes; i; i--, dst++, src++)
669 /************************************************************************/
670 /* streams of Emchars */
671 /************************************************************************/
673 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
674 The functions below are not meant to be called directly; use
675 the macros in insdel.h. */
678 Lstream_get_emchar_1 (Lstream *stream, int ch)
680 Bufbyte str[MAX_EMCHAR_LEN];
681 Bufbyte *strptr = str;
684 str[0] = (Bufbyte) ch;
686 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
688 int c = Lstream_getc (stream);
689 bufpos_checking_assert (c >= 0);
690 *++strptr = (Bufbyte) c;
692 return charptr_emchar (str);
696 Lstream_fput_emchar (Lstream *stream, Emchar ch)
698 Bufbyte str[MAX_EMCHAR_LEN];
699 Bytecount len = set_charptr_emchar (str, ch);
700 return Lstream_write (stream, str, len);
704 Lstream_funget_emchar (Lstream *stream, Emchar ch)
706 Bufbyte str[MAX_EMCHAR_LEN];
707 Bytecount len = set_charptr_emchar (str, ch);
708 Lstream_unread (stream, str, len);
712 /************************************************************************/
714 /************************************************************************/
717 mark_charset (Lisp_Object obj)
719 Lisp_Charset *cs = XCHARSET (obj);
721 mark_object (cs->short_name);
722 mark_object (cs->long_name);
723 mark_object (cs->doc_string);
724 mark_object (cs->registry);
725 mark_object (cs->ccl_program);
727 mark_object (cs->decoding_table);
728 mark_object (cs->mother);
734 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
736 Lisp_Charset *cs = XCHARSET (obj);
740 error ("printing unreadable object #<charset %s 0x%x>",
741 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
744 write_c_string ("#<charset ", printcharfun);
745 print_internal (CHARSET_NAME (cs), printcharfun, 0);
746 write_c_string (" ", printcharfun);
747 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
748 write_c_string (" ", printcharfun);
749 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
750 write_c_string (" ", printcharfun);
751 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
752 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
754 CHARSET_DIMENSION (cs),
755 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
756 CHARSET_COLUMNS (cs),
757 CHARSET_GRAPHIC (cs),
759 write_c_string (buf, printcharfun);
760 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
761 sprintf (buf, " 0x%x>", cs->header.uid);
762 write_c_string (buf, printcharfun);
765 static const struct lrecord_description charset_description[] = {
766 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
767 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
768 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
769 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
770 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
771 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
772 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
774 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
775 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
780 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
781 mark_charset, print_charset, 0, 0, 0,
785 /* Make a new charset. */
786 /* #### SJT Should generic properties be allowed? */
788 make_charset (Charset_ID id, Lisp_Object name,
789 unsigned short chars, unsigned char dimension,
790 unsigned char columns, unsigned char graphic,
791 Bufbyte final, unsigned char direction, Lisp_Object short_name,
792 Lisp_Object long_name, Lisp_Object doc,
794 Lisp_Object decoding_table,
795 Emchar min_code, Emchar max_code,
796 Emchar code_offset, unsigned char byte_offset,
797 Lisp_Object mother, unsigned char conversion)
800 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
804 XSETCHARSET (obj, cs);
806 CHARSET_ID (cs) = id;
807 CHARSET_NAME (cs) = name;
808 CHARSET_SHORT_NAME (cs) = short_name;
809 CHARSET_LONG_NAME (cs) = long_name;
810 CHARSET_CHARS (cs) = chars;
811 CHARSET_DIMENSION (cs) = dimension;
812 CHARSET_DIRECTION (cs) = direction;
813 CHARSET_COLUMNS (cs) = columns;
814 CHARSET_GRAPHIC (cs) = graphic;
815 CHARSET_FINAL (cs) = final;
816 CHARSET_DOC_STRING (cs) = doc;
817 CHARSET_REGISTRY (cs) = reg;
818 CHARSET_CCL_PROGRAM (cs) = Qnil;
819 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
821 CHARSET_DECODING_TABLE(cs) = Qunbound;
822 CHARSET_MIN_CODE (cs) = min_code;
823 CHARSET_MAX_CODE (cs) = max_code;
824 CHARSET_CODE_OFFSET (cs) = code_offset;
825 CHARSET_BYTE_OFFSET (cs) = byte_offset;
826 CHARSET_MOTHER (cs) = mother;
827 CHARSET_CONVERSION (cs) = conversion;
831 if (id == LEADING_BYTE_ASCII)
832 CHARSET_REP_BYTES (cs) = 1;
834 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
836 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
841 /* some charsets do not have final characters. This includes
842 ASCII, Control-1, Composite, and the two faux private
844 unsigned char iso2022_type
845 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
847 if (code_offset == 0)
849 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
850 chlook->charset_by_attributes[iso2022_type][final] = obj;
854 (chlook->charset_by_attributes[iso2022_type][final][direction]));
855 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
859 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
860 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
862 /* Some charsets are "faux" and don't have names or really exist at
863 all except in the leading-byte table. */
865 Fputhash (name, obj, Vcharset_hash_table);
870 get_unallocated_leading_byte (int dimension)
875 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
878 lb = chlook->next_allocated_leading_byte++;
882 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
885 lb = chlook->next_allocated_1_byte_leading_byte++;
889 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
892 lb = chlook->next_allocated_2_byte_leading_byte++;
898 ("No more character sets free for this dimension",
899 make_int (dimension));
905 /* Number of Big5 characters which have the same code in 1st byte. */
907 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
910 decode_defined_char (Lisp_Object ccs, int code_point)
912 int dim = XCHARSET_DIMENSION (ccs);
913 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
921 = get_ccs_octet_table (decoding_table, ccs,
922 (code_point >> (dim * 8)) & 255);
924 if (CHARP (decoding_table))
925 return XCHAR (decoding_table);
927 if (EQ (decoding_table, Qunloaded))
929 char_id = load_char_decoding_entry_maybe (ccs, code_point);
934 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
936 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
938 if ( EQ (mother, Vcharset_ucs) )
939 return DECODE_CHAR (mother, code_point);
941 return decode_defined_char (mother, code_point);
943 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
946 = (((code_point >> 8) & 0x7F) - 33) * 94
947 + (( code_point & 0x7F) - 33);
948 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
949 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
951 b2 += b2 < 0x3F ? 0x40 : 0x62;
952 return decode_defined_char (mother, (b1 << 8) | b2);
954 else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
957 = (((code_point >> 8) & 0x7F) - 33) * 94
958 + (( code_point & 0x7F) - 33)
959 + BIG5_SAME_ROW * (0xC9 - 0xA1);
960 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
961 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
963 b2 += b2 < 0x3F ? 0x40 : 0x62;
964 return decode_defined_char (mother, (b1 << 8) | b2);
971 decode_builtin_char (Lisp_Object charset, int code_point)
973 Lisp_Object mother = XCHARSET_MOTHER (charset);
976 if ( XCHARSET_MAX_CODE (charset) > 0 )
978 if ( CHARSETP (mother) )
980 int code = code_point;
982 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
984 int row = code_point >> 8;
985 int cell = code_point & 255;
989 else if (row < 16 + 32 + 30)
990 code = (row - (16 + 32)) * 94 + cell - 33;
991 else if (row < 18 + 32 + 30)
993 else if (row < 18 + 32 + 60)
994 code = (row - (18 + 32)) * 94 + cell - 33;
996 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
998 int plane = code_point >> 16;
999 int row = (code_point >> 8) & 255;
1000 int cell = code_point & 255;
1004 else if (row < 16 + 32 + 30)
1006 = (plane - 33) * 94 * 60
1007 + (row - (16 + 32)) * 94
1009 else if (row < 18 + 32 + 30)
1011 else if (row < 18 + 32 + 60)
1013 = (plane - 33) * 94 * 60
1014 + (row - (18 + 32)) * 94
1017 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1020 = (((code_point >> 8) & 0x7F) - 33) * 94
1021 + (( code_point & 0x7F) - 33);
1022 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1023 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1025 b2 += b2 < 0x3F ? 0x40 : 0x62;
1026 code = (b1 << 8) | b2;
1028 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1031 = (((code_point >> 8) & 0x7F) - 33) * 94
1032 + (( code_point & 0x7F) - 33)
1033 + BIG5_SAME_ROW * (0xC9 - 0xA1);
1034 unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
1035 unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
1037 b2 += b2 < 0x3F ? 0x40 : 0x62;
1038 code = (b1 << 8) | b2;
1041 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
1046 = (XCHARSET_DIMENSION (charset) == 1
1048 code_point - XCHARSET_BYTE_OFFSET (charset)
1050 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1051 * XCHARSET_CHARS (charset)
1052 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1053 + XCHARSET_CODE_OFFSET (charset);
1054 if ((cid < XCHARSET_MIN_CODE (charset))
1055 || (XCHARSET_MAX_CODE (charset) < cid))
1060 else if ((final = XCHARSET_FINAL (charset)) >= '0')
1062 if (XCHARSET_DIMENSION (charset) == 1)
1064 switch (XCHARSET_CHARS (charset))
1068 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1071 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1079 switch (XCHARSET_CHARS (charset))
1082 return MIN_CHAR_94x94
1083 + (final - '0') * 94 * 94
1084 + (((code_point >> 8) & 0x7F) - 33) * 94
1085 + ((code_point & 0x7F) - 33);
1087 return MIN_CHAR_96x96
1088 + (final - '0') * 96 * 96
1089 + (((code_point >> 8) & 0x7F) - 32) * 96
1090 + ((code_point & 0x7F) - 32);
1102 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
1104 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1107 if ( CHAR_TABLEP (encoding_table)
1108 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1113 Lisp_Object mother = XCHARSET_MOTHER (charset);
1114 int min = XCHARSET_MIN_CODE (charset);
1115 int max = XCHARSET_MAX_CODE (charset);
1118 if ( CHARSETP (mother) )
1120 if (XCHARSET_FINAL (charset) >= '0')
1121 code = charset_code_point (mother, ch, 1);
1123 code = charset_code_point (mother, ch, defined_only);
1125 else if (defined_only)
1127 else if ( ((max == 0) && CHARSETP (mother)
1128 && (XCHARSET_FINAL (charset) == 0))
1129 || ((min <= ch) && (ch <= max)) )
1131 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1132 || ((min <= code) && (code <= max)) )
1134 int d = code - XCHARSET_CODE_OFFSET (charset);
1136 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1138 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1140 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1142 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1145 int cell = d % 94 + 33;
1151 return (row << 8) | cell;
1153 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
1155 int B1 = d >> 8, B2 = d & 0xFF;
1157 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1158 - (B2 < 0x7F ? 0x40 : 0x62);
1162 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1165 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
1167 int B1 = d >> 8, B2 = d & 0xFF;
1169 = (B1 - 0xA1) * BIG5_SAME_ROW + B2
1170 - (B2 < 0x7F ? 0x40 : 0x62);
1174 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1175 return ((I / 94 + 33) << 8) | (I % 94 + 33);
1178 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1179 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1180 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1181 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1182 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1184 int plane = d / (94 * 60) + 33;
1185 int row = (d % (94 * 60)) / 94;
1186 int cell = d % 94 + 33;
1192 return (plane << 16) | (row << 8) | cell;
1194 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1196 ( (d / (94 * 94) + 33) << 16)
1197 | ((d / 94 % 94 + 33) << 8)
1199 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1201 ( (d / (96 * 96) + 32) << 16)
1202 | ((d / 96 % 96 + 32) << 8)
1204 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1206 ( (d / (94 * 94 * 94) + 33) << 24)
1207 | ((d / (94 * 94) % 94 + 33) << 16)
1208 | ((d / 94 % 94 + 33) << 8)
1210 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1212 ( (d / (96 * 96 * 96) + 32) << 24)
1213 | ((d / (96 * 96) % 96 + 32) << 16)
1214 | ((d / 96 % 96 + 32) << 8)
1218 printf ("Unknown CCS-conversion %d is specified!",
1219 XCHARSET_CONVERSION (charset));
1223 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1224 ( XCHARSET_MIN_CODE (charset) == 0 )
1226 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1227 (XCHARSET_CODE_OFFSET (charset)
1228 == XCHARSET_MIN_CODE (charset))
1233 if (XCHARSET_DIMENSION (charset) == 1)
1235 if (XCHARSET_CHARS (charset) == 94)
1237 if (((d = ch - (MIN_CHAR_94
1238 + (XCHARSET_FINAL (charset) - '0') * 94))
1243 else if (XCHARSET_CHARS (charset) == 96)
1245 if (((d = ch - (MIN_CHAR_96
1246 + (XCHARSET_FINAL (charset) - '0') * 96))
1254 else if (XCHARSET_DIMENSION (charset) == 2)
1256 if (XCHARSET_CHARS (charset) == 94)
1258 if (((d = ch - (MIN_CHAR_94x94
1260 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1263 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1265 else if (XCHARSET_CHARS (charset) == 96)
1267 if (((d = ch - (MIN_CHAR_96x96
1269 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1272 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1283 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1285 if (c <= MAX_CHAR_BASIC_LATIN)
1287 *charset = Vcharset_ascii;
1292 *charset = Vcharset_control_1;
1297 *charset = Vcharset_latin_iso8859_1;
1301 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1303 *charset = Vcharset_hebrew_iso8859_8;
1304 return c - MIN_CHAR_HEBREW + 0x20;
1307 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1309 *charset = Vcharset_thai_tis620;
1310 return c - MIN_CHAR_THAI + 0x20;
1313 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1314 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1316 return list2 (Vcharset_katakana_jisx0201,
1317 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1320 else if (c <= MAX_CHAR_BMP)
1322 *charset = Vcharset_ucs_bmp;
1325 else if (c <= MAX_CHAR_SMP)
1327 *charset = Vcharset_ucs_smp;
1328 return c - MIN_CHAR_SMP;
1330 else if (c <= MAX_CHAR_SIP)
1332 *charset = Vcharset_ucs_sip;
1333 return c - MIN_CHAR_SIP;
1335 else if (c < MIN_CHAR_DAIKANWA)
1337 *charset = Vcharset_ucs;
1340 else if (c <= MAX_CHAR_DAIKANWA)
1342 *charset = Vcharset_ideograph_daikanwa;
1343 return c - MIN_CHAR_DAIKANWA;
1345 else if (c < MIN_CHAR_94)
1347 *charset = Vcharset_ucs;
1350 else if (c <= MAX_CHAR_94)
1352 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1353 ((c - MIN_CHAR_94) / 94) + '0',
1354 CHARSET_LEFT_TO_RIGHT);
1355 if (!NILP (*charset))
1356 return ((c - MIN_CHAR_94) % 94) + 33;
1359 *charset = Vcharset_ucs;
1363 else if (c <= MAX_CHAR_96)
1365 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1366 ((c - MIN_CHAR_96) / 96) + '0',
1367 CHARSET_LEFT_TO_RIGHT);
1368 if (!NILP (*charset))
1369 return ((c - MIN_CHAR_96) % 96) + 32;
1372 *charset = Vcharset_ucs;
1376 else if (c <= MAX_CHAR_94x94)
1379 = CHARSET_BY_ATTRIBUTES (94, 2,
1380 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1381 CHARSET_LEFT_TO_RIGHT);
1382 if (!NILP (*charset))
1383 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1384 | (((c - MIN_CHAR_94x94) % 94) + 33);
1387 *charset = Vcharset_ucs;
1391 else if (c <= MAX_CHAR_96x96)
1394 = CHARSET_BY_ATTRIBUTES (96, 2,
1395 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1396 CHARSET_LEFT_TO_RIGHT);
1397 if (!NILP (*charset))
1398 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1399 | (((c - MIN_CHAR_96x96) % 96) + 32);
1402 *charset = Vcharset_ucs;
1408 *charset = Vcharset_ucs;
1413 Lisp_Object Vdefault_coded_charset_priority_list;
1417 /************************************************************************/
1418 /* Basic charset Lisp functions */
1419 /************************************************************************/
1421 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1422 Return non-nil if OBJECT is a charset.
1426 return CHARSETP (object) ? Qt : Qnil;
1429 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1430 Retrieve the charset of the given name.
1431 If CHARSET-OR-NAME is a charset object, it is simply returned.
1432 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1433 nil is returned. Otherwise the associated charset object is returned.
1437 if (CHARSETP (charset_or_name))
1438 return charset_or_name;
1440 CHECK_SYMBOL (charset_or_name);
1441 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1444 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1445 Retrieve the charset of the given name.
1446 Same as `find-charset' except an error is signalled if there is no such
1447 charset instead of returning nil.
1451 Lisp_Object charset = Ffind_charset (name);
1454 signal_simple_error ("No such charset", name);
1458 /* We store the charsets in hash tables with the names as the key and the
1459 actual charset object as the value. Occasionally we need to use them
1460 in a list format. These routines provide us with that. */
1461 struct charset_list_closure
1463 Lisp_Object *charset_list;
1467 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1468 void *charset_list_closure)
1470 /* This function can GC */
1471 struct charset_list_closure *chcl =
1472 (struct charset_list_closure*) charset_list_closure;
1473 Lisp_Object *charset_list = chcl->charset_list;
1475 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1479 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1480 Return a list of the names of all defined charsets.
1484 Lisp_Object charset_list = Qnil;
1485 struct gcpro gcpro1;
1486 struct charset_list_closure charset_list_closure;
1488 GCPRO1 (charset_list);
1489 charset_list_closure.charset_list = &charset_list;
1490 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1491 &charset_list_closure);
1494 return charset_list;
1497 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1498 Return the name of charset CHARSET.
1502 return XCHARSET_NAME (Fget_charset (charset));
1505 /* #### SJT Should generic properties be allowed? */
1506 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1507 Define a new character set.
1508 This function is for use with Mule support.
1509 NAME is a symbol, the name by which the character set is normally referred.
1510 DOC-STRING is a string describing the character set.
1511 PROPS is a property list, describing the specific nature of the
1512 character set. Recognized properties are:
1514 'short-name Short version of the charset name (ex: Latin-1)
1515 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1516 'registry A regular expression matching the font registry field for
1518 'dimension Number of octets used to index a character in this charset.
1519 Either 1 or 2. Defaults to 1.
1520 If UTF-2000 feature is enabled, 3 or 4 are also available.
1521 'columns Number of columns used to display a character in this charset.
1522 Only used in TTY mode. (Under X, the actual width of a
1523 character can be derived from the font used to display the
1524 characters.) If unspecified, defaults to the dimension
1525 (this is almost always the correct value).
1526 'chars Number of characters in each dimension (94 or 96).
1527 Defaults to 94. Note that if the dimension is 2, the
1528 character set thus described is 94x94 or 96x96.
1529 If UTF-2000 feature is enabled, 128 or 256 are also available.
1530 'final Final byte of ISO 2022 escape sequence. Must be
1531 supplied. Each combination of (DIMENSION, CHARS) defines a
1532 separate namespace for final bytes. Note that ISO
1533 2022 restricts the final byte to the range
1534 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1535 dimension == 2. Note also that final bytes in the range
1536 0x30 - 0x3F are reserved for user-defined (not official)
1538 'graphic 0 (use left half of font on output) or 1 (use right half
1539 of font on output). Defaults to 0. For example, for
1540 a font whose registry is ISO8859-1, the left half
1541 (octets 0x20 - 0x7F) is the `ascii' character set, while
1542 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1543 character set. With 'graphic set to 0, the octets
1544 will have their high bit cleared; with it set to 1,
1545 the octets will have their high bit set.
1546 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1548 'ccl-program A compiled CCL program used to convert a character in
1549 this charset into an index into the font. This is in
1550 addition to the 'graphic property. The CCL program
1551 is passed the octets of the character, with the high
1552 bit cleared and set depending upon whether the value
1553 of the 'graphic property is 0 or 1.
1554 'mother [UTF-2000 only] Base coded-charset.
1555 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1556 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1557 'code-offset [UTF-2000 only] Offset for a code-point of a base
1559 'conversion [UTF-2000 only] Conversion for a code-point of a base
1560 coded-charset (94x60, 94x94x60, big5-1 or big5-2).
1562 (name, doc_string, props))
1564 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1565 int direction = CHARSET_LEFT_TO_RIGHT;
1566 Lisp_Object registry = Qnil;
1567 Lisp_Object charset;
1568 Lisp_Object ccl_program = Qnil;
1569 Lisp_Object short_name = Qnil, long_name = Qnil;
1570 Lisp_Object mother = Qnil;
1571 int min_code = 0, max_code = 0, code_offset = 0;
1572 int byte_offset = -1;
1575 CHECK_SYMBOL (name);
1576 if (!NILP (doc_string))
1577 CHECK_STRING (doc_string);
1579 charset = Ffind_charset (name);
1580 if (!NILP (charset))
1581 signal_simple_error ("Cannot redefine existing charset", name);
1584 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1586 if (EQ (keyword, Qshort_name))
1588 CHECK_STRING (value);
1592 else if (EQ (keyword, Qlong_name))
1594 CHECK_STRING (value);
1598 else if (EQ (keyword, Qdimension))
1601 dimension = XINT (value);
1602 if (dimension < 1 ||
1609 signal_simple_error ("Invalid value for 'dimension", value);
1612 else if (EQ (keyword, Qchars))
1615 chars = XINT (value);
1616 if (chars != 94 && chars != 96
1618 && chars != 128 && chars != 256
1621 signal_simple_error ("Invalid value for 'chars", value);
1624 else if (EQ (keyword, Qcolumns))
1627 columns = XINT (value);
1628 if (columns != 1 && columns != 2)
1629 signal_simple_error ("Invalid value for 'columns", value);
1632 else if (EQ (keyword, Qgraphic))
1635 graphic = XINT (value);
1643 signal_simple_error ("Invalid value for 'graphic", value);
1646 else if (EQ (keyword, Qregistry))
1648 CHECK_STRING (value);
1652 else if (EQ (keyword, Qdirection))
1654 if (EQ (value, Ql2r))
1655 direction = CHARSET_LEFT_TO_RIGHT;
1656 else if (EQ (value, Qr2l))
1657 direction = CHARSET_RIGHT_TO_LEFT;
1659 signal_simple_error ("Invalid value for 'direction", value);
1662 else if (EQ (keyword, Qfinal))
1664 CHECK_CHAR_COERCE_INT (value);
1665 final = XCHAR (value);
1666 if (final < '0' || final > '~')
1667 signal_simple_error ("Invalid value for 'final", value);
1671 else if (EQ (keyword, Qmother))
1673 mother = Fget_charset (value);
1676 else if (EQ (keyword, Qmin_code))
1679 min_code = XUINT (value);
1682 else if (EQ (keyword, Qmax_code))
1685 max_code = XUINT (value);
1688 else if (EQ (keyword, Qcode_offset))
1691 code_offset = XUINT (value);
1694 else if (EQ (keyword, Qconversion))
1696 if (EQ (value, Q94x60))
1697 conversion = CONVERSION_94x60;
1698 else if (EQ (value, Q94x94x60))
1699 conversion = CONVERSION_94x94x60;
1700 else if (EQ (value, Qbig5_1))
1701 conversion = CONVERSION_BIG5_1;
1702 else if (EQ (value, Qbig5_2))
1703 conversion = CONVERSION_BIG5_2;
1705 signal_simple_error ("Unrecognized conversion", value);
1709 else if (EQ (keyword, Qccl_program))
1711 struct ccl_program test_ccl;
1713 if (setup_ccl_program (&test_ccl, value) < 0)
1714 signal_simple_error ("Invalid value for 'ccl-program", value);
1715 ccl_program = value;
1719 signal_simple_error ("Unrecognized property", keyword);
1725 error ("'final must be specified");
1727 if (dimension == 2 && final > 0x5F)
1729 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1732 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1733 CHARSET_LEFT_TO_RIGHT)) ||
1734 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1735 CHARSET_RIGHT_TO_LEFT)))
1737 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1739 id = get_unallocated_leading_byte (dimension);
1741 if (NILP (doc_string))
1742 doc_string = build_string ("");
1744 if (NILP (registry))
1745 registry = build_string ("");
1747 if (NILP (short_name))
1748 XSETSTRING (short_name, XSYMBOL (name)->name);
1750 if (NILP (long_name))
1751 long_name = doc_string;
1754 columns = dimension;
1756 if (byte_offset < 0)
1760 else if (chars == 96)
1766 charset = make_charset (id, name, chars, dimension, columns, graphic,
1767 final, direction, short_name, long_name,
1768 doc_string, registry,
1769 Qnil, min_code, max_code, code_offset, byte_offset,
1770 mother, conversion);
1771 if (!NILP (ccl_program))
1772 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1776 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1778 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1779 NEW-NAME is the name of the new charset. Return the new charset.
1781 (charset, new_name))
1783 Lisp_Object new_charset = Qnil;
1784 int id, chars, dimension, columns, graphic, final;
1786 Lisp_Object registry, doc_string, short_name, long_name;
1789 charset = Fget_charset (charset);
1790 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1791 signal_simple_error ("Charset already has reverse-direction charset",
1794 CHECK_SYMBOL (new_name);
1795 if (!NILP (Ffind_charset (new_name)))
1796 signal_simple_error ("Cannot redefine existing charset", new_name);
1798 cs = XCHARSET (charset);
1800 chars = CHARSET_CHARS (cs);
1801 dimension = CHARSET_DIMENSION (cs);
1802 columns = CHARSET_COLUMNS (cs);
1803 id = get_unallocated_leading_byte (dimension);
1805 graphic = CHARSET_GRAPHIC (cs);
1806 final = CHARSET_FINAL (cs);
1807 direction = CHARSET_RIGHT_TO_LEFT;
1808 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1809 direction = CHARSET_LEFT_TO_RIGHT;
1810 doc_string = CHARSET_DOC_STRING (cs);
1811 short_name = CHARSET_SHORT_NAME (cs);
1812 long_name = CHARSET_LONG_NAME (cs);
1813 registry = CHARSET_REGISTRY (cs);
1815 new_charset = make_charset (id, new_name, chars, dimension, columns,
1816 graphic, final, direction, short_name, long_name,
1817 doc_string, registry,
1819 CHARSET_DECODING_TABLE(cs),
1820 CHARSET_MIN_CODE(cs),
1821 CHARSET_MAX_CODE(cs),
1822 CHARSET_CODE_OFFSET(cs),
1823 CHARSET_BYTE_OFFSET(cs),
1825 CHARSET_CONVERSION (cs)
1827 Qnil, 0, 0, 0, 0, Qnil, 0
1831 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1832 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1837 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1838 Define symbol ALIAS as an alias for CHARSET.
1842 CHECK_SYMBOL (alias);
1843 charset = Fget_charset (charset);
1844 return Fputhash (alias, charset, Vcharset_hash_table);
1847 /* #### Reverse direction charsets not yet implemented. */
1849 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1851 Return the reverse-direction charset parallel to CHARSET, if any.
1852 This is the charset with the same properties (in particular, the same
1853 dimension, number of characters per dimension, and final byte) as
1854 CHARSET but whose characters are displayed in the opposite direction.
1858 charset = Fget_charset (charset);
1859 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1863 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1864 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1865 If DIRECTION is omitted, both directions will be checked (left-to-right
1866 will be returned if character sets exist for both directions).
1868 (dimension, chars, final, direction))
1870 int dm, ch, fi, di = -1;
1871 Lisp_Object obj = Qnil;
1873 CHECK_INT (dimension);
1874 dm = XINT (dimension);
1875 if (dm < 1 || dm > 2)
1876 signal_simple_error ("Invalid value for DIMENSION", dimension);
1880 if (ch != 94 && ch != 96)
1881 signal_simple_error ("Invalid value for CHARS", chars);
1883 CHECK_CHAR_COERCE_INT (final);
1885 if (fi < '0' || fi > '~')
1886 signal_simple_error ("Invalid value for FINAL", final);
1888 if (EQ (direction, Ql2r))
1889 di = CHARSET_LEFT_TO_RIGHT;
1890 else if (EQ (direction, Qr2l))
1891 di = CHARSET_RIGHT_TO_LEFT;
1892 else if (!NILP (direction))
1893 signal_simple_error ("Invalid value for DIRECTION", direction);
1895 if (dm == 2 && fi > 0x5F)
1897 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1901 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1903 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1906 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1909 return XCHARSET_NAME (obj);
1913 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1914 Return short name of CHARSET.
1918 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1921 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1922 Return long name of CHARSET.
1926 return XCHARSET_LONG_NAME (Fget_charset (charset));
1929 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1930 Return description of CHARSET.
1934 return XCHARSET_DOC_STRING (Fget_charset (charset));
1937 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1938 Return dimension of CHARSET.
1942 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1945 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1946 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1947 Recognized properties are those listed in `make-charset', as well as
1948 'name and 'doc-string.
1954 charset = Fget_charset (charset);
1955 cs = XCHARSET (charset);
1957 CHECK_SYMBOL (prop);
1958 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1959 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1960 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1961 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1962 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1963 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1964 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1965 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1966 Qnil : make_char (CHARSET_FINAL (cs));
1967 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1968 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1969 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1970 if (EQ (prop, Qdirection))
1971 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1972 if (EQ (prop, Qreverse_direction_charset))
1974 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1975 /* #### Is this translation OK? If so, error checking sufficient? */
1976 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1979 if (EQ (prop, Qmother))
1980 return CHARSET_MOTHER (cs);
1981 if (EQ (prop, Qmin_code))
1982 return make_int (CHARSET_MIN_CODE (cs));
1983 if (EQ (prop, Qmax_code))
1984 return make_int (CHARSET_MAX_CODE (cs));
1986 signal_simple_error ("Unrecognized charset property name", prop);
1987 return Qnil; /* not reached */
1990 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1991 Return charset identification number of CHARSET.
1995 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1998 /* #### We need to figure out which properties we really want to
2001 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2002 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2004 (charset, ccl_program))
2006 struct ccl_program test_ccl;
2008 charset = Fget_charset (charset);
2009 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2010 signal_simple_error ("Invalid ccl-program", ccl_program);
2011 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2016 invalidate_charset_font_caches (Lisp_Object charset)
2018 /* Invalidate font cache entries for charset on all devices. */
2019 Lisp_Object devcons, concons, hash_table;
2020 DEVICE_LOOP_NO_BREAK (devcons, concons)
2022 struct device *d = XDEVICE (XCAR (devcons));
2023 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2024 if (!UNBOUNDP (hash_table))
2025 Fclrhash (hash_table);
2029 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2030 Set the 'registry property of CHARSET to REGISTRY.
2032 (charset, registry))
2034 charset = Fget_charset (charset);
2035 CHECK_STRING (registry);
2036 XCHARSET_REGISTRY (charset) = registry;
2037 invalidate_charset_font_caches (charset);
2038 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2043 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2044 Return mapping-table of CHARSET.
2048 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2051 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2052 Set mapping-table of CHARSET to TABLE.
2056 struct Lisp_Charset *cs;
2060 charset = Fget_charset (charset);
2061 cs = XCHARSET (charset);
2065 CHARSET_DECODING_TABLE(cs) = Qnil;
2068 else if (VECTORP (table))
2070 int ccs_len = CHARSET_BYTE_SIZE (cs);
2071 int ret = decoding_table_check_elements (table,
2072 CHARSET_DIMENSION (cs),
2077 signal_simple_error ("Too big table", table);
2079 signal_simple_error ("Invalid element is found", table);
2081 signal_simple_error ("Something wrong", table);
2083 CHARSET_DECODING_TABLE(cs) = Qnil;
2086 signal_error (Qwrong_type_argument,
2087 list2 (build_translated_string ("vector-or-nil-p"),
2090 byte_offset = CHARSET_BYTE_OFFSET (cs);
2091 switch (CHARSET_DIMENSION (cs))
2094 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2096 Lisp_Object c = XVECTOR_DATA(table)[i];
2099 Fput_char_attribute (c, XCHARSET_NAME (charset),
2100 make_int (i + byte_offset));
2104 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2106 Lisp_Object v = XVECTOR_DATA(table)[i];
2112 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2114 Lisp_Object c = XVECTOR_DATA(v)[j];
2118 (c, XCHARSET_NAME (charset),
2119 make_int ( ( (i + byte_offset) << 8 )
2125 Fput_char_attribute (v, XCHARSET_NAME (charset),
2126 make_int (i + byte_offset));
2133 #ifdef HAVE_CHISE_CLIENT
2134 DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
2135 Save mapping-table of CHARSET.
2139 struct Lisp_Charset *cs;
2140 int byte_min, byte_max;
2142 Lisp_Object db_file;
2144 charset = Fget_charset (charset);
2145 cs = XCHARSET (charset);
2147 db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
2148 Qsystem_char_id, 1);
2149 db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
2151 byte_min = CHARSET_BYTE_OFFSET (cs);
2152 byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
2153 switch (CHARSET_DIMENSION (cs))
2157 Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset);
2160 for (cell = byte_min; cell < byte_max; cell++)
2162 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2165 Fput_database (Fprin1_to_string (make_int (cell), Qnil),
2166 Fprin1_to_string (c, Qnil),
2173 Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset);
2176 for (row = byte_min; row < byte_max; row++)
2178 Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row);
2181 for (cell = byte_min; cell < byte_max; cell++)
2183 Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
2186 Fput_database (Fprin1_to_string (make_int ((row << 8)
2189 Fprin1_to_string (c, Qnil),
2197 Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset);
2200 for (plane = byte_min; plane < byte_max; plane++)
2203 = get_ccs_octet_table (table_p, charset, plane);
2206 for (row = byte_min; row < byte_max; row++)
2209 = get_ccs_octet_table (table_r, charset, row);
2212 for (cell = byte_min; cell < byte_max; cell++)
2214 Lisp_Object c = get_ccs_octet_table (table_c, charset,
2218 Fput_database (Fprin1_to_string (make_int ((plane << 16)
2222 Fprin1_to_string (c, Qnil),
2231 Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset);
2234 for (group = byte_min; group < byte_max; group++)
2237 = get_ccs_octet_table (table_g, charset, group);
2240 for (plane = byte_min; plane < byte_max; plane++)
2243 = get_ccs_octet_table (table_p, charset, plane);
2246 for (row = byte_min; row < byte_max; row++)
2249 = get_ccs_octet_table (table_r, charset, row);
2252 for (cell = byte_min; cell < byte_max; cell++)
2255 = get_ccs_octet_table (table_c, charset, cell);
2258 Fput_database (Fprin1_to_string
2259 (make_int (( group << 24)
2264 Fprin1_to_string (c, Qnil),
2272 return Fclose_database (db);
2275 DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
2276 Reset mapping-table of CCS with database file.
2280 Lisp_Object db_file;
2282 ccs = Fget_charset (ccs);
2283 db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
2284 Qsystem_char_id, 0);
2286 if (!NILP (Ffile_exists_p (db_file)))
2288 XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
2295 load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
2299 = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
2302 db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil);
2306 = Fget_database (Fprin1_to_string (make_int (code_point), Qnil),
2313 decoding_table_put_char (ccs, code_point, ret);
2314 Fclose_database (db);
2318 decoding_table_put_char (ccs, code_point, Qnil);
2319 Fclose_database (db);
2323 #endif /* HAVE_CHISE_CLIENT */
2324 #endif /* UTF2000 */
2327 /************************************************************************/
2328 /* Lisp primitives for working with characters */
2329 /************************************************************************/
2332 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2333 Make a character from CHARSET and code-point CODE.
2334 If DEFINED_ONLY is non-nil, builtin character is not returned.
2335 If corresponding character is not found, nil is returned.
2337 (charset, code, defined_only))
2341 charset = Fget_charset (charset);
2344 if (XCHARSET_GRAPHIC (charset) == 1)
2346 if (NILP (defined_only))
2347 c = DECODE_CHAR (charset, c);
2349 c = decode_defined_char (charset, c);
2350 return c >= 0 ? make_char (c) : Qnil;
2353 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2354 Make a builtin character from CHARSET and code-point CODE.
2360 charset = Fget_charset (charset);
2362 if (EQ (charset, Vcharset_latin_viscii))
2364 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2370 (ret = Fget_char_attribute (chr,
2371 Vcharset_latin_viscii_lower,
2374 charset = Vcharset_latin_viscii_lower;
2378 (ret = Fget_char_attribute (chr,
2379 Vcharset_latin_viscii_upper,
2382 charset = Vcharset_latin_viscii_upper;
2389 if (XCHARSET_GRAPHIC (charset) == 1)
2392 c = decode_builtin_char (charset, c);
2393 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2397 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2398 Make a character from CHARSET and octets ARG1 and ARG2.
2399 ARG2 is required only for characters from two-dimensional charsets.
2400 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2401 character s with caron.
2403 (charset, arg1, arg2))
2407 int lowlim, highlim;
2409 charset = Fget_charset (charset);
2410 cs = XCHARSET (charset);
2412 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2413 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2415 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2417 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2418 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2421 /* It is useful (and safe, according to Olivier Galibert) to strip
2422 the 8th bit off ARG1 and ARG2 because it allows programmers to
2423 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2424 Latin 2 code of the character. */
2432 if (a1 < lowlim || a1 > highlim)
2433 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2435 if (CHARSET_DIMENSION (cs) == 1)
2439 ("Charset is of dimension one; second octet must be nil", arg2);
2440 return make_char (MAKE_CHAR (charset, a1, 0));
2449 a2 = XINT (arg2) & 0x7f;
2451 if (a2 < lowlim || a2 > highlim)
2452 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2454 return make_char (MAKE_CHAR (charset, a1, a2));
2457 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2458 Return the character set of CHARACTER.
2462 CHECK_CHAR_COERCE_INT (character);
2464 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2467 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2468 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2469 N defaults to 0 if omitted.
2473 Lisp_Object charset;
2476 CHECK_CHAR_COERCE_INT (character);
2478 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2480 if (NILP (n) || EQ (n, Qzero))
2481 return make_int (octet0);
2482 else if (EQ (n, make_int (1)))
2483 return make_int (octet1);
2485 signal_simple_error ("Octet number must be 0 or 1", n);
2489 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2490 Return code-point of CHARACTER in specified CHARSET.
2492 (character, charset, defined_only))
2496 CHECK_CHAR_COERCE_INT (character);
2497 charset = Fget_charset (charset);
2498 code_point = charset_code_point (charset, XCHAR (character),
2499 !NILP (defined_only));
2500 if (code_point >= 0)
2501 return make_int (code_point);
2507 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2508 Return list of charset and one or two position-codes of CHARACTER.
2512 /* This function can GC */
2513 struct gcpro gcpro1, gcpro2;
2514 Lisp_Object charset = Qnil;
2515 Lisp_Object rc = Qnil;
2523 GCPRO2 (charset, rc);
2524 CHECK_CHAR_COERCE_INT (character);
2527 code_point = ENCODE_CHAR (XCHAR (character), charset);
2528 dimension = XCHARSET_DIMENSION (charset);
2529 while (dimension > 0)
2531 rc = Fcons (make_int (code_point & 255), rc);
2535 rc = Fcons (XCHARSET_NAME (charset), rc);
2537 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2539 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2541 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2545 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2554 #ifdef ENABLE_COMPOSITE_CHARS
2555 /************************************************************************/
2556 /* composite character functions */
2557 /************************************************************************/
2560 lookup_composite_char (Bufbyte *str, int len)
2562 Lisp_Object lispstr = make_string (str, len);
2563 Lisp_Object ch = Fgethash (lispstr,
2564 Vcomposite_char_string2char_hash_table,
2570 if (composite_char_row_next >= 128)
2571 signal_simple_error ("No more composite chars available", lispstr);
2572 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2573 composite_char_col_next);
2574 Fputhash (make_char (emch), lispstr,
2575 Vcomposite_char_char2string_hash_table);
2576 Fputhash (lispstr, make_char (emch),
2577 Vcomposite_char_string2char_hash_table);
2578 composite_char_col_next++;
2579 if (composite_char_col_next >= 128)
2581 composite_char_col_next = 32;
2582 composite_char_row_next++;
2591 composite_char_string (Emchar ch)
2593 Lisp_Object str = Fgethash (make_char (ch),
2594 Vcomposite_char_char2string_hash_table,
2596 assert (!UNBOUNDP (str));
2600 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2601 Convert a string into a single composite character.
2602 The character is the result of overstriking all the characters in
2607 CHECK_STRING (string);
2608 return make_char (lookup_composite_char (XSTRING_DATA (string),
2609 XSTRING_LENGTH (string)));
2612 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2613 Return a string of the characters comprising a composite character.
2621 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2622 signal_simple_error ("Must be composite char", ch);
2623 return composite_char_string (emch);
2625 #endif /* ENABLE_COMPOSITE_CHARS */
2628 /************************************************************************/
2629 /* initialization */
2630 /************************************************************************/
2633 syms_of_mule_charset (void)
2635 INIT_LRECORD_IMPLEMENTATION (charset);
2637 DEFSUBR (Fcharsetp);
2638 DEFSUBR (Ffind_charset);
2639 DEFSUBR (Fget_charset);
2640 DEFSUBR (Fcharset_list);
2641 DEFSUBR (Fcharset_name);
2642 DEFSUBR (Fmake_charset);
2643 DEFSUBR (Fmake_reverse_direction_charset);
2644 /* DEFSUBR (Freverse_direction_charset); */
2645 DEFSUBR (Fdefine_charset_alias);
2646 DEFSUBR (Fcharset_from_attributes);
2647 DEFSUBR (Fcharset_short_name);
2648 DEFSUBR (Fcharset_long_name);
2649 DEFSUBR (Fcharset_description);
2650 DEFSUBR (Fcharset_dimension);
2651 DEFSUBR (Fcharset_property);
2652 DEFSUBR (Fcharset_id);
2653 DEFSUBR (Fset_charset_ccl_program);
2654 DEFSUBR (Fset_charset_registry);
2656 DEFSUBR (Fcharset_mapping_table);
2657 DEFSUBR (Fset_charset_mapping_table);
2658 #ifdef HAVE_CHISE_CLIENT
2659 DEFSUBR (Fsave_charset_mapping_table);
2660 DEFSUBR (Freset_charset_mapping_table);
2663 DEFSUBR (Fdecode_char);
2664 DEFSUBR (Fdecode_builtin_char);
2665 DEFSUBR (Fencode_char);
2667 DEFSUBR (Fmake_char);
2668 DEFSUBR (Fchar_charset);
2669 DEFSUBR (Fchar_octet);
2670 DEFSUBR (Fsplit_char);
2672 #ifdef ENABLE_COMPOSITE_CHARS
2673 DEFSUBR (Fmake_composite_char);
2674 DEFSUBR (Fcomposite_char_string);
2677 defsymbol (&Qcharsetp, "charsetp");
2678 defsymbol (&Qregistry, "registry");
2679 defsymbol (&Qfinal, "final");
2680 defsymbol (&Qgraphic, "graphic");
2681 defsymbol (&Qdirection, "direction");
2682 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2683 defsymbol (&Qshort_name, "short-name");
2684 defsymbol (&Qlong_name, "long-name");
2686 defsymbol (&Qmother, "mother");
2687 defsymbol (&Qmin_code, "min-code");
2688 defsymbol (&Qmax_code, "max-code");
2689 defsymbol (&Qcode_offset, "code-offset");
2690 defsymbol (&Qconversion, "conversion");
2691 defsymbol (&Q94x60, "94x60");
2692 defsymbol (&Q94x94x60, "94x94x60");
2693 defsymbol (&Qbig5_1, "big5-1");
2694 defsymbol (&Qbig5_2, "big5-2");
2697 defsymbol (&Ql2r, "l2r");
2698 defsymbol (&Qr2l, "r2l");
2700 /* Charsets, compatible with FSF 20.3
2701 Naming convention is Script-Charset[-Edition] */
2702 defsymbol (&Qascii, "ascii");
2703 defsymbol (&Qcontrol_1, "control-1");
2704 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2705 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2706 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2707 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2708 defsymbol (&Qthai_tis620, "thai-tis620");
2709 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2710 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2711 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2712 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2713 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2714 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2715 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2716 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2717 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2718 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2719 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2720 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2721 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2722 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2723 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2724 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2726 defsymbol (&Qucs, "ucs");
2727 defsymbol (&Qucs_bmp, "ucs-bmp");
2728 defsymbol (&Qucs_smp, "ucs-smp");
2729 defsymbol (&Qucs_sip, "ucs-sip");
2730 defsymbol (&Qucs_gb, "ucs-gb");
2731 defsymbol (&Qucs_cns, "ucs-cns");
2732 defsymbol (&Qucs_jis, "ucs-jis");
2733 defsymbol (&Qucs_ks, "ucs-ks");
2734 defsymbol (&Qucs_big5, "ucs-big5");
2735 defsymbol (&Qlatin_viscii, "latin-viscii");
2736 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2737 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2738 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2739 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2740 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2741 defsymbol (&Qjis_x0208, "=jis-x0208");
2742 defsymbol (&Qideograph_gt, "ideograph-gt");
2743 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2744 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2745 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2746 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2747 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2748 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2749 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2750 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2751 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2752 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2753 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2754 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2755 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2756 defsymbol (&Qchinese_big5, "chinese-big5");
2757 /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
2758 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2759 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2760 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2761 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2762 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2763 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2764 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2765 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2766 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2767 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2768 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2769 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2770 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2771 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2773 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2774 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2776 defsymbol (&Qcomposite, "composite");
2780 vars_of_mule_charset (void)
2787 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2788 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2790 /* Table of charsets indexed by leading byte. */
2791 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2792 chlook->charset_by_leading_byte[i] = Qnil;
2795 /* Table of charsets indexed by type/final-byte. */
2796 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2797 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2798 chlook->charset_by_attributes[i][j] = Qnil;
2800 /* Table of charsets indexed by type/final-byte/direction. */
2801 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2802 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2803 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2804 chlook->charset_by_attributes[i][j][k] = Qnil;
2808 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2810 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2811 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2815 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2816 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2817 Leading-code of private TYPE9N charset of column-width 1.
2819 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2823 Vdefault_coded_charset_priority_list = Qnil;
2824 DEFVAR_LISP ("default-coded-charset-priority-list",
2825 &Vdefault_coded_charset_priority_list /*
2826 Default order of preferred coded-character-sets.
2832 complex_vars_of_mule_charset (void)
2834 staticpro (&Vcharset_hash_table);
2835 Vcharset_hash_table =
2836 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2838 /* Predefined character sets. We store them into variables for
2842 staticpro (&Vcharset_ucs);
2844 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2845 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2846 build_string ("UCS"),
2847 build_string ("UCS"),
2848 build_string ("ISO/IEC 10646"),
2850 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2851 staticpro (&Vcharset_ucs_bmp);
2853 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2854 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2855 build_string ("BMP"),
2856 build_string ("UCS-BMP"),
2857 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2859 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2860 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2861 staticpro (&Vcharset_ucs_smp);
2863 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2864 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2865 build_string ("SMP"),
2866 build_string ("UCS-SMP"),
2867 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2868 build_string ("UCS00-1"),
2869 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2870 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2871 staticpro (&Vcharset_ucs_sip);
2873 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2874 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2875 build_string ("SIP"),
2876 build_string ("UCS-SIP"),
2877 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2878 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2879 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2880 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2881 staticpro (&Vcharset_ucs_gb);
2883 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2884 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2885 build_string ("UCS for GB"),
2886 build_string ("UCS for GB"),
2887 build_string ("ISO/IEC 10646 for GB"),
2889 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2890 staticpro (&Vcharset_ucs_cns);
2892 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2893 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2894 build_string ("UCS for CNS"),
2895 build_string ("UCS for CNS 11643"),
2896 build_string ("ISO/IEC 10646 for CNS 11643"),
2898 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2899 staticpro (&Vcharset_ucs_jis);
2901 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2902 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2903 build_string ("UCS for JIS"),
2904 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2906 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2908 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2909 staticpro (&Vcharset_ucs_ks);
2911 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2912 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2913 build_string ("UCS for KS"),
2914 build_string ("UCS for CCS defined by KS"),
2915 build_string ("ISO/IEC 10646 for Korean Standards"),
2917 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2918 staticpro (&Vcharset_ucs_big5);
2920 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2921 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2922 build_string ("UCS for Big5"),
2923 build_string ("UCS for Big5"),
2924 build_string ("ISO/IEC 10646 for Big5"),
2926 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2928 # define MIN_CHAR_THAI 0
2929 # define MAX_CHAR_THAI 0
2930 /* # define MIN_CHAR_HEBREW 0 */
2931 /* # define MAX_CHAR_HEBREW 0 */
2932 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2933 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2935 staticpro (&Vcharset_ascii);
2937 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2938 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2939 build_string ("ASCII"),
2940 build_string ("ASCII)"),
2941 build_string ("ASCII (ISO646 IRV)"),
2942 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2943 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2944 staticpro (&Vcharset_control_1);
2945 Vcharset_control_1 =
2946 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2947 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2948 build_string ("C1"),
2949 build_string ("Control characters"),
2950 build_string ("Control characters 128-191"),
2952 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2953 staticpro (&Vcharset_latin_iso8859_1);
2954 Vcharset_latin_iso8859_1 =
2955 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2956 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2957 build_string ("Latin-1"),
2958 build_string ("ISO8859-1 (Latin-1)"),
2959 build_string ("ISO8859-1 (Latin-1)"),
2960 build_string ("iso8859-1"),
2961 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2962 staticpro (&Vcharset_latin_iso8859_2);
2963 Vcharset_latin_iso8859_2 =
2964 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2965 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2966 build_string ("Latin-2"),
2967 build_string ("ISO8859-2 (Latin-2)"),
2968 build_string ("ISO8859-2 (Latin-2)"),
2969 build_string ("iso8859-2"),
2970 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2971 staticpro (&Vcharset_latin_iso8859_3);
2972 Vcharset_latin_iso8859_3 =
2973 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2974 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2975 build_string ("Latin-3"),
2976 build_string ("ISO8859-3 (Latin-3)"),
2977 build_string ("ISO8859-3 (Latin-3)"),
2978 build_string ("iso8859-3"),
2979 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2980 staticpro (&Vcharset_latin_iso8859_4);
2981 Vcharset_latin_iso8859_4 =
2982 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2983 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2984 build_string ("Latin-4"),
2985 build_string ("ISO8859-4 (Latin-4)"),
2986 build_string ("ISO8859-4 (Latin-4)"),
2987 build_string ("iso8859-4"),
2988 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2989 staticpro (&Vcharset_thai_tis620);
2990 Vcharset_thai_tis620 =
2991 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2992 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2993 build_string ("TIS620"),
2994 build_string ("TIS620 (Thai)"),
2995 build_string ("TIS620.2529 (Thai)"),
2996 build_string ("tis620"),
2997 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2998 staticpro (&Vcharset_greek_iso8859_7);
2999 Vcharset_greek_iso8859_7 =
3000 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3001 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3002 build_string ("ISO8859-7"),
3003 build_string ("ISO8859-7 (Greek)"),
3004 build_string ("ISO8859-7 (Greek)"),
3005 build_string ("iso8859-7"),
3006 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3007 staticpro (&Vcharset_arabic_iso8859_6);
3008 Vcharset_arabic_iso8859_6 =
3009 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3010 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3011 build_string ("ISO8859-6"),
3012 build_string ("ISO8859-6 (Arabic)"),
3013 build_string ("ISO8859-6 (Arabic)"),
3014 build_string ("iso8859-6"),
3015 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3016 staticpro (&Vcharset_hebrew_iso8859_8);
3017 Vcharset_hebrew_iso8859_8 =
3018 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3019 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3020 build_string ("ISO8859-8"),
3021 build_string ("ISO8859-8 (Hebrew)"),
3022 build_string ("ISO8859-8 (Hebrew)"),
3023 build_string ("iso8859-8"),
3025 0 /* MIN_CHAR_HEBREW */,
3026 0 /* MAX_CHAR_HEBREW */, 0, 32,
3027 Qnil, CONVERSION_IDENTICAL);
3028 staticpro (&Vcharset_katakana_jisx0201);
3029 Vcharset_katakana_jisx0201 =
3030 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3031 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3032 build_string ("JISX0201 Kana"),
3033 build_string ("JISX0201.1976 (Japanese Kana)"),
3034 build_string ("JISX0201.1976 Japanese Kana"),
3035 build_string ("jisx0201\\.1976"),
3036 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3037 staticpro (&Vcharset_latin_jisx0201);
3038 Vcharset_latin_jisx0201 =
3039 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3040 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3041 build_string ("JISX0201 Roman"),
3042 build_string ("JISX0201.1976 (Japanese Roman)"),
3043 build_string ("JISX0201.1976 Japanese Roman"),
3044 build_string ("jisx0201\\.1976"),
3045 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3046 staticpro (&Vcharset_cyrillic_iso8859_5);
3047 Vcharset_cyrillic_iso8859_5 =
3048 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3049 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3050 build_string ("ISO8859-5"),
3051 build_string ("ISO8859-5 (Cyrillic)"),
3052 build_string ("ISO8859-5 (Cyrillic)"),
3053 build_string ("iso8859-5"),
3054 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3055 staticpro (&Vcharset_latin_iso8859_9);
3056 Vcharset_latin_iso8859_9 =
3057 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3058 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3059 build_string ("Latin-5"),
3060 build_string ("ISO8859-9 (Latin-5)"),
3061 build_string ("ISO8859-9 (Latin-5)"),
3062 build_string ("iso8859-9"),
3063 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3065 staticpro (&Vcharset_jis_x0208);
3066 Vcharset_jis_x0208 =
3067 make_charset (LEADING_BYTE_JIS_X0208,
3069 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3070 build_string ("JIS X0208"),
3071 build_string ("JIS X0208 Common"),
3072 build_string ("JIS X0208 Common part"),
3073 build_string ("jisx0208\\.1990"),
3075 MIN_CHAR_JIS_X0208_1990,
3076 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
3077 Qnil, CONVERSION_94x94);
3079 staticpro (&Vcharset_japanese_jisx0208_1978);
3080 Vcharset_japanese_jisx0208_1978 =
3081 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3082 Qjapanese_jisx0208_1978, 94, 2,
3083 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3084 build_string ("JIS X0208:1978"),
3085 build_string ("JIS X0208:1978 (Japanese)"),
3087 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3088 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3095 CONVERSION_IDENTICAL);
3096 staticpro (&Vcharset_chinese_gb2312);
3097 Vcharset_chinese_gb2312 =
3098 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3099 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3100 build_string ("GB2312"),
3101 build_string ("GB2312)"),
3102 build_string ("GB2312 Chinese simplified"),
3103 build_string ("gb2312"),
3104 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3105 staticpro (&Vcharset_chinese_gb12345);
3106 Vcharset_chinese_gb12345 =
3107 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3108 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3109 build_string ("G1"),
3110 build_string ("GB 12345)"),
3111 build_string ("GB 12345-1990"),
3112 build_string ("GB12345\\(\\.1990\\)?-0"),
3113 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3114 staticpro (&Vcharset_japanese_jisx0208);
3115 Vcharset_japanese_jisx0208 =
3116 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3117 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3118 build_string ("JISX0208"),
3119 build_string ("JIS X0208:1983 (Japanese)"),
3120 build_string ("JIS X0208:1983 Japanese Kanji"),
3121 build_string ("jisx0208\\.1983"),
3128 CONVERSION_IDENTICAL);
3130 staticpro (&Vcharset_japanese_jisx0208_1990);
3131 Vcharset_japanese_jisx0208_1990 =
3132 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3133 Qjapanese_jisx0208_1990, 94, 2,
3134 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3135 build_string ("JISX0208-1990"),
3136 build_string ("JIS X0208:1990 (Japanese)"),
3137 build_string ("JIS X0208:1990 Japanese Kanji"),
3138 build_string ("jisx0208\\.1990"),
3140 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
3141 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
3142 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
3143 Vcharset_jis_x0208 /* Qnil */,
3144 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
3146 staticpro (&Vcharset_korean_ksc5601);
3147 Vcharset_korean_ksc5601 =
3148 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3149 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3150 build_string ("KSC5601"),
3151 build_string ("KSC5601 (Korean"),
3152 build_string ("KSC5601 Korean Hangul and Hanja"),
3153 build_string ("ksc5601"),
3154 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3155 staticpro (&Vcharset_japanese_jisx0212);
3156 Vcharset_japanese_jisx0212 =
3157 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3158 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3159 build_string ("JISX0212"),
3160 build_string ("JISX0212 (Japanese)"),
3161 build_string ("JISX0212 Japanese Supplement"),
3162 build_string ("jisx0212"),
3163 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3165 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3166 staticpro (&Vcharset_chinese_cns11643_1);
3167 Vcharset_chinese_cns11643_1 =
3168 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3169 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3170 build_string ("CNS11643-1"),
3171 build_string ("CNS11643-1 (Chinese traditional)"),
3173 ("CNS 11643 Plane 1 Chinese traditional"),
3174 build_string (CHINESE_CNS_PLANE_RE("1")),
3175 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3176 staticpro (&Vcharset_chinese_cns11643_2);
3177 Vcharset_chinese_cns11643_2 =
3178 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3179 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3180 build_string ("CNS11643-2"),
3181 build_string ("CNS11643-2 (Chinese traditional)"),
3183 ("CNS 11643 Plane 2 Chinese traditional"),
3184 build_string (CHINESE_CNS_PLANE_RE("2")),
3185 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3187 staticpro (&Vcharset_latin_tcvn5712);
3188 Vcharset_latin_tcvn5712 =
3189 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3190 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3191 build_string ("TCVN 5712"),
3192 build_string ("TCVN 5712 (VSCII-2)"),
3193 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3194 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3195 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3196 staticpro (&Vcharset_latin_viscii_lower);
3197 Vcharset_latin_viscii_lower =
3198 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3199 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3200 build_string ("VISCII lower"),
3201 build_string ("VISCII lower (Vietnamese)"),
3202 build_string ("VISCII lower (Vietnamese)"),
3203 build_string ("MULEVISCII-LOWER"),
3204 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3205 staticpro (&Vcharset_latin_viscii_upper);
3206 Vcharset_latin_viscii_upper =
3207 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3208 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3209 build_string ("VISCII upper"),
3210 build_string ("VISCII upper (Vietnamese)"),
3211 build_string ("VISCII upper (Vietnamese)"),
3212 build_string ("MULEVISCII-UPPER"),
3213 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
3214 staticpro (&Vcharset_latin_viscii);
3215 Vcharset_latin_viscii =
3216 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3217 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3218 build_string ("VISCII"),
3219 build_string ("VISCII 1.1 (Vietnamese)"),
3220 build_string ("VISCII 1.1 (Vietnamese)"),
3221 build_string ("VISCII1\\.1"),
3222 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3223 staticpro (&Vcharset_chinese_big5);
3224 Vcharset_chinese_big5 =
3225 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3226 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3227 build_string ("Big5"),
3228 build_string ("Big5"),
3229 build_string ("Big5 Chinese traditional"),
3230 build_string ("big5-0"),
3232 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3233 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3235 staticpro (&Vcharset_chinese_big5_cdp);
3236 Vcharset_chinese_big5_cdp =
3237 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3238 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3239 build_string ("Big5-CDP"),
3240 build_string ("Big5 + CDP extension"),
3241 build_string ("Big5 with CDP extension"),
3242 build_string ("big5\\.cdp-0"),
3243 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
3244 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
3246 #define DEF_HANZIKU(n) \
3247 staticpro (&Vcharset_ideograph_hanziku_##n); \
3248 Vcharset_ideograph_hanziku_##n = \
3249 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
3250 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
3251 build_string ("HZK-"#n), \
3252 build_string ("HANZIKU-"#n), \
3253 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
3255 ("hanziku-"#n"$"), \
3256 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
3257 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
3270 staticpro (&Vcharset_ideograph_cbeta);
3271 Vcharset_ideograph_cbeta =
3272 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
3273 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3274 build_string ("CB"),
3275 build_string ("CBETA"),
3276 build_string ("CBETA private characters"),
3277 build_string ("cbeta-0"),
3278 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
3279 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
3280 staticpro (&Vcharset_ideograph_gt);
3281 Vcharset_ideograph_gt =
3282 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3283 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3284 build_string ("GT"),
3285 build_string ("GT"),
3286 build_string ("GT"),
3288 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
3289 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
3290 #define DEF_GT_PJ(n) \
3291 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3292 Vcharset_ideograph_gt_pj_##n = \
3293 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3294 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3295 build_string ("GT-PJ-"#n), \
3296 build_string ("GT (pseudo JIS encoding) part "#n), \
3297 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3299 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
3300 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3313 staticpro (&Vcharset_ideograph_daikanwa_2);
3314 Vcharset_ideograph_daikanwa_2 =
3315 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3316 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3317 build_string ("Daikanwa Rev."),
3318 build_string ("Morohashi's Daikanwa Rev."),
3320 ("Daikanwa dictionary (revised version)"),
3321 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3322 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3323 staticpro (&Vcharset_ideograph_daikanwa);
3324 Vcharset_ideograph_daikanwa =
3325 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3326 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3327 build_string ("Daikanwa"),
3328 build_string ("Morohashi's Daikanwa Rev.2"),
3330 ("Daikanwa dictionary (second revised version)"),
3331 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3332 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3333 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3335 staticpro (&Vcharset_ethiopic_ucs);
3336 Vcharset_ethiopic_ucs =
3337 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3338 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Ethiopic (UCS)"),
3340 build_string ("Ethiopic (UCS)"),
3341 build_string ("Ethiopic of UCS"),
3342 build_string ("Ethiopic-Unicode"),
3343 Qnil, 0x1200, 0x137F, 0, 0,
3344 Qnil, CONVERSION_IDENTICAL);
3346 staticpro (&Vcharset_chinese_big5_1);
3347 Vcharset_chinese_big5_1 =
3348 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3349 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3350 build_string ("Big5"),
3351 build_string ("Big5 (Level-1)"),
3353 ("Big5 Level-1 Chinese traditional"),
3354 build_string ("big5"),
3355 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3356 Vcharset_chinese_big5, CONVERSION_BIG5_1);
3357 staticpro (&Vcharset_chinese_big5_2);
3358 Vcharset_chinese_big5_2 =
3359 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3360 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3361 build_string ("Big5"),
3362 build_string ("Big5 (Level-2)"),
3364 ("Big5 Level-2 Chinese traditional"),
3365 build_string ("big5"),
3366 Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */
3367 Vcharset_chinese_big5, CONVERSION_BIG5_2);
3369 #ifdef ENABLE_COMPOSITE_CHARS
3370 /* #### For simplicity, we put composite chars into a 96x96 charset.
3371 This is going to lead to problems because you can run out of
3372 room, esp. as we don't yet recycle numbers. */
3373 staticpro (&Vcharset_composite);
3374 Vcharset_composite =
3375 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3376 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3377 build_string ("Composite"),
3378 build_string ("Composite characters"),
3379 build_string ("Composite characters"),
3382 /* #### not dumped properly */
3383 composite_char_row_next = 32;
3384 composite_char_col_next = 32;
3386 Vcomposite_char_string2char_hash_table =
3387 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3388 Vcomposite_char_char2string_hash_table =
3389 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3390 staticpro (&Vcomposite_char_string2char_hash_table);
3391 staticpro (&Vcomposite_char_char2string_hash_table);
3392 #endif /* ENABLE_COMPOSITE_CHARS */