1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_japanese_jisx0208_1990;
57 Lisp_Object Vcharset_korean_ksc5601;
58 Lisp_Object Vcharset_japanese_jisx0212;
59 Lisp_Object Vcharset_chinese_cns11643_1;
60 Lisp_Object Vcharset_chinese_cns11643_2;
62 Lisp_Object Vcharset_ucs_bmp;
63 Lisp_Object Vcharset_latin_viscii;
64 Lisp_Object Vcharset_latin_viscii_lower;
65 Lisp_Object Vcharset_latin_viscii_upper;
66 Lisp_Object Vcharset_ideograph_daikanwa;
67 Lisp_Object Vcharset_ethiopic_ucs;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
389 Lisp_Object QnoBreak;
390 Lisp_Object Qfraction;
400 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
406 else if (EQ (v, Qcompat))
408 else if (EQ (v, QnoBreak))
410 else if (EQ (v, Qfraction))
412 else if (EQ (v, Qsuper))
414 else if (EQ (v, Qsub))
416 else if (EQ (v, Qcircle))
418 else if (EQ (v, Qsquare))
420 else if (EQ (v, Qwide))
422 else if (EQ (v, Qnarrow))
424 else if (EQ (v, Qfont))
427 signal_simple_error (err_msg, err_arg);
430 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
431 Return character corresponding with list.
435 Lisp_Object table = Vcharacter_composition_table;
436 Lisp_Object rest = list;
440 Lisp_Object v = Fcar (rest);
442 Emchar c = to_char_code (v, "Invalid value for composition", list);
444 ret = get_char_code_table (c, table);
449 if (!CHAR_CODE_TABLE_P (ret))
454 else if (!CONSP (rest))
456 else if (CHAR_CODE_TABLE_P (ret))
459 signal_simple_error ("Invalid table is found with", list);
461 signal_simple_error ("Invalid value for composition", list);
464 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
465 Return variants of CHARACTER.
469 CHECK_CHAR (character);
470 return Fcopy_list (get_char_code_table (XCHAR (character),
471 Vcharacter_variant_table));
474 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
475 Return the alist of attributes of CHARACTER.
479 CHECK_CHAR (character);
480 return Fcopy_alist (get_char_code_table (XCHAR (character),
481 Vcharacter_attribute_table));
484 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
485 Return the value of CHARACTER's ATTRIBUTE.
487 (character, attribute))
492 CHECK_CHAR (character);
493 ret = get_char_code_table (XCHAR (character),
494 Vcharacter_attribute_table);
498 if (!NILP (ccs = Ffind_charset (attribute)))
501 return Fcdr (Fassq (attribute, ret));
505 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
508 Emchar char_code = XCHAR (character);
510 = get_char_code_table (char_code, Vcharacter_attribute_table);
513 cell = Fassq (attribute, ret);
517 ret = Fcons (Fcons (attribute, value), ret);
519 else if (!EQ (Fcdr (cell), value))
521 Fsetcdr (cell, value);
523 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
527 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
528 Store CHARACTER's ATTRIBUTE with VALUE.
530 (character, attribute, value))
534 CHECK_CHAR (character);
535 ccs = Ffind_charset (attribute);
539 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
544 /* ad-hoc method for `ascii' */
545 if ((XCHARSET_CHARS (ccs) == 94) &&
546 (XCHARSET_BYTE_OFFSET (ccs) != 33))
547 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
549 ccs_len = XCHARSET_CHARS (ccs);
552 signal_simple_error ("Invalid value for coded-charset",
556 rest = Fget_char_attribute (character, attribute);
563 Lisp_Object ei = Fcar (rest);
565 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
566 nv = XVECTOR_DATA(v)[i];
573 XVECTOR_DATA(v)[i] = Qnil;
574 v = XCHARSET_DECODING_TABLE (ccs);
579 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
582 if (XCHARSET_GRAPHIC (ccs) == 1)
583 value = Fcopy_list (value);
588 Lisp_Object ei = Fcar (rest);
591 signal_simple_error ("Invalid value for coded-charset", value);
593 if ((i < 0) || (255 < i))
594 signal_simple_error ("Invalid value for coded-charset", value);
595 if (XCHARSET_GRAPHIC (ccs) == 1)
598 Fsetcar (rest, make_int (i));
600 i -= XCHARSET_BYTE_OFFSET (ccs);
601 nv = XVECTOR_DATA(v)[i];
607 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
614 XVECTOR_DATA(v)[i] = character;
616 else if (EQ (attribute, Q_decomposition))
618 Lisp_Object rest = value;
619 Lisp_Object table = Vcharacter_composition_table;
622 signal_simple_error ("Invalid value for ->decomposition",
627 Lisp_Object v = Fcar (rest);
630 = to_char_code (v, "Invalid value for ->decomposition", value);
635 put_char_code_table (c, character, table);
640 ntable = get_char_code_table (c, table);
641 if (!CHAR_CODE_TABLE_P (ntable))
643 ntable = make_char_code_table (Qnil);
644 put_char_code_table (c, ntable, table);
650 else if (EQ (attribute, Q_ucs))
656 signal_simple_error ("Invalid value for ->ucs", value);
660 ret = get_char_code_table (c, Vcharacter_variant_table);
661 if (NILP (Fmemq (character, ret)))
663 put_char_code_table (c, Fcons (character, ret),
664 Vcharacter_variant_table);
667 return put_char_attribute (character, attribute, value);
672 EXFUN (Fmake_char, 3);
674 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
675 Store character's ATTRIBUTES.
679 Lisp_Object rest = attributes;
680 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
681 Lisp_Object character;
687 Lisp_Object cell = Fcar (rest);
691 signal_simple_error ("Invalid argument", attributes);
692 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
695 character = Fmake_char (ccs, Fcar (cell),
697 goto setup_attributes;
701 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
704 signal_simple_error ("Invalid argument", attributes);
706 character = make_char (XINT (code) + 0x100000);
707 goto setup_attributes;
711 else if (!INTP (code))
712 signal_simple_error ("Invalid argument", attributes);
714 character = make_char (XINT (code));
720 Lisp_Object cell = Fcar (rest);
723 signal_simple_error ("Invalid argument", attributes);
724 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
728 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
731 Lisp_Object Vutf_2000_version;
735 int leading_code_private_11;
738 Lisp_Object Qcharsetp;
740 /* Qdoc_string, Qdimension, Qchars defined in general.c */
741 Lisp_Object Qregistry, Qfinal, Qgraphic;
742 Lisp_Object Qdirection;
743 Lisp_Object Qreverse_direction_charset;
744 Lisp_Object Qleading_byte;
745 Lisp_Object Qshort_name, Qlong_name;
761 Qjapanese_jisx0208_1978,
764 Qjapanese_jisx0208_1990,
774 Qvietnamese_viscii_lower,
775 Qvietnamese_viscii_upper,
783 Lisp_Object Ql2r, Qr2l;
785 Lisp_Object Vcharset_hash_table;
788 static Charset_ID next_allocated_leading_byte;
790 static Charset_ID next_allocated_1_byte_leading_byte;
791 static Charset_ID next_allocated_2_byte_leading_byte;
794 /* Composite characters are characters constructed by overstriking two
795 or more regular characters.
797 1) The old Mule implementation involves storing composite characters
798 in a buffer as a tag followed by all of the actual characters
799 used to make up the composite character. I think this is a bad
800 idea; it greatly complicates code that wants to handle strings
801 one character at a time because it has to deal with the possibility
802 of great big ungainly characters. It's much more reasonable to
803 simply store an index into a table of composite characters.
805 2) The current implementation only allows for 16,384 separate
806 composite characters over the lifetime of the XEmacs process.
807 This could become a potential problem if the user
808 edited lots of different files that use composite characters.
809 Due to FSF bogosity, increasing the number of allowable
810 composite characters under Mule would decrease the number
811 of possible faces that can exist. Mule already has shrunk
812 this to 2048, and further shrinkage would become uncomfortable.
813 No such problems exist in XEmacs.
815 Composite characters could be represented as 0x80 C1 C2 C3,
816 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
817 for slightly under 2^20 (one million) composite characters
818 over the XEmacs process lifetime, and you only need to
819 increase the size of a Mule character from 19 to 21 bits.
820 Or you could use 0x80 C1 C2 C3 C4, allowing for about
821 85 million (slightly over 2^26) composite characters. */
824 /************************************************************************/
825 /* Basic Emchar functions */
826 /************************************************************************/
828 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
829 string in STR. Returns the number of bytes stored.
830 Do not call this directly. Use the macro set_charptr_emchar() instead.
834 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
849 else if ( c <= 0x7ff )
851 *p++ = (c >> 6) | 0xc0;
852 *p++ = (c & 0x3f) | 0x80;
854 else if ( c <= 0xffff )
856 *p++ = (c >> 12) | 0xe0;
857 *p++ = ((c >> 6) & 0x3f) | 0x80;
858 *p++ = (c & 0x3f) | 0x80;
860 else if ( c <= 0x1fffff )
862 *p++ = (c >> 18) | 0xf0;
863 *p++ = ((c >> 12) & 0x3f) | 0x80;
864 *p++ = ((c >> 6) & 0x3f) | 0x80;
865 *p++ = (c & 0x3f) | 0x80;
867 else if ( c <= 0x3ffffff )
869 *p++ = (c >> 24) | 0xf8;
870 *p++ = ((c >> 18) & 0x3f) | 0x80;
871 *p++ = ((c >> 12) & 0x3f) | 0x80;
872 *p++ = ((c >> 6) & 0x3f) | 0x80;
873 *p++ = (c & 0x3f) | 0x80;
877 *p++ = (c >> 30) | 0xfc;
878 *p++ = ((c >> 24) & 0x3f) | 0x80;
879 *p++ = ((c >> 18) & 0x3f) | 0x80;
880 *p++ = ((c >> 12) & 0x3f) | 0x80;
881 *p++ = ((c >> 6) & 0x3f) | 0x80;
882 *p++ = (c & 0x3f) | 0x80;
885 BREAKUP_CHAR (c, charset, c1, c2);
886 lb = CHAR_LEADING_BYTE (c);
887 if (LEADING_BYTE_PRIVATE_P (lb))
888 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
890 if (EQ (charset, Vcharset_control_1))
899 /* Return the first character from a Mule-encoded string in STR,
900 assuming it's non-ASCII. Do not call this directly.
901 Use the macro charptr_emchar() instead. */
904 non_ascii_charptr_emchar (CONST Bufbyte *str)
917 else if ( b >= 0xf8 )
922 else if ( b >= 0xf0 )
927 else if ( b >= 0xe0 )
932 else if ( b >= 0xc0 )
942 for( ; len > 0; len-- )
945 ch = ( ch << 6 ) | ( b & 0x3f );
949 Bufbyte i0 = *str, i1, i2 = 0;
952 if (i0 == LEADING_BYTE_CONTROL_1)
953 return (Emchar) (*++str - 0x20);
955 if (LEADING_BYTE_PREFIX_P (i0))
960 charset = CHARSET_BY_LEADING_BYTE (i0);
961 if (XCHARSET_DIMENSION (charset) == 2)
964 return MAKE_CHAR (charset, i1, i2);
968 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
969 Do not call this directly. Use the macro valid_char_p() instead. */
973 non_ascii_valid_char_p (Emchar ch)
977 /* Must have only lowest 19 bits set */
981 f1 = CHAR_FIELD1 (ch);
982 f2 = CHAR_FIELD2 (ch);
983 f3 = CHAR_FIELD3 (ch);
989 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
990 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
991 f2 > MAX_CHAR_FIELD2_PRIVATE)
996 if (f3 != 0x20 && f3 != 0x7F)
1000 NOTE: This takes advantage of the fact that
1001 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1002 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1004 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1005 return (XCHARSET_CHARS (charset) == 96);
1009 Lisp_Object charset;
1011 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1012 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1013 f1 > MAX_CHAR_FIELD1_PRIVATE)
1015 if (f2 < 0x20 || f3 < 0x20)
1018 #ifdef ENABLE_COMPOSITE_CHARS
1019 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1021 if (UNBOUNDP (Fgethash (make_int (ch),
1022 Vcomposite_char_char2string_hash_table,
1027 #endif /* ENABLE_COMPOSITE_CHARS */
1029 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1032 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1034 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1037 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1039 return (XCHARSET_CHARS (charset) == 96);
1045 /************************************************************************/
1046 /* Basic string functions */
1047 /************************************************************************/
1049 /* Copy the character pointed to by PTR into STR, assuming it's
1050 non-ASCII. Do not call this directly. Use the macro
1051 charptr_copy_char() instead. */
1054 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1056 Bufbyte *strptr = str;
1058 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1060 /* Notice fallthrough. */
1062 case 6: *++strptr = *ptr++;
1063 case 5: *++strptr = *ptr++;
1065 case 4: *++strptr = *ptr++;
1066 case 3: *++strptr = *ptr++;
1067 case 2: *++strptr = *ptr;
1072 return strptr + 1 - str;
1076 /************************************************************************/
1077 /* streams of Emchars */
1078 /************************************************************************/
1080 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1081 The functions below are not meant to be called directly; use
1082 the macros in insdel.h. */
1085 Lstream_get_emchar_1 (Lstream *stream, int ch)
1087 Bufbyte str[MAX_EMCHAR_LEN];
1088 Bufbyte *strptr = str;
1090 str[0] = (Bufbyte) ch;
1091 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1093 /* Notice fallthrough. */
1096 ch = Lstream_getc (stream);
1098 *++strptr = (Bufbyte) ch;
1100 ch = Lstream_getc (stream);
1102 *++strptr = (Bufbyte) ch;
1105 ch = Lstream_getc (stream);
1107 *++strptr = (Bufbyte) ch;
1109 ch = Lstream_getc (stream);
1111 *++strptr = (Bufbyte) ch;
1113 ch = Lstream_getc (stream);
1115 *++strptr = (Bufbyte) ch;
1120 return charptr_emchar (str);
1124 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1126 Bufbyte str[MAX_EMCHAR_LEN];
1127 Bytecount len = set_charptr_emchar (str, ch);
1128 return Lstream_write (stream, str, len);
1132 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1134 Bufbyte str[MAX_EMCHAR_LEN];
1135 Bytecount len = set_charptr_emchar (str, ch);
1136 Lstream_unread (stream, str, len);
1140 /************************************************************************/
1141 /* charset object */
1142 /************************************************************************/
1145 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1147 struct Lisp_Charset *cs = XCHARSET (obj);
1149 markobj (cs->short_name);
1150 markobj (cs->long_name);
1151 markobj (cs->doc_string);
1152 markobj (cs->registry);
1153 markobj (cs->ccl_program);
1155 markobj (cs->decoding_table);
1161 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1163 struct Lisp_Charset *cs = XCHARSET (obj);
1167 error ("printing unreadable object #<charset %s 0x%x>",
1168 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1171 write_c_string ("#<charset ", printcharfun);
1172 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1173 write_c_string (" ", printcharfun);
1174 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1175 write_c_string (" ", printcharfun);
1176 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1177 write_c_string (" ", printcharfun);
1178 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1179 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1180 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1181 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1182 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1184 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1185 CHARSET_COLUMNS (cs),
1186 CHARSET_GRAPHIC (cs),
1187 CHARSET_FINAL (cs));
1188 write_c_string (buf, printcharfun);
1189 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1190 sprintf (buf, " 0x%x>", cs->header.uid);
1191 write_c_string (buf, printcharfun);
1194 static const struct lrecord_description charset_description[] = {
1195 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1197 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1202 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1203 mark_charset, print_charset, 0, 0, 0,
1204 charset_description,
1205 struct Lisp_Charset);
1207 /* Make a new charset. */
1210 make_charset (Charset_ID id, Lisp_Object name,
1211 unsigned char type, unsigned char columns, unsigned char graphic,
1212 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1213 Lisp_Object long_name, Lisp_Object doc,
1215 Lisp_Object decoding_table,
1216 Emchar ucs_min, Emchar ucs_max,
1217 Emchar code_offset, unsigned char byte_offset)
1220 struct Lisp_Charset *cs =
1221 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1222 XSETCHARSET (obj, cs);
1224 CHARSET_ID (cs) = id;
1225 CHARSET_NAME (cs) = name;
1226 CHARSET_SHORT_NAME (cs) = short_name;
1227 CHARSET_LONG_NAME (cs) = long_name;
1228 CHARSET_DIRECTION (cs) = direction;
1229 CHARSET_TYPE (cs) = type;
1230 CHARSET_COLUMNS (cs) = columns;
1231 CHARSET_GRAPHIC (cs) = graphic;
1232 CHARSET_FINAL (cs) = final;
1233 CHARSET_DOC_STRING (cs) = doc;
1234 CHARSET_REGISTRY (cs) = reg;
1235 CHARSET_CCL_PROGRAM (cs) = Qnil;
1236 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1238 CHARSET_DECODING_TABLE(cs) = Qnil;
1239 CHARSET_UCS_MIN(cs) = ucs_min;
1240 CHARSET_UCS_MAX(cs) = ucs_max;
1241 CHARSET_CODE_OFFSET(cs) = code_offset;
1242 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1245 switch (CHARSET_TYPE (cs))
1247 case CHARSET_TYPE_94:
1248 CHARSET_DIMENSION (cs) = 1;
1249 CHARSET_CHARS (cs) = 94;
1251 case CHARSET_TYPE_96:
1252 CHARSET_DIMENSION (cs) = 1;
1253 CHARSET_CHARS (cs) = 96;
1255 case CHARSET_TYPE_94X94:
1256 CHARSET_DIMENSION (cs) = 2;
1257 CHARSET_CHARS (cs) = 94;
1259 case CHARSET_TYPE_96X96:
1260 CHARSET_DIMENSION (cs) = 2;
1261 CHARSET_CHARS (cs) = 96;
1264 case CHARSET_TYPE_128:
1265 CHARSET_DIMENSION (cs) = 1;
1266 CHARSET_CHARS (cs) = 128;
1268 case CHARSET_TYPE_128X128:
1269 CHARSET_DIMENSION (cs) = 2;
1270 CHARSET_CHARS (cs) = 128;
1272 case CHARSET_TYPE_256:
1273 CHARSET_DIMENSION (cs) = 1;
1274 CHARSET_CHARS (cs) = 256;
1276 case CHARSET_TYPE_256X256:
1277 CHARSET_DIMENSION (cs) = 2;
1278 CHARSET_CHARS (cs) = 256;
1284 if (id == LEADING_BYTE_ASCII)
1285 CHARSET_REP_BYTES (cs) = 1;
1287 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1289 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1294 /* some charsets do not have final characters. This includes
1295 ASCII, Control-1, Composite, and the two faux private
1298 if (code_offset == 0)
1300 assert (NILP (charset_by_attributes[type][final]));
1301 charset_by_attributes[type][final] = obj;
1304 assert (NILP (charset_by_attributes[type][final][direction]));
1305 charset_by_attributes[type][final][direction] = obj;
1309 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1310 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1313 /* official leading byte */
1314 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1317 /* Some charsets are "faux" and don't have names or really exist at
1318 all except in the leading-byte table. */
1320 Fputhash (name, obj, Vcharset_hash_table);
1325 get_unallocated_leading_byte (int dimension)
1330 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1333 lb = next_allocated_leading_byte++;
1337 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1340 lb = next_allocated_1_byte_leading_byte++;
1344 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1347 lb = next_allocated_2_byte_leading_byte++;
1353 ("No more character sets free for this dimension",
1354 make_int (dimension));
1361 range_charset_code_point (Lisp_Object charset, Emchar ch)
1365 if ((XCHARSET_UCS_MIN (charset) <= ch)
1366 && (ch <= XCHARSET_UCS_MAX (charset)))
1368 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1370 if (XCHARSET_DIMENSION (charset) == 1)
1371 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1372 else if (XCHARSET_DIMENSION (charset) == 2)
1373 return list2 (make_int (d / XCHARSET_CHARS (charset)
1374 + XCHARSET_BYTE_OFFSET (charset)),
1375 make_int (d % XCHARSET_CHARS (charset)
1376 + XCHARSET_BYTE_OFFSET (charset)));
1377 else if (XCHARSET_DIMENSION (charset) == 3)
1378 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1379 * XCHARSET_CHARS (charset))
1380 + XCHARSET_BYTE_OFFSET (charset)),
1381 make_int (d / XCHARSET_CHARS (charset)
1382 % XCHARSET_CHARS (charset)
1383 + XCHARSET_BYTE_OFFSET (charset)),
1384 make_int (d % XCHARSET_CHARS (charset)
1385 + XCHARSET_BYTE_OFFSET (charset)));
1386 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1387 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1388 * XCHARSET_CHARS (charset)
1389 * XCHARSET_CHARS (charset))
1390 + XCHARSET_BYTE_OFFSET (charset)),
1391 make_int (d / (XCHARSET_CHARS (charset)
1392 * XCHARSET_CHARS (charset))
1393 % XCHARSET_CHARS (charset)
1394 + XCHARSET_BYTE_OFFSET (charset)),
1395 make_int (d / XCHARSET_CHARS (charset)
1396 % XCHARSET_CHARS (charset)
1397 + XCHARSET_BYTE_OFFSET (charset)),
1398 make_int (d % XCHARSET_CHARS (charset)
1399 + XCHARSET_BYTE_OFFSET (charset)));
1401 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1403 if (XCHARSET_DIMENSION (charset) == 1)
1405 if (XCHARSET_CHARS (charset) == 94)
1407 if (((d = ch - (MIN_CHAR_94
1408 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1410 return list1 (make_int (d + 33));
1412 else if (XCHARSET_CHARS (charset) == 96)
1414 if (((d = ch - (MIN_CHAR_96
1415 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1417 return list1 (make_int (d + 32));
1422 else if (XCHARSET_DIMENSION (charset) == 2)
1424 if (XCHARSET_CHARS (charset) == 94)
1426 if (((d = ch - (MIN_CHAR_94x94
1427 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1430 return list2 (make_int ((d / 94) + 33),
1431 make_int (d % 94 + 33));
1433 else if (XCHARSET_CHARS (charset) == 96)
1435 if (((d = ch - (MIN_CHAR_96x96
1436 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1439 return list2 (make_int ((d / 96) + 32),
1440 make_int (d % 96 + 32));
1448 split_builtin_char (Emchar c)
1450 if (c < MIN_CHAR_OBS_94x94)
1452 if (c <= MAX_CHAR_BASIC_LATIN)
1454 return list2 (Vcharset_ascii, make_int (c));
1458 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1462 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1464 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1466 return list2 (Vcharset_greek_iso8859_7,
1467 make_int (c - MIN_CHAR_GREEK + 0x20));
1469 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1471 return list2 (Vcharset_cyrillic_iso8859_5,
1472 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1474 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1476 return list2 (Vcharset_hebrew_iso8859_8,
1477 make_int (c - MIN_CHAR_HEBREW + 0x20));
1479 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1481 return list2 (Vcharset_thai_tis620,
1482 make_int (c - MIN_CHAR_THAI + 0x20));
1484 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1485 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1487 return list2 (Vcharset_katakana_jisx0201,
1488 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1492 return list3 (Vcharset_ucs_bmp,
1493 make_int (c >> 8), make_int (c & 0xff));
1496 else if (c <= MAX_CHAR_OBS_94x94)
1498 return list3 (CHARSET_BY_ATTRIBUTES
1499 (CHARSET_TYPE_94X94,
1500 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1501 CHARSET_LEFT_TO_RIGHT),
1502 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1503 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1505 else if (c <= MAX_CHAR_DAIKANWA)
1507 return list3 (Vcharset_ideograph_daikanwa,
1508 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1509 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1511 else if (c <= MAX_CHAR_94)
1513 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1514 ((c - MIN_CHAR_94) / 94) + '0',
1515 CHARSET_LEFT_TO_RIGHT),
1516 make_int (((c - MIN_CHAR_94) % 94) + 33));
1518 else if (c <= MAX_CHAR_96)
1520 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1521 ((c - MIN_CHAR_96) / 96) + '0',
1522 CHARSET_LEFT_TO_RIGHT),
1523 make_int (((c - MIN_CHAR_96) % 96) + 32));
1525 else if (c <= MAX_CHAR_94x94)
1527 return list3 (CHARSET_BY_ATTRIBUTES
1528 (CHARSET_TYPE_94X94,
1529 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1530 CHARSET_LEFT_TO_RIGHT),
1531 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1532 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1534 else if (c <= MAX_CHAR_96x96)
1536 return list3 (CHARSET_BY_ATTRIBUTES
1537 (CHARSET_TYPE_96X96,
1538 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1539 CHARSET_LEFT_TO_RIGHT),
1540 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1541 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1550 charset_code_point (Lisp_Object charset, Emchar ch)
1552 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1554 if (!EQ (cdef, Qnil))
1556 Lisp_Object field = Fassq (charset, cdef);
1558 if (!EQ (field, Qnil))
1559 return Fcdr (field);
1561 return range_charset_code_point (charset, ch);
1564 Lisp_Object Vdefault_coded_charset_priority_list;
1568 /************************************************************************/
1569 /* Basic charset Lisp functions */
1570 /************************************************************************/
1572 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1573 Return non-nil if OBJECT is a charset.
1577 return CHARSETP (object) ? Qt : Qnil;
1580 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1581 Retrieve the charset of the given name.
1582 If CHARSET-OR-NAME is a charset object, it is simply returned.
1583 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1584 nil is returned. Otherwise the associated charset object is returned.
1588 if (CHARSETP (charset_or_name))
1589 return charset_or_name;
1591 CHECK_SYMBOL (charset_or_name);
1592 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1595 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1596 Retrieve the charset of the given name.
1597 Same as `find-charset' except an error is signalled if there is no such
1598 charset instead of returning nil.
1602 Lisp_Object charset = Ffind_charset (name);
1605 signal_simple_error ("No such charset", name);
1609 /* We store the charsets in hash tables with the names as the key and the
1610 actual charset object as the value. Occasionally we need to use them
1611 in a list format. These routines provide us with that. */
1612 struct charset_list_closure
1614 Lisp_Object *charset_list;
1618 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1619 void *charset_list_closure)
1621 /* This function can GC */
1622 struct charset_list_closure *chcl =
1623 (struct charset_list_closure*) charset_list_closure;
1624 Lisp_Object *charset_list = chcl->charset_list;
1626 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1630 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1631 Return a list of the names of all defined charsets.
1635 Lisp_Object charset_list = Qnil;
1636 struct gcpro gcpro1;
1637 struct charset_list_closure charset_list_closure;
1639 GCPRO1 (charset_list);
1640 charset_list_closure.charset_list = &charset_list;
1641 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1642 &charset_list_closure);
1645 return charset_list;
1648 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1649 Return the name of the given charset.
1653 return XCHARSET_NAME (Fget_charset (charset));
1656 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1657 Define a new character set.
1658 This function is for use with Mule support.
1659 NAME is a symbol, the name by which the character set is normally referred.
1660 DOC-STRING is a string describing the character set.
1661 PROPS is a property list, describing the specific nature of the
1662 character set. Recognized properties are:
1664 'short-name Short version of the charset name (ex: Latin-1)
1665 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1666 'registry A regular expression matching the font registry field for
1668 'dimension Number of octets used to index a character in this charset.
1669 Either 1 or 2. Defaults to 1.
1670 'columns Number of columns used to display a character in this charset.
1671 Only used in TTY mode. (Under X, the actual width of a
1672 character can be derived from the font used to display the
1673 characters.) If unspecified, defaults to the dimension
1674 (this is almost always the correct value).
1675 'chars Number of characters in each dimension (94 or 96).
1676 Defaults to 94. Note that if the dimension is 2, the
1677 character set thus described is 94x94 or 96x96.
1678 'final Final byte of ISO 2022 escape sequence. Must be
1679 supplied. Each combination of (DIMENSION, CHARS) defines a
1680 separate namespace for final bytes. Note that ISO
1681 2022 restricts the final byte to the range
1682 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1683 dimension == 2. Note also that final bytes in the range
1684 0x30 - 0x3F are reserved for user-defined (not official)
1686 'graphic 0 (use left half of font on output) or 1 (use right half
1687 of font on output). Defaults to 0. For example, for
1688 a font whose registry is ISO8859-1, the left half
1689 (octets 0x20 - 0x7F) is the `ascii' character set, while
1690 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1691 character set. With 'graphic set to 0, the octets
1692 will have their high bit cleared; with it set to 1,
1693 the octets will have their high bit set.
1694 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1696 'ccl-program A compiled CCL program used to convert a character in
1697 this charset into an index into the font. This is in
1698 addition to the 'graphic property. The CCL program
1699 is passed the octets of the character, with the high
1700 bit cleared and set depending upon whether the value
1701 of the 'graphic property is 0 or 1.
1703 (name, doc_string, props))
1705 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1706 int direction = CHARSET_LEFT_TO_RIGHT;
1708 Lisp_Object registry = Qnil;
1709 Lisp_Object charset;
1710 Lisp_Object rest, keyword, value;
1711 Lisp_Object ccl_program = Qnil;
1712 Lisp_Object short_name = Qnil, long_name = Qnil;
1713 int byte_offset = -1;
1715 CHECK_SYMBOL (name);
1716 if (!NILP (doc_string))
1717 CHECK_STRING (doc_string);
1719 charset = Ffind_charset (name);
1720 if (!NILP (charset))
1721 signal_simple_error ("Cannot redefine existing charset", name);
1723 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1725 if (EQ (keyword, Qshort_name))
1727 CHECK_STRING (value);
1731 if (EQ (keyword, Qlong_name))
1733 CHECK_STRING (value);
1737 else if (EQ (keyword, Qdimension))
1740 dimension = XINT (value);
1741 if (dimension < 1 || dimension > 2)
1742 signal_simple_error ("Invalid value for 'dimension", value);
1745 else if (EQ (keyword, Qchars))
1748 chars = XINT (value);
1749 if (chars != 94 && chars != 96)
1750 signal_simple_error ("Invalid value for 'chars", value);
1753 else if (EQ (keyword, Qcolumns))
1756 columns = XINT (value);
1757 if (columns != 1 && columns != 2)
1758 signal_simple_error ("Invalid value for 'columns", value);
1761 else if (EQ (keyword, Qgraphic))
1764 graphic = XINT (value);
1766 if (graphic < 0 || graphic > 2)
1768 if (graphic < 0 || graphic > 1)
1770 signal_simple_error ("Invalid value for 'graphic", value);
1773 else if (EQ (keyword, Qregistry))
1775 CHECK_STRING (value);
1779 else if (EQ (keyword, Qdirection))
1781 if (EQ (value, Ql2r))
1782 direction = CHARSET_LEFT_TO_RIGHT;
1783 else if (EQ (value, Qr2l))
1784 direction = CHARSET_RIGHT_TO_LEFT;
1786 signal_simple_error ("Invalid value for 'direction", value);
1789 else if (EQ (keyword, Qfinal))
1791 CHECK_CHAR_COERCE_INT (value);
1792 final = XCHAR (value);
1793 if (final < '0' || final > '~')
1794 signal_simple_error ("Invalid value for 'final", value);
1797 else if (EQ (keyword, Qccl_program))
1799 CHECK_VECTOR (value);
1800 ccl_program = value;
1804 signal_simple_error ("Unrecognized property", keyword);
1808 error ("'final must be specified");
1809 if (dimension == 2 && final > 0x5F)
1811 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1815 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1817 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1819 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1820 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1822 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1824 id = get_unallocated_leading_byte (dimension);
1826 if (NILP (doc_string))
1827 doc_string = build_string ("");
1829 if (NILP (registry))
1830 registry = build_string ("");
1832 if (NILP (short_name))
1833 XSETSTRING (short_name, XSYMBOL (name)->name);
1835 if (NILP (long_name))
1836 long_name = doc_string;
1839 columns = dimension;
1841 if (byte_offset < 0)
1845 else if (chars == 96)
1851 charset = make_charset (id, name, type, columns, graphic,
1852 final, direction, short_name, long_name,
1853 doc_string, registry,
1854 Qnil, 0, 0, 0, byte_offset);
1855 if (!NILP (ccl_program))
1856 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1860 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1862 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1863 NEW-NAME is the name of the new charset. Return the new charset.
1865 (charset, new_name))
1867 Lisp_Object new_charset = Qnil;
1868 int id, dimension, columns, graphic, final;
1869 int direction, type;
1870 Lisp_Object registry, doc_string, short_name, long_name;
1871 struct Lisp_Charset *cs;
1873 charset = Fget_charset (charset);
1874 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1875 signal_simple_error ("Charset already has reverse-direction charset",
1878 CHECK_SYMBOL (new_name);
1879 if (!NILP (Ffind_charset (new_name)))
1880 signal_simple_error ("Cannot redefine existing charset", new_name);
1882 cs = XCHARSET (charset);
1884 type = CHARSET_TYPE (cs);
1885 columns = CHARSET_COLUMNS (cs);
1886 dimension = CHARSET_DIMENSION (cs);
1887 id = get_unallocated_leading_byte (dimension);
1889 graphic = CHARSET_GRAPHIC (cs);
1890 final = CHARSET_FINAL (cs);
1891 direction = CHARSET_RIGHT_TO_LEFT;
1892 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1893 direction = CHARSET_LEFT_TO_RIGHT;
1894 doc_string = CHARSET_DOC_STRING (cs);
1895 short_name = CHARSET_SHORT_NAME (cs);
1896 long_name = CHARSET_LONG_NAME (cs);
1897 registry = CHARSET_REGISTRY (cs);
1899 new_charset = make_charset (id, new_name, type, columns,
1900 graphic, final, direction, short_name, long_name,
1901 doc_string, registry,
1903 CHARSET_DECODING_TABLE(cs),
1904 CHARSET_UCS_MIN(cs),
1905 CHARSET_UCS_MAX(cs),
1906 CHARSET_CODE_OFFSET(cs),
1907 CHARSET_BYTE_OFFSET(cs)
1913 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1914 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1919 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1920 Define symbol ALIAS as an alias for CHARSET.
1924 CHECK_SYMBOL (alias);
1925 charset = Fget_charset (charset);
1926 return Fputhash (alias, charset, Vcharset_hash_table);
1929 /* #### Reverse direction charsets not yet implemented. */
1931 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1933 Return the reverse-direction charset parallel to CHARSET, if any.
1934 This is the charset with the same properties (in particular, the same
1935 dimension, number of characters per dimension, and final byte) as
1936 CHARSET but whose characters are displayed in the opposite direction.
1940 charset = Fget_charset (charset);
1941 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1945 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1946 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1947 If DIRECTION is omitted, both directions will be checked (left-to-right
1948 will be returned if character sets exist for both directions).
1950 (dimension, chars, final, direction))
1952 int dm, ch, fi, di = -1;
1954 Lisp_Object obj = Qnil;
1956 CHECK_INT (dimension);
1957 dm = XINT (dimension);
1958 if (dm < 1 || dm > 2)
1959 signal_simple_error ("Invalid value for DIMENSION", dimension);
1963 if (ch != 94 && ch != 96)
1964 signal_simple_error ("Invalid value for CHARS", chars);
1966 CHECK_CHAR_COERCE_INT (final);
1968 if (fi < '0' || fi > '~')
1969 signal_simple_error ("Invalid value for FINAL", final);
1971 if (EQ (direction, Ql2r))
1972 di = CHARSET_LEFT_TO_RIGHT;
1973 else if (EQ (direction, Qr2l))
1974 di = CHARSET_RIGHT_TO_LEFT;
1975 else if (!NILP (direction))
1976 signal_simple_error ("Invalid value for DIRECTION", direction);
1978 if (dm == 2 && fi > 0x5F)
1980 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1983 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1985 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1989 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1991 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1994 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1997 return XCHARSET_NAME (obj);
2001 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2002 Return short name of CHARSET.
2006 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2009 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2010 Return long name of CHARSET.
2014 return XCHARSET_LONG_NAME (Fget_charset (charset));
2017 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2018 Return description of CHARSET.
2022 return XCHARSET_DOC_STRING (Fget_charset (charset));
2025 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2026 Return dimension of CHARSET.
2030 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2033 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2034 Return property PROP of CHARSET.
2035 Recognized properties are those listed in `make-charset', as well as
2036 'name and 'doc-string.
2040 struct Lisp_Charset *cs;
2042 charset = Fget_charset (charset);
2043 cs = XCHARSET (charset);
2045 CHECK_SYMBOL (prop);
2046 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2047 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2048 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2049 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2050 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2051 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2052 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2053 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2054 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2055 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2056 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2057 if (EQ (prop, Qdirection))
2058 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2059 if (EQ (prop, Qreverse_direction_charset))
2061 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2065 return XCHARSET_NAME (obj);
2067 signal_simple_error ("Unrecognized charset property name", prop);
2068 return Qnil; /* not reached */
2071 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2072 Return charset identification number of CHARSET.
2076 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2079 /* #### We need to figure out which properties we really want to
2082 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2083 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2085 (charset, ccl_program))
2087 charset = Fget_charset (charset);
2088 CHECK_VECTOR (ccl_program);
2089 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2094 invalidate_charset_font_caches (Lisp_Object charset)
2096 /* Invalidate font cache entries for charset on all devices. */
2097 Lisp_Object devcons, concons, hash_table;
2098 DEVICE_LOOP_NO_BREAK (devcons, concons)
2100 struct device *d = XDEVICE (XCAR (devcons));
2101 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2102 if (!UNBOUNDP (hash_table))
2103 Fclrhash (hash_table);
2107 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2108 Set the 'registry property of CHARSET to REGISTRY.
2110 (charset, registry))
2112 charset = Fget_charset (charset);
2113 CHECK_STRING (registry);
2114 XCHARSET_REGISTRY (charset) = registry;
2115 invalidate_charset_font_caches (charset);
2116 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2121 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2122 Return mapping-table of CHARSET.
2126 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2129 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2130 Set mapping-table of CHARSET to TABLE.
2134 struct Lisp_Charset *cs;
2135 Lisp_Object old_table;
2138 charset = Fget_charset (charset);
2139 cs = XCHARSET (charset);
2141 if (EQ (table, Qnil))
2143 CHARSET_DECODING_TABLE(cs) = table;
2146 else if (VECTORP (table))
2150 /* ad-hoc method for `ascii' */
2151 if ((CHARSET_CHARS (cs) == 94) &&
2152 (CHARSET_BYTE_OFFSET (cs) != 33))
2153 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2155 ccs_len = CHARSET_CHARS (cs);
2157 if (XVECTOR_LENGTH (table) > ccs_len)
2158 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2159 old_table = CHARSET_DECODING_TABLE(cs);
2160 CHARSET_DECODING_TABLE(cs) = table;
2163 signal_error (Qwrong_type_argument,
2164 list2 (build_translated_string ("vector-or-nil-p"),
2166 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2168 switch (CHARSET_DIMENSION (cs))
2171 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2173 Lisp_Object c = XVECTOR_DATA(table)[i];
2178 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2182 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2184 Lisp_Object v = XVECTOR_DATA(table)[i];
2190 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2192 CHARSET_DECODING_TABLE(cs) = old_table;
2193 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2195 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2197 Lisp_Object c = XVECTOR_DATA(v)[j];
2200 put_char_attribute (c, charset,
2203 (i + CHARSET_BYTE_OFFSET (cs)),
2205 (j + CHARSET_BYTE_OFFSET (cs))));
2209 put_char_attribute (v, charset,
2211 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2220 /************************************************************************/
2221 /* Lisp primitives for working with characters */
2222 /************************************************************************/
2224 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2225 Make a character from CHARSET and octets ARG1 and ARG2.
2226 ARG2 is required only for characters from two-dimensional charsets.
2227 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2228 character s with caron.
2230 (charset, arg1, arg2))
2232 struct Lisp_Charset *cs;
2234 int lowlim, highlim;
2236 charset = Fget_charset (charset);
2237 cs = XCHARSET (charset);
2239 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2240 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2242 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2244 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2245 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2248 /* It is useful (and safe, according to Olivier Galibert) to strip
2249 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2250 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2251 Latin 2 code of the character. */
2259 if (a1 < lowlim || a1 > highlim)
2260 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2262 if (CHARSET_DIMENSION (cs) == 1)
2266 ("Charset is of dimension one; second octet must be nil", arg2);
2267 return make_char (MAKE_CHAR (charset, a1, 0));
2276 a2 = XINT (arg2) & 0x7f;
2278 if (a2 < lowlim || a2 > highlim)
2279 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2281 return make_char (MAKE_CHAR (charset, a1, a2));
2284 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2285 Return the character set of char CH.
2289 CHECK_CHAR_COERCE_INT (ch);
2291 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2294 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2295 Return list of charset and one or two position-codes of CHAR.
2301 Lisp_Object charset;
2303 CHECK_CHAR_COERCE_INT (character);
2304 ret = SPLIT_CHAR (XCHAR (character));
2305 charset = Fcar (ret);
2306 if (CHARSETP (charset))
2307 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2311 /* This function can GC */
2312 struct gcpro gcpro1, gcpro2;
2313 Lisp_Object charset = Qnil;
2314 Lisp_Object rc = Qnil;
2317 GCPRO2 (charset, rc);
2318 CHECK_CHAR_COERCE_INT (character);
2320 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2322 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2324 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2328 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2336 #ifdef ENABLE_COMPOSITE_CHARS
2337 /************************************************************************/
2338 /* composite character functions */
2339 /************************************************************************/
2342 lookup_composite_char (Bufbyte *str, int len)
2344 Lisp_Object lispstr = make_string (str, len);
2345 Lisp_Object ch = Fgethash (lispstr,
2346 Vcomposite_char_string2char_hash_table,
2352 if (composite_char_row_next >= 128)
2353 signal_simple_error ("No more composite chars available", lispstr);
2354 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2355 composite_char_col_next);
2356 Fputhash (make_char (emch), lispstr,
2357 Vcomposite_char_char2string_hash_table);
2358 Fputhash (lispstr, make_char (emch),
2359 Vcomposite_char_string2char_hash_table);
2360 composite_char_col_next++;
2361 if (composite_char_col_next >= 128)
2363 composite_char_col_next = 32;
2364 composite_char_row_next++;
2373 composite_char_string (Emchar ch)
2375 Lisp_Object str = Fgethash (make_char (ch),
2376 Vcomposite_char_char2string_hash_table,
2378 assert (!UNBOUNDP (str));
2382 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2383 Convert a string into a single composite character.
2384 The character is the result of overstriking all the characters in
2389 CHECK_STRING (string);
2390 return make_char (lookup_composite_char (XSTRING_DATA (string),
2391 XSTRING_LENGTH (string)));
2394 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2395 Return a string of the characters comprising a composite character.
2403 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2404 signal_simple_error ("Must be composite char", ch);
2405 return composite_char_string (emch);
2407 #endif /* ENABLE_COMPOSITE_CHARS */
2410 /************************************************************************/
2411 /* initialization */
2412 /************************************************************************/
2415 syms_of_mule_charset (void)
2417 DEFSUBR (Fcharsetp);
2418 DEFSUBR (Ffind_charset);
2419 DEFSUBR (Fget_charset);
2420 DEFSUBR (Fcharset_list);
2421 DEFSUBR (Fcharset_name);
2422 DEFSUBR (Fmake_charset);
2423 DEFSUBR (Fmake_reverse_direction_charset);
2424 /* DEFSUBR (Freverse_direction_charset); */
2425 DEFSUBR (Fdefine_charset_alias);
2426 DEFSUBR (Fcharset_from_attributes);
2427 DEFSUBR (Fcharset_short_name);
2428 DEFSUBR (Fcharset_long_name);
2429 DEFSUBR (Fcharset_description);
2430 DEFSUBR (Fcharset_dimension);
2431 DEFSUBR (Fcharset_property);
2432 DEFSUBR (Fcharset_id);
2433 DEFSUBR (Fset_charset_ccl_program);
2434 DEFSUBR (Fset_charset_registry);
2436 DEFSUBR (Fchar_attribute_alist);
2437 DEFSUBR (Fget_char_attribute);
2438 DEFSUBR (Fput_char_attribute);
2439 DEFSUBR (Fdefine_char);
2440 DEFSUBR (Fchar_variants);
2441 DEFSUBR (Fget_composite_char);
2442 DEFSUBR (Fcharset_mapping_table);
2443 DEFSUBR (Fset_charset_mapping_table);
2446 DEFSUBR (Fmake_char);
2447 DEFSUBR (Fchar_charset);
2448 DEFSUBR (Fsplit_char);
2450 #ifdef ENABLE_COMPOSITE_CHARS
2451 DEFSUBR (Fmake_composite_char);
2452 DEFSUBR (Fcomposite_char_string);
2455 defsymbol (&Qcharsetp, "charsetp");
2456 defsymbol (&Qregistry, "registry");
2457 defsymbol (&Qfinal, "final");
2458 defsymbol (&Qgraphic, "graphic");
2459 defsymbol (&Qdirection, "direction");
2460 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2461 defsymbol (&Qshort_name, "short-name");
2462 defsymbol (&Qlong_name, "long-name");
2464 defsymbol (&Ql2r, "l2r");
2465 defsymbol (&Qr2l, "r2l");
2467 /* Charsets, compatible with FSF 20.3
2468 Naming convention is Script-Charset[-Edition] */
2469 defsymbol (&Qascii, "ascii");
2470 defsymbol (&Qcontrol_1, "control-1");
2471 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2472 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2473 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2474 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2475 defsymbol (&Qthai_tis620, "thai-tis620");
2476 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2477 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2478 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2479 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2480 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2481 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2482 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2483 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2484 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2485 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2486 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2487 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2488 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2489 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2490 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2492 defsymbol (&Q_ucs, "->ucs");
2493 defsymbol (&Q_decomposition, "->decomposition");
2494 defsymbol (&Qcompat, "compat");
2495 defsymbol (&QnoBreak, "noBreak");
2496 defsymbol (&Qfraction, "fraction");
2497 defsymbol (&Qsuper, "super");
2498 defsymbol (&Qsub, "sub");
2499 defsymbol (&Qcircle, "circle");
2500 defsymbol (&Qsquare, "square");
2501 defsymbol (&Qwide, "wide");
2502 defsymbol (&Qnarrow, "narrow");
2503 defsymbol (&Qfont, "font");
2504 defsymbol (&Qucs, "ucs");
2505 defsymbol (&Qucs_bmp, "ucs-bmp");
2506 defsymbol (&Qlatin_viscii, "latin-viscii");
2507 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2508 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2509 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2510 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2511 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2512 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2514 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2515 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2517 defsymbol (&Qcomposite, "composite");
2521 vars_of_mule_charset (void)
2528 /* Table of charsets indexed by leading byte. */
2529 for (i = 0; i < countof (charset_by_leading_byte); i++)
2530 charset_by_leading_byte[i] = Qnil;
2533 /* Table of charsets indexed by type/final-byte. */
2534 for (i = 0; i < countof (charset_by_attributes); i++)
2535 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2536 charset_by_attributes[i][j] = Qnil;
2538 /* Table of charsets indexed by type/final-byte/direction. */
2539 for (i = 0; i < countof (charset_by_attributes); i++)
2540 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2541 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2542 charset_by_attributes[i][j][k] = Qnil;
2546 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2548 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2549 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2553 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2554 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2555 Leading-code of private TYPE9N charset of column-width 1.
2557 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2561 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2562 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2563 Version number of UTF-2000.
2566 staticpro (&Vcharacter_attribute_table);
2567 Vcharacter_attribute_table = make_char_code_table (Qnil);
2569 staticpro (&Vcharacter_composition_table);
2570 Vcharacter_composition_table = make_char_code_table (Qnil);
2572 staticpro (&Vcharacter_variant_table);
2573 Vcharacter_variant_table = make_char_code_table (Qnil);
2575 Vdefault_coded_charset_priority_list = Qnil;
2576 DEFVAR_LISP ("default-coded-charset-priority-list",
2577 &Vdefault_coded_charset_priority_list /*
2578 Default order of preferred coded-character-sets.
2584 complex_vars_of_mule_charset (void)
2586 staticpro (&Vcharset_hash_table);
2587 Vcharset_hash_table =
2588 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2590 /* Predefined character sets. We store them into variables for
2595 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2596 CHARSET_TYPE_256X256, 1, 2, 0,
2597 CHARSET_LEFT_TO_RIGHT,
2598 build_string ("BMP"),
2599 build_string ("BMP"),
2600 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2601 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2602 Qnil, 0, 0xFFFF, 0, 0);
2604 # define MIN_CHAR_THAI 0
2605 # define MAX_CHAR_THAI 0
2606 # define MIN_CHAR_GREEK 0
2607 # define MAX_CHAR_GREEK 0
2608 # define MIN_CHAR_HEBREW 0
2609 # define MAX_CHAR_HEBREW 0
2610 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2611 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2612 # define MIN_CHAR_CYRILLIC 0
2613 # define MAX_CHAR_CYRILLIC 0
2616 make_charset (LEADING_BYTE_ASCII, Qascii,
2617 CHARSET_TYPE_94, 1, 0, 'B',
2618 CHARSET_LEFT_TO_RIGHT,
2619 build_string ("ASCII"),
2620 build_string ("ASCII)"),
2621 build_string ("ASCII (ISO646 IRV)"),
2622 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2623 Qnil, 0, 0x7F, 0, 0);
2624 Vcharset_control_1 =
2625 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2626 CHARSET_TYPE_94, 1, 1, 0,
2627 CHARSET_LEFT_TO_RIGHT,
2628 build_string ("C1"),
2629 build_string ("Control characters"),
2630 build_string ("Control characters 128-191"),
2632 Qnil, 0x80, 0x9F, 0, 0);
2633 Vcharset_latin_iso8859_1 =
2634 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2635 CHARSET_TYPE_96, 1, 1, 'A',
2636 CHARSET_LEFT_TO_RIGHT,
2637 build_string ("Latin-1"),
2638 build_string ("ISO8859-1 (Latin-1)"),
2639 build_string ("ISO8859-1 (Latin-1)"),
2640 build_string ("iso8859-1"),
2641 Qnil, 0xA0, 0xFF, 0, 32);
2642 Vcharset_latin_iso8859_2 =
2643 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2644 CHARSET_TYPE_96, 1, 1, 'B',
2645 CHARSET_LEFT_TO_RIGHT,
2646 build_string ("Latin-2"),
2647 build_string ("ISO8859-2 (Latin-2)"),
2648 build_string ("ISO8859-2 (Latin-2)"),
2649 build_string ("iso8859-2"),
2651 Vcharset_latin_iso8859_3 =
2652 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2653 CHARSET_TYPE_96, 1, 1, 'C',
2654 CHARSET_LEFT_TO_RIGHT,
2655 build_string ("Latin-3"),
2656 build_string ("ISO8859-3 (Latin-3)"),
2657 build_string ("ISO8859-3 (Latin-3)"),
2658 build_string ("iso8859-3"),
2660 Vcharset_latin_iso8859_4 =
2661 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2662 CHARSET_TYPE_96, 1, 1, 'D',
2663 CHARSET_LEFT_TO_RIGHT,
2664 build_string ("Latin-4"),
2665 build_string ("ISO8859-4 (Latin-4)"),
2666 build_string ("ISO8859-4 (Latin-4)"),
2667 build_string ("iso8859-4"),
2669 Vcharset_thai_tis620 =
2670 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2671 CHARSET_TYPE_96, 1, 1, 'T',
2672 CHARSET_LEFT_TO_RIGHT,
2673 build_string ("TIS620"),
2674 build_string ("TIS620 (Thai)"),
2675 build_string ("TIS620.2529 (Thai)"),
2676 build_string ("tis620"),
2677 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2678 Vcharset_greek_iso8859_7 =
2679 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2680 CHARSET_TYPE_96, 1, 1, 'F',
2681 CHARSET_LEFT_TO_RIGHT,
2682 build_string ("ISO8859-7"),
2683 build_string ("ISO8859-7 (Greek)"),
2684 build_string ("ISO8859-7 (Greek)"),
2685 build_string ("iso8859-7"),
2686 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2687 Vcharset_arabic_iso8859_6 =
2688 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2689 CHARSET_TYPE_96, 1, 1, 'G',
2690 CHARSET_RIGHT_TO_LEFT,
2691 build_string ("ISO8859-6"),
2692 build_string ("ISO8859-6 (Arabic)"),
2693 build_string ("ISO8859-6 (Arabic)"),
2694 build_string ("iso8859-6"),
2696 Vcharset_hebrew_iso8859_8 =
2697 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2698 CHARSET_TYPE_96, 1, 1, 'H',
2699 CHARSET_RIGHT_TO_LEFT,
2700 build_string ("ISO8859-8"),
2701 build_string ("ISO8859-8 (Hebrew)"),
2702 build_string ("ISO8859-8 (Hebrew)"),
2703 build_string ("iso8859-8"),
2704 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2705 Vcharset_katakana_jisx0201 =
2706 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2707 CHARSET_TYPE_94, 1, 1, 'I',
2708 CHARSET_LEFT_TO_RIGHT,
2709 build_string ("JISX0201 Kana"),
2710 build_string ("JISX0201.1976 (Japanese Kana)"),
2711 build_string ("JISX0201.1976 Japanese Kana"),
2712 build_string ("jisx0201\\.1976"),
2714 MIN_CHAR_HALFWIDTH_KATAKANA,
2715 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2716 Vcharset_latin_jisx0201 =
2717 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2718 CHARSET_TYPE_94, 1, 0, 'J',
2719 CHARSET_LEFT_TO_RIGHT,
2720 build_string ("JISX0201 Roman"),
2721 build_string ("JISX0201.1976 (Japanese Roman)"),
2722 build_string ("JISX0201.1976 Japanese Roman"),
2723 build_string ("jisx0201\\.1976"),
2725 Vcharset_cyrillic_iso8859_5 =
2726 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2727 CHARSET_TYPE_96, 1, 1, 'L',
2728 CHARSET_LEFT_TO_RIGHT,
2729 build_string ("ISO8859-5"),
2730 build_string ("ISO8859-5 (Cyrillic)"),
2731 build_string ("ISO8859-5 (Cyrillic)"),
2732 build_string ("iso8859-5"),
2733 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2734 Vcharset_latin_iso8859_9 =
2735 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2736 CHARSET_TYPE_96, 1, 1, 'M',
2737 CHARSET_LEFT_TO_RIGHT,
2738 build_string ("Latin-5"),
2739 build_string ("ISO8859-9 (Latin-5)"),
2740 build_string ("ISO8859-9 (Latin-5)"),
2741 build_string ("iso8859-9"),
2743 Vcharset_japanese_jisx0208_1978 =
2744 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2745 CHARSET_TYPE_94X94, 2, 0, '@',
2746 CHARSET_LEFT_TO_RIGHT,
2747 build_string ("JIS X0208:1978"),
2748 build_string ("JIS X0208:1978 (Japanese)"),
2750 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2751 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2753 Vcharset_chinese_gb2312 =
2754 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2755 CHARSET_TYPE_94X94, 2, 0, 'A',
2756 CHARSET_LEFT_TO_RIGHT,
2757 build_string ("GB2312"),
2758 build_string ("GB2312)"),
2759 build_string ("GB2312 Chinese simplified"),
2760 build_string ("gb2312"),
2762 Vcharset_japanese_jisx0208 =
2763 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2764 CHARSET_TYPE_94X94, 2, 0, 'B',
2765 CHARSET_LEFT_TO_RIGHT,
2766 build_string ("JISX0208"),
2767 build_string ("JIS X0208:1983 (Japanese)"),
2768 build_string ("JIS X0208:1983 Japanese Kanji"),
2769 build_string ("jisx0208\\.1983"),
2771 Vcharset_japanese_jisx0208_1990 =
2772 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2773 Qjapanese_jisx0208_1990,
2774 CHARSET_TYPE_94X94, 2, 0, 0,
2775 CHARSET_LEFT_TO_RIGHT,
2776 build_string ("JISX0208-1990"),
2777 build_string ("JIS X0208:1990 (Japanese)"),
2778 build_string ("JIS X0208:1990 Japanese Kanji"),
2779 build_string ("jisx0208\\.1990"),
2781 MIN_CHAR_JIS_X0208_1990,
2782 MAX_CHAR_JIS_X0208_1990, 0, 33);
2783 Vcharset_korean_ksc5601 =
2784 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2785 CHARSET_TYPE_94X94, 2, 0, 'C',
2786 CHARSET_LEFT_TO_RIGHT,
2787 build_string ("KSC5601"),
2788 build_string ("KSC5601 (Korean"),
2789 build_string ("KSC5601 Korean Hangul and Hanja"),
2790 build_string ("ksc5601"),
2792 Vcharset_japanese_jisx0212 =
2793 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2794 CHARSET_TYPE_94X94, 2, 0, 'D',
2795 CHARSET_LEFT_TO_RIGHT,
2796 build_string ("JISX0212"),
2797 build_string ("JISX0212 (Japanese)"),
2798 build_string ("JISX0212 Japanese Supplement"),
2799 build_string ("jisx0212"),
2802 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2803 Vcharset_chinese_cns11643_1 =
2804 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2805 CHARSET_TYPE_94X94, 2, 0, 'G',
2806 CHARSET_LEFT_TO_RIGHT,
2807 build_string ("CNS11643-1"),
2808 build_string ("CNS11643-1 (Chinese traditional)"),
2810 ("CNS 11643 Plane 1 Chinese traditional"),
2811 build_string (CHINESE_CNS_PLANE_RE("1")),
2813 Vcharset_chinese_cns11643_2 =
2814 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2815 CHARSET_TYPE_94X94, 2, 0, 'H',
2816 CHARSET_LEFT_TO_RIGHT,
2817 build_string ("CNS11643-2"),
2818 build_string ("CNS11643-2 (Chinese traditional)"),
2820 ("CNS 11643 Plane 2 Chinese traditional"),
2821 build_string (CHINESE_CNS_PLANE_RE("2")),
2824 Vcharset_latin_viscii_lower =
2825 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2826 CHARSET_TYPE_96, 1, 1, '1',
2827 CHARSET_LEFT_TO_RIGHT,
2828 build_string ("VISCII lower"),
2829 build_string ("VISCII lower (Vietnamese)"),
2830 build_string ("VISCII lower (Vietnamese)"),
2831 build_string ("MULEVISCII-LOWER"),
2833 Vcharset_latin_viscii_upper =
2834 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2835 CHARSET_TYPE_96, 1, 1, '2',
2836 CHARSET_LEFT_TO_RIGHT,
2837 build_string ("VISCII upper"),
2838 build_string ("VISCII upper (Vietnamese)"),
2839 build_string ("VISCII upper (Vietnamese)"),
2840 build_string ("MULEVISCII-UPPER"),
2842 Vcharset_latin_viscii =
2843 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2844 CHARSET_TYPE_256, 1, 2, 0,
2845 CHARSET_LEFT_TO_RIGHT,
2846 build_string ("VISCII"),
2847 build_string ("VISCII 1.1 (Vietnamese)"),
2848 build_string ("VISCII 1.1 (Vietnamese)"),
2849 build_string ("VISCII1\\.1"),
2851 Vcharset_ideograph_daikanwa =
2852 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2853 CHARSET_TYPE_256X256, 2, 2, 0,
2854 CHARSET_LEFT_TO_RIGHT,
2855 build_string ("Daikanwa"),
2856 build_string ("Morohashi's Daikanwa"),
2857 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2858 build_string ("Daikanwa"),
2859 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2860 Vcharset_ethiopic_ucs =
2861 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2862 CHARSET_TYPE_256X256, 2, 2, 0,
2863 CHARSET_LEFT_TO_RIGHT,
2864 build_string ("Ethiopic (UCS)"),
2865 build_string ("Ethiopic (UCS)"),
2866 build_string ("Ethiopic of UCS"),
2867 build_string ("Ethiopic-Unicode"),
2868 Qnil, 0x1200, 0x137F, 0x1200, 0);
2870 Vcharset_chinese_big5_1 =
2871 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2872 CHARSET_TYPE_94X94, 2, 0, '0',
2873 CHARSET_LEFT_TO_RIGHT,
2874 build_string ("Big5"),
2875 build_string ("Big5 (Level-1)"),
2877 ("Big5 Level-1 Chinese traditional"),
2878 build_string ("big5"),
2880 Vcharset_chinese_big5_2 =
2881 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2882 CHARSET_TYPE_94X94, 2, 0, '1',
2883 CHARSET_LEFT_TO_RIGHT,
2884 build_string ("Big5"),
2885 build_string ("Big5 (Level-2)"),
2887 ("Big5 Level-2 Chinese traditional"),
2888 build_string ("big5"),
2891 #ifdef ENABLE_COMPOSITE_CHARS
2892 /* #### For simplicity, we put composite chars into a 96x96 charset.
2893 This is going to lead to problems because you can run out of
2894 room, esp. as we don't yet recycle numbers. */
2895 Vcharset_composite =
2896 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2897 CHARSET_TYPE_96X96, 2, 0, 0,
2898 CHARSET_LEFT_TO_RIGHT,
2899 build_string ("Composite"),
2900 build_string ("Composite characters"),
2901 build_string ("Composite characters"),
2904 composite_char_row_next = 32;
2905 composite_char_col_next = 32;
2907 Vcomposite_char_string2char_hash_table =
2908 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2909 Vcomposite_char_char2string_hash_table =
2910 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2911 staticpro (&Vcomposite_char_string2char_hash_table);
2912 staticpro (&Vcomposite_char_char2string_hash_table);
2913 #endif /* ENABLE_COMPOSITE_CHARS */