1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ideograph_daikanwa;
66 Lisp_Object Vcharset_ethiopic_ucs;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 unsigned int code = ch;
299 struct Lisp_Char_Byte_Table* cpt
300 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
301 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
303 if (CHAR_BYTE_TABLE_P (ret))
304 cpt = XCHAR_BYTE_TABLE (ret);
308 ret = cpt->property [(unsigned char) (code >> 16)];
309 if (CHAR_BYTE_TABLE_P (ret))
310 cpt = XCHAR_BYTE_TABLE (ret);
314 ret = cpt->property [(unsigned char) (code >> 8)];
315 if (CHAR_BYTE_TABLE_P (ret))
316 cpt = XCHAR_BYTE_TABLE (ret);
320 return cpt->property [(unsigned char) code];
324 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
326 unsigned int code = ch;
327 struct Lisp_Char_Byte_Table* cpt1
328 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
329 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
331 if (CHAR_BYTE_TABLE_P (ret))
333 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
335 ret = cpt2->property[(unsigned char)(code >> 16)];
336 if (CHAR_BYTE_TABLE_P (ret))
338 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
340 ret = cpt3->property[(unsigned char)(code >> 8)];
341 if (CHAR_BYTE_TABLE_P (ret))
343 struct Lisp_Char_Byte_Table* cpt4
344 = XCHAR_BYTE_TABLE (ret);
346 cpt4->property[(unsigned char)code] = value;
348 else if (!EQ (ret, value))
350 Lisp_Object cpt4 = make_char_byte_table (ret);
352 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
353 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
356 else if (!EQ (ret, value))
358 Lisp_Object cpt3 = make_char_byte_table (ret);
359 Lisp_Object cpt4 = make_char_byte_table (ret);
361 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
362 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
364 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
367 else if (!EQ (ret, value))
369 Lisp_Object cpt2 = make_char_byte_table (ret);
370 Lisp_Object cpt3 = make_char_byte_table (ret);
371 Lisp_Object cpt4 = make_char_byte_table (ret);
373 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
374 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
375 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
376 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
381 Lisp_Object Vcharacter_attribute_table;
382 Lisp_Object Vcharacter_composition_table;
383 Lisp_Object Vcharacter_variant_table;
385 Lisp_Object Q_decomposition;
388 Lisp_Object QnoBreak;
389 Lisp_Object Qfraction;
399 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
405 else if (EQ (v, Qcompat))
407 else if (EQ (v, QnoBreak))
409 else if (EQ (v, Qfraction))
411 else if (EQ (v, Qsuper))
413 else if (EQ (v, Qsub))
415 else if (EQ (v, Qcircle))
417 else if (EQ (v, Qsquare))
419 else if (EQ (v, Qwide))
421 else if (EQ (v, Qnarrow))
423 else if (EQ (v, Qfont))
426 signal_simple_error (err_msg, err_arg);
429 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
430 Return character corresponding with list.
434 Lisp_Object table = Vcharacter_composition_table;
435 Lisp_Object rest = list;
439 Lisp_Object v = Fcar (rest);
441 Emchar c = to_char_code (v, "Invalid value for composition", list);
443 ret = get_char_code_table (c, table);
448 if (!CHAR_CODE_TABLE_P (ret))
453 else if (!CONSP (rest))
455 else if (CHAR_CODE_TABLE_P (ret))
458 signal_simple_error ("Invalid table is found with", list);
460 signal_simple_error ("Invalid value for composition", list);
463 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
464 Return variants of CHARACTER.
468 CHECK_CHAR (character);
469 return Fcopy_list (get_char_code_table (XCHAR (character),
470 Vcharacter_variant_table));
473 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
474 Return the alist of attributes of CHARACTER.
478 CHECK_CHAR (character);
479 return Fcopy_alist (get_char_code_table (XCHAR (character),
480 Vcharacter_attribute_table));
483 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
484 Return the value of CHARACTER's ATTRIBUTE.
486 (character, attribute))
489 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
495 if (!NILP (ccs = Ffind_charset (attribute)))
498 return Fcdr (Fassq (attribute, ret));
502 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
505 Emchar char_code = XCHAR (character);
507 = get_char_code_table (char_code, Vcharacter_attribute_table);
510 cell = Fassq (attribute, ret);
514 ret = Fcons (Fcons (attribute, value), ret);
516 else if (!EQ (Fcdr (cell), value))
518 Fsetcdr (cell, value);
520 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
524 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
525 Store CHARACTER's ATTRIBUTE with VALUE.
527 (character, attribute, value))
531 CHECK_CHAR (character);
532 ccs = Ffind_charset (attribute);
536 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
541 /* ad-hoc method for `ascii' */
542 if ((XCHARSET_CHARS (ccs) == 94) &&
543 (XCHARSET_BYTE_OFFSET (ccs) != 33))
544 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
546 ccs_len = XCHARSET_CHARS (ccs);
549 signal_simple_error ("Invalid value for coded-charset",
553 rest = Fget_char_attribute (character, attribute);
560 Lisp_Object ei = Fcar (rest);
562 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
563 nv = XVECTOR_DATA(v)[i];
570 XVECTOR_DATA(v)[i] = Qnil;
571 v = XCHARSET_DECODING_TABLE (ccs);
576 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
579 if (XCHARSET_GRAPHIC (ccs) == 1)
580 value = Fcopy_list (value);
585 Lisp_Object ei = Fcar (rest);
588 signal_simple_error ("Invalid value for coded-charset", value);
590 if ((i < 0) || (255 < i))
591 signal_simple_error ("Invalid value for coded-charset", value);
592 if (XCHARSET_GRAPHIC (ccs) == 1)
595 Fsetcar (rest, make_int (i));
597 i -= XCHARSET_BYTE_OFFSET (ccs);
598 nv = XVECTOR_DATA(v)[i];
604 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
611 XVECTOR_DATA(v)[i] = character;
613 else if (EQ (attribute, Q_decomposition))
615 Lisp_Object rest = value;
616 Lisp_Object table = Vcharacter_composition_table;
619 signal_simple_error ("Invalid value for ->decomposition",
624 Lisp_Object v = Fcar (rest);
627 = to_char_code (v, "Invalid value for ->decomposition", value);
632 put_char_code_table (c, character, table);
637 ntable = get_char_code_table (c, table);
638 if (!CHAR_CODE_TABLE_P (ntable))
640 ntable = make_char_code_table (Qnil);
641 put_char_code_table (c, ntable, table);
647 else if (EQ (attribute, Q_ucs))
653 signal_simple_error ("Invalid value for ->ucs", value);
657 ret = get_char_code_table (c, Vcharacter_variant_table);
658 if (NILP (Fmemq (character, ret)))
660 put_char_code_table (c, Fcons (character, ret),
661 Vcharacter_variant_table);
664 return put_char_attribute (character, attribute, value);
669 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
670 Store character's ATTRIBUTES.
674 Lisp_Object rest = attributes;
675 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
676 Lisp_Object character;
682 Lisp_Object cell = Fcar (rest);
686 signal_simple_error ("Invalid argument", attributes);
687 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
688 && XCHARSET_FINAL (ccs))
692 if (XCHARSET_DIMENSION (ccs) == 1)
694 Lisp_Object eb1 = Fcar (Fcdr (cell));
698 signal_simple_error ("Invalid argument", attributes);
700 switch (XCHARSET_CHARS (ccs))
704 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
708 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
714 else if (XCHARSET_DIMENSION (ccs) == 2)
716 Lisp_Object eb1 = Fcar (Fcdr (cell));
717 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
721 signal_simple_error ("Invalid argument", attributes);
724 signal_simple_error ("Invalid argument", attributes);
726 switch (XCHARSET_CHARS (ccs))
729 code = MIN_CHAR_94x94
730 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
731 + (b1 - 33) * 94 + (b2 - 33);
734 code = MIN_CHAR_96x96
735 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
736 + (b1 - 32) * 96 + (b2 - 32);
747 character = make_char (code);
748 goto setup_attributes;
754 else if (!INTP (code))
755 signal_simple_error ("Invalid argument", attributes);
757 character = make_char (XINT (code));
763 Lisp_Object cell = Fcar (rest);
766 signal_simple_error ("Invalid argument", attributes);
767 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
771 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
774 Lisp_Object Vutf_2000_version;
778 int leading_code_private_11;
781 Lisp_Object Qcharsetp;
783 /* Qdoc_string, Qdimension, Qchars defined in general.c */
784 Lisp_Object Qregistry, Qfinal, Qgraphic;
785 Lisp_Object Qdirection;
786 Lisp_Object Qreverse_direction_charset;
787 Lisp_Object Qleading_byte;
788 Lisp_Object Qshort_name, Qlong_name;
804 Qjapanese_jisx0208_1978,
816 Qvietnamese_viscii_lower,
817 Qvietnamese_viscii_upper,
825 Lisp_Object Ql2r, Qr2l;
827 Lisp_Object Vcharset_hash_table;
830 static Charset_ID next_allocated_leading_byte;
832 static Charset_ID next_allocated_1_byte_leading_byte;
833 static Charset_ID next_allocated_2_byte_leading_byte;
836 /* Composite characters are characters constructed by overstriking two
837 or more regular characters.
839 1) The old Mule implementation involves storing composite characters
840 in a buffer as a tag followed by all of the actual characters
841 used to make up the composite character. I think this is a bad
842 idea; it greatly complicates code that wants to handle strings
843 one character at a time because it has to deal with the possibility
844 of great big ungainly characters. It's much more reasonable to
845 simply store an index into a table of composite characters.
847 2) The current implementation only allows for 16,384 separate
848 composite characters over the lifetime of the XEmacs process.
849 This could become a potential problem if the user
850 edited lots of different files that use composite characters.
851 Due to FSF bogosity, increasing the number of allowable
852 composite characters under Mule would decrease the number
853 of possible faces that can exist. Mule already has shrunk
854 this to 2048, and further shrinkage would become uncomfortable.
855 No such problems exist in XEmacs.
857 Composite characters could be represented as 0x80 C1 C2 C3,
858 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
859 for slightly under 2^20 (one million) composite characters
860 over the XEmacs process lifetime, and you only need to
861 increase the size of a Mule character from 19 to 21 bits.
862 Or you could use 0x80 C1 C2 C3 C4, allowing for about
863 85 million (slightly over 2^26) composite characters. */
866 /************************************************************************/
867 /* Basic Emchar functions */
868 /************************************************************************/
870 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
871 string in STR. Returns the number of bytes stored.
872 Do not call this directly. Use the macro set_charptr_emchar() instead.
876 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
891 else if ( c <= 0x7ff )
893 *p++ = (c >> 6) | 0xc0;
894 *p++ = (c & 0x3f) | 0x80;
896 else if ( c <= 0xffff )
898 *p++ = (c >> 12) | 0xe0;
899 *p++ = ((c >> 6) & 0x3f) | 0x80;
900 *p++ = (c & 0x3f) | 0x80;
902 else if ( c <= 0x1fffff )
904 *p++ = (c >> 18) | 0xf0;
905 *p++ = ((c >> 12) & 0x3f) | 0x80;
906 *p++ = ((c >> 6) & 0x3f) | 0x80;
907 *p++ = (c & 0x3f) | 0x80;
909 else if ( c <= 0x3ffffff )
911 *p++ = (c >> 24) | 0xf8;
912 *p++ = ((c >> 18) & 0x3f) | 0x80;
913 *p++ = ((c >> 12) & 0x3f) | 0x80;
914 *p++ = ((c >> 6) & 0x3f) | 0x80;
915 *p++ = (c & 0x3f) | 0x80;
919 *p++ = (c >> 30) | 0xfc;
920 *p++ = ((c >> 24) & 0x3f) | 0x80;
921 *p++ = ((c >> 18) & 0x3f) | 0x80;
922 *p++ = ((c >> 12) & 0x3f) | 0x80;
923 *p++ = ((c >> 6) & 0x3f) | 0x80;
924 *p++ = (c & 0x3f) | 0x80;
927 BREAKUP_CHAR (c, charset, c1, c2);
928 lb = CHAR_LEADING_BYTE (c);
929 if (LEADING_BYTE_PRIVATE_P (lb))
930 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
932 if (EQ (charset, Vcharset_control_1))
941 /* Return the first character from a Mule-encoded string in STR,
942 assuming it's non-ASCII. Do not call this directly.
943 Use the macro charptr_emchar() instead. */
946 non_ascii_charptr_emchar (CONST Bufbyte *str)
959 else if ( b >= 0xf8 )
964 else if ( b >= 0xf0 )
969 else if ( b >= 0xe0 )
974 else if ( b >= 0xc0 )
984 for( ; len > 0; len-- )
987 ch = ( ch << 6 ) | ( b & 0x3f );
991 Bufbyte i0 = *str, i1, i2 = 0;
994 if (i0 == LEADING_BYTE_CONTROL_1)
995 return (Emchar) (*++str - 0x20);
997 if (LEADING_BYTE_PREFIX_P (i0))
1002 charset = CHARSET_BY_LEADING_BYTE (i0);
1003 if (XCHARSET_DIMENSION (charset) == 2)
1006 return MAKE_CHAR (charset, i1, i2);
1010 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1011 Do not call this directly. Use the macro valid_char_p() instead. */
1015 non_ascii_valid_char_p (Emchar ch)
1019 /* Must have only lowest 19 bits set */
1023 f1 = CHAR_FIELD1 (ch);
1024 f2 = CHAR_FIELD2 (ch);
1025 f3 = CHAR_FIELD3 (ch);
1029 Lisp_Object charset;
1031 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1032 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1033 f2 > MAX_CHAR_FIELD2_PRIVATE)
1038 if (f3 != 0x20 && f3 != 0x7F)
1042 NOTE: This takes advantage of the fact that
1043 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1044 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1046 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1047 return (XCHARSET_CHARS (charset) == 96);
1051 Lisp_Object charset;
1053 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1054 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1055 f1 > MAX_CHAR_FIELD1_PRIVATE)
1057 if (f2 < 0x20 || f3 < 0x20)
1060 #ifdef ENABLE_COMPOSITE_CHARS
1061 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1063 if (UNBOUNDP (Fgethash (make_int (ch),
1064 Vcomposite_char_char2string_hash_table,
1069 #endif /* ENABLE_COMPOSITE_CHARS */
1071 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1074 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1076 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1079 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1081 return (XCHARSET_CHARS (charset) == 96);
1087 /************************************************************************/
1088 /* Basic string functions */
1089 /************************************************************************/
1091 /* Copy the character pointed to by PTR into STR, assuming it's
1092 non-ASCII. Do not call this directly. Use the macro
1093 charptr_copy_char() instead. */
1096 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1098 Bufbyte *strptr = str;
1100 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1102 /* Notice fallthrough. */
1104 case 6: *++strptr = *ptr++;
1105 case 5: *++strptr = *ptr++;
1107 case 4: *++strptr = *ptr++;
1108 case 3: *++strptr = *ptr++;
1109 case 2: *++strptr = *ptr;
1114 return strptr + 1 - str;
1118 /************************************************************************/
1119 /* streams of Emchars */
1120 /************************************************************************/
1122 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1123 The functions below are not meant to be called directly; use
1124 the macros in insdel.h. */
1127 Lstream_get_emchar_1 (Lstream *stream, int ch)
1129 Bufbyte str[MAX_EMCHAR_LEN];
1130 Bufbyte *strptr = str;
1132 str[0] = (Bufbyte) ch;
1133 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1135 /* Notice fallthrough. */
1138 ch = Lstream_getc (stream);
1140 *++strptr = (Bufbyte) ch;
1142 ch = Lstream_getc (stream);
1144 *++strptr = (Bufbyte) ch;
1147 ch = Lstream_getc (stream);
1149 *++strptr = (Bufbyte) ch;
1151 ch = Lstream_getc (stream);
1153 *++strptr = (Bufbyte) ch;
1155 ch = Lstream_getc (stream);
1157 *++strptr = (Bufbyte) ch;
1162 return charptr_emchar (str);
1166 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1168 Bufbyte str[MAX_EMCHAR_LEN];
1169 Bytecount len = set_charptr_emchar (str, ch);
1170 return Lstream_write (stream, str, len);
1174 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1176 Bufbyte str[MAX_EMCHAR_LEN];
1177 Bytecount len = set_charptr_emchar (str, ch);
1178 Lstream_unread (stream, str, len);
1182 /************************************************************************/
1183 /* charset object */
1184 /************************************************************************/
1187 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1189 struct Lisp_Charset *cs = XCHARSET (obj);
1191 markobj (cs->short_name);
1192 markobj (cs->long_name);
1193 markobj (cs->doc_string);
1194 markobj (cs->registry);
1195 markobj (cs->ccl_program);
1197 markobj (cs->decoding_table);
1203 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1205 struct Lisp_Charset *cs = XCHARSET (obj);
1209 error ("printing unreadable object #<charset %s 0x%x>",
1210 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1213 write_c_string ("#<charset ", printcharfun);
1214 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1215 write_c_string (" ", printcharfun);
1216 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1217 write_c_string (" ", printcharfun);
1218 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1219 write_c_string (" ", printcharfun);
1220 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1221 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1222 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1223 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1224 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1226 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1227 CHARSET_COLUMNS (cs),
1228 CHARSET_GRAPHIC (cs),
1229 CHARSET_FINAL (cs));
1230 write_c_string (buf, printcharfun);
1231 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1232 sprintf (buf, " 0x%x>", cs->header.uid);
1233 write_c_string (buf, printcharfun);
1236 static const struct lrecord_description charset_description[] = {
1237 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1239 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1244 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1245 mark_charset, print_charset, 0, 0, 0,
1246 charset_description,
1247 struct Lisp_Charset);
1249 /* Make a new charset. */
1252 make_charset (Charset_ID id, Lisp_Object name,
1253 unsigned char type, unsigned char columns, unsigned char graphic,
1254 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1255 Lisp_Object long_name, Lisp_Object doc,
1257 Lisp_Object decoding_table,
1258 Emchar ucs_min, Emchar ucs_max,
1259 Emchar code_offset, unsigned char byte_offset)
1262 struct Lisp_Charset *cs =
1263 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1264 XSETCHARSET (obj, cs);
1266 CHARSET_ID (cs) = id;
1267 CHARSET_NAME (cs) = name;
1268 CHARSET_SHORT_NAME (cs) = short_name;
1269 CHARSET_LONG_NAME (cs) = long_name;
1270 CHARSET_DIRECTION (cs) = direction;
1271 CHARSET_TYPE (cs) = type;
1272 CHARSET_COLUMNS (cs) = columns;
1273 CHARSET_GRAPHIC (cs) = graphic;
1274 CHARSET_FINAL (cs) = final;
1275 CHARSET_DOC_STRING (cs) = doc;
1276 CHARSET_REGISTRY (cs) = reg;
1277 CHARSET_CCL_PROGRAM (cs) = Qnil;
1278 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1280 CHARSET_DECODING_TABLE(cs) = Qnil;
1281 CHARSET_UCS_MIN(cs) = ucs_min;
1282 CHARSET_UCS_MAX(cs) = ucs_max;
1283 CHARSET_CODE_OFFSET(cs) = code_offset;
1284 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1287 switch (CHARSET_TYPE (cs))
1289 case CHARSET_TYPE_94:
1290 CHARSET_DIMENSION (cs) = 1;
1291 CHARSET_CHARS (cs) = 94;
1293 case CHARSET_TYPE_96:
1294 CHARSET_DIMENSION (cs) = 1;
1295 CHARSET_CHARS (cs) = 96;
1297 case CHARSET_TYPE_94X94:
1298 CHARSET_DIMENSION (cs) = 2;
1299 CHARSET_CHARS (cs) = 94;
1301 case CHARSET_TYPE_96X96:
1302 CHARSET_DIMENSION (cs) = 2;
1303 CHARSET_CHARS (cs) = 96;
1306 case CHARSET_TYPE_128:
1307 CHARSET_DIMENSION (cs) = 1;
1308 CHARSET_CHARS (cs) = 128;
1310 case CHARSET_TYPE_128X128:
1311 CHARSET_DIMENSION (cs) = 2;
1312 CHARSET_CHARS (cs) = 128;
1314 case CHARSET_TYPE_256:
1315 CHARSET_DIMENSION (cs) = 1;
1316 CHARSET_CHARS (cs) = 256;
1318 case CHARSET_TYPE_256X256:
1319 CHARSET_DIMENSION (cs) = 2;
1320 CHARSET_CHARS (cs) = 256;
1326 if (id == LEADING_BYTE_ASCII)
1327 CHARSET_REP_BYTES (cs) = 1;
1329 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1331 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1336 /* some charsets do not have final characters. This includes
1337 ASCII, Control-1, Composite, and the two faux private
1340 if (code_offset == 0)
1342 assert (NILP (charset_by_attributes[type][final]));
1343 charset_by_attributes[type][final] = obj;
1346 assert (NILP (charset_by_attributes[type][final][direction]));
1347 charset_by_attributes[type][final][direction] = obj;
1351 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1352 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1355 /* official leading byte */
1356 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1359 /* Some charsets are "faux" and don't have names or really exist at
1360 all except in the leading-byte table. */
1362 Fputhash (name, obj, Vcharset_hash_table);
1367 get_unallocated_leading_byte (int dimension)
1372 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1375 lb = next_allocated_leading_byte++;
1379 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1382 lb = next_allocated_1_byte_leading_byte++;
1386 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1389 lb = next_allocated_2_byte_leading_byte++;
1395 ("No more character sets free for this dimension",
1396 make_int (dimension));
1403 range_charset_code_point (Lisp_Object charset, Emchar ch)
1407 if ((XCHARSET_UCS_MIN (charset) <= ch)
1408 && (ch <= XCHARSET_UCS_MAX (charset)))
1410 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1412 if (XCHARSET_DIMENSION (charset) == 1)
1413 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1414 else if (XCHARSET_DIMENSION (charset) == 2)
1415 return list2 (make_int (d / XCHARSET_CHARS (charset)
1416 + XCHARSET_BYTE_OFFSET (charset)),
1417 make_int (d % XCHARSET_CHARS (charset)
1418 + XCHARSET_BYTE_OFFSET (charset)));
1419 else if (XCHARSET_DIMENSION (charset) == 3)
1420 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1421 * XCHARSET_CHARS (charset))
1422 + XCHARSET_BYTE_OFFSET (charset)),
1423 make_int (d / XCHARSET_CHARS (charset)
1424 % XCHARSET_CHARS (charset)
1425 + XCHARSET_BYTE_OFFSET (charset)),
1426 make_int (d % XCHARSET_CHARS (charset)
1427 + XCHARSET_BYTE_OFFSET (charset)));
1428 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1429 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1430 * XCHARSET_CHARS (charset)
1431 * XCHARSET_CHARS (charset))
1432 + XCHARSET_BYTE_OFFSET (charset)),
1433 make_int (d / (XCHARSET_CHARS (charset)
1434 * XCHARSET_CHARS (charset))
1435 % XCHARSET_CHARS (charset)
1436 + XCHARSET_BYTE_OFFSET (charset)),
1437 make_int (d / XCHARSET_CHARS (charset)
1438 % XCHARSET_CHARS (charset)
1439 + XCHARSET_BYTE_OFFSET (charset)),
1440 make_int (d % XCHARSET_CHARS (charset)
1441 + XCHARSET_BYTE_OFFSET (charset)));
1443 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1445 if (XCHARSET_DIMENSION (charset) == 1)
1447 if (XCHARSET_CHARS (charset) == 94)
1449 if (((d = ch - (MIN_CHAR_94
1450 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1452 return list1 (make_int (d + 33));
1454 else if (XCHARSET_CHARS (charset) == 96)
1456 if (((d = ch - (MIN_CHAR_96
1457 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1459 return list1 (make_int (d + 32));
1464 else if (XCHARSET_DIMENSION (charset) == 2)
1466 if (XCHARSET_CHARS (charset) == 94)
1468 if (((d = ch - (MIN_CHAR_94x94
1469 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1472 return list2 (make_int ((d / 94) + 33),
1473 make_int (d % 94 + 33));
1475 else if (XCHARSET_CHARS (charset) == 96)
1477 if (((d = ch - (MIN_CHAR_96x96
1478 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1481 return list2 (make_int ((d / 96) + 32),
1482 make_int (d % 96 + 32));
1490 split_builtin_char (Emchar c)
1492 if (c < MIN_CHAR_OBS_94x94)
1494 if (c <= MAX_CHAR_BASIC_LATIN)
1496 return list2 (Vcharset_ascii, make_int (c));
1500 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1504 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1506 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1508 return list2 (Vcharset_greek_iso8859_7,
1509 make_int (c - MIN_CHAR_GREEK + 0x20));
1511 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1513 return list2 (Vcharset_cyrillic_iso8859_5,
1514 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1516 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1518 return list2 (Vcharset_hebrew_iso8859_8,
1519 make_int (c - MIN_CHAR_HEBREW + 0x20));
1521 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1523 return list2 (Vcharset_thai_tis620,
1524 make_int (c - MIN_CHAR_THAI + 0x20));
1526 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1527 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1529 return list2 (Vcharset_katakana_jisx0201,
1530 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1534 return list3 (Vcharset_ucs_bmp,
1535 make_int (c >> 8), make_int (c & 0xff));
1538 else if (c <= MAX_CHAR_OBS_94x94)
1540 return list3 (CHARSET_BY_ATTRIBUTES
1541 (CHARSET_TYPE_94X94,
1542 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1543 CHARSET_LEFT_TO_RIGHT),
1544 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1545 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1547 else if (c <= MAX_CHAR_DAIKANWA)
1549 return list3 (Vcharset_ideograph_daikanwa,
1550 make_int ((c - MIN_CHAR_DAIKANWA) >> 8),
1551 make_int ((c - MIN_CHAR_DAIKANWA) & 255));
1553 else if (c <= MAX_CHAR_94)
1555 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1556 ((c - MIN_CHAR_94) / 94) + '0',
1557 CHARSET_LEFT_TO_RIGHT),
1558 make_int (((c - MIN_CHAR_94) % 94) + 33));
1560 else if (c <= MAX_CHAR_96)
1562 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1563 ((c - MIN_CHAR_96) / 96) + '0',
1564 CHARSET_LEFT_TO_RIGHT),
1565 make_int (((c - MIN_CHAR_96) % 96) + 32));
1567 else if (c <= MAX_CHAR_94x94)
1569 return list3 (CHARSET_BY_ATTRIBUTES
1570 (CHARSET_TYPE_94X94,
1571 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1572 CHARSET_LEFT_TO_RIGHT),
1573 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1574 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1576 else if (c <= MAX_CHAR_96x96)
1578 return list3 (CHARSET_BY_ATTRIBUTES
1579 (CHARSET_TYPE_96X96,
1580 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1581 CHARSET_LEFT_TO_RIGHT),
1582 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1583 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1592 charset_code_point (Lisp_Object charset, Emchar ch)
1594 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1596 if (!EQ (cdef, Qnil))
1598 Lisp_Object field = Fassq (charset, cdef);
1600 if (!EQ (field, Qnil))
1601 return Fcdr (field);
1603 return range_charset_code_point (charset, ch);
1606 Lisp_Object Vdefault_coded_charset_priority_list;
1610 /************************************************************************/
1611 /* Basic charset Lisp functions */
1612 /************************************************************************/
1614 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1615 Return non-nil if OBJECT is a charset.
1619 return CHARSETP (object) ? Qt : Qnil;
1622 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1623 Retrieve the charset of the given name.
1624 If CHARSET-OR-NAME is a charset object, it is simply returned.
1625 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1626 nil is returned. Otherwise the associated charset object is returned.
1630 if (CHARSETP (charset_or_name))
1631 return charset_or_name;
1633 CHECK_SYMBOL (charset_or_name);
1634 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1637 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1638 Retrieve the charset of the given name.
1639 Same as `find-charset' except an error is signalled if there is no such
1640 charset instead of returning nil.
1644 Lisp_Object charset = Ffind_charset (name);
1647 signal_simple_error ("No such charset", name);
1651 /* We store the charsets in hash tables with the names as the key and the
1652 actual charset object as the value. Occasionally we need to use them
1653 in a list format. These routines provide us with that. */
1654 struct charset_list_closure
1656 Lisp_Object *charset_list;
1660 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1661 void *charset_list_closure)
1663 /* This function can GC */
1664 struct charset_list_closure *chcl =
1665 (struct charset_list_closure*) charset_list_closure;
1666 Lisp_Object *charset_list = chcl->charset_list;
1668 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1672 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1673 Return a list of the names of all defined charsets.
1677 Lisp_Object charset_list = Qnil;
1678 struct gcpro gcpro1;
1679 struct charset_list_closure charset_list_closure;
1681 GCPRO1 (charset_list);
1682 charset_list_closure.charset_list = &charset_list;
1683 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1684 &charset_list_closure);
1687 return charset_list;
1690 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1691 Return the name of the given charset.
1695 return XCHARSET_NAME (Fget_charset (charset));
1698 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1699 Define a new character set.
1700 This function is for use with Mule support.
1701 NAME is a symbol, the name by which the character set is normally referred.
1702 DOC-STRING is a string describing the character set.
1703 PROPS is a property list, describing the specific nature of the
1704 character set. Recognized properties are:
1706 'short-name Short version of the charset name (ex: Latin-1)
1707 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1708 'registry A regular expression matching the font registry field for
1710 'dimension Number of octets used to index a character in this charset.
1711 Either 1 or 2. Defaults to 1.
1712 'columns Number of columns used to display a character in this charset.
1713 Only used in TTY mode. (Under X, the actual width of a
1714 character can be derived from the font used to display the
1715 characters.) If unspecified, defaults to the dimension
1716 (this is almost always the correct value).
1717 'chars Number of characters in each dimension (94 or 96).
1718 Defaults to 94. Note that if the dimension is 2, the
1719 character set thus described is 94x94 or 96x96.
1720 'final Final byte of ISO 2022 escape sequence. Must be
1721 supplied. Each combination of (DIMENSION, CHARS) defines a
1722 separate namespace for final bytes. Note that ISO
1723 2022 restricts the final byte to the range
1724 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1725 dimension == 2. Note also that final bytes in the range
1726 0x30 - 0x3F are reserved for user-defined (not official)
1728 'graphic 0 (use left half of font on output) or 1 (use right half
1729 of font on output). Defaults to 0. For example, for
1730 a font whose registry is ISO8859-1, the left half
1731 (octets 0x20 - 0x7F) is the `ascii' character set, while
1732 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1733 character set. With 'graphic set to 0, the octets
1734 will have their high bit cleared; with it set to 1,
1735 the octets will have their high bit set.
1736 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1738 'ccl-program A compiled CCL program used to convert a character in
1739 this charset into an index into the font. This is in
1740 addition to the 'graphic property. The CCL program
1741 is passed the octets of the character, with the high
1742 bit cleared and set depending upon whether the value
1743 of the 'graphic property is 0 or 1.
1745 (name, doc_string, props))
1747 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1748 int direction = CHARSET_LEFT_TO_RIGHT;
1750 Lisp_Object registry = Qnil;
1751 Lisp_Object charset;
1752 Lisp_Object rest, keyword, value;
1753 Lisp_Object ccl_program = Qnil;
1754 Lisp_Object short_name = Qnil, long_name = Qnil;
1755 int byte_offset = -1;
1757 CHECK_SYMBOL (name);
1758 if (!NILP (doc_string))
1759 CHECK_STRING (doc_string);
1761 charset = Ffind_charset (name);
1762 if (!NILP (charset))
1763 signal_simple_error ("Cannot redefine existing charset", name);
1765 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1767 if (EQ (keyword, Qshort_name))
1769 CHECK_STRING (value);
1773 if (EQ (keyword, Qlong_name))
1775 CHECK_STRING (value);
1779 else if (EQ (keyword, Qdimension))
1782 dimension = XINT (value);
1783 if (dimension < 1 || dimension > 2)
1784 signal_simple_error ("Invalid value for 'dimension", value);
1787 else if (EQ (keyword, Qchars))
1790 chars = XINT (value);
1791 if (chars != 94 && chars != 96)
1792 signal_simple_error ("Invalid value for 'chars", value);
1795 else if (EQ (keyword, Qcolumns))
1798 columns = XINT (value);
1799 if (columns != 1 && columns != 2)
1800 signal_simple_error ("Invalid value for 'columns", value);
1803 else if (EQ (keyword, Qgraphic))
1806 graphic = XINT (value);
1808 if (graphic < 0 || graphic > 2)
1810 if (graphic < 0 || graphic > 1)
1812 signal_simple_error ("Invalid value for 'graphic", value);
1815 else if (EQ (keyword, Qregistry))
1817 CHECK_STRING (value);
1821 else if (EQ (keyword, Qdirection))
1823 if (EQ (value, Ql2r))
1824 direction = CHARSET_LEFT_TO_RIGHT;
1825 else if (EQ (value, Qr2l))
1826 direction = CHARSET_RIGHT_TO_LEFT;
1828 signal_simple_error ("Invalid value for 'direction", value);
1831 else if (EQ (keyword, Qfinal))
1833 CHECK_CHAR_COERCE_INT (value);
1834 final = XCHAR (value);
1835 if (final < '0' || final > '~')
1836 signal_simple_error ("Invalid value for 'final", value);
1839 else if (EQ (keyword, Qccl_program))
1841 CHECK_VECTOR (value);
1842 ccl_program = value;
1846 signal_simple_error ("Unrecognized property", keyword);
1850 error ("'final must be specified");
1851 if (dimension == 2 && final > 0x5F)
1853 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1857 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1859 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1861 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1862 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1864 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1866 id = get_unallocated_leading_byte (dimension);
1868 if (NILP (doc_string))
1869 doc_string = build_string ("");
1871 if (NILP (registry))
1872 registry = build_string ("");
1874 if (NILP (short_name))
1875 XSETSTRING (short_name, XSYMBOL (name)->name);
1877 if (NILP (long_name))
1878 long_name = doc_string;
1881 columns = dimension;
1883 if (byte_offset < 0)
1887 else if (chars == 96)
1893 charset = make_charset (id, name, type, columns, graphic,
1894 final, direction, short_name, long_name,
1895 doc_string, registry,
1896 Qnil, 0, 0, 0, byte_offset);
1897 if (!NILP (ccl_program))
1898 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1902 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1904 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1905 NEW-NAME is the name of the new charset. Return the new charset.
1907 (charset, new_name))
1909 Lisp_Object new_charset = Qnil;
1910 int id, dimension, columns, graphic, final;
1911 int direction, type;
1912 Lisp_Object registry, doc_string, short_name, long_name;
1913 struct Lisp_Charset *cs;
1915 charset = Fget_charset (charset);
1916 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1917 signal_simple_error ("Charset already has reverse-direction charset",
1920 CHECK_SYMBOL (new_name);
1921 if (!NILP (Ffind_charset (new_name)))
1922 signal_simple_error ("Cannot redefine existing charset", new_name);
1924 cs = XCHARSET (charset);
1926 type = CHARSET_TYPE (cs);
1927 columns = CHARSET_COLUMNS (cs);
1928 dimension = CHARSET_DIMENSION (cs);
1929 id = get_unallocated_leading_byte (dimension);
1931 graphic = CHARSET_GRAPHIC (cs);
1932 final = CHARSET_FINAL (cs);
1933 direction = CHARSET_RIGHT_TO_LEFT;
1934 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1935 direction = CHARSET_LEFT_TO_RIGHT;
1936 doc_string = CHARSET_DOC_STRING (cs);
1937 short_name = CHARSET_SHORT_NAME (cs);
1938 long_name = CHARSET_LONG_NAME (cs);
1939 registry = CHARSET_REGISTRY (cs);
1941 new_charset = make_charset (id, new_name, type, columns,
1942 graphic, final, direction, short_name, long_name,
1943 doc_string, registry,
1945 CHARSET_DECODING_TABLE(cs),
1946 CHARSET_UCS_MIN(cs),
1947 CHARSET_UCS_MAX(cs),
1948 CHARSET_CODE_OFFSET(cs),
1949 CHARSET_BYTE_OFFSET(cs)
1955 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1956 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1961 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1962 Define symbol ALIAS as an alias for CHARSET.
1966 CHECK_SYMBOL (alias);
1967 charset = Fget_charset (charset);
1968 return Fputhash (alias, charset, Vcharset_hash_table);
1971 /* #### Reverse direction charsets not yet implemented. */
1973 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1975 Return the reverse-direction charset parallel to CHARSET, if any.
1976 This is the charset with the same properties (in particular, the same
1977 dimension, number of characters per dimension, and final byte) as
1978 CHARSET but whose characters are displayed in the opposite direction.
1982 charset = Fget_charset (charset);
1983 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1987 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1988 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1989 If DIRECTION is omitted, both directions will be checked (left-to-right
1990 will be returned if character sets exist for both directions).
1992 (dimension, chars, final, direction))
1994 int dm, ch, fi, di = -1;
1996 Lisp_Object obj = Qnil;
1998 CHECK_INT (dimension);
1999 dm = XINT (dimension);
2000 if (dm < 1 || dm > 2)
2001 signal_simple_error ("Invalid value for DIMENSION", dimension);
2005 if (ch != 94 && ch != 96)
2006 signal_simple_error ("Invalid value for CHARS", chars);
2008 CHECK_CHAR_COERCE_INT (final);
2010 if (fi < '0' || fi > '~')
2011 signal_simple_error ("Invalid value for FINAL", final);
2013 if (EQ (direction, Ql2r))
2014 di = CHARSET_LEFT_TO_RIGHT;
2015 else if (EQ (direction, Qr2l))
2016 di = CHARSET_RIGHT_TO_LEFT;
2017 else if (!NILP (direction))
2018 signal_simple_error ("Invalid value for DIRECTION", direction);
2020 if (dm == 2 && fi > 0x5F)
2022 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2025 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2027 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2031 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2033 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2036 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2039 return XCHARSET_NAME (obj);
2043 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2044 Return short name of CHARSET.
2048 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2051 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2052 Return long name of CHARSET.
2056 return XCHARSET_LONG_NAME (Fget_charset (charset));
2059 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2060 Return description of CHARSET.
2064 return XCHARSET_DOC_STRING (Fget_charset (charset));
2067 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2068 Return dimension of CHARSET.
2072 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2075 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2076 Return property PROP of CHARSET.
2077 Recognized properties are those listed in `make-charset', as well as
2078 'name and 'doc-string.
2082 struct Lisp_Charset *cs;
2084 charset = Fget_charset (charset);
2085 cs = XCHARSET (charset);
2087 CHECK_SYMBOL (prop);
2088 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2089 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2090 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2091 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2092 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2093 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2094 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2095 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2096 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2097 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2098 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2099 if (EQ (prop, Qdirection))
2100 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2101 if (EQ (prop, Qreverse_direction_charset))
2103 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2107 return XCHARSET_NAME (obj);
2109 signal_simple_error ("Unrecognized charset property name", prop);
2110 return Qnil; /* not reached */
2113 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2114 Return charset identification number of CHARSET.
2118 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2121 /* #### We need to figure out which properties we really want to
2124 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2125 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2127 (charset, ccl_program))
2129 charset = Fget_charset (charset);
2130 CHECK_VECTOR (ccl_program);
2131 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2136 invalidate_charset_font_caches (Lisp_Object charset)
2138 /* Invalidate font cache entries for charset on all devices. */
2139 Lisp_Object devcons, concons, hash_table;
2140 DEVICE_LOOP_NO_BREAK (devcons, concons)
2142 struct device *d = XDEVICE (XCAR (devcons));
2143 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2144 if (!UNBOUNDP (hash_table))
2145 Fclrhash (hash_table);
2149 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2150 Set the 'registry property of CHARSET to REGISTRY.
2152 (charset, registry))
2154 charset = Fget_charset (charset);
2155 CHECK_STRING (registry);
2156 XCHARSET_REGISTRY (charset) = registry;
2157 invalidate_charset_font_caches (charset);
2158 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2163 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2164 Return mapping-table of CHARSET.
2168 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2171 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2172 Set mapping-table of CHARSET to TABLE.
2176 struct Lisp_Charset *cs;
2177 Lisp_Object old_table;
2180 charset = Fget_charset (charset);
2181 cs = XCHARSET (charset);
2183 if (EQ (table, Qnil))
2185 CHARSET_DECODING_TABLE(cs) = table;
2188 else if (VECTORP (table))
2192 /* ad-hoc method for `ascii' */
2193 if ((CHARSET_CHARS (cs) == 94) &&
2194 (CHARSET_BYTE_OFFSET (cs) != 33))
2195 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2197 ccs_len = CHARSET_CHARS (cs);
2199 if (XVECTOR_LENGTH (table) > ccs_len)
2200 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2201 old_table = CHARSET_DECODING_TABLE(cs);
2202 CHARSET_DECODING_TABLE(cs) = table;
2205 signal_error (Qwrong_type_argument,
2206 list2 (build_translated_string ("vector-or-nil-p"),
2208 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2210 switch (CHARSET_DIMENSION (cs))
2213 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2215 Lisp_Object c = XVECTOR_DATA(table)[i];
2220 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2224 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2226 Lisp_Object v = XVECTOR_DATA(table)[i];
2232 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2234 CHARSET_DECODING_TABLE(cs) = old_table;
2235 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2237 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2239 Lisp_Object c = XVECTOR_DATA(v)[j];
2242 put_char_attribute (c, charset,
2245 (i + CHARSET_BYTE_OFFSET (cs)),
2247 (j + CHARSET_BYTE_OFFSET (cs))));
2251 put_char_attribute (v, charset,
2253 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2262 /************************************************************************/
2263 /* Lisp primitives for working with characters */
2264 /************************************************************************/
2266 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2267 Make a character from CHARSET and octets ARG1 and ARG2.
2268 ARG2 is required only for characters from two-dimensional charsets.
2269 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2270 character s with caron.
2272 (charset, arg1, arg2))
2274 struct Lisp_Charset *cs;
2276 int lowlim, highlim;
2278 charset = Fget_charset (charset);
2279 cs = XCHARSET (charset);
2281 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2282 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2284 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2286 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2287 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2290 /* It is useful (and safe, according to Olivier Galibert) to strip
2291 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2292 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2293 Latin 2 code of the character. */
2301 if (a1 < lowlim || a1 > highlim)
2302 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2304 if (CHARSET_DIMENSION (cs) == 1)
2308 ("Charset is of dimension one; second octet must be nil", arg2);
2309 return make_char (MAKE_CHAR (charset, a1, 0));
2318 a2 = XINT (arg2) & 0x7f;
2320 if (a2 < lowlim || a2 > highlim)
2321 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2323 return make_char (MAKE_CHAR (charset, a1, a2));
2326 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2327 Return the character set of char CH.
2331 CHECK_CHAR_COERCE_INT (ch);
2333 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2336 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2337 Return list of charset and one or two position-codes of CHAR.
2343 Lisp_Object charset;
2345 CHECK_CHAR_COERCE_INT (character);
2346 ret = SPLIT_CHAR (XCHAR (character));
2347 charset = Fcar (ret);
2348 if (CHARSETP (charset))
2349 return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret)));
2353 /* This function can GC */
2354 struct gcpro gcpro1, gcpro2;
2355 Lisp_Object charset = Qnil;
2356 Lisp_Object rc = Qnil;
2359 GCPRO2 (charset, rc);
2360 CHECK_CHAR_COERCE_INT (character);
2362 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2364 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2366 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2370 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2378 #ifdef ENABLE_COMPOSITE_CHARS
2379 /************************************************************************/
2380 /* composite character functions */
2381 /************************************************************************/
2384 lookup_composite_char (Bufbyte *str, int len)
2386 Lisp_Object lispstr = make_string (str, len);
2387 Lisp_Object ch = Fgethash (lispstr,
2388 Vcomposite_char_string2char_hash_table,
2394 if (composite_char_row_next >= 128)
2395 signal_simple_error ("No more composite chars available", lispstr);
2396 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2397 composite_char_col_next);
2398 Fputhash (make_char (emch), lispstr,
2399 Vcomposite_char_char2string_hash_table);
2400 Fputhash (lispstr, make_char (emch),
2401 Vcomposite_char_string2char_hash_table);
2402 composite_char_col_next++;
2403 if (composite_char_col_next >= 128)
2405 composite_char_col_next = 32;
2406 composite_char_row_next++;
2415 composite_char_string (Emchar ch)
2417 Lisp_Object str = Fgethash (make_char (ch),
2418 Vcomposite_char_char2string_hash_table,
2420 assert (!UNBOUNDP (str));
2424 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2425 Convert a string into a single composite character.
2426 The character is the result of overstriking all the characters in
2431 CHECK_STRING (string);
2432 return make_char (lookup_composite_char (XSTRING_DATA (string),
2433 XSTRING_LENGTH (string)));
2436 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2437 Return a string of the characters comprising a composite character.
2445 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2446 signal_simple_error ("Must be composite char", ch);
2447 return composite_char_string (emch);
2449 #endif /* ENABLE_COMPOSITE_CHARS */
2452 /************************************************************************/
2453 /* initialization */
2454 /************************************************************************/
2457 syms_of_mule_charset (void)
2459 DEFSUBR (Fcharsetp);
2460 DEFSUBR (Ffind_charset);
2461 DEFSUBR (Fget_charset);
2462 DEFSUBR (Fcharset_list);
2463 DEFSUBR (Fcharset_name);
2464 DEFSUBR (Fmake_charset);
2465 DEFSUBR (Fmake_reverse_direction_charset);
2466 /* DEFSUBR (Freverse_direction_charset); */
2467 DEFSUBR (Fdefine_charset_alias);
2468 DEFSUBR (Fcharset_from_attributes);
2469 DEFSUBR (Fcharset_short_name);
2470 DEFSUBR (Fcharset_long_name);
2471 DEFSUBR (Fcharset_description);
2472 DEFSUBR (Fcharset_dimension);
2473 DEFSUBR (Fcharset_property);
2474 DEFSUBR (Fcharset_id);
2475 DEFSUBR (Fset_charset_ccl_program);
2476 DEFSUBR (Fset_charset_registry);
2478 DEFSUBR (Fchar_attribute_alist);
2479 DEFSUBR (Fget_char_attribute);
2480 DEFSUBR (Fput_char_attribute);
2481 DEFSUBR (Fdefine_char);
2482 DEFSUBR (Fchar_variants);
2483 DEFSUBR (Fget_composite_char);
2484 DEFSUBR (Fcharset_mapping_table);
2485 DEFSUBR (Fset_charset_mapping_table);
2488 DEFSUBR (Fmake_char);
2489 DEFSUBR (Fchar_charset);
2490 DEFSUBR (Fsplit_char);
2492 #ifdef ENABLE_COMPOSITE_CHARS
2493 DEFSUBR (Fmake_composite_char);
2494 DEFSUBR (Fcomposite_char_string);
2497 defsymbol (&Qcharsetp, "charsetp");
2498 defsymbol (&Qregistry, "registry");
2499 defsymbol (&Qfinal, "final");
2500 defsymbol (&Qgraphic, "graphic");
2501 defsymbol (&Qdirection, "direction");
2502 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2503 defsymbol (&Qshort_name, "short-name");
2504 defsymbol (&Qlong_name, "long-name");
2506 defsymbol (&Ql2r, "l2r");
2507 defsymbol (&Qr2l, "r2l");
2509 /* Charsets, compatible with FSF 20.3
2510 Naming convention is Script-Charset[-Edition] */
2511 defsymbol (&Qascii, "ascii");
2512 defsymbol (&Qcontrol_1, "control-1");
2513 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2514 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2515 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2516 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2517 defsymbol (&Qthai_tis620, "thai-tis620");
2518 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2519 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2520 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2521 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2522 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2523 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2524 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2525 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2526 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2527 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2528 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2529 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2530 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2531 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2533 defsymbol (&Q_ucs, "->ucs");
2534 defsymbol (&Q_decomposition, "->decomposition");
2535 defsymbol (&Qcompat, "compat");
2536 defsymbol (&QnoBreak, "noBreak");
2537 defsymbol (&Qfraction, "fraction");
2538 defsymbol (&Qsuper, "super");
2539 defsymbol (&Qsub, "sub");
2540 defsymbol (&Qcircle, "circle");
2541 defsymbol (&Qsquare, "square");
2542 defsymbol (&Qwide, "wide");
2543 defsymbol (&Qnarrow, "narrow");
2544 defsymbol (&Qfont, "font");
2545 defsymbol (&Qucs, "ucs");
2546 defsymbol (&Qucs_bmp, "ucs-bmp");
2547 defsymbol (&Qlatin_viscii, "latin-viscii");
2548 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2549 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2550 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2551 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2552 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2553 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2555 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2556 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2558 defsymbol (&Qcomposite, "composite");
2562 vars_of_mule_charset (void)
2569 /* Table of charsets indexed by leading byte. */
2570 for (i = 0; i < countof (charset_by_leading_byte); i++)
2571 charset_by_leading_byte[i] = Qnil;
2574 /* Table of charsets indexed by type/final-byte. */
2575 for (i = 0; i < countof (charset_by_attributes); i++)
2576 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2577 charset_by_attributes[i][j] = Qnil;
2579 /* Table of charsets indexed by type/final-byte/direction. */
2580 for (i = 0; i < countof (charset_by_attributes); i++)
2581 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2582 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2583 charset_by_attributes[i][j][k] = Qnil;
2587 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2589 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2590 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2594 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2595 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2596 Leading-code of private TYPE9N charset of column-width 1.
2598 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2602 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2603 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2604 Version number of UTF-2000.
2607 staticpro (&Vcharacter_attribute_table);
2608 Vcharacter_attribute_table = make_char_code_table (Qnil);
2610 staticpro (&Vcharacter_composition_table);
2611 Vcharacter_composition_table = make_char_code_table (Qnil);
2613 staticpro (&Vcharacter_variant_table);
2614 Vcharacter_variant_table = make_char_code_table (Qnil);
2616 Vdefault_coded_charset_priority_list = Qnil;
2617 DEFVAR_LISP ("default-coded-charset-priority-list",
2618 &Vdefault_coded_charset_priority_list /*
2619 Default order of preferred coded-character-sets.
2625 complex_vars_of_mule_charset (void)
2627 staticpro (&Vcharset_hash_table);
2628 Vcharset_hash_table =
2629 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2631 /* Predefined character sets. We store them into variables for
2636 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2637 CHARSET_TYPE_256X256, 1, 2, 0,
2638 CHARSET_LEFT_TO_RIGHT,
2639 build_string ("BMP"),
2640 build_string ("BMP"),
2641 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2642 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2643 Qnil, 0, 0xFFFF, 0, 0);
2645 # define MIN_CHAR_THAI 0
2646 # define MAX_CHAR_THAI 0
2647 # define MIN_CHAR_GREEK 0
2648 # define MAX_CHAR_GREEK 0
2649 # define MIN_CHAR_HEBREW 0
2650 # define MAX_CHAR_HEBREW 0
2651 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2652 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2653 # define MIN_CHAR_CYRILLIC 0
2654 # define MAX_CHAR_CYRILLIC 0
2657 make_charset (LEADING_BYTE_ASCII, Qascii,
2658 CHARSET_TYPE_94, 1, 0, 'B',
2659 CHARSET_LEFT_TO_RIGHT,
2660 build_string ("ASCII"),
2661 build_string ("ASCII)"),
2662 build_string ("ASCII (ISO646 IRV)"),
2663 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2664 Qnil, 0, 0x7F, 0, 0);
2665 Vcharset_control_1 =
2666 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2667 CHARSET_TYPE_94, 1, 1, 0,
2668 CHARSET_LEFT_TO_RIGHT,
2669 build_string ("C1"),
2670 build_string ("Control characters"),
2671 build_string ("Control characters 128-191"),
2673 Qnil, 0x80, 0x9F, 0, 0);
2674 Vcharset_latin_iso8859_1 =
2675 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2676 CHARSET_TYPE_96, 1, 1, 'A',
2677 CHARSET_LEFT_TO_RIGHT,
2678 build_string ("Latin-1"),
2679 build_string ("ISO8859-1 (Latin-1)"),
2680 build_string ("ISO8859-1 (Latin-1)"),
2681 build_string ("iso8859-1"),
2682 Qnil, 0xA0, 0xFF, 0, 32);
2683 Vcharset_latin_iso8859_2 =
2684 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2685 CHARSET_TYPE_96, 1, 1, 'B',
2686 CHARSET_LEFT_TO_RIGHT,
2687 build_string ("Latin-2"),
2688 build_string ("ISO8859-2 (Latin-2)"),
2689 build_string ("ISO8859-2 (Latin-2)"),
2690 build_string ("iso8859-2"),
2692 Vcharset_latin_iso8859_3 =
2693 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2694 CHARSET_TYPE_96, 1, 1, 'C',
2695 CHARSET_LEFT_TO_RIGHT,
2696 build_string ("Latin-3"),
2697 build_string ("ISO8859-3 (Latin-3)"),
2698 build_string ("ISO8859-3 (Latin-3)"),
2699 build_string ("iso8859-3"),
2701 Vcharset_latin_iso8859_4 =
2702 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2703 CHARSET_TYPE_96, 1, 1, 'D',
2704 CHARSET_LEFT_TO_RIGHT,
2705 build_string ("Latin-4"),
2706 build_string ("ISO8859-4 (Latin-4)"),
2707 build_string ("ISO8859-4 (Latin-4)"),
2708 build_string ("iso8859-4"),
2710 Vcharset_thai_tis620 =
2711 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2712 CHARSET_TYPE_96, 1, 1, 'T',
2713 CHARSET_LEFT_TO_RIGHT,
2714 build_string ("TIS620"),
2715 build_string ("TIS620 (Thai)"),
2716 build_string ("TIS620.2529 (Thai)"),
2717 build_string ("tis620"),
2718 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2719 Vcharset_greek_iso8859_7 =
2720 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2721 CHARSET_TYPE_96, 1, 1, 'F',
2722 CHARSET_LEFT_TO_RIGHT,
2723 build_string ("ISO8859-7"),
2724 build_string ("ISO8859-7 (Greek)"),
2725 build_string ("ISO8859-7 (Greek)"),
2726 build_string ("iso8859-7"),
2727 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2728 Vcharset_arabic_iso8859_6 =
2729 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2730 CHARSET_TYPE_96, 1, 1, 'G',
2731 CHARSET_RIGHT_TO_LEFT,
2732 build_string ("ISO8859-6"),
2733 build_string ("ISO8859-6 (Arabic)"),
2734 build_string ("ISO8859-6 (Arabic)"),
2735 build_string ("iso8859-6"),
2737 Vcharset_hebrew_iso8859_8 =
2738 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2739 CHARSET_TYPE_96, 1, 1, 'H',
2740 CHARSET_RIGHT_TO_LEFT,
2741 build_string ("ISO8859-8"),
2742 build_string ("ISO8859-8 (Hebrew)"),
2743 build_string ("ISO8859-8 (Hebrew)"),
2744 build_string ("iso8859-8"),
2745 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2746 Vcharset_katakana_jisx0201 =
2747 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2748 CHARSET_TYPE_94, 1, 1, 'I',
2749 CHARSET_LEFT_TO_RIGHT,
2750 build_string ("JISX0201 Kana"),
2751 build_string ("JISX0201.1976 (Japanese Kana)"),
2752 build_string ("JISX0201.1976 Japanese Kana"),
2753 build_string ("jisx0201\\.1976"),
2755 MIN_CHAR_HALFWIDTH_KATAKANA,
2756 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2757 Vcharset_latin_jisx0201 =
2758 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2759 CHARSET_TYPE_94, 1, 0, 'J',
2760 CHARSET_LEFT_TO_RIGHT,
2761 build_string ("JISX0201 Roman"),
2762 build_string ("JISX0201.1976 (Japanese Roman)"),
2763 build_string ("JISX0201.1976 Japanese Roman"),
2764 build_string ("jisx0201\\.1976"),
2766 Vcharset_cyrillic_iso8859_5 =
2767 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2768 CHARSET_TYPE_96, 1, 1, 'L',
2769 CHARSET_LEFT_TO_RIGHT,
2770 build_string ("ISO8859-5"),
2771 build_string ("ISO8859-5 (Cyrillic)"),
2772 build_string ("ISO8859-5 (Cyrillic)"),
2773 build_string ("iso8859-5"),
2774 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2775 Vcharset_latin_iso8859_9 =
2776 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2777 CHARSET_TYPE_96, 1, 1, 'M',
2778 CHARSET_LEFT_TO_RIGHT,
2779 build_string ("Latin-5"),
2780 build_string ("ISO8859-9 (Latin-5)"),
2781 build_string ("ISO8859-9 (Latin-5)"),
2782 build_string ("iso8859-9"),
2784 Vcharset_japanese_jisx0208_1978 =
2785 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2786 CHARSET_TYPE_94X94, 2, 0, '@',
2787 CHARSET_LEFT_TO_RIGHT,
2788 build_string ("JIS X0208:1978"),
2789 build_string ("JIS X0208:1978 (Japanese)"),
2791 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2792 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2794 Vcharset_chinese_gb2312 =
2795 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2796 CHARSET_TYPE_94X94, 2, 0, 'A',
2797 CHARSET_LEFT_TO_RIGHT,
2798 build_string ("GB2312"),
2799 build_string ("GB2312)"),
2800 build_string ("GB2312 Chinese simplified"),
2801 build_string ("gb2312"),
2803 Vcharset_japanese_jisx0208 =
2804 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2805 CHARSET_TYPE_94X94, 2, 0, 'B',
2806 CHARSET_LEFT_TO_RIGHT,
2807 build_string ("JISX0208"),
2808 build_string ("JIS X0208:1983 (Japanese)"),
2809 build_string ("JIS X0208:1983 Japanese Kanji"),
2810 build_string ("jisx0208\\.1983"),
2812 Vcharset_korean_ksc5601 =
2813 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2814 CHARSET_TYPE_94X94, 2, 0, 'C',
2815 CHARSET_LEFT_TO_RIGHT,
2816 build_string ("KSC5601"),
2817 build_string ("KSC5601 (Korean"),
2818 build_string ("KSC5601 Korean Hangul and Hanja"),
2819 build_string ("ksc5601"),
2821 Vcharset_japanese_jisx0212 =
2822 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2823 CHARSET_TYPE_94X94, 2, 0, 'D',
2824 CHARSET_LEFT_TO_RIGHT,
2825 build_string ("JISX0212"),
2826 build_string ("JISX0212 (Japanese)"),
2827 build_string ("JISX0212 Japanese Supplement"),
2828 build_string ("jisx0212"),
2831 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2832 Vcharset_chinese_cns11643_1 =
2833 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2834 CHARSET_TYPE_94X94, 2, 0, 'G',
2835 CHARSET_LEFT_TO_RIGHT,
2836 build_string ("CNS11643-1"),
2837 build_string ("CNS11643-1 (Chinese traditional)"),
2839 ("CNS 11643 Plane 1 Chinese traditional"),
2840 build_string (CHINESE_CNS_PLANE_RE("1")),
2842 Vcharset_chinese_cns11643_2 =
2843 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2844 CHARSET_TYPE_94X94, 2, 0, 'H',
2845 CHARSET_LEFT_TO_RIGHT,
2846 build_string ("CNS11643-2"),
2847 build_string ("CNS11643-2 (Chinese traditional)"),
2849 ("CNS 11643 Plane 2 Chinese traditional"),
2850 build_string (CHINESE_CNS_PLANE_RE("2")),
2853 Vcharset_latin_viscii_lower =
2854 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2855 CHARSET_TYPE_96, 1, 1, '1',
2856 CHARSET_LEFT_TO_RIGHT,
2857 build_string ("VISCII lower"),
2858 build_string ("VISCII lower (Vietnamese)"),
2859 build_string ("VISCII lower (Vietnamese)"),
2860 build_string ("MULEVISCII-LOWER"),
2862 Vcharset_latin_viscii_upper =
2863 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2864 CHARSET_TYPE_96, 1, 1, '2',
2865 CHARSET_LEFT_TO_RIGHT,
2866 build_string ("VISCII upper"),
2867 build_string ("VISCII upper (Vietnamese)"),
2868 build_string ("VISCII upper (Vietnamese)"),
2869 build_string ("MULEVISCII-UPPER"),
2871 Vcharset_latin_viscii =
2872 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2873 CHARSET_TYPE_256, 1, 2, 0,
2874 CHARSET_LEFT_TO_RIGHT,
2875 build_string ("VISCII"),
2876 build_string ("VISCII 1.1 (Vietnamese)"),
2877 build_string ("VISCII 1.1 (Vietnamese)"),
2878 build_string ("VISCII1\\.1"),
2880 Vcharset_ideograph_daikanwa =
2881 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa,
2882 CHARSET_TYPE_256X256, 2, 2, 0,
2883 CHARSET_LEFT_TO_RIGHT,
2884 build_string ("Daikanwa"),
2885 build_string ("Morohashi's Daikanwa"),
2886 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2887 build_string ("Daikanwa"),
2888 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2889 Vcharset_ethiopic_ucs =
2890 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2891 CHARSET_TYPE_256X256, 2, 2, 0,
2892 CHARSET_LEFT_TO_RIGHT,
2893 build_string ("Ethiopic (UCS)"),
2894 build_string ("Ethiopic (UCS)"),
2895 build_string ("Ethiopic of UCS"),
2896 build_string ("Ethiopic-Unicode"),
2897 Qnil, 0x1200, 0x137F, 0x1200, 0);
2899 Vcharset_chinese_big5_1 =
2900 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2901 CHARSET_TYPE_94X94, 2, 0, '0',
2902 CHARSET_LEFT_TO_RIGHT,
2903 build_string ("Big5"),
2904 build_string ("Big5 (Level-1)"),
2906 ("Big5 Level-1 Chinese traditional"),
2907 build_string ("big5"),
2909 Vcharset_chinese_big5_2 =
2910 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2911 CHARSET_TYPE_94X94, 2, 0, '1',
2912 CHARSET_LEFT_TO_RIGHT,
2913 build_string ("Big5"),
2914 build_string ("Big5 (Level-2)"),
2916 ("Big5 Level-2 Chinese traditional"),
2917 build_string ("big5"),
2920 #ifdef ENABLE_COMPOSITE_CHARS
2921 /* #### For simplicity, we put composite chars into a 96x96 charset.
2922 This is going to lead to problems because you can run out of
2923 room, esp. as we don't yet recycle numbers. */
2924 Vcharset_composite =
2925 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2926 CHARSET_TYPE_96X96, 2, 0, 0,
2927 CHARSET_LEFT_TO_RIGHT,
2928 build_string ("Composite"),
2929 build_string ("Composite characters"),
2930 build_string ("Composite characters"),
2933 composite_char_row_next = 32;
2934 composite_char_col_next = 32;
2936 Vcomposite_char_string2char_hash_table =
2937 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2938 Vcomposite_char_char2string_hash_table =
2939 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2940 staticpro (&Vcomposite_char_string2char_hash_table);
2941 staticpro (&Vcomposite_char_char2string_hash_table);
2942 #endif /* ENABLE_COMPOSITE_CHARS */