1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 unsigned int code = ch;
299 struct Lisp_Char_Byte_Table* cpt
300 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
301 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
303 if (CHAR_BYTE_TABLE_P (ret))
304 cpt = XCHAR_BYTE_TABLE (ret);
308 ret = cpt->property [(unsigned char) (code >> 16)];
309 if (CHAR_BYTE_TABLE_P (ret))
310 cpt = XCHAR_BYTE_TABLE (ret);
314 ret = cpt->property [(unsigned char) (code >> 8)];
315 if (CHAR_BYTE_TABLE_P (ret))
316 cpt = XCHAR_BYTE_TABLE (ret);
320 return cpt->property [(unsigned char) code];
324 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
326 unsigned int code = ch;
327 struct Lisp_Char_Byte_Table* cpt1
328 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
329 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
331 if (CHAR_BYTE_TABLE_P (ret))
333 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
335 ret = cpt2->property[(unsigned char)(code >> 16)];
336 if (CHAR_BYTE_TABLE_P (ret))
338 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
340 ret = cpt3->property[(unsigned char)(code >> 8)];
341 if (CHAR_BYTE_TABLE_P (ret))
343 struct Lisp_Char_Byte_Table* cpt4
344 = XCHAR_BYTE_TABLE (ret);
346 cpt4->property[(unsigned char)code] = value;
348 else if (!EQ (ret, value))
350 Lisp_Object cpt4 = make_char_byte_table (ret);
352 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
353 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
356 else if (!EQ (ret, value))
358 Lisp_Object cpt3 = make_char_byte_table (ret);
359 Lisp_Object cpt4 = make_char_byte_table (ret);
361 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
362 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
364 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
367 else if (!EQ (ret, value))
369 Lisp_Object cpt2 = make_char_byte_table (ret);
370 Lisp_Object cpt3 = make_char_byte_table (ret);
371 Lisp_Object cpt4 = make_char_byte_table (ret);
373 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
374 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
375 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
376 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
381 Lisp_Object Vcharacter_attribute_table;
382 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Q_decomposition;
388 Lisp_Object QnoBreak;
390 Lisp_Object Qfraction;
393 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
399 else if (EQ (v, Qwide))
401 else if (EQ (v, Qnarrow))
403 else if (EQ (v, Qcompat))
405 else if (EQ (v, QnoBreak))
407 else if (EQ (v, Qsuper))
409 else if (EQ (v, Qfraction))
412 signal_simple_error (err_msg, err_arg);
415 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
416 Return character corresponding with list.
420 Lisp_Object table = Vcharacter_composition_table;
421 Lisp_Object rest = list;
425 Lisp_Object v = Fcar (rest);
427 Emchar c = to_char_code (v, "Invalid value for composition", list);
429 ret = get_char_code_table (c, table);
434 if (!CHAR_CODE_TABLE_P (ret))
439 else if (!CONSP (rest))
441 else if (CHAR_CODE_TABLE_P (ret))
444 signal_simple_error ("Invalid table is found with", list);
446 signal_simple_error ("Invalid value for composition", list);
449 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
450 Return the alist of attributes of CHARACTER.
454 CHECK_CHAR (character);
455 return Fcopy_alist (get_char_code_table (XCHAR (character),
456 Vcharacter_attribute_table));
459 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
460 Return the value of CHARACTER's ATTRIBUTE.
462 (character, attribute))
465 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
471 if (!NILP (ccs = Ffind_charset (attribute)))
474 return Fcdr (Fassq (attribute, ret));
478 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
481 Emchar char_code = XCHAR (character);
483 = get_char_code_table (char_code, Vcharacter_attribute_table);
486 cell = Fassq (attribute, ret);
490 ret = Fcons (Fcons (attribute, value), ret);
492 else if (!EQ (Fcdr (cell), value))
494 Fsetcdr (cell, value);
496 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
500 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
501 Store CHARACTER's ATTRIBUTE with VALUE.
503 (character, attribute, value))
507 ccs = Ffind_charset (attribute);
511 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
516 /* ad-hoc method for `ascii' */
517 if ((XCHARSET_CHARS (ccs) == 94) &&
518 (XCHARSET_BYTE_OFFSET (ccs) != 33))
519 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
521 ccs_len = XCHARSET_CHARS (ccs);
524 signal_simple_error ("Invalid value for coded-charset",
528 rest = Fget_char_attribute (character, attribute);
535 Lisp_Object ei = Fcar (rest);
537 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
538 nv = XVECTOR_DATA(v)[i];
545 XVECTOR_DATA(v)[i] = Qnil;
546 v = XCHARSET_DECODING_TABLE (ccs);
551 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
558 Lisp_Object ei = Fcar (rest);
561 signal_simple_error ("Invalid value for coded-charset",
563 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
564 nv = XVECTOR_DATA(v)[i];
570 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
577 XVECTOR_DATA(v)[i] = character;
579 else if (EQ (attribute, Q_decomposition))
581 Lisp_Object rest = value;
582 Lisp_Object table = Vcharacter_composition_table;
585 signal_simple_error ("Invalid value for ->decomposition",
590 Lisp_Object v = Fcar (rest);
593 = to_char_code (v, "Invalid value for ->decomposition", value);
598 put_char_code_table (c, character, table);
603 ntable = get_char_code_table (c, table);
604 if (!CHAR_CODE_TABLE_P (ntable))
606 ntable = make_char_code_table (Qnil);
607 put_char_code_table (c, ntable, table);
613 return put_char_attribute (character, attribute, value);
618 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
619 Store character's ATTRIBUTES.
623 Lisp_Object rest = attributes;
624 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
625 Lisp_Object character;
631 Lisp_Object cell = Fcar (rest);
635 signal_simple_error ("Invalid argument", attributes);
636 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
637 && XCHARSET_FINAL (ccs))
641 if (XCHARSET_DIMENSION (ccs) == 1)
643 Lisp_Object eb1 = Fcar (Fcdr (cell));
647 signal_simple_error ("Invalid argument", attributes);
649 switch (XCHARSET_CHARS (ccs))
653 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
657 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
663 else if (XCHARSET_DIMENSION (ccs) == 2)
665 Lisp_Object eb1 = Fcar (Fcdr (cell));
666 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
670 signal_simple_error ("Invalid argument", attributes);
673 signal_simple_error ("Invalid argument", attributes);
675 switch (XCHARSET_CHARS (ccs))
678 code = MIN_CHAR_94x94
679 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
680 + (b1 - 33) * 94 + (b2 - 33);
683 code = MIN_CHAR_96x96
684 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
685 + (b1 - 32) * 96 + (b2 - 32);
696 character = make_char (code);
697 goto setup_attributes;
703 else if (!INTP (code))
704 signal_simple_error ("Invalid argument", attributes);
706 character = make_char (XINT (code));
712 Lisp_Object cell = Fcar (rest);
715 signal_simple_error ("Invalid argument", attributes);
716 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
720 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
723 Lisp_Object Vutf_2000_version;
727 int leading_code_private_11;
730 Lisp_Object Qcharsetp;
732 /* Qdoc_string, Qdimension, Qchars defined in general.c */
733 Lisp_Object Qregistry, Qfinal, Qgraphic;
734 Lisp_Object Qdirection;
735 Lisp_Object Qreverse_direction_charset;
736 Lisp_Object Qleading_byte;
737 Lisp_Object Qshort_name, Qlong_name;
753 Qjapanese_jisx0208_1978,
765 Qvietnamese_viscii_lower,
766 Qvietnamese_viscii_upper,
774 Lisp_Object Ql2r, Qr2l;
776 Lisp_Object Vcharset_hash_table;
779 static Charset_ID next_allocated_leading_byte;
781 static Charset_ID next_allocated_1_byte_leading_byte;
782 static Charset_ID next_allocated_2_byte_leading_byte;
785 /* Composite characters are characters constructed by overstriking two
786 or more regular characters.
788 1) The old Mule implementation involves storing composite characters
789 in a buffer as a tag followed by all of the actual characters
790 used to make up the composite character. I think this is a bad
791 idea; it greatly complicates code that wants to handle strings
792 one character at a time because it has to deal with the possibility
793 of great big ungainly characters. It's much more reasonable to
794 simply store an index into a table of composite characters.
796 2) The current implementation only allows for 16,384 separate
797 composite characters over the lifetime of the XEmacs process.
798 This could become a potential problem if the user
799 edited lots of different files that use composite characters.
800 Due to FSF bogosity, increasing the number of allowable
801 composite characters under Mule would decrease the number
802 of possible faces that can exist. Mule already has shrunk
803 this to 2048, and further shrinkage would become uncomfortable.
804 No such problems exist in XEmacs.
806 Composite characters could be represented as 0x80 C1 C2 C3,
807 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
808 for slightly under 2^20 (one million) composite characters
809 over the XEmacs process lifetime, and you only need to
810 increase the size of a Mule character from 19 to 21 bits.
811 Or you could use 0x80 C1 C2 C3 C4, allowing for about
812 85 million (slightly over 2^26) composite characters. */
815 /************************************************************************/
816 /* Basic Emchar functions */
817 /************************************************************************/
819 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
820 string in STR. Returns the number of bytes stored.
821 Do not call this directly. Use the macro set_charptr_emchar() instead.
825 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
840 else if ( c <= 0x7ff )
842 *p++ = (c >> 6) | 0xc0;
843 *p++ = (c & 0x3f) | 0x80;
845 else if ( c <= 0xffff )
847 *p++ = (c >> 12) | 0xe0;
848 *p++ = ((c >> 6) & 0x3f) | 0x80;
849 *p++ = (c & 0x3f) | 0x80;
851 else if ( c <= 0x1fffff )
853 *p++ = (c >> 18) | 0xf0;
854 *p++ = ((c >> 12) & 0x3f) | 0x80;
855 *p++ = ((c >> 6) & 0x3f) | 0x80;
856 *p++ = (c & 0x3f) | 0x80;
858 else if ( c <= 0x3ffffff )
860 *p++ = (c >> 24) | 0xf8;
861 *p++ = ((c >> 18) & 0x3f) | 0x80;
862 *p++ = ((c >> 12) & 0x3f) | 0x80;
863 *p++ = ((c >> 6) & 0x3f) | 0x80;
864 *p++ = (c & 0x3f) | 0x80;
868 *p++ = (c >> 30) | 0xfc;
869 *p++ = ((c >> 24) & 0x3f) | 0x80;
870 *p++ = ((c >> 18) & 0x3f) | 0x80;
871 *p++ = ((c >> 12) & 0x3f) | 0x80;
872 *p++ = ((c >> 6) & 0x3f) | 0x80;
873 *p++ = (c & 0x3f) | 0x80;
876 BREAKUP_CHAR (c, charset, c1, c2);
877 lb = CHAR_LEADING_BYTE (c);
878 if (LEADING_BYTE_PRIVATE_P (lb))
879 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
881 if (EQ (charset, Vcharset_control_1))
890 /* Return the first character from a Mule-encoded string in STR,
891 assuming it's non-ASCII. Do not call this directly.
892 Use the macro charptr_emchar() instead. */
895 non_ascii_charptr_emchar (CONST Bufbyte *str)
908 else if ( b >= 0xf8 )
913 else if ( b >= 0xf0 )
918 else if ( b >= 0xe0 )
923 else if ( b >= 0xc0 )
933 for( ; len > 0; len-- )
936 ch = ( ch << 6 ) | ( b & 0x3f );
940 Bufbyte i0 = *str, i1, i2 = 0;
943 if (i0 == LEADING_BYTE_CONTROL_1)
944 return (Emchar) (*++str - 0x20);
946 if (LEADING_BYTE_PREFIX_P (i0))
951 charset = CHARSET_BY_LEADING_BYTE (i0);
952 if (XCHARSET_DIMENSION (charset) == 2)
955 return MAKE_CHAR (charset, i1, i2);
959 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
960 Do not call this directly. Use the macro valid_char_p() instead. */
964 non_ascii_valid_char_p (Emchar ch)
968 /* Must have only lowest 19 bits set */
972 f1 = CHAR_FIELD1 (ch);
973 f2 = CHAR_FIELD2 (ch);
974 f3 = CHAR_FIELD3 (ch);
980 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
981 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
982 f2 > MAX_CHAR_FIELD2_PRIVATE)
987 if (f3 != 0x20 && f3 != 0x7F)
991 NOTE: This takes advantage of the fact that
992 FIELD2_TO_OFFICIAL_LEADING_BYTE and
993 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
995 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
996 return (XCHARSET_CHARS (charset) == 96);
1000 Lisp_Object charset;
1002 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1003 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1004 f1 > MAX_CHAR_FIELD1_PRIVATE)
1006 if (f2 < 0x20 || f3 < 0x20)
1009 #ifdef ENABLE_COMPOSITE_CHARS
1010 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1012 if (UNBOUNDP (Fgethash (make_int (ch),
1013 Vcomposite_char_char2string_hash_table,
1018 #endif /* ENABLE_COMPOSITE_CHARS */
1020 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1023 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1025 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1028 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1030 return (XCHARSET_CHARS (charset) == 96);
1036 /************************************************************************/
1037 /* Basic string functions */
1038 /************************************************************************/
1040 /* Copy the character pointed to by PTR into STR, assuming it's
1041 non-ASCII. Do not call this directly. Use the macro
1042 charptr_copy_char() instead. */
1045 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1047 Bufbyte *strptr = str;
1049 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1051 /* Notice fallthrough. */
1053 case 6: *++strptr = *ptr++;
1054 case 5: *++strptr = *ptr++;
1056 case 4: *++strptr = *ptr++;
1057 case 3: *++strptr = *ptr++;
1058 case 2: *++strptr = *ptr;
1063 return strptr + 1 - str;
1067 /************************************************************************/
1068 /* streams of Emchars */
1069 /************************************************************************/
1071 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1072 The functions below are not meant to be called directly; use
1073 the macros in insdel.h. */
1076 Lstream_get_emchar_1 (Lstream *stream, int ch)
1078 Bufbyte str[MAX_EMCHAR_LEN];
1079 Bufbyte *strptr = str;
1081 str[0] = (Bufbyte) ch;
1082 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1084 /* Notice fallthrough. */
1087 ch = Lstream_getc (stream);
1089 *++strptr = (Bufbyte) ch;
1091 ch = Lstream_getc (stream);
1093 *++strptr = (Bufbyte) ch;
1096 ch = Lstream_getc (stream);
1098 *++strptr = (Bufbyte) ch;
1100 ch = Lstream_getc (stream);
1102 *++strptr = (Bufbyte) ch;
1104 ch = Lstream_getc (stream);
1106 *++strptr = (Bufbyte) ch;
1111 return charptr_emchar (str);
1115 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1117 Bufbyte str[MAX_EMCHAR_LEN];
1118 Bytecount len = set_charptr_emchar (str, ch);
1119 return Lstream_write (stream, str, len);
1123 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1125 Bufbyte str[MAX_EMCHAR_LEN];
1126 Bytecount len = set_charptr_emchar (str, ch);
1127 Lstream_unread (stream, str, len);
1131 /************************************************************************/
1132 /* charset object */
1133 /************************************************************************/
1136 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1138 struct Lisp_Charset *cs = XCHARSET (obj);
1140 markobj (cs->short_name);
1141 markobj (cs->long_name);
1142 markobj (cs->doc_string);
1143 markobj (cs->registry);
1144 markobj (cs->ccl_program);
1146 markobj (cs->decoding_table);
1152 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1154 struct Lisp_Charset *cs = XCHARSET (obj);
1158 error ("printing unreadable object #<charset %s 0x%x>",
1159 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1162 write_c_string ("#<charset ", printcharfun);
1163 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1164 write_c_string (" ", printcharfun);
1165 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1166 write_c_string (" ", printcharfun);
1167 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1168 write_c_string (" ", printcharfun);
1169 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1170 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1171 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1172 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1173 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1175 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1176 CHARSET_COLUMNS (cs),
1177 CHARSET_GRAPHIC (cs),
1178 CHARSET_FINAL (cs));
1179 write_c_string (buf, printcharfun);
1180 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1181 sprintf (buf, " 0x%x>", cs->header.uid);
1182 write_c_string (buf, printcharfun);
1185 static const struct lrecord_description charset_description[] = {
1186 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1188 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1193 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1194 mark_charset, print_charset, 0, 0, 0,
1195 charset_description,
1196 struct Lisp_Charset);
1198 /* Make a new charset. */
1201 make_charset (Charset_ID id, Lisp_Object name,
1202 unsigned char type, unsigned char columns, unsigned char graphic,
1203 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1204 Lisp_Object long_name, Lisp_Object doc,
1206 Lisp_Object decoding_table,
1207 Emchar ucs_min, Emchar ucs_max,
1208 Emchar code_offset, unsigned char byte_offset)
1211 struct Lisp_Charset *cs =
1212 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1213 XSETCHARSET (obj, cs);
1215 CHARSET_ID (cs) = id;
1216 CHARSET_NAME (cs) = name;
1217 CHARSET_SHORT_NAME (cs) = short_name;
1218 CHARSET_LONG_NAME (cs) = long_name;
1219 CHARSET_DIRECTION (cs) = direction;
1220 CHARSET_TYPE (cs) = type;
1221 CHARSET_COLUMNS (cs) = columns;
1222 CHARSET_GRAPHIC (cs) = graphic;
1223 CHARSET_FINAL (cs) = final;
1224 CHARSET_DOC_STRING (cs) = doc;
1225 CHARSET_REGISTRY (cs) = reg;
1226 CHARSET_CCL_PROGRAM (cs) = Qnil;
1227 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1229 CHARSET_DECODING_TABLE(cs) = Qnil;
1230 CHARSET_UCS_MIN(cs) = ucs_min;
1231 CHARSET_UCS_MAX(cs) = ucs_max;
1232 CHARSET_CODE_OFFSET(cs) = code_offset;
1233 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1236 switch (CHARSET_TYPE (cs))
1238 case CHARSET_TYPE_94:
1239 CHARSET_DIMENSION (cs) = 1;
1240 CHARSET_CHARS (cs) = 94;
1242 case CHARSET_TYPE_96:
1243 CHARSET_DIMENSION (cs) = 1;
1244 CHARSET_CHARS (cs) = 96;
1246 case CHARSET_TYPE_94X94:
1247 CHARSET_DIMENSION (cs) = 2;
1248 CHARSET_CHARS (cs) = 94;
1250 case CHARSET_TYPE_96X96:
1251 CHARSET_DIMENSION (cs) = 2;
1252 CHARSET_CHARS (cs) = 96;
1255 case CHARSET_TYPE_128:
1256 CHARSET_DIMENSION (cs) = 1;
1257 CHARSET_CHARS (cs) = 128;
1259 case CHARSET_TYPE_128X128:
1260 CHARSET_DIMENSION (cs) = 2;
1261 CHARSET_CHARS (cs) = 128;
1263 case CHARSET_TYPE_256:
1264 CHARSET_DIMENSION (cs) = 1;
1265 CHARSET_CHARS (cs) = 256;
1267 case CHARSET_TYPE_256X256:
1268 CHARSET_DIMENSION (cs) = 2;
1269 CHARSET_CHARS (cs) = 256;
1275 if (id == LEADING_BYTE_ASCII)
1276 CHARSET_REP_BYTES (cs) = 1;
1278 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1280 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1285 /* some charsets do not have final characters. This includes
1286 ASCII, Control-1, Composite, and the two faux private
1289 if (code_offset == 0)
1291 assert (NILP (charset_by_attributes[type][final]));
1292 charset_by_attributes[type][final] = obj;
1295 assert (NILP (charset_by_attributes[type][final][direction]));
1296 charset_by_attributes[type][final][direction] = obj;
1300 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1301 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1304 /* official leading byte */
1305 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1308 /* Some charsets are "faux" and don't have names or really exist at
1309 all except in the leading-byte table. */
1311 Fputhash (name, obj, Vcharset_hash_table);
1316 get_unallocated_leading_byte (int dimension)
1321 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1324 lb = next_allocated_leading_byte++;
1328 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1331 lb = next_allocated_1_byte_leading_byte++;
1335 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1338 lb = next_allocated_2_byte_leading_byte++;
1344 ("No more character sets free for this dimension",
1345 make_int (dimension));
1352 range_charset_code_point (Lisp_Object charset, Emchar ch)
1356 if ((XCHARSET_UCS_MIN (charset) <= ch)
1357 && (ch <= XCHARSET_UCS_MAX (charset)))
1359 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1361 if (XCHARSET_DIMENSION (charset) == 1)
1362 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1363 else if (XCHARSET_DIMENSION (charset) == 2)
1364 return list2 (make_int (d / XCHARSET_CHARS (charset)
1365 + XCHARSET_BYTE_OFFSET (charset)),
1366 make_int (d % XCHARSET_CHARS (charset)
1367 + XCHARSET_BYTE_OFFSET (charset)));
1368 else if (XCHARSET_DIMENSION (charset) == 3)
1369 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1370 * XCHARSET_CHARS (charset))
1371 + XCHARSET_BYTE_OFFSET (charset)),
1372 make_int (d / XCHARSET_CHARS (charset)
1373 % XCHARSET_CHARS (charset)
1374 + XCHARSET_BYTE_OFFSET (charset)),
1375 make_int (d % XCHARSET_CHARS (charset)
1376 + XCHARSET_BYTE_OFFSET (charset)));
1377 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1378 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1379 * XCHARSET_CHARS (charset)
1380 * XCHARSET_CHARS (charset))
1381 + XCHARSET_BYTE_OFFSET (charset)),
1382 make_int (d / (XCHARSET_CHARS (charset)
1383 * XCHARSET_CHARS (charset))
1384 % XCHARSET_CHARS (charset)
1385 + XCHARSET_BYTE_OFFSET (charset)),
1386 make_int (d / XCHARSET_CHARS (charset)
1387 % XCHARSET_CHARS (charset)
1388 + XCHARSET_BYTE_OFFSET (charset)),
1389 make_int (d % XCHARSET_CHARS (charset)
1390 + XCHARSET_BYTE_OFFSET (charset)));
1392 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1394 if (XCHARSET_DIMENSION (charset) == 1)
1396 if (XCHARSET_CHARS (charset) == 94)
1398 if (((d = ch - (MIN_CHAR_94
1399 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1401 return list1 (make_int (d + 33));
1403 else if (XCHARSET_CHARS (charset) == 96)
1405 if (((d = ch - (MIN_CHAR_96
1406 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1408 return list1 (make_int (d + 32));
1413 else if (XCHARSET_DIMENSION (charset) == 2)
1415 if (XCHARSET_CHARS (charset) == 94)
1417 if (((d = ch - (MIN_CHAR_94x94
1418 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1421 return list2 (make_int ((d / 94) + 33),
1422 make_int (d % 94 + 33));
1424 else if (XCHARSET_CHARS (charset) == 96)
1426 if (((d = ch - (MIN_CHAR_96x96
1427 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1430 return list2 (make_int ((d / 96) + 32),
1431 make_int (d % 96 + 32));
1439 split_builtin_char (Emchar c)
1441 if (c < MIN_CHAR_OBS_94x94)
1443 if (c <= MAX_CHAR_BASIC_LATIN)
1445 return list2 (Vcharset_ascii, make_int (c));
1449 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1453 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1455 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1457 return list2 (Vcharset_greek_iso8859_7,
1458 make_int (c - MIN_CHAR_GREEK + 0x20));
1460 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1462 return list2 (Vcharset_cyrillic_iso8859_5,
1463 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1465 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1467 return list2 (Vcharset_hebrew_iso8859_8,
1468 make_int (c - MIN_CHAR_HEBREW + 0x20));
1470 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1472 return list2 (Vcharset_thai_tis620,
1473 make_int (c - MIN_CHAR_THAI + 0x20));
1475 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1476 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1478 return list2 (Vcharset_katakana_jisx0201,
1479 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1483 return list3 (Vcharset_ucs_bmp,
1484 make_int (c >> 8), make_int (c & 0xff));
1487 else if (c <= MAX_CHAR_OBS_94x94)
1489 return list3 (CHARSET_BY_ATTRIBUTES
1490 (CHARSET_TYPE_94X94,
1491 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1492 CHARSET_LEFT_TO_RIGHT),
1493 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1494 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1496 else if (c <= MAX_CHAR_94)
1498 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1499 ((c - MIN_CHAR_94) / 94) + '0',
1500 CHARSET_LEFT_TO_RIGHT),
1501 make_int (((c - MIN_CHAR_94) % 94) + 33));
1503 else if (c <= MAX_CHAR_96)
1505 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1506 ((c - MIN_CHAR_96) / 96) + '0',
1507 CHARSET_LEFT_TO_RIGHT),
1508 make_int (((c - MIN_CHAR_96) % 96) + 32));
1510 else if (c <= MAX_CHAR_94x94)
1512 return list3 (CHARSET_BY_ATTRIBUTES
1513 (CHARSET_TYPE_94X94,
1514 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1515 CHARSET_LEFT_TO_RIGHT),
1516 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1517 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1519 else if (c <= MAX_CHAR_96x96)
1521 return list3 (CHARSET_BY_ATTRIBUTES
1522 (CHARSET_TYPE_96X96,
1523 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1524 CHARSET_LEFT_TO_RIGHT),
1525 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1526 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1535 charset_code_point (Lisp_Object charset, Emchar ch)
1537 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1539 if (!EQ (cdef, Qnil))
1541 Lisp_Object field = Fassq (charset, cdef);
1543 if (!EQ (field, Qnil))
1544 return Fcdr (field);
1546 return range_charset_code_point (charset, ch);
1549 Lisp_Object Vdefault_coded_charset_priority_list;
1553 /************************************************************************/
1554 /* Basic charset Lisp functions */
1555 /************************************************************************/
1557 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1558 Return non-nil if OBJECT is a charset.
1562 return CHARSETP (object) ? Qt : Qnil;
1565 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1566 Retrieve the charset of the given name.
1567 If CHARSET-OR-NAME is a charset object, it is simply returned.
1568 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1569 nil is returned. Otherwise the associated charset object is returned.
1573 if (CHARSETP (charset_or_name))
1574 return charset_or_name;
1576 CHECK_SYMBOL (charset_or_name);
1577 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1580 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1581 Retrieve the charset of the given name.
1582 Same as `find-charset' except an error is signalled if there is no such
1583 charset instead of returning nil.
1587 Lisp_Object charset = Ffind_charset (name);
1590 signal_simple_error ("No such charset", name);
1594 /* We store the charsets in hash tables with the names as the key and the
1595 actual charset object as the value. Occasionally we need to use them
1596 in a list format. These routines provide us with that. */
1597 struct charset_list_closure
1599 Lisp_Object *charset_list;
1603 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1604 void *charset_list_closure)
1606 /* This function can GC */
1607 struct charset_list_closure *chcl =
1608 (struct charset_list_closure*) charset_list_closure;
1609 Lisp_Object *charset_list = chcl->charset_list;
1611 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1615 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1616 Return a list of the names of all defined charsets.
1620 Lisp_Object charset_list = Qnil;
1621 struct gcpro gcpro1;
1622 struct charset_list_closure charset_list_closure;
1624 GCPRO1 (charset_list);
1625 charset_list_closure.charset_list = &charset_list;
1626 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1627 &charset_list_closure);
1630 return charset_list;
1633 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1634 Return the name of the given charset.
1638 return XCHARSET_NAME (Fget_charset (charset));
1641 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1642 Define a new character set.
1643 This function is for use with Mule support.
1644 NAME is a symbol, the name by which the character set is normally referred.
1645 DOC-STRING is a string describing the character set.
1646 PROPS is a property list, describing the specific nature of the
1647 character set. Recognized properties are:
1649 'short-name Short version of the charset name (ex: Latin-1)
1650 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1651 'registry A regular expression matching the font registry field for
1653 'dimension Number of octets used to index a character in this charset.
1654 Either 1 or 2. Defaults to 1.
1655 'columns Number of columns used to display a character in this charset.
1656 Only used in TTY mode. (Under X, the actual width of a
1657 character can be derived from the font used to display the
1658 characters.) If unspecified, defaults to the dimension
1659 (this is almost always the correct value).
1660 'chars Number of characters in each dimension (94 or 96).
1661 Defaults to 94. Note that if the dimension is 2, the
1662 character set thus described is 94x94 or 96x96.
1663 'final Final byte of ISO 2022 escape sequence. Must be
1664 supplied. Each combination of (DIMENSION, CHARS) defines a
1665 separate namespace for final bytes. Note that ISO
1666 2022 restricts the final byte to the range
1667 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1668 dimension == 2. Note also that final bytes in the range
1669 0x30 - 0x3F are reserved for user-defined (not official)
1671 'graphic 0 (use left half of font on output) or 1 (use right half
1672 of font on output). Defaults to 0. For example, for
1673 a font whose registry is ISO8859-1, the left half
1674 (octets 0x20 - 0x7F) is the `ascii' character set, while
1675 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1676 character set. With 'graphic set to 0, the octets
1677 will have their high bit cleared; with it set to 1,
1678 the octets will have their high bit set.
1679 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1681 'ccl-program A compiled CCL program used to convert a character in
1682 this charset into an index into the font. This is in
1683 addition to the 'graphic property. The CCL program
1684 is passed the octets of the character, with the high
1685 bit cleared and set depending upon whether the value
1686 of the 'graphic property is 0 or 1.
1688 (name, doc_string, props))
1690 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1691 int direction = CHARSET_LEFT_TO_RIGHT;
1693 Lisp_Object registry = Qnil;
1694 Lisp_Object charset;
1695 Lisp_Object rest, keyword, value;
1696 Lisp_Object ccl_program = Qnil;
1697 Lisp_Object short_name = Qnil, long_name = Qnil;
1698 int byte_offset = -1;
1700 CHECK_SYMBOL (name);
1701 if (!NILP (doc_string))
1702 CHECK_STRING (doc_string);
1704 charset = Ffind_charset (name);
1705 if (!NILP (charset))
1706 signal_simple_error ("Cannot redefine existing charset", name);
1708 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1710 if (EQ (keyword, Qshort_name))
1712 CHECK_STRING (value);
1716 if (EQ (keyword, Qlong_name))
1718 CHECK_STRING (value);
1722 else if (EQ (keyword, Qdimension))
1725 dimension = XINT (value);
1726 if (dimension < 1 || dimension > 2)
1727 signal_simple_error ("Invalid value for 'dimension", value);
1730 else if (EQ (keyword, Qchars))
1733 chars = XINT (value);
1734 if (chars != 94 && chars != 96)
1735 signal_simple_error ("Invalid value for 'chars", value);
1738 else if (EQ (keyword, Qcolumns))
1741 columns = XINT (value);
1742 if (columns != 1 && columns != 2)
1743 signal_simple_error ("Invalid value for 'columns", value);
1746 else if (EQ (keyword, Qgraphic))
1749 graphic = XINT (value);
1751 if (graphic < 0 || graphic > 2)
1753 if (graphic < 0 || graphic > 1)
1755 signal_simple_error ("Invalid value for 'graphic", value);
1758 else if (EQ (keyword, Qregistry))
1760 CHECK_STRING (value);
1764 else if (EQ (keyword, Qdirection))
1766 if (EQ (value, Ql2r))
1767 direction = CHARSET_LEFT_TO_RIGHT;
1768 else if (EQ (value, Qr2l))
1769 direction = CHARSET_RIGHT_TO_LEFT;
1771 signal_simple_error ("Invalid value for 'direction", value);
1774 else if (EQ (keyword, Qfinal))
1776 CHECK_CHAR_COERCE_INT (value);
1777 final = XCHAR (value);
1778 if (final < '0' || final > '~')
1779 signal_simple_error ("Invalid value for 'final", value);
1782 else if (EQ (keyword, Qccl_program))
1784 CHECK_VECTOR (value);
1785 ccl_program = value;
1789 signal_simple_error ("Unrecognized property", keyword);
1793 error ("'final must be specified");
1794 if (dimension == 2 && final > 0x5F)
1796 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1800 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1802 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1804 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1805 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1807 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1809 id = get_unallocated_leading_byte (dimension);
1811 if (NILP (doc_string))
1812 doc_string = build_string ("");
1814 if (NILP (registry))
1815 registry = build_string ("");
1817 if (NILP (short_name))
1818 XSETSTRING (short_name, XSYMBOL (name)->name);
1820 if (NILP (long_name))
1821 long_name = doc_string;
1824 columns = dimension;
1826 if (byte_offset < 0)
1830 else if (chars == 96)
1836 charset = make_charset (id, name, type, columns, graphic,
1837 final, direction, short_name, long_name,
1838 doc_string, registry,
1839 Qnil, 0, 0, 0, byte_offset);
1840 if (!NILP (ccl_program))
1841 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1845 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1847 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1848 NEW-NAME is the name of the new charset. Return the new charset.
1850 (charset, new_name))
1852 Lisp_Object new_charset = Qnil;
1853 int id, dimension, columns, graphic, final;
1854 int direction, type;
1855 Lisp_Object registry, doc_string, short_name, long_name;
1856 struct Lisp_Charset *cs;
1858 charset = Fget_charset (charset);
1859 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1860 signal_simple_error ("Charset already has reverse-direction charset",
1863 CHECK_SYMBOL (new_name);
1864 if (!NILP (Ffind_charset (new_name)))
1865 signal_simple_error ("Cannot redefine existing charset", new_name);
1867 cs = XCHARSET (charset);
1869 type = CHARSET_TYPE (cs);
1870 columns = CHARSET_COLUMNS (cs);
1871 dimension = CHARSET_DIMENSION (cs);
1872 id = get_unallocated_leading_byte (dimension);
1874 graphic = CHARSET_GRAPHIC (cs);
1875 final = CHARSET_FINAL (cs);
1876 direction = CHARSET_RIGHT_TO_LEFT;
1877 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1878 direction = CHARSET_LEFT_TO_RIGHT;
1879 doc_string = CHARSET_DOC_STRING (cs);
1880 short_name = CHARSET_SHORT_NAME (cs);
1881 long_name = CHARSET_LONG_NAME (cs);
1882 registry = CHARSET_REGISTRY (cs);
1884 new_charset = make_charset (id, new_name, type, columns,
1885 graphic, final, direction, short_name, long_name,
1886 doc_string, registry,
1888 CHARSET_DECODING_TABLE(cs),
1889 CHARSET_UCS_MIN(cs),
1890 CHARSET_UCS_MAX(cs),
1891 CHARSET_CODE_OFFSET(cs),
1892 CHARSET_BYTE_OFFSET(cs)
1898 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1899 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1904 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1905 Define symbol ALIAS as an alias for CHARSET.
1909 CHECK_SYMBOL (alias);
1910 charset = Fget_charset (charset);
1911 return Fputhash (alias, charset, Vcharset_hash_table);
1914 /* #### Reverse direction charsets not yet implemented. */
1916 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1918 Return the reverse-direction charset parallel to CHARSET, if any.
1919 This is the charset with the same properties (in particular, the same
1920 dimension, number of characters per dimension, and final byte) as
1921 CHARSET but whose characters are displayed in the opposite direction.
1925 charset = Fget_charset (charset);
1926 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1930 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1931 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1932 If DIRECTION is omitted, both directions will be checked (left-to-right
1933 will be returned if character sets exist for both directions).
1935 (dimension, chars, final, direction))
1937 int dm, ch, fi, di = -1;
1939 Lisp_Object obj = Qnil;
1941 CHECK_INT (dimension);
1942 dm = XINT (dimension);
1943 if (dm < 1 || dm > 2)
1944 signal_simple_error ("Invalid value for DIMENSION", dimension);
1948 if (ch != 94 && ch != 96)
1949 signal_simple_error ("Invalid value for CHARS", chars);
1951 CHECK_CHAR_COERCE_INT (final);
1953 if (fi < '0' || fi > '~')
1954 signal_simple_error ("Invalid value for FINAL", final);
1956 if (EQ (direction, Ql2r))
1957 di = CHARSET_LEFT_TO_RIGHT;
1958 else if (EQ (direction, Qr2l))
1959 di = CHARSET_RIGHT_TO_LEFT;
1960 else if (!NILP (direction))
1961 signal_simple_error ("Invalid value for DIRECTION", direction);
1963 if (dm == 2 && fi > 0x5F)
1965 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1968 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1970 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1974 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1976 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1979 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1982 return XCHARSET_NAME (obj);
1986 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1987 Return short name of CHARSET.
1991 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1994 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1995 Return long name of CHARSET.
1999 return XCHARSET_LONG_NAME (Fget_charset (charset));
2002 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2003 Return description of CHARSET.
2007 return XCHARSET_DOC_STRING (Fget_charset (charset));
2010 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2011 Return dimension of CHARSET.
2015 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2018 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2019 Return property PROP of CHARSET.
2020 Recognized properties are those listed in `make-charset', as well as
2021 'name and 'doc-string.
2025 struct Lisp_Charset *cs;
2027 charset = Fget_charset (charset);
2028 cs = XCHARSET (charset);
2030 CHECK_SYMBOL (prop);
2031 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2032 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2033 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2034 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2035 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2036 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2037 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2038 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2039 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2040 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2041 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2042 if (EQ (prop, Qdirection))
2043 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2044 if (EQ (prop, Qreverse_direction_charset))
2046 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2050 return XCHARSET_NAME (obj);
2052 signal_simple_error ("Unrecognized charset property name", prop);
2053 return Qnil; /* not reached */
2056 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2057 Return charset identification number of CHARSET.
2061 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2064 /* #### We need to figure out which properties we really want to
2067 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2068 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2070 (charset, ccl_program))
2072 charset = Fget_charset (charset);
2073 CHECK_VECTOR (ccl_program);
2074 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2079 invalidate_charset_font_caches (Lisp_Object charset)
2081 /* Invalidate font cache entries for charset on all devices. */
2082 Lisp_Object devcons, concons, hash_table;
2083 DEVICE_LOOP_NO_BREAK (devcons, concons)
2085 struct device *d = XDEVICE (XCAR (devcons));
2086 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2087 if (!UNBOUNDP (hash_table))
2088 Fclrhash (hash_table);
2092 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2093 Set the 'registry property of CHARSET to REGISTRY.
2095 (charset, registry))
2097 charset = Fget_charset (charset);
2098 CHECK_STRING (registry);
2099 XCHARSET_REGISTRY (charset) = registry;
2100 invalidate_charset_font_caches (charset);
2101 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2106 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2107 Return mapping-table of CHARSET.
2111 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2114 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2115 Set mapping-table of CHARSET to TABLE.
2119 struct Lisp_Charset *cs;
2120 Lisp_Object old_table;
2123 charset = Fget_charset (charset);
2124 cs = XCHARSET (charset);
2126 if (EQ (table, Qnil))
2128 CHARSET_DECODING_TABLE(cs) = table;
2131 else if (VECTORP (table))
2133 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
2134 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2135 old_table = CHARSET_DECODING_TABLE(cs);
2136 CHARSET_DECODING_TABLE(cs) = table;
2139 signal_error (Qwrong_type_argument,
2140 list2 (build_translated_string ("vector-or-nil-p"),
2142 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2144 switch (CHARSET_DIMENSION (cs))
2147 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2149 Lisp_Object c = XVECTOR_DATA(table)[i];
2154 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2158 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2160 Lisp_Object v = XVECTOR_DATA(table)[i];
2166 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2168 CHARSET_DECODING_TABLE(cs) = old_table;
2169 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2171 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2173 Lisp_Object c = XVECTOR_DATA(v)[j];
2176 put_char_attribute (c, charset,
2179 (i + CHARSET_BYTE_OFFSET (cs)),
2181 (j + CHARSET_BYTE_OFFSET (cs))));
2185 put_char_attribute (v, charset,
2187 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2196 /************************************************************************/
2197 /* Lisp primitives for working with characters */
2198 /************************************************************************/
2200 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2201 Make a character from CHARSET and octets ARG1 and ARG2.
2202 ARG2 is required only for characters from two-dimensional charsets.
2203 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2204 character s with caron.
2206 (charset, arg1, arg2))
2208 struct Lisp_Charset *cs;
2210 int lowlim, highlim;
2212 charset = Fget_charset (charset);
2213 cs = XCHARSET (charset);
2215 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2216 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2218 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2220 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2221 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2224 /* It is useful (and safe, according to Olivier Galibert) to strip
2225 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2226 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2227 Latin 2 code of the character. */
2235 if (a1 < lowlim || a1 > highlim)
2236 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2238 if (CHARSET_DIMENSION (cs) == 1)
2242 ("Charset is of dimension one; second octet must be nil", arg2);
2243 return make_char (MAKE_CHAR (charset, a1, 0));
2252 a2 = XINT (arg2) & 0x7f;
2254 if (a2 < lowlim || a2 > highlim)
2255 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2257 return make_char (MAKE_CHAR (charset, a1, a2));
2260 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2261 Return the character set of char CH.
2265 CHECK_CHAR_COERCE_INT (ch);
2267 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2270 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2271 Return list of charset and one or two position-codes of CHAR.
2275 /* This function can GC */
2276 struct gcpro gcpro1, gcpro2;
2277 Lisp_Object charset = Qnil;
2278 Lisp_Object rc = Qnil;
2281 GCPRO2 (charset, rc);
2282 CHECK_CHAR_COERCE_INT (character);
2284 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2286 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2288 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2292 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2300 #ifdef ENABLE_COMPOSITE_CHARS
2301 /************************************************************************/
2302 /* composite character functions */
2303 /************************************************************************/
2306 lookup_composite_char (Bufbyte *str, int len)
2308 Lisp_Object lispstr = make_string (str, len);
2309 Lisp_Object ch = Fgethash (lispstr,
2310 Vcomposite_char_string2char_hash_table,
2316 if (composite_char_row_next >= 128)
2317 signal_simple_error ("No more composite chars available", lispstr);
2318 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2319 composite_char_col_next);
2320 Fputhash (make_char (emch), lispstr,
2321 Vcomposite_char_char2string_hash_table);
2322 Fputhash (lispstr, make_char (emch),
2323 Vcomposite_char_string2char_hash_table);
2324 composite_char_col_next++;
2325 if (composite_char_col_next >= 128)
2327 composite_char_col_next = 32;
2328 composite_char_row_next++;
2337 composite_char_string (Emchar ch)
2339 Lisp_Object str = Fgethash (make_char (ch),
2340 Vcomposite_char_char2string_hash_table,
2342 assert (!UNBOUNDP (str));
2346 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2347 Convert a string into a single composite character.
2348 The character is the result of overstriking all the characters in
2353 CHECK_STRING (string);
2354 return make_char (lookup_composite_char (XSTRING_DATA (string),
2355 XSTRING_LENGTH (string)));
2358 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2359 Return a string of the characters comprising a composite character.
2367 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2368 signal_simple_error ("Must be composite char", ch);
2369 return composite_char_string (emch);
2371 #endif /* ENABLE_COMPOSITE_CHARS */
2374 /************************************************************************/
2375 /* initialization */
2376 /************************************************************************/
2379 syms_of_mule_charset (void)
2381 DEFSUBR (Fcharsetp);
2382 DEFSUBR (Ffind_charset);
2383 DEFSUBR (Fget_charset);
2384 DEFSUBR (Fcharset_list);
2385 DEFSUBR (Fcharset_name);
2386 DEFSUBR (Fmake_charset);
2387 DEFSUBR (Fmake_reverse_direction_charset);
2388 /* DEFSUBR (Freverse_direction_charset); */
2389 DEFSUBR (Fdefine_charset_alias);
2390 DEFSUBR (Fcharset_from_attributes);
2391 DEFSUBR (Fcharset_short_name);
2392 DEFSUBR (Fcharset_long_name);
2393 DEFSUBR (Fcharset_description);
2394 DEFSUBR (Fcharset_dimension);
2395 DEFSUBR (Fcharset_property);
2396 DEFSUBR (Fcharset_id);
2397 DEFSUBR (Fset_charset_ccl_program);
2398 DEFSUBR (Fset_charset_registry);
2400 DEFSUBR (Fchar_attribute_alist);
2401 DEFSUBR (Fget_char_attribute);
2402 DEFSUBR (Fput_char_attribute);
2403 DEFSUBR (Fdefine_char);
2404 DEFSUBR (Fget_composite_char);
2405 DEFSUBR (Fcharset_mapping_table);
2406 DEFSUBR (Fset_charset_mapping_table);
2409 DEFSUBR (Fmake_char);
2410 DEFSUBR (Fchar_charset);
2411 DEFSUBR (Fsplit_char);
2413 #ifdef ENABLE_COMPOSITE_CHARS
2414 DEFSUBR (Fmake_composite_char);
2415 DEFSUBR (Fcomposite_char_string);
2418 defsymbol (&Qcharsetp, "charsetp");
2419 defsymbol (&Qregistry, "registry");
2420 defsymbol (&Qfinal, "final");
2421 defsymbol (&Qgraphic, "graphic");
2422 defsymbol (&Qdirection, "direction");
2423 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2424 defsymbol (&Qshort_name, "short-name");
2425 defsymbol (&Qlong_name, "long-name");
2427 defsymbol (&Ql2r, "l2r");
2428 defsymbol (&Qr2l, "r2l");
2430 /* Charsets, compatible with FSF 20.3
2431 Naming convention is Script-Charset[-Edition] */
2432 defsymbol (&Qascii, "ascii");
2433 defsymbol (&Qcontrol_1, "control-1");
2434 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2435 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2436 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2437 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2438 defsymbol (&Qthai_tis620, "thai-tis620");
2439 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2440 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2441 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2442 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2443 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2444 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2445 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2446 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2447 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2448 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2449 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2450 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2451 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2452 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2454 defsymbol (&Q_decomposition, "->decomposition");
2455 defsymbol (&Qwide, "wide");
2456 defsymbol (&Qnarrow, "narrow");
2457 defsymbol (&Qcompat, "compat");
2458 defsymbol (&QnoBreak, "noBreak");
2459 defsymbol (&Qsuper, "super");
2460 defsymbol (&Qfraction, "fraction");
2461 defsymbol (&Qucs, "ucs");
2462 defsymbol (&Qucs_bmp, "ucs-bmp");
2463 defsymbol (&Qlatin_viscii, "latin-viscii");
2464 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2465 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2466 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2467 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2468 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2469 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2471 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2472 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2474 defsymbol (&Qcomposite, "composite");
2478 vars_of_mule_charset (void)
2485 /* Table of charsets indexed by leading byte. */
2486 for (i = 0; i < countof (charset_by_leading_byte); i++)
2487 charset_by_leading_byte[i] = Qnil;
2490 /* Table of charsets indexed by type/final-byte. */
2491 for (i = 0; i < countof (charset_by_attributes); i++)
2492 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2493 charset_by_attributes[i][j] = Qnil;
2495 /* Table of charsets indexed by type/final-byte/direction. */
2496 for (i = 0; i < countof (charset_by_attributes); i++)
2497 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2498 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2499 charset_by_attributes[i][j][k] = Qnil;
2503 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2505 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2506 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2510 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2511 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2512 Leading-code of private TYPE9N charset of column-width 1.
2514 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2518 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2519 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2520 Version number of UTF-2000.
2523 staticpro (&Vcharacter_attribute_table);
2524 Vcharacter_attribute_table = make_char_code_table (Qnil);
2526 staticpro (&Vcharacter_composition_table);
2527 Vcharacter_composition_table = make_char_code_table (Qnil);
2529 Vdefault_coded_charset_priority_list = Qnil;
2530 DEFVAR_LISP ("default-coded-charset-priority-list",
2531 &Vdefault_coded_charset_priority_list /*
2532 Default order of preferred coded-character-sets.
2538 complex_vars_of_mule_charset (void)
2540 staticpro (&Vcharset_hash_table);
2541 Vcharset_hash_table =
2542 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2544 /* Predefined character sets. We store them into variables for
2549 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2550 CHARSET_TYPE_256X256, 1, 2, 0,
2551 CHARSET_LEFT_TO_RIGHT,
2552 build_string ("BMP"),
2553 build_string ("BMP"),
2554 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2555 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2556 Qnil, 0, 0xFFFF, 0, 0);
2558 # define MIN_CHAR_THAI 0
2559 # define MAX_CHAR_THAI 0
2560 # define MIN_CHAR_GREEK 0
2561 # define MAX_CHAR_GREEK 0
2562 # define MIN_CHAR_HEBREW 0
2563 # define MAX_CHAR_HEBREW 0
2564 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2565 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2566 # define MIN_CHAR_CYRILLIC 0
2567 # define MAX_CHAR_CYRILLIC 0
2570 make_charset (LEADING_BYTE_ASCII, Qascii,
2571 CHARSET_TYPE_94, 1, 0, 'B',
2572 CHARSET_LEFT_TO_RIGHT,
2573 build_string ("ASCII"),
2574 build_string ("ASCII)"),
2575 build_string ("ASCII (ISO646 IRV)"),
2576 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2577 Qnil, 0, 0x7F, 0, 0);
2578 Vcharset_control_1 =
2579 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2580 CHARSET_TYPE_94, 1, 1, 0,
2581 CHARSET_LEFT_TO_RIGHT,
2582 build_string ("C1"),
2583 build_string ("Control characters"),
2584 build_string ("Control characters 128-191"),
2586 Qnil, 0x80, 0x9F, 0, 0);
2587 Vcharset_latin_iso8859_1 =
2588 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2589 CHARSET_TYPE_96, 1, 1, 'A',
2590 CHARSET_LEFT_TO_RIGHT,
2591 build_string ("Latin-1"),
2592 build_string ("ISO8859-1 (Latin-1)"),
2593 build_string ("ISO8859-1 (Latin-1)"),
2594 build_string ("iso8859-1"),
2595 Qnil, 0xA0, 0xFF, 0, 32);
2596 Vcharset_latin_iso8859_2 =
2597 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2598 CHARSET_TYPE_96, 1, 1, 'B',
2599 CHARSET_LEFT_TO_RIGHT,
2600 build_string ("Latin-2"),
2601 build_string ("ISO8859-2 (Latin-2)"),
2602 build_string ("ISO8859-2 (Latin-2)"),
2603 build_string ("iso8859-2"),
2605 Vcharset_latin_iso8859_3 =
2606 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2607 CHARSET_TYPE_96, 1, 1, 'C',
2608 CHARSET_LEFT_TO_RIGHT,
2609 build_string ("Latin-3"),
2610 build_string ("ISO8859-3 (Latin-3)"),
2611 build_string ("ISO8859-3 (Latin-3)"),
2612 build_string ("iso8859-3"),
2614 Vcharset_latin_iso8859_4 =
2615 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2616 CHARSET_TYPE_96, 1, 1, 'D',
2617 CHARSET_LEFT_TO_RIGHT,
2618 build_string ("Latin-4"),
2619 build_string ("ISO8859-4 (Latin-4)"),
2620 build_string ("ISO8859-4 (Latin-4)"),
2621 build_string ("iso8859-4"),
2623 Vcharset_thai_tis620 =
2624 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2625 CHARSET_TYPE_96, 1, 1, 'T',
2626 CHARSET_LEFT_TO_RIGHT,
2627 build_string ("TIS620"),
2628 build_string ("TIS620 (Thai)"),
2629 build_string ("TIS620.2529 (Thai)"),
2630 build_string ("tis620"),
2631 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2632 Vcharset_greek_iso8859_7 =
2633 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2634 CHARSET_TYPE_96, 1, 1, 'F',
2635 CHARSET_LEFT_TO_RIGHT,
2636 build_string ("ISO8859-7"),
2637 build_string ("ISO8859-7 (Greek)"),
2638 build_string ("ISO8859-7 (Greek)"),
2639 build_string ("iso8859-7"),
2640 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2641 Vcharset_arabic_iso8859_6 =
2642 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2643 CHARSET_TYPE_96, 1, 1, 'G',
2644 CHARSET_RIGHT_TO_LEFT,
2645 build_string ("ISO8859-6"),
2646 build_string ("ISO8859-6 (Arabic)"),
2647 build_string ("ISO8859-6 (Arabic)"),
2648 build_string ("iso8859-6"),
2650 Vcharset_hebrew_iso8859_8 =
2651 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2652 CHARSET_TYPE_96, 1, 1, 'H',
2653 CHARSET_RIGHT_TO_LEFT,
2654 build_string ("ISO8859-8"),
2655 build_string ("ISO8859-8 (Hebrew)"),
2656 build_string ("ISO8859-8 (Hebrew)"),
2657 build_string ("iso8859-8"),
2658 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2659 Vcharset_katakana_jisx0201 =
2660 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2661 CHARSET_TYPE_94, 1, 1, 'I',
2662 CHARSET_LEFT_TO_RIGHT,
2663 build_string ("JISX0201 Kana"),
2664 build_string ("JISX0201.1976 (Japanese Kana)"),
2665 build_string ("JISX0201.1976 Japanese Kana"),
2666 build_string ("jisx0201\\.1976"),
2668 MIN_CHAR_HALFWIDTH_KATAKANA,
2669 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2670 Vcharset_latin_jisx0201 =
2671 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2672 CHARSET_TYPE_94, 1, 0, 'J',
2673 CHARSET_LEFT_TO_RIGHT,
2674 build_string ("JISX0201 Roman"),
2675 build_string ("JISX0201.1976 (Japanese Roman)"),
2676 build_string ("JISX0201.1976 Japanese Roman"),
2677 build_string ("jisx0201\\.1976"),
2679 Vcharset_cyrillic_iso8859_5 =
2680 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2681 CHARSET_TYPE_96, 1, 1, 'L',
2682 CHARSET_LEFT_TO_RIGHT,
2683 build_string ("ISO8859-5"),
2684 build_string ("ISO8859-5 (Cyrillic)"),
2685 build_string ("ISO8859-5 (Cyrillic)"),
2686 build_string ("iso8859-5"),
2687 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2688 Vcharset_latin_iso8859_9 =
2689 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2690 CHARSET_TYPE_96, 1, 1, 'M',
2691 CHARSET_LEFT_TO_RIGHT,
2692 build_string ("Latin-5"),
2693 build_string ("ISO8859-9 (Latin-5)"),
2694 build_string ("ISO8859-9 (Latin-5)"),
2695 build_string ("iso8859-9"),
2697 Vcharset_japanese_jisx0208_1978 =
2698 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2699 CHARSET_TYPE_94X94, 2, 0, '@',
2700 CHARSET_LEFT_TO_RIGHT,
2701 build_string ("JIS X0208:1978"),
2702 build_string ("JIS X0208:1978 (Japanese)"),
2704 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2705 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2707 Vcharset_chinese_gb2312 =
2708 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2709 CHARSET_TYPE_94X94, 2, 0, 'A',
2710 CHARSET_LEFT_TO_RIGHT,
2711 build_string ("GB2312"),
2712 build_string ("GB2312)"),
2713 build_string ("GB2312 Chinese simplified"),
2714 build_string ("gb2312"),
2716 Vcharset_japanese_jisx0208 =
2717 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2718 CHARSET_TYPE_94X94, 2, 0, 'B',
2719 CHARSET_LEFT_TO_RIGHT,
2720 build_string ("JISX0208"),
2721 build_string ("JIS X0208:1983 (Japanese)"),
2722 build_string ("JIS X0208:1983 Japanese Kanji"),
2723 build_string ("jisx0208\\.1983"),
2725 Vcharset_korean_ksc5601 =
2726 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2727 CHARSET_TYPE_94X94, 2, 0, 'C',
2728 CHARSET_LEFT_TO_RIGHT,
2729 build_string ("KSC5601"),
2730 build_string ("KSC5601 (Korean"),
2731 build_string ("KSC5601 Korean Hangul and Hanja"),
2732 build_string ("ksc5601"),
2734 Vcharset_japanese_jisx0212 =
2735 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2736 CHARSET_TYPE_94X94, 2, 0, 'D',
2737 CHARSET_LEFT_TO_RIGHT,
2738 build_string ("JISX0212"),
2739 build_string ("JISX0212 (Japanese)"),
2740 build_string ("JISX0212 Japanese Supplement"),
2741 build_string ("jisx0212"),
2744 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2745 Vcharset_chinese_cns11643_1 =
2746 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2747 CHARSET_TYPE_94X94, 2, 0, 'G',
2748 CHARSET_LEFT_TO_RIGHT,
2749 build_string ("CNS11643-1"),
2750 build_string ("CNS11643-1 (Chinese traditional)"),
2752 ("CNS 11643 Plane 1 Chinese traditional"),
2753 build_string (CHINESE_CNS_PLANE_RE("1")),
2755 Vcharset_chinese_cns11643_2 =
2756 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2757 CHARSET_TYPE_94X94, 2, 0, 'H',
2758 CHARSET_LEFT_TO_RIGHT,
2759 build_string ("CNS11643-2"),
2760 build_string ("CNS11643-2 (Chinese traditional)"),
2762 ("CNS 11643 Plane 2 Chinese traditional"),
2763 build_string (CHINESE_CNS_PLANE_RE("2")),
2766 Vcharset_latin_viscii_lower =
2767 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2768 CHARSET_TYPE_96, 1, 1, '1',
2769 CHARSET_LEFT_TO_RIGHT,
2770 build_string ("VISCII lower"),
2771 build_string ("VISCII lower (Vietnamese)"),
2772 build_string ("VISCII lower (Vietnamese)"),
2773 build_string ("MULEVISCII-LOWER"),
2775 Vcharset_latin_viscii_upper =
2776 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2777 CHARSET_TYPE_96, 1, 1, '2',
2778 CHARSET_LEFT_TO_RIGHT,
2779 build_string ("VISCII upper"),
2780 build_string ("VISCII upper (Vietnamese)"),
2781 build_string ("VISCII upper (Vietnamese)"),
2782 build_string ("MULEVISCII-UPPER"),
2784 Vcharset_latin_viscii =
2785 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2786 CHARSET_TYPE_256, 1, 2, 0,
2787 CHARSET_LEFT_TO_RIGHT,
2788 build_string ("VISCII"),
2789 build_string ("VISCII 1.1 (Vietnamese)"),
2790 build_string ("VISCII 1.1 (Vietnamese)"),
2791 build_string ("VISCII1\\.1"),
2793 Vcharset_hiragana_jisx0208 =
2794 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2795 CHARSET_TYPE_94X94, 2, 0, 'B',
2796 CHARSET_LEFT_TO_RIGHT,
2797 build_string ("Hiragana"),
2798 build_string ("Hiragana of JIS X0208"),
2799 build_string ("Japanese Hiragana of JIS X0208"),
2800 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2801 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2802 (0x24 - 33) * 94 + (0x21 - 33), 33);
2803 Vcharset_katakana_jisx0208 =
2804 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2805 CHARSET_TYPE_94X94, 2, 0, 'B',
2806 CHARSET_LEFT_TO_RIGHT,
2807 build_string ("Katakana"),
2808 build_string ("Katakana of JIS X0208"),
2809 build_string ("Japanese Katakana of JIS X0208"),
2810 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2811 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2812 (0x25 - 33) * 94 + (0x21 - 33), 33);
2814 Vcharset_chinese_big5_1 =
2815 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2816 CHARSET_TYPE_94X94, 2, 0, '0',
2817 CHARSET_LEFT_TO_RIGHT,
2818 build_string ("Big5"),
2819 build_string ("Big5 (Level-1)"),
2821 ("Big5 Level-1 Chinese traditional"),
2822 build_string ("big5"),
2824 Vcharset_chinese_big5_2 =
2825 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2826 CHARSET_TYPE_94X94, 2, 0, '1',
2827 CHARSET_LEFT_TO_RIGHT,
2828 build_string ("Big5"),
2829 build_string ("Big5 (Level-2)"),
2831 ("Big5 Level-2 Chinese traditional"),
2832 build_string ("big5"),
2835 #ifdef ENABLE_COMPOSITE_CHARS
2836 /* #### For simplicity, we put composite chars into a 96x96 charset.
2837 This is going to lead to problems because you can run out of
2838 room, esp. as we don't yet recycle numbers. */
2839 Vcharset_composite =
2840 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2841 CHARSET_TYPE_96X96, 2, 0, 0,
2842 CHARSET_LEFT_TO_RIGHT,
2843 build_string ("Composite"),
2844 build_string ("Composite characters"),
2845 build_string ("Composite characters"),
2848 composite_char_row_next = 32;
2849 composite_char_col_next = 32;
2851 Vcomposite_char_string2char_hash_table =
2852 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2853 Vcomposite_char_char2string_hash_table =
2854 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2855 staticpro (&Vcomposite_char_string2char_hash_table);
2856 staticpro (&Vcomposite_char_char2string_hash_table);
2857 #endif /* ENABLE_COMPOSITE_CHARS */