1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_viscii_lower;
67 Lisp_Object Vcharset_latin_viscii_upper;
68 Lisp_Object Vcharset_ideograph_daikanwa;
69 Lisp_Object Vcharset_mojikyo_pj_1;
70 Lisp_Object Vcharset_mojikyo_pj_2;
71 Lisp_Object Vcharset_mojikyo_pj_3;
72 Lisp_Object Vcharset_mojikyo_pj_4;
73 Lisp_Object Vcharset_mojikyo_pj_5;
74 Lisp_Object Vcharset_mojikyo_pj_6;
75 Lisp_Object Vcharset_mojikyo_pj_7;
76 Lisp_Object Vcharset_mojikyo_pj_8;
77 Lisp_Object Vcharset_mojikyo_pj_9;
78 Lisp_Object Vcharset_mojikyo_pj_10;
79 Lisp_Object Vcharset_mojikyo_pj_11;
80 Lisp_Object Vcharset_mojikyo_pj_12;
81 Lisp_Object Vcharset_mojikyo_pj_13;
82 Lisp_Object Vcharset_mojikyo_pj_14;
83 Lisp_Object Vcharset_mojikyo_pj_15;
84 Lisp_Object Vcharset_mojikyo_pj_16;
85 Lisp_Object Vcharset_mojikyo_pj_17;
86 Lisp_Object Vcharset_mojikyo_pj_18;
87 Lisp_Object Vcharset_mojikyo_pj_19;
88 Lisp_Object Vcharset_mojikyo_pj_20;
89 Lisp_Object Vcharset_mojikyo_pj_21;
90 Lisp_Object Vcharset_ethiopic_ucs;
92 Lisp_Object Vcharset_chinese_big5_1;
93 Lisp_Object Vcharset_chinese_big5_2;
95 #ifdef ENABLE_COMPOSITE_CHARS
96 Lisp_Object Vcharset_composite;
98 /* Hash tables for composite chars. One maps string representing
99 composed chars to their equivalent chars; one goes the
101 Lisp_Object Vcomposite_char_char2string_hash_table;
102 Lisp_Object Vcomposite_char_string2char_hash_table;
104 static int composite_char_row_next;
105 static int composite_char_col_next;
107 #endif /* ENABLE_COMPOSITE_CHARS */
109 struct charset_lookup *chlook;
111 static const struct lrecord_description charset_lookup_description_1[] = {
112 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte),
121 static const struct struct_description charset_lookup_description = {
122 sizeof(struct charset_lookup),
123 charset_lookup_description_1
127 /* Table of number of bytes in the string representation of a character
128 indexed by the first byte of that representation.
130 rep_bytes_by_first_byte(c) is more efficient than the equivalent
131 canonical computation:
133 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
135 const Bytecount rep_bytes_by_first_byte[0xA0] =
136 { /* 0x00 - 0x7f are for straight ASCII */
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 /* 0x80 - 0x8f are for Dimension-1 official charsets */
147 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 /* 0x90 - 0x9d are for Dimension-2 official charsets */
152 /* 0x9e is for Dimension-1 private charsets */
153 /* 0x9f is for Dimension-2 private charsets */
154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
161 mark_char_byte_table (Lisp_Object obj)
163 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
166 for (i = 0; i < 256; i++)
168 mark_object (cte->property[i]);
174 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
176 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
177 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
180 for (i = 0; i < 256; i++)
181 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
183 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
185 if (!char_byte_table_equal (cte1->property[i],
186 cte2->property[i], depth + 1))
193 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
199 char_byte_table_hash (Lisp_Object obj, int depth)
201 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
203 return internal_array_hash (cte->property, 256, depth);
206 static const struct lrecord_description char_byte_table_description[] = {
207 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
211 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
212 mark_char_byte_table,
213 internal_object_printer,
214 0, char_byte_table_equal,
215 char_byte_table_hash,
216 char_byte_table_description,
217 struct Lisp_Char_Byte_Table);
220 make_char_byte_table (Lisp_Object initval)
224 struct Lisp_Char_Byte_Table *cte =
225 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
226 &lrecord_char_byte_table);
228 for (i = 0; i < 256; i++)
229 cte->property[i] = initval;
231 XSETCHAR_BYTE_TABLE (obj, cte);
236 copy_char_byte_table (Lisp_Object entry)
238 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
241 struct Lisp_Char_Byte_Table *ctenew =
242 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
243 &lrecord_char_byte_table);
245 for (i = 0; i < 256; i++)
247 Lisp_Object new = cte->property[i];
248 if (CHAR_BYTE_TABLE_P (new))
249 ctenew->property[i] = copy_char_byte_table (new);
251 ctenew->property[i] = new;
254 XSETCHAR_BYTE_TABLE (obj, ctenew);
260 mark_char_code_table (Lisp_Object obj)
262 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
268 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
270 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
271 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
273 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
277 char_code_table_hash (Lisp_Object obj, int depth)
279 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
281 return char_code_table_hash (cte->table, depth + 1);
284 static const struct lrecord_description char_code_table_description[] = {
285 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
289 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
290 mark_char_code_table,
291 internal_object_printer,
292 0, char_code_table_equal,
293 char_code_table_hash,
294 char_code_table_description,
295 struct Lisp_Char_Code_Table);
298 make_char_code_table (Lisp_Object initval)
301 struct Lisp_Char_Code_Table *cte =
302 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
303 &lrecord_char_code_table);
305 cte->table = make_char_byte_table (initval);
307 XSETCHAR_CODE_TABLE (obj, cte);
312 copy_char_code_table (Lisp_Object entry)
314 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
316 struct Lisp_Char_Code_Table *ctenew =
317 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
318 &lrecord_char_code_table);
320 ctenew->table = copy_char_byte_table (cte->table);
321 XSETCHAR_CODE_TABLE (obj, ctenew);
327 get_char_code_table (Emchar ch, Lisp_Object table)
329 unsigned int code = ch;
330 struct Lisp_Char_Byte_Table* cpt
331 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
332 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
334 if (CHAR_BYTE_TABLE_P (ret))
335 cpt = XCHAR_BYTE_TABLE (ret);
339 ret = cpt->property [(unsigned char) (code >> 16)];
340 if (CHAR_BYTE_TABLE_P (ret))
341 cpt = XCHAR_BYTE_TABLE (ret);
345 ret = cpt->property [(unsigned char) (code >> 8)];
346 if (CHAR_BYTE_TABLE_P (ret))
347 cpt = XCHAR_BYTE_TABLE (ret);
351 return cpt->property [(unsigned char) code];
355 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
357 unsigned int code = ch;
358 struct Lisp_Char_Byte_Table* cpt1
359 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
360 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
362 if (CHAR_BYTE_TABLE_P (ret))
364 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
366 ret = cpt2->property[(unsigned char)(code >> 16)];
367 if (CHAR_BYTE_TABLE_P (ret))
369 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
371 ret = cpt3->property[(unsigned char)(code >> 8)];
372 if (CHAR_BYTE_TABLE_P (ret))
374 struct Lisp_Char_Byte_Table* cpt4
375 = XCHAR_BYTE_TABLE (ret);
377 cpt4->property[(unsigned char)code] = value;
379 else if (!EQ (ret, value))
381 Lisp_Object cpt4 = make_char_byte_table (ret);
383 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
384 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
387 else if (!EQ (ret, value))
389 Lisp_Object cpt3 = make_char_byte_table (ret);
390 Lisp_Object cpt4 = make_char_byte_table (ret);
392 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
393 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
395 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
398 else if (!EQ (ret, value))
400 Lisp_Object cpt2 = make_char_byte_table (ret);
401 Lisp_Object cpt3 = make_char_byte_table (ret);
402 Lisp_Object cpt4 = make_char_byte_table (ret);
404 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
405 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
406 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
407 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
412 Lisp_Object Vcharacter_attribute_table;
413 Lisp_Object Vcharacter_composition_table;
414 Lisp_Object Vcharacter_variant_table;
416 Lisp_Object Q_decomposition;
419 Lisp_Object Qisolated;
420 Lisp_Object Qinitial;
423 Lisp_Object Qvertical;
424 Lisp_Object QnoBreak;
425 Lisp_Object Qfraction;
436 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
442 else if (EQ (v, Qcompat))
444 else if (EQ (v, Qisolated))
446 else if (EQ (v, Qinitial))
448 else if (EQ (v, Qmedial))
450 else if (EQ (v, Qfinal))
452 else if (EQ (v, Qvertical))
454 else if (EQ (v, QnoBreak))
456 else if (EQ (v, Qfraction))
458 else if (EQ (v, Qsuper))
460 else if (EQ (v, Qsub))
462 else if (EQ (v, Qcircle))
464 else if (EQ (v, Qsquare))
466 else if (EQ (v, Qwide))
468 else if (EQ (v, Qnarrow))
470 else if (EQ (v, Qsmall))
472 else if (EQ (v, Qfont))
475 signal_simple_error (err_msg, err_arg);
478 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
479 Return character corresponding with list.
483 Lisp_Object table = Vcharacter_composition_table;
484 Lisp_Object rest = list;
488 Lisp_Object v = Fcar (rest);
490 Emchar c = to_char_code (v, "Invalid value for composition", list);
492 ret = get_char_code_table (c, table);
497 if (!CHAR_CODE_TABLE_P (ret))
502 else if (!CONSP (rest))
504 else if (CHAR_CODE_TABLE_P (ret))
507 signal_simple_error ("Invalid table is found with", list);
509 signal_simple_error ("Invalid value for composition", list);
512 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
513 Return variants of CHARACTER.
517 CHECK_CHAR (character);
518 return Fcopy_list (get_char_code_table (XCHAR (character),
519 Vcharacter_variant_table));
522 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
523 Return the alist of attributes of CHARACTER.
527 CHECK_CHAR (character);
528 return Fcopy_alist (get_char_code_table (XCHAR (character),
529 Vcharacter_attribute_table));
532 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
533 Return the value of CHARACTER's ATTRIBUTE.
535 (character, attribute))
540 CHECK_CHAR (character);
541 ret = get_char_code_table (XCHAR (character),
542 Vcharacter_attribute_table);
546 if (!NILP (ccs = Ffind_charset (attribute)))
549 return Fcdr (Fassq (attribute, ret));
553 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
556 Emchar char_code = XCHAR (character);
558 = get_char_code_table (char_code, Vcharacter_attribute_table);
561 cell = Fassq (attribute, ret);
565 ret = Fcons (Fcons (attribute, value), ret);
567 else if (!EQ (Fcdr (cell), value))
569 Fsetcdr (cell, value);
571 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
575 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
576 Store CHARACTER's ATTRIBUTE with VALUE.
578 (character, attribute, value))
582 CHECK_CHAR (character);
583 ccs = Ffind_charset (attribute);
586 Lisp_Object cpos, rest;
587 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
594 /* ad-hoc method for `ascii' */
595 if ((XCHARSET_CHARS (ccs) == 94) &&
596 (XCHARSET_BYTE_OFFSET (ccs) != 33))
597 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
599 ccs_len = XCHARSET_CHARS (ccs);
603 Lisp_Object ret = Fcar (value);
606 signal_simple_error ("Invalid value for coded-charset", value);
607 code_point = XINT (ret);
608 if (XCHARSET_GRAPHIC (ccs) == 1)
616 signal_simple_error ("Invalid value for coded-charset", value);
619 signal_simple_error ("Invalid value for coded-charset", value);
621 if (XCHARSET_GRAPHIC (ccs) == 1)
623 code_point = (code_point << 8) | i;
626 value = make_int (code_point);
628 else if (INTP (value))
630 if (XCHARSET_GRAPHIC (ccs) == 1)
631 value = make_int (XINT (value) & 0x7F7F7F7F);
634 signal_simple_error ("Invalid value for coded-charset", value);
637 cpos = Fget_char_attribute (character, attribute);
642 dim = XCHARSET_DIMENSION (ccs);
643 code_point = XINT (cpos);
647 i = ((code_point >> (8 * dim)) & 255)
648 - XCHARSET_BYTE_OFFSET (ccs);
649 nv = XVECTOR_DATA(v)[i];
655 XVECTOR_DATA(v)[i] = Qnil;
656 v = XCHARSET_DECODING_TABLE (ccs);
661 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
664 dim = XCHARSET_DIMENSION (ccs);
665 code_point = XINT (value);
670 i = ((code_point >> (8 * dim)) & 255) - XCHARSET_BYTE_OFFSET (ccs);
671 nv = XVECTOR_DATA(v)[i];
675 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
681 XVECTOR_DATA(v)[i] = character;
683 else if (EQ (attribute, Q_decomposition))
685 Lisp_Object rest = value;
686 Lisp_Object table = Vcharacter_composition_table;
689 signal_simple_error ("Invalid value for ->decomposition",
694 Lisp_Object v = Fcar (rest);
697 = to_char_code (v, "Invalid value for ->decomposition", value);
702 put_char_code_table (c, character, table);
707 ntable = get_char_code_table (c, table);
708 if (!CHAR_CODE_TABLE_P (ntable))
710 ntable = make_char_code_table (Qnil);
711 put_char_code_table (c, ntable, table);
717 else if (EQ (attribute, Q_ucs))
723 signal_simple_error ("Invalid value for ->ucs", value);
727 ret = get_char_code_table (c, Vcharacter_variant_table);
728 if (NILP (Fmemq (character, ret)))
730 put_char_code_table (c, Fcons (character, ret),
731 Vcharacter_variant_table);
734 return put_char_attribute (character, attribute, value);
739 EXFUN (Fmake_char, 3);
741 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
742 Store character's ATTRIBUTES.
746 Lisp_Object rest = attributes;
747 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
748 Lisp_Object character;
754 Lisp_Object cell = Fcar (rest);
758 signal_simple_error ("Invalid argument", attributes);
759 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
762 character = Fmake_char (ccs, Fcar (cell),
764 goto setup_attributes;
768 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
771 signal_simple_error ("Invalid argument", attributes);
773 character = make_char (XINT (code) + 0x100000);
774 goto setup_attributes;
778 else if (!INTP (code))
779 signal_simple_error ("Invalid argument", attributes);
781 character = make_char (XINT (code));
787 Lisp_Object cell = Fcar (rest);
790 signal_simple_error ("Invalid argument", attributes);
791 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
795 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
798 Lisp_Object Vutf_2000_version;
802 int leading_code_private_11;
805 Lisp_Object Qcharsetp;
807 /* Qdoc_string, Qdimension, Qchars defined in general.c */
808 Lisp_Object Qregistry, Qfinal, Qgraphic;
809 Lisp_Object Qdirection;
810 Lisp_Object Qreverse_direction_charset;
811 Lisp_Object Qleading_byte;
812 Lisp_Object Qshort_name, Qlong_name;
828 Qjapanese_jisx0208_1978,
831 Qjapanese_jisx0208_1990,
841 Qvietnamese_viscii_lower,
842 Qvietnamese_viscii_upper,
871 Lisp_Object Ql2r, Qr2l;
873 Lisp_Object Vcharset_hash_table;
876 static Charset_ID next_allocated_leading_byte;
878 static Charset_ID next_allocated_1_byte_leading_byte;
879 static Charset_ID next_allocated_2_byte_leading_byte;
882 /* Composite characters are characters constructed by overstriking two
883 or more regular characters.
885 1) The old Mule implementation involves storing composite characters
886 in a buffer as a tag followed by all of the actual characters
887 used to make up the composite character. I think this is a bad
888 idea; it greatly complicates code that wants to handle strings
889 one character at a time because it has to deal with the possibility
890 of great big ungainly characters. It's much more reasonable to
891 simply store an index into a table of composite characters.
893 2) The current implementation only allows for 16,384 separate
894 composite characters over the lifetime of the XEmacs process.
895 This could become a potential problem if the user
896 edited lots of different files that use composite characters.
897 Due to FSF bogosity, increasing the number of allowable
898 composite characters under Mule would decrease the number
899 of possible faces that can exist. Mule already has shrunk
900 this to 2048, and further shrinkage would become uncomfortable.
901 No such problems exist in XEmacs.
903 Composite characters could be represented as 0x80 C1 C2 C3,
904 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
905 for slightly under 2^20 (one million) composite characters
906 over the XEmacs process lifetime, and you only need to
907 increase the size of a Mule character from 19 to 21 bits.
908 Or you could use 0x80 C1 C2 C3 C4, allowing for about
909 85 million (slightly over 2^26) composite characters. */
912 /************************************************************************/
913 /* Basic Emchar functions */
914 /************************************************************************/
916 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
917 string in STR. Returns the number of bytes stored.
918 Do not call this directly. Use the macro set_charptr_emchar() instead.
922 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
937 else if ( c <= 0x7ff )
939 *p++ = (c >> 6) | 0xc0;
940 *p++ = (c & 0x3f) | 0x80;
942 else if ( c <= 0xffff )
944 *p++ = (c >> 12) | 0xe0;
945 *p++ = ((c >> 6) & 0x3f) | 0x80;
946 *p++ = (c & 0x3f) | 0x80;
948 else if ( c <= 0x1fffff )
950 *p++ = (c >> 18) | 0xf0;
951 *p++ = ((c >> 12) & 0x3f) | 0x80;
952 *p++ = ((c >> 6) & 0x3f) | 0x80;
953 *p++ = (c & 0x3f) | 0x80;
955 else if ( c <= 0x3ffffff )
957 *p++ = (c >> 24) | 0xf8;
958 *p++ = ((c >> 18) & 0x3f) | 0x80;
959 *p++ = ((c >> 12) & 0x3f) | 0x80;
960 *p++ = ((c >> 6) & 0x3f) | 0x80;
961 *p++ = (c & 0x3f) | 0x80;
965 *p++ = (c >> 30) | 0xfc;
966 *p++ = ((c >> 24) & 0x3f) | 0x80;
967 *p++ = ((c >> 18) & 0x3f) | 0x80;
968 *p++ = ((c >> 12) & 0x3f) | 0x80;
969 *p++ = ((c >> 6) & 0x3f) | 0x80;
970 *p++ = (c & 0x3f) | 0x80;
973 BREAKUP_CHAR (c, charset, c1, c2);
974 lb = CHAR_LEADING_BYTE (c);
975 if (LEADING_BYTE_PRIVATE_P (lb))
976 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
978 if (EQ (charset, Vcharset_control_1))
987 /* Return the first character from a Mule-encoded string in STR,
988 assuming it's non-ASCII. Do not call this directly.
989 Use the macro charptr_emchar() instead. */
992 non_ascii_charptr_emchar (CONST Bufbyte *str)
1005 else if ( b >= 0xf8 )
1010 else if ( b >= 0xf0 )
1015 else if ( b >= 0xe0 )
1020 else if ( b >= 0xc0 )
1030 for( ; len > 0; len-- )
1033 ch = ( ch << 6 ) | ( b & 0x3f );
1037 Bufbyte i0 = *str, i1, i2 = 0;
1038 Lisp_Object charset;
1040 if (i0 == LEADING_BYTE_CONTROL_1)
1041 return (Emchar) (*++str - 0x20);
1043 if (LEADING_BYTE_PREFIX_P (i0))
1048 charset = CHARSET_BY_LEADING_BYTE (i0);
1049 if (XCHARSET_DIMENSION (charset) == 2)
1052 return MAKE_CHAR (charset, i1, i2);
1056 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1057 Do not call this directly. Use the macro valid_char_p() instead. */
1061 non_ascii_valid_char_p (Emchar ch)
1065 /* Must have only lowest 19 bits set */
1069 f1 = CHAR_FIELD1 (ch);
1070 f2 = CHAR_FIELD2 (ch);
1071 f3 = CHAR_FIELD3 (ch);
1075 Lisp_Object charset;
1077 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1078 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1079 f2 > MAX_CHAR_FIELD2_PRIVATE)
1084 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1085 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1089 NOTE: This takes advantage of the fact that
1090 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1091 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1093 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1094 if (EQ (charset, Qnil))
1096 return (XCHARSET_CHARS (charset) == 96);
1100 Lisp_Object charset;
1102 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1103 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1104 f1 > MAX_CHAR_FIELD1_PRIVATE)
1106 if (f2 < 0x20 || f3 < 0x20)
1109 #ifdef ENABLE_COMPOSITE_CHARS
1110 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1112 if (UNBOUNDP (Fgethash (make_int (ch),
1113 Vcomposite_char_char2string_hash_table,
1118 #endif /* ENABLE_COMPOSITE_CHARS */
1120 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1121 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1124 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1126 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1129 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1131 if (EQ (charset, Qnil))
1133 return (XCHARSET_CHARS (charset) == 96);
1139 /************************************************************************/
1140 /* Basic string functions */
1141 /************************************************************************/
1143 /* Copy the character pointed to by PTR into STR, assuming it's
1144 non-ASCII. Do not call this directly. Use the macro
1145 charptr_copy_char() instead. */
1148 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1150 Bufbyte *strptr = str;
1152 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1154 /* Notice fallthrough. */
1156 case 6: *++strptr = *ptr++;
1157 case 5: *++strptr = *ptr++;
1159 case 4: *++strptr = *ptr++;
1160 case 3: *++strptr = *ptr++;
1161 case 2: *++strptr = *ptr;
1166 return strptr + 1 - str;
1170 /************************************************************************/
1171 /* streams of Emchars */
1172 /************************************************************************/
1174 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1175 The functions below are not meant to be called directly; use
1176 the macros in insdel.h. */
1179 Lstream_get_emchar_1 (Lstream *stream, int ch)
1181 Bufbyte str[MAX_EMCHAR_LEN];
1182 Bufbyte *strptr = str;
1184 str[0] = (Bufbyte) ch;
1185 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1187 /* Notice fallthrough. */
1190 ch = Lstream_getc (stream);
1192 *++strptr = (Bufbyte) ch;
1194 ch = Lstream_getc (stream);
1196 *++strptr = (Bufbyte) ch;
1199 ch = Lstream_getc (stream);
1201 *++strptr = (Bufbyte) ch;
1203 ch = Lstream_getc (stream);
1205 *++strptr = (Bufbyte) ch;
1207 ch = Lstream_getc (stream);
1209 *++strptr = (Bufbyte) ch;
1214 return charptr_emchar (str);
1218 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1220 Bufbyte str[MAX_EMCHAR_LEN];
1221 Bytecount len = set_charptr_emchar (str, ch);
1222 return Lstream_write (stream, str, len);
1226 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1228 Bufbyte str[MAX_EMCHAR_LEN];
1229 Bytecount len = set_charptr_emchar (str, ch);
1230 Lstream_unread (stream, str, len);
1234 /************************************************************************/
1235 /* charset object */
1236 /************************************************************************/
1239 mark_charset (Lisp_Object obj)
1241 struct Lisp_Charset *cs = XCHARSET (obj);
1243 mark_object (cs->short_name);
1244 mark_object (cs->long_name);
1245 mark_object (cs->doc_string);
1246 mark_object (cs->registry);
1247 mark_object (cs->ccl_program);
1249 mark_object (cs->decoding_table);
1255 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1257 struct Lisp_Charset *cs = XCHARSET (obj);
1261 error ("printing unreadable object #<charset %s 0x%x>",
1262 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1265 write_c_string ("#<charset ", printcharfun);
1266 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1267 write_c_string (" ", printcharfun);
1268 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1269 write_c_string (" ", printcharfun);
1270 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1271 write_c_string (" ", printcharfun);
1272 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1273 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1275 CHARSET_DIMENSION (cs),
1276 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1277 CHARSET_COLUMNS (cs),
1278 CHARSET_GRAPHIC (cs),
1279 CHARSET_FINAL (cs));
1280 write_c_string (buf, printcharfun);
1281 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1282 sprintf (buf, " 0x%x>", cs->header.uid);
1283 write_c_string (buf, printcharfun);
1286 static const struct lrecord_description charset_description[] = {
1287 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1289 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1294 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1295 mark_charset, print_charset, 0, 0, 0,
1296 charset_description,
1297 struct Lisp_Charset);
1298 /* Make a new charset. */
1301 make_charset (Charset_ID id, Lisp_Object name,
1302 unsigned short chars, unsigned char dimension,
1303 unsigned char columns, unsigned char graphic,
1304 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1305 Lisp_Object long_name, Lisp_Object doc,
1307 Lisp_Object decoding_table,
1308 Emchar ucs_min, Emchar ucs_max,
1309 Emchar code_offset, unsigned char byte_offset)
1311 unsigned char type = 0;
1313 struct Lisp_Charset *cs =
1314 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1315 XSETCHARSET (obj, cs);
1317 CHARSET_ID (cs) = id;
1318 CHARSET_NAME (cs) = name;
1319 CHARSET_SHORT_NAME (cs) = short_name;
1320 CHARSET_LONG_NAME (cs) = long_name;
1321 CHARSET_CHARS (cs) = chars;
1322 CHARSET_DIMENSION (cs) = dimension;
1323 CHARSET_DIRECTION (cs) = direction;
1324 CHARSET_COLUMNS (cs) = columns;
1325 CHARSET_GRAPHIC (cs) = graphic;
1326 CHARSET_FINAL (cs) = final;
1327 CHARSET_DOC_STRING (cs) = doc;
1328 CHARSET_REGISTRY (cs) = reg;
1329 CHARSET_CCL_PROGRAM (cs) = Qnil;
1330 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1332 CHARSET_DECODING_TABLE(cs) = Qnil;
1333 CHARSET_UCS_MIN(cs) = ucs_min;
1334 CHARSET_UCS_MAX(cs) = ucs_max;
1335 CHARSET_CODE_OFFSET(cs) = code_offset;
1336 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1339 switch (CHARSET_CHARS (cs))
1342 switch (CHARSET_DIMENSION (cs))
1345 type = CHARSET_TYPE_94;
1348 type = CHARSET_TYPE_94X94;
1353 switch (CHARSET_DIMENSION (cs))
1356 type = CHARSET_TYPE_96;
1359 type = CHARSET_TYPE_96X96;
1365 switch (CHARSET_DIMENSION (cs))
1368 type = CHARSET_TYPE_128;
1371 type = CHARSET_TYPE_128X128;
1376 switch (CHARSET_DIMENSION (cs))
1379 type = CHARSET_TYPE_256;
1382 type = CHARSET_TYPE_256X256;
1389 CHARSET_TYPE (cs) = type;
1393 if (id == LEADING_BYTE_ASCII)
1394 CHARSET_REP_BYTES (cs) = 1;
1396 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1398 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1403 /* some charsets do not have final characters. This includes
1404 ASCII, Control-1, Composite, and the two faux private
1407 if (code_offset == 0)
1409 assert (NILP (chlook->charset_by_attributes[type][final]));
1410 chlook->charset_by_attributes[type][final] = obj;
1413 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1414 chlook->charset_by_attributes[type][final][direction] = obj;
1418 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1419 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1421 /* Some charsets are "faux" and don't have names or really exist at
1422 all except in the leading-byte table. */
1424 Fputhash (name, obj, Vcharset_hash_table);
1429 get_unallocated_leading_byte (int dimension)
1434 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1437 lb = next_allocated_leading_byte++;
1441 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1444 lb = next_allocated_1_byte_leading_byte++;
1448 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1451 lb = next_allocated_2_byte_leading_byte++;
1457 ("No more character sets free for this dimension",
1458 make_int (dimension));
1465 make_builtin_char (Lisp_Object charset, int c1, int c2)
1467 if (XCHARSET_UCS_MAX (charset))
1470 = (XCHARSET_DIMENSION (charset) == 1
1472 c1 - XCHARSET_BYTE_OFFSET (charset)
1474 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1475 + c2 - XCHARSET_BYTE_OFFSET (charset))
1476 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1477 if ((code < XCHARSET_UCS_MIN (charset))
1478 || (XCHARSET_UCS_MAX (charset) < code))
1479 signal_simple_error ("Arguments makes invalid character",
1483 else if (XCHARSET_DIMENSION (charset) == 1)
1485 switch (XCHARSET_CHARS (charset))
1489 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1492 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1499 switch (XCHARSET_CHARS (charset))
1502 return MIN_CHAR_94x94
1503 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1504 + (c1 - 33) * 94 + (c2 - 33);
1506 return MIN_CHAR_96x96
1507 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1508 + (c1 - 32) * 96 + (c2 - 32);
1516 range_charset_code_point (Lisp_Object charset, Emchar ch)
1520 if ((XCHARSET_UCS_MIN (charset) <= ch)
1521 && (ch <= XCHARSET_UCS_MAX (charset)))
1523 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1525 if (XCHARSET_CHARS (charset) == 256)
1527 else if (XCHARSET_DIMENSION (charset) == 1)
1528 return d + XCHARSET_BYTE_OFFSET (charset);
1529 else if (XCHARSET_DIMENSION (charset) == 2)
1531 ((d / XCHARSET_CHARS (charset)
1532 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1533 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1534 else if (XCHARSET_DIMENSION (charset) == 3)
1536 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1537 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1538 | ((d / XCHARSET_CHARS (charset)
1539 % XCHARSET_CHARS (charset)
1540 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1541 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1542 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1544 ((d / (XCHARSET_CHARS (charset)
1545 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1546 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1547 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1548 % XCHARSET_CHARS (charset)
1549 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1550 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1551 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1552 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1554 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1556 if (XCHARSET_DIMENSION (charset) == 1)
1558 if (XCHARSET_CHARS (charset) == 94)
1560 if (((d = ch - (MIN_CHAR_94
1561 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1565 else if (XCHARSET_CHARS (charset) == 96)
1567 if (((d = ch - (MIN_CHAR_96
1568 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1575 else if (XCHARSET_DIMENSION (charset) == 2)
1577 if (XCHARSET_CHARS (charset) == 94)
1579 if (((d = ch - (MIN_CHAR_94x94
1580 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1583 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1585 else if (XCHARSET_CHARS (charset) == 96)
1587 if (((d = ch - (MIN_CHAR_96x96
1588 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1591 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1601 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1603 if (c <= MAX_CHAR_BASIC_LATIN)
1605 *charset = Vcharset_ascii;
1610 *charset = Vcharset_control_1;
1615 *charset = Vcharset_latin_iso8859_1;
1618 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1620 *charset = Vcharset_greek_iso8859_7;
1621 return c - MIN_CHAR_GREEK + 0x20;
1624 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1626 *charset = Vcharset_cyrillic_iso8859_5;
1627 return c - MIN_CHAR_CYRILLIC + 0x20;
1630 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1632 *charset = Vcharset_hebrew_iso8859_8;
1633 return c - MIN_CHAR_HEBREW + 0x20;
1635 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1637 *charset = Vcharset_thai_tis620;
1638 return c - MIN_CHAR_THAI + 0x20;
1641 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1642 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1644 return list2 (Vcharset_katakana_jisx0201,
1645 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1648 else if (c <= MAX_CHAR_BMP)
1650 *charset = Vcharset_ucs_bmp;
1653 else if (c < MIN_CHAR_DAIKANWA)
1655 *charset = Vcharset_ucs;
1658 else if (c <= MAX_CHAR_DAIKANWA)
1660 *charset = Vcharset_ideograph_daikanwa;
1661 return c - MIN_CHAR_DAIKANWA;
1663 else if (c < MIN_CHAR_94)
1665 *charset = Vcharset_ucs;
1668 else if (c <= MAX_CHAR_94)
1670 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1671 ((c - MIN_CHAR_94) / 94) + '0',
1672 CHARSET_LEFT_TO_RIGHT);
1673 if (!NILP (*charset))
1674 return ((c - MIN_CHAR_94) % 94) + 33;
1677 *charset = Vcharset_ucs;
1681 else if (c <= MAX_CHAR_96)
1683 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1684 ((c - MIN_CHAR_96) / 96) + '0',
1685 CHARSET_LEFT_TO_RIGHT);
1686 if (!NILP (*charset))
1687 return ((c - MIN_CHAR_96) % 96) + 32;
1690 *charset = Vcharset_ucs;
1694 else if (c <= MAX_CHAR_94x94)
1697 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
1698 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1699 CHARSET_LEFT_TO_RIGHT);
1700 if (!NILP (*charset))
1701 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1702 | (((c - MIN_CHAR_94x94) % 94) + 33);
1705 *charset = Vcharset_ucs;
1709 else if (c <= MAX_CHAR_96x96)
1712 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
1713 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1714 CHARSET_LEFT_TO_RIGHT);
1715 if (!NILP (*charset))
1716 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1717 | (((c - MIN_CHAR_96x96) % 96) + 32);
1720 *charset = Vcharset_ucs;
1726 *charset = Vcharset_ucs;
1731 Lisp_Object Vdefault_coded_charset_priority_list;
1735 /************************************************************************/
1736 /* Basic charset Lisp functions */
1737 /************************************************************************/
1739 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1740 Return non-nil if OBJECT is a charset.
1744 return CHARSETP (object) ? Qt : Qnil;
1747 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1748 Retrieve the charset of the given name.
1749 If CHARSET-OR-NAME is a charset object, it is simply returned.
1750 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1751 nil is returned. Otherwise the associated charset object is returned.
1755 if (CHARSETP (charset_or_name))
1756 return charset_or_name;
1758 CHECK_SYMBOL (charset_or_name);
1759 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1762 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1763 Retrieve the charset of the given name.
1764 Same as `find-charset' except an error is signalled if there is no such
1765 charset instead of returning nil.
1769 Lisp_Object charset = Ffind_charset (name);
1772 signal_simple_error ("No such charset", name);
1776 /* We store the charsets in hash tables with the names as the key and the
1777 actual charset object as the value. Occasionally we need to use them
1778 in a list format. These routines provide us with that. */
1779 struct charset_list_closure
1781 Lisp_Object *charset_list;
1785 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1786 void *charset_list_closure)
1788 /* This function can GC */
1789 struct charset_list_closure *chcl =
1790 (struct charset_list_closure*) charset_list_closure;
1791 Lisp_Object *charset_list = chcl->charset_list;
1793 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1797 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1798 Return a list of the names of all defined charsets.
1802 Lisp_Object charset_list = Qnil;
1803 struct gcpro gcpro1;
1804 struct charset_list_closure charset_list_closure;
1806 GCPRO1 (charset_list);
1807 charset_list_closure.charset_list = &charset_list;
1808 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1809 &charset_list_closure);
1812 return charset_list;
1815 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1816 Return the name of the given charset.
1820 return XCHARSET_NAME (Fget_charset (charset));
1823 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1824 Define a new character set.
1825 This function is for use with Mule support.
1826 NAME is a symbol, the name by which the character set is normally referred.
1827 DOC-STRING is a string describing the character set.
1828 PROPS is a property list, describing the specific nature of the
1829 character set. Recognized properties are:
1831 'short-name Short version of the charset name (ex: Latin-1)
1832 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1833 'registry A regular expression matching the font registry field for
1835 'dimension Number of octets used to index a character in this charset.
1836 Either 1 or 2. Defaults to 1.
1837 'columns Number of columns used to display a character in this charset.
1838 Only used in TTY mode. (Under X, the actual width of a
1839 character can be derived from the font used to display the
1840 characters.) If unspecified, defaults to the dimension
1841 (this is almost always the correct value).
1842 'chars Number of characters in each dimension (94 or 96).
1843 Defaults to 94. Note that if the dimension is 2, the
1844 character set thus described is 94x94 or 96x96.
1845 'final Final byte of ISO 2022 escape sequence. Must be
1846 supplied. Each combination of (DIMENSION, CHARS) defines a
1847 separate namespace for final bytes. Note that ISO
1848 2022 restricts the final byte to the range
1849 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1850 dimension == 2. Note also that final bytes in the range
1851 0x30 - 0x3F are reserved for user-defined (not official)
1853 'graphic 0 (use left half of font on output) or 1 (use right half
1854 of font on output). Defaults to 0. For example, for
1855 a font whose registry is ISO8859-1, the left half
1856 (octets 0x20 - 0x7F) is the `ascii' character set, while
1857 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1858 character set. With 'graphic set to 0, the octets
1859 will have their high bit cleared; with it set to 1,
1860 the octets will have their high bit set.
1861 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1863 'ccl-program A compiled CCL program used to convert a character in
1864 this charset into an index into the font. This is in
1865 addition to the 'graphic property. The CCL program
1866 is passed the octets of the character, with the high
1867 bit cleared and set depending upon whether the value
1868 of the 'graphic property is 0 or 1.
1870 (name, doc_string, props))
1872 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1873 int direction = CHARSET_LEFT_TO_RIGHT;
1875 Lisp_Object registry = Qnil;
1876 Lisp_Object charset;
1877 Lisp_Object rest, keyword, value;
1878 Lisp_Object ccl_program = Qnil;
1879 Lisp_Object short_name = Qnil, long_name = Qnil;
1880 int byte_offset = -1;
1882 CHECK_SYMBOL (name);
1883 if (!NILP (doc_string))
1884 CHECK_STRING (doc_string);
1886 charset = Ffind_charset (name);
1887 if (!NILP (charset))
1888 signal_simple_error ("Cannot redefine existing charset", name);
1890 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1892 if (EQ (keyword, Qshort_name))
1894 CHECK_STRING (value);
1898 if (EQ (keyword, Qlong_name))
1900 CHECK_STRING (value);
1904 else if (EQ (keyword, Qdimension))
1907 dimension = XINT (value);
1908 if (dimension < 1 || dimension > 2)
1909 signal_simple_error ("Invalid value for 'dimension", value);
1912 else if (EQ (keyword, Qchars))
1915 chars = XINT (value);
1916 if (chars != 94 && chars != 96)
1917 signal_simple_error ("Invalid value for 'chars", value);
1920 else if (EQ (keyword, Qcolumns))
1923 columns = XINT (value);
1924 if (columns != 1 && columns != 2)
1925 signal_simple_error ("Invalid value for 'columns", value);
1928 else if (EQ (keyword, Qgraphic))
1931 graphic = XINT (value);
1933 if (graphic < 0 || graphic > 2)
1935 if (graphic < 0 || graphic > 1)
1937 signal_simple_error ("Invalid value for 'graphic", value);
1940 else if (EQ (keyword, Qregistry))
1942 CHECK_STRING (value);
1946 else if (EQ (keyword, Qdirection))
1948 if (EQ (value, Ql2r))
1949 direction = CHARSET_LEFT_TO_RIGHT;
1950 else if (EQ (value, Qr2l))
1951 direction = CHARSET_RIGHT_TO_LEFT;
1953 signal_simple_error ("Invalid value for 'direction", value);
1956 else if (EQ (keyword, Qfinal))
1958 CHECK_CHAR_COERCE_INT (value);
1959 final = XCHAR (value);
1960 if (final < '0' || final > '~')
1961 signal_simple_error ("Invalid value for 'final", value);
1964 else if (EQ (keyword, Qccl_program))
1966 CHECK_VECTOR (value);
1967 ccl_program = value;
1971 signal_simple_error ("Unrecognized property", keyword);
1975 error ("'final must be specified");
1976 if (dimension == 2 && final > 0x5F)
1978 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1982 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1984 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1986 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1987 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1989 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1991 id = get_unallocated_leading_byte (dimension);
1993 if (NILP (doc_string))
1994 doc_string = build_string ("");
1996 if (NILP (registry))
1997 registry = build_string ("");
1999 if (NILP (short_name))
2000 XSETSTRING (short_name, XSYMBOL (name)->name);
2002 if (NILP (long_name))
2003 long_name = doc_string;
2006 columns = dimension;
2008 if (byte_offset < 0)
2012 else if (chars == 96)
2018 charset = make_charset (id, name, chars, dimension, columns, graphic,
2019 final, direction, short_name, long_name,
2020 doc_string, registry,
2021 Qnil, 0, 0, 0, byte_offset);
2022 if (!NILP (ccl_program))
2023 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2027 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2029 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2030 NEW-NAME is the name of the new charset. Return the new charset.
2032 (charset, new_name))
2034 Lisp_Object new_charset = Qnil;
2035 int id, chars, dimension, columns, graphic, final;
2037 Lisp_Object registry, doc_string, short_name, long_name;
2038 struct Lisp_Charset *cs;
2040 charset = Fget_charset (charset);
2041 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2042 signal_simple_error ("Charset already has reverse-direction charset",
2045 CHECK_SYMBOL (new_name);
2046 if (!NILP (Ffind_charset (new_name)))
2047 signal_simple_error ("Cannot redefine existing charset", new_name);
2049 cs = XCHARSET (charset);
2051 chars = CHARSET_CHARS (cs);
2052 dimension = CHARSET_DIMENSION (cs);
2053 columns = CHARSET_COLUMNS (cs);
2054 id = get_unallocated_leading_byte (dimension);
2056 graphic = CHARSET_GRAPHIC (cs);
2057 final = CHARSET_FINAL (cs);
2058 direction = CHARSET_RIGHT_TO_LEFT;
2059 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2060 direction = CHARSET_LEFT_TO_RIGHT;
2061 doc_string = CHARSET_DOC_STRING (cs);
2062 short_name = CHARSET_SHORT_NAME (cs);
2063 long_name = CHARSET_LONG_NAME (cs);
2064 registry = CHARSET_REGISTRY (cs);
2066 new_charset = make_charset (id, new_name, chars, dimension, columns,
2067 graphic, final, direction, short_name, long_name,
2068 doc_string, registry,
2070 CHARSET_DECODING_TABLE(cs),
2071 CHARSET_UCS_MIN(cs),
2072 CHARSET_UCS_MAX(cs),
2073 CHARSET_CODE_OFFSET(cs),
2074 CHARSET_BYTE_OFFSET(cs)
2080 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2081 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2086 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2087 Define symbol ALIAS as an alias for CHARSET.
2091 CHECK_SYMBOL (alias);
2092 charset = Fget_charset (charset);
2093 return Fputhash (alias, charset, Vcharset_hash_table);
2096 /* #### Reverse direction charsets not yet implemented. */
2098 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2100 Return the reverse-direction charset parallel to CHARSET, if any.
2101 This is the charset with the same properties (in particular, the same
2102 dimension, number of characters per dimension, and final byte) as
2103 CHARSET but whose characters are displayed in the opposite direction.
2107 charset = Fget_charset (charset);
2108 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2112 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2113 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2114 If DIRECTION is omitted, both directions will be checked (left-to-right
2115 will be returned if character sets exist for both directions).
2117 (dimension, chars, final, direction))
2119 int dm, ch, fi, di = -1;
2121 Lisp_Object obj = Qnil;
2123 CHECK_INT (dimension);
2124 dm = XINT (dimension);
2125 if (dm < 1 || dm > 2)
2126 signal_simple_error ("Invalid value for DIMENSION", dimension);
2130 if (ch != 94 && ch != 96)
2131 signal_simple_error ("Invalid value for CHARS", chars);
2133 CHECK_CHAR_COERCE_INT (final);
2135 if (fi < '0' || fi > '~')
2136 signal_simple_error ("Invalid value for FINAL", final);
2138 if (EQ (direction, Ql2r))
2139 di = CHARSET_LEFT_TO_RIGHT;
2140 else if (EQ (direction, Qr2l))
2141 di = CHARSET_RIGHT_TO_LEFT;
2142 else if (!NILP (direction))
2143 signal_simple_error ("Invalid value for DIRECTION", direction);
2145 if (dm == 2 && fi > 0x5F)
2147 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2150 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2152 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2156 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2158 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2161 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2164 return XCHARSET_NAME (obj);
2168 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2169 Return short name of CHARSET.
2173 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2176 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2177 Return long name of CHARSET.
2181 return XCHARSET_LONG_NAME (Fget_charset (charset));
2184 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2185 Return description of CHARSET.
2189 return XCHARSET_DOC_STRING (Fget_charset (charset));
2192 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2193 Return dimension of CHARSET.
2197 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2200 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2201 Return property PROP of CHARSET.
2202 Recognized properties are those listed in `make-charset', as well as
2203 'name and 'doc-string.
2207 struct Lisp_Charset *cs;
2209 charset = Fget_charset (charset);
2210 cs = XCHARSET (charset);
2212 CHECK_SYMBOL (prop);
2213 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2214 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2215 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2216 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2217 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2218 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2219 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2220 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2221 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2222 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2223 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2224 if (EQ (prop, Qdirection))
2225 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2226 if (EQ (prop, Qreverse_direction_charset))
2228 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2232 return XCHARSET_NAME (obj);
2234 signal_simple_error ("Unrecognized charset property name", prop);
2235 return Qnil; /* not reached */
2238 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2239 Return charset identification number of CHARSET.
2243 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2246 /* #### We need to figure out which properties we really want to
2249 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2250 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2252 (charset, ccl_program))
2254 charset = Fget_charset (charset);
2255 CHECK_VECTOR (ccl_program);
2256 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2261 invalidate_charset_font_caches (Lisp_Object charset)
2263 /* Invalidate font cache entries for charset on all devices. */
2264 Lisp_Object devcons, concons, hash_table;
2265 DEVICE_LOOP_NO_BREAK (devcons, concons)
2267 struct device *d = XDEVICE (XCAR (devcons));
2268 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2269 if (!UNBOUNDP (hash_table))
2270 Fclrhash (hash_table);
2274 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2275 Set the 'registry property of CHARSET to REGISTRY.
2277 (charset, registry))
2279 charset = Fget_charset (charset);
2280 CHECK_STRING (registry);
2281 XCHARSET_REGISTRY (charset) = registry;
2282 invalidate_charset_font_caches (charset);
2283 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2288 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2289 Return mapping-table of CHARSET.
2293 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2296 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2297 Set mapping-table of CHARSET to TABLE.
2301 struct Lisp_Charset *cs;
2302 Lisp_Object old_table;
2305 charset = Fget_charset (charset);
2306 cs = XCHARSET (charset);
2308 if (EQ (table, Qnil))
2310 CHARSET_DECODING_TABLE(cs) = table;
2313 else if (VECTORP (table))
2317 /* ad-hoc method for `ascii' */
2318 if ((CHARSET_CHARS (cs) == 94) &&
2319 (CHARSET_BYTE_OFFSET (cs) != 33))
2320 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2322 ccs_len = CHARSET_CHARS (cs);
2324 if (XVECTOR_LENGTH (table) > ccs_len)
2325 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2326 old_table = CHARSET_DECODING_TABLE(cs);
2327 CHARSET_DECODING_TABLE(cs) = table;
2330 signal_error (Qwrong_type_argument,
2331 list2 (build_translated_string ("vector-or-nil-p"),
2333 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2335 switch (CHARSET_DIMENSION (cs))
2338 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2340 Lisp_Object c = XVECTOR_DATA(table)[i];
2345 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2349 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2351 Lisp_Object v = XVECTOR_DATA(table)[i];
2357 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2359 CHARSET_DECODING_TABLE(cs) = old_table;
2360 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2362 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2364 Lisp_Object c = XVECTOR_DATA(v)[j];
2369 make_int ( ((i + CHARSET_BYTE_OFFSET (cs)) << 8)
2370 | (j + CHARSET_BYTE_OFFSET (cs)) ));
2374 put_char_attribute (v, charset,
2375 make_int (i + CHARSET_BYTE_OFFSET (cs)));
2384 /************************************************************************/
2385 /* Lisp primitives for working with characters */
2386 /************************************************************************/
2389 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2390 Make a character from CHARSET and code-point CODE.
2394 charset = Fget_charset (charset);
2395 return make_char (DECODE_CHAR (charset, XINT (code)));
2399 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2400 Make a character from CHARSET and octets ARG1 and ARG2.
2401 ARG2 is required only for characters from two-dimensional charsets.
2402 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2403 character s with caron.
2405 (charset, arg1, arg2))
2407 struct Lisp_Charset *cs;
2409 int lowlim, highlim;
2411 charset = Fget_charset (charset);
2412 cs = XCHARSET (charset);
2414 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2415 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2417 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2419 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2420 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2423 /* It is useful (and safe, according to Olivier Galibert) to strip
2424 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2425 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2426 Latin 2 code of the character. */
2434 if (a1 < lowlim || a1 > highlim)
2435 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2437 if (CHARSET_DIMENSION (cs) == 1)
2441 ("Charset is of dimension one; second octet must be nil", arg2);
2442 return make_char (MAKE_CHAR (charset, a1, 0));
2451 a2 = XINT (arg2) & 0x7f;
2453 if (a2 < lowlim || a2 > highlim)
2454 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2456 return make_char (MAKE_CHAR (charset, a1, a2));
2459 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2460 Return the character set of char CH.
2464 CHECK_CHAR_COERCE_INT (ch);
2466 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2469 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2470 Return the octet numbered N (should be 0 or 1) of char CH.
2471 N defaults to 0 if omitted.
2475 Lisp_Object charset;
2478 CHECK_CHAR_COERCE_INT (ch);
2480 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2482 if (NILP (n) || EQ (n, Qzero))
2483 return make_int (octet0);
2484 else if (EQ (n, make_int (1)))
2485 return make_int (octet1);
2487 signal_simple_error ("Octet number must be 0 or 1", n);
2490 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2491 Return list of charset and one or two position-codes of CHAR.
2495 /* This function can GC */
2496 struct gcpro gcpro1, gcpro2;
2497 Lisp_Object charset = Qnil;
2498 Lisp_Object rc = Qnil;
2506 GCPRO2 (charset, rc);
2507 CHECK_CHAR_COERCE_INT (character);
2510 code_point = ENCODE_CHAR (XCHAR (character), charset);
2511 dimension = XCHARSET_DIMENSION (charset);
2512 while (dimension > 0)
2514 rc = Fcons (make_int (code_point & 255), rc);
2518 rc = Fcons (XCHARSET_NAME (charset), rc);
2520 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2522 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2524 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2528 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2537 #ifdef ENABLE_COMPOSITE_CHARS
2538 /************************************************************************/
2539 /* composite character functions */
2540 /************************************************************************/
2543 lookup_composite_char (Bufbyte *str, int len)
2545 Lisp_Object lispstr = make_string (str, len);
2546 Lisp_Object ch = Fgethash (lispstr,
2547 Vcomposite_char_string2char_hash_table,
2553 if (composite_char_row_next >= 128)
2554 signal_simple_error ("No more composite chars available", lispstr);
2555 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2556 composite_char_col_next);
2557 Fputhash (make_char (emch), lispstr,
2558 Vcomposite_char_char2string_hash_table);
2559 Fputhash (lispstr, make_char (emch),
2560 Vcomposite_char_string2char_hash_table);
2561 composite_char_col_next++;
2562 if (composite_char_col_next >= 128)
2564 composite_char_col_next = 32;
2565 composite_char_row_next++;
2574 composite_char_string (Emchar ch)
2576 Lisp_Object str = Fgethash (make_char (ch),
2577 Vcomposite_char_char2string_hash_table,
2579 assert (!UNBOUNDP (str));
2583 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2584 Convert a string into a single composite character.
2585 The character is the result of overstriking all the characters in
2590 CHECK_STRING (string);
2591 return make_char (lookup_composite_char (XSTRING_DATA (string),
2592 XSTRING_LENGTH (string)));
2595 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2596 Return a string of the characters comprising a composite character.
2604 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2605 signal_simple_error ("Must be composite char", ch);
2606 return composite_char_string (emch);
2608 #endif /* ENABLE_COMPOSITE_CHARS */
2611 /************************************************************************/
2612 /* initialization */
2613 /************************************************************************/
2616 syms_of_mule_charset (void)
2618 DEFSUBR (Fcharsetp);
2619 DEFSUBR (Ffind_charset);
2620 DEFSUBR (Fget_charset);
2621 DEFSUBR (Fcharset_list);
2622 DEFSUBR (Fcharset_name);
2623 DEFSUBR (Fmake_charset);
2624 DEFSUBR (Fmake_reverse_direction_charset);
2625 /* DEFSUBR (Freverse_direction_charset); */
2626 DEFSUBR (Fdefine_charset_alias);
2627 DEFSUBR (Fcharset_from_attributes);
2628 DEFSUBR (Fcharset_short_name);
2629 DEFSUBR (Fcharset_long_name);
2630 DEFSUBR (Fcharset_description);
2631 DEFSUBR (Fcharset_dimension);
2632 DEFSUBR (Fcharset_property);
2633 DEFSUBR (Fcharset_id);
2634 DEFSUBR (Fset_charset_ccl_program);
2635 DEFSUBR (Fset_charset_registry);
2637 DEFSUBR (Fchar_attribute_alist);
2638 DEFSUBR (Fget_char_attribute);
2639 DEFSUBR (Fput_char_attribute);
2640 DEFSUBR (Fdefine_char);
2641 DEFSUBR (Fchar_variants);
2642 DEFSUBR (Fget_composite_char);
2643 DEFSUBR (Fcharset_mapping_table);
2644 DEFSUBR (Fset_charset_mapping_table);
2648 DEFSUBR (Fdecode_char);
2650 DEFSUBR (Fmake_char);
2651 DEFSUBR (Fchar_charset);
2652 DEFSUBR (Fchar_octet);
2653 DEFSUBR (Fsplit_char);
2655 #ifdef ENABLE_COMPOSITE_CHARS
2656 DEFSUBR (Fmake_composite_char);
2657 DEFSUBR (Fcomposite_char_string);
2660 defsymbol (&Qcharsetp, "charsetp");
2661 defsymbol (&Qregistry, "registry");
2662 defsymbol (&Qfinal, "final");
2663 defsymbol (&Qgraphic, "graphic");
2664 defsymbol (&Qdirection, "direction");
2665 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2666 defsymbol (&Qshort_name, "short-name");
2667 defsymbol (&Qlong_name, "long-name");
2669 defsymbol (&Ql2r, "l2r");
2670 defsymbol (&Qr2l, "r2l");
2672 /* Charsets, compatible with FSF 20.3
2673 Naming convention is Script-Charset[-Edition] */
2674 defsymbol (&Qascii, "ascii");
2675 defsymbol (&Qcontrol_1, "control-1");
2676 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2677 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2678 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2679 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2680 defsymbol (&Qthai_tis620, "thai-tis620");
2681 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2682 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2683 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2684 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2685 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2686 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2687 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2688 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2689 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2690 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2691 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2692 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2693 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2694 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2695 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2697 defsymbol (&Q_ucs, "->ucs");
2698 defsymbol (&Q_decomposition, "->decomposition");
2699 defsymbol (&Qcompat, "compat");
2700 defsymbol (&Qisolated, "isolated");
2701 defsymbol (&Qinitial, "initial");
2702 defsymbol (&Qmedial, "medial");
2703 defsymbol (&Qfinal, "final");
2704 defsymbol (&Qvertical, "vertical");
2705 defsymbol (&QnoBreak, "noBreak");
2706 defsymbol (&Qfraction, "fraction");
2707 defsymbol (&Qsuper, "super");
2708 defsymbol (&Qsub, "sub");
2709 defsymbol (&Qcircle, "circle");
2710 defsymbol (&Qsquare, "square");
2711 defsymbol (&Qwide, "wide");
2712 defsymbol (&Qnarrow, "narrow");
2713 defsymbol (&Qsmall, "small");
2714 defsymbol (&Qfont, "font");
2715 defsymbol (&Qucs, "ucs");
2716 defsymbol (&Qucs_bmp, "ucs-bmp");
2717 defsymbol (&Qlatin_viscii, "latin-viscii");
2718 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2719 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2720 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2721 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2722 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2723 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2724 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2725 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2726 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2727 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2728 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2729 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2730 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2731 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2732 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2733 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2734 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2735 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2736 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2737 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2738 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2739 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2740 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2741 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2742 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2743 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2744 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2746 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2747 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2749 defsymbol (&Qcomposite, "composite");
2753 vars_of_mule_charset (void)
2760 chlook = xnew (struct charset_lookup);
2761 dumpstruct (&chlook, &charset_lookup_description);
2763 /* Table of charsets indexed by leading byte. */
2764 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2765 chlook->charset_by_leading_byte[i] = Qnil;
2768 /* Table of charsets indexed by type/final-byte. */
2769 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2770 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2771 chlook->charset_by_attributes[i][j] = Qnil;
2773 /* Table of charsets indexed by type/final-byte/direction. */
2774 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2775 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2776 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2777 chlook->charset_by_attributes[i][j][k] = Qnil;
2781 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2783 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2784 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2788 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2789 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2790 Leading-code of private TYPE9N charset of column-width 1.
2792 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2796 Vutf_2000_version = build_string("0.14 (Kawachi-Katakami)");
2797 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2798 Version number of UTF-2000.
2801 staticpro (&Vcharacter_attribute_table);
2802 Vcharacter_attribute_table = make_char_code_table (Qnil);
2804 staticpro (&Vcharacter_composition_table);
2805 Vcharacter_composition_table = make_char_code_table (Qnil);
2807 staticpro (&Vcharacter_variant_table);
2808 Vcharacter_variant_table = make_char_code_table (Qnil);
2810 Vdefault_coded_charset_priority_list = Qnil;
2811 DEFVAR_LISP ("default-coded-charset-priority-list",
2812 &Vdefault_coded_charset_priority_list /*
2813 Default order of preferred coded-character-sets.
2819 complex_vars_of_mule_charset (void)
2821 staticpro (&Vcharset_hash_table);
2822 Vcharset_hash_table =
2823 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2825 /* Predefined character sets. We store them into variables for
2829 staticpro (&Vcharset_ucs);
2831 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2832 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2833 build_string ("UCS"),
2834 build_string ("UCS"),
2835 build_string ("ISO/IEC 10646"),
2837 Qnil, 0, 0xFFFFFFF, 0, 0);
2838 staticpro (&Vcharset_ucs_bmp);
2840 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2841 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2842 build_string ("BMP"),
2843 build_string ("BMP"),
2844 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2845 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2846 Qnil, 0, 0xFFFF, 0, 0);
2848 # define MIN_CHAR_THAI 0
2849 # define MAX_CHAR_THAI 0
2850 # define MIN_CHAR_GREEK 0
2851 # define MAX_CHAR_GREEK 0
2852 # define MIN_CHAR_HEBREW 0
2853 # define MAX_CHAR_HEBREW 0
2854 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2855 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2856 # define MIN_CHAR_CYRILLIC 0
2857 # define MAX_CHAR_CYRILLIC 0
2859 staticpro (&Vcharset_ascii);
2861 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2862 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2863 build_string ("ASCII"),
2864 build_string ("ASCII)"),
2865 build_string ("ASCII (ISO646 IRV)"),
2866 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2867 Qnil, 0, 0x7F, 0, 0);
2868 staticpro (&Vcharset_control_1);
2869 Vcharset_control_1 =
2870 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2871 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2872 build_string ("C1"),
2873 build_string ("Control characters"),
2874 build_string ("Control characters 128-191"),
2876 Qnil, 0x80, 0x9F, 0, 0);
2877 staticpro (&Vcharset_latin_iso8859_1);
2878 Vcharset_latin_iso8859_1 =
2879 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2880 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2881 build_string ("Latin-1"),
2882 build_string ("ISO8859-1 (Latin-1)"),
2883 build_string ("ISO8859-1 (Latin-1)"),
2884 build_string ("iso8859-1"),
2885 Qnil, 0xA0, 0xFF, 0, 32);
2886 staticpro (&Vcharset_latin_iso8859_2);
2887 Vcharset_latin_iso8859_2 =
2888 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2889 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2890 build_string ("Latin-2"),
2891 build_string ("ISO8859-2 (Latin-2)"),
2892 build_string ("ISO8859-2 (Latin-2)"),
2893 build_string ("iso8859-2"),
2895 staticpro (&Vcharset_latin_iso8859_3);
2896 Vcharset_latin_iso8859_3 =
2897 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2898 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2899 build_string ("Latin-3"),
2900 build_string ("ISO8859-3 (Latin-3)"),
2901 build_string ("ISO8859-3 (Latin-3)"),
2902 build_string ("iso8859-3"),
2904 staticpro (&Vcharset_latin_iso8859_4);
2905 Vcharset_latin_iso8859_4 =
2906 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2907 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2908 build_string ("Latin-4"),
2909 build_string ("ISO8859-4 (Latin-4)"),
2910 build_string ("ISO8859-4 (Latin-4)"),
2911 build_string ("iso8859-4"),
2913 staticpro (&Vcharset_thai_tis620);
2914 Vcharset_thai_tis620 =
2915 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2916 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2917 build_string ("TIS620"),
2918 build_string ("TIS620 (Thai)"),
2919 build_string ("TIS620.2529 (Thai)"),
2920 build_string ("tis620"),
2921 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2922 staticpro (&Vcharset_greek_iso8859_7);
2923 Vcharset_greek_iso8859_7 =
2924 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2925 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2926 build_string ("ISO8859-7"),
2927 build_string ("ISO8859-7 (Greek)"),
2928 build_string ("ISO8859-7 (Greek)"),
2929 build_string ("iso8859-7"),
2930 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2931 staticpro (&Vcharset_arabic_iso8859_6);
2932 Vcharset_arabic_iso8859_6 =
2933 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2934 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2935 build_string ("ISO8859-6"),
2936 build_string ("ISO8859-6 (Arabic)"),
2937 build_string ("ISO8859-6 (Arabic)"),
2938 build_string ("iso8859-6"),
2940 staticpro (&Vcharset_hebrew_iso8859_8);
2941 Vcharset_hebrew_iso8859_8 =
2942 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2943 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2944 build_string ("ISO8859-8"),
2945 build_string ("ISO8859-8 (Hebrew)"),
2946 build_string ("ISO8859-8 (Hebrew)"),
2947 build_string ("iso8859-8"),
2948 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2949 staticpro (&Vcharset_katakana_jisx0201);
2950 Vcharset_katakana_jisx0201 =
2951 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2952 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2953 build_string ("JISX0201 Kana"),
2954 build_string ("JISX0201.1976 (Japanese Kana)"),
2955 build_string ("JISX0201.1976 Japanese Kana"),
2956 build_string ("jisx0201\\.1976"),
2958 staticpro (&Vcharset_latin_jisx0201);
2959 Vcharset_latin_jisx0201 =
2960 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2961 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2962 build_string ("JISX0201 Roman"),
2963 build_string ("JISX0201.1976 (Japanese Roman)"),
2964 build_string ("JISX0201.1976 Japanese Roman"),
2965 build_string ("jisx0201\\.1976"),
2967 staticpro (&Vcharset_cyrillic_iso8859_5);
2968 Vcharset_cyrillic_iso8859_5 =
2969 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2970 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2971 build_string ("ISO8859-5"),
2972 build_string ("ISO8859-5 (Cyrillic)"),
2973 build_string ("ISO8859-5 (Cyrillic)"),
2974 build_string ("iso8859-5"),
2975 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2976 staticpro (&Vcharset_latin_iso8859_9);
2977 Vcharset_latin_iso8859_9 =
2978 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2979 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2980 build_string ("Latin-5"),
2981 build_string ("ISO8859-9 (Latin-5)"),
2982 build_string ("ISO8859-9 (Latin-5)"),
2983 build_string ("iso8859-9"),
2985 staticpro (&Vcharset_japanese_jisx0208_1978);
2986 Vcharset_japanese_jisx0208_1978 =
2987 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2988 Qjapanese_jisx0208_1978, 94, 2,
2989 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2990 build_string ("JIS X0208:1978"),
2991 build_string ("JIS X0208:1978 (Japanese)"),
2993 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2994 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2996 staticpro (&Vcharset_chinese_gb2312);
2997 Vcharset_chinese_gb2312 =
2998 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2999 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3000 build_string ("GB2312"),
3001 build_string ("GB2312)"),
3002 build_string ("GB2312 Chinese simplified"),
3003 build_string ("gb2312"),
3005 staticpro (&Vcharset_japanese_jisx0208);
3006 Vcharset_japanese_jisx0208 =
3007 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3008 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3009 build_string ("JISX0208"),
3010 build_string ("JIS X0208:1983 (Japanese)"),
3011 build_string ("JIS X0208:1983 Japanese Kanji"),
3012 build_string ("jisx0208\\.1983"),
3015 staticpro (&Vcharset_japanese_jisx0208_1990);
3016 Vcharset_japanese_jisx0208_1990 =
3017 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3018 Qjapanese_jisx0208_1990, 94, 2,
3019 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3020 build_string ("JISX0208-1990"),
3021 build_string ("JIS X0208:1990 (Japanese)"),
3022 build_string ("JIS X0208:1990 Japanese Kanji"),
3023 build_string ("jisx0208\\.1990"),
3025 MIN_CHAR_JIS_X0208_1990,
3026 MAX_CHAR_JIS_X0208_1990, 0, 33);
3028 staticpro (&Vcharset_korean_ksc5601);
3029 Vcharset_korean_ksc5601 =
3030 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3031 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3032 build_string ("KSC5601"),
3033 build_string ("KSC5601 (Korean"),
3034 build_string ("KSC5601 Korean Hangul and Hanja"),
3035 build_string ("ksc5601"),
3037 staticpro (&Vcharset_japanese_jisx0212);
3038 Vcharset_japanese_jisx0212 =
3039 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3040 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3041 build_string ("JISX0212"),
3042 build_string ("JISX0212 (Japanese)"),
3043 build_string ("JISX0212 Japanese Supplement"),
3044 build_string ("jisx0212"),
3047 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3048 staticpro (&Vcharset_chinese_cns11643_1);
3049 Vcharset_chinese_cns11643_1 =
3050 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3051 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3052 build_string ("CNS11643-1"),
3053 build_string ("CNS11643-1 (Chinese traditional)"),
3055 ("CNS 11643 Plane 1 Chinese traditional"),
3056 build_string (CHINESE_CNS_PLANE_RE("1")),
3058 staticpro (&Vcharset_chinese_cns11643_2);
3059 Vcharset_chinese_cns11643_2 =
3060 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3061 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3062 build_string ("CNS11643-2"),
3063 build_string ("CNS11643-2 (Chinese traditional)"),
3065 ("CNS 11643 Plane 2 Chinese traditional"),
3066 build_string (CHINESE_CNS_PLANE_RE("2")),
3069 staticpro (&Vcharset_latin_viscii_lower);
3070 Vcharset_latin_viscii_lower =
3071 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3072 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3073 build_string ("VISCII lower"),
3074 build_string ("VISCII lower (Vietnamese)"),
3075 build_string ("VISCII lower (Vietnamese)"),
3076 build_string ("MULEVISCII-LOWER"),
3078 staticpro (&Vcharset_latin_viscii_upper);
3079 Vcharset_latin_viscii_upper =
3080 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3081 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3082 build_string ("VISCII upper"),
3083 build_string ("VISCII upper (Vietnamese)"),
3084 build_string ("VISCII upper (Vietnamese)"),
3085 build_string ("MULEVISCII-UPPER"),
3087 staticpro (&Vcharset_latin_viscii);
3088 Vcharset_latin_viscii =
3089 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3090 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3091 build_string ("VISCII"),
3092 build_string ("VISCII 1.1 (Vietnamese)"),
3093 build_string ("VISCII 1.1 (Vietnamese)"),
3094 build_string ("VISCII1\\.1"),
3096 staticpro (&Vcharset_ideograph_daikanwa);
3097 Vcharset_ideograph_daikanwa =
3098 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3099 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3100 build_string ("Daikanwa"),
3101 build_string ("Morohashi's Daikanwa"),
3102 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3103 build_string ("Daikanwa"),
3104 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3105 staticpro (&Vcharset_mojikyo_pj_1);
3106 Vcharset_mojikyo_pj_1 =
3107 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3108 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3109 build_string ("Mojikyo-PJ-1"),
3110 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3112 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3113 build_string ("jisx0208\\.Mojikyo-1$"),
3115 staticpro (&Vcharset_mojikyo_pj_2);
3116 Vcharset_mojikyo_pj_2 =
3117 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3118 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3119 build_string ("Mojikyo-PJ-2"),
3120 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3122 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3123 build_string ("jisx0208\\.Mojikyo-2$"),
3125 staticpro (&Vcharset_mojikyo_pj_3);
3126 Vcharset_mojikyo_pj_3 =
3127 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3128 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3129 build_string ("Mojikyo-PJ-3"),
3130 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3132 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3133 build_string ("jisx0208\\.Mojikyo-3$"),
3135 staticpro (&Vcharset_mojikyo_pj_4);
3136 Vcharset_mojikyo_pj_4 =
3137 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3138 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3139 build_string ("Mojikyo-PJ-4"),
3140 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3142 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3143 build_string ("jisx0208\\.Mojikyo-4$"),
3145 staticpro (&Vcharset_mojikyo_pj_5);
3146 Vcharset_mojikyo_pj_5 =
3147 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3148 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3149 build_string ("Mojikyo-PJ-5"),
3150 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3152 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3153 build_string ("jisx0208\\.Mojikyo-5$"),
3155 staticpro (&Vcharset_mojikyo_pj_6);
3156 Vcharset_mojikyo_pj_6 =
3157 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3158 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3159 build_string ("Mojikyo-PJ-6"),
3160 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3162 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3163 build_string ("jisx0208\\.Mojikyo-6$"),
3165 staticpro (&Vcharset_mojikyo_pj_7);
3166 Vcharset_mojikyo_pj_7 =
3167 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3168 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3169 build_string ("Mojikyo-PJ-7"),
3170 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3172 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3173 build_string ("jisx0208\\.Mojikyo-7$"),
3175 staticpro (&Vcharset_mojikyo_pj_8);
3176 Vcharset_mojikyo_pj_8 =
3177 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3178 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3179 build_string ("Mojikyo-PJ-8"),
3180 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3182 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3183 build_string ("jisx0208\\.Mojikyo-8$"),
3185 staticpro (&Vcharset_mojikyo_pj_9);
3186 Vcharset_mojikyo_pj_9 =
3187 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3188 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3189 build_string ("Mojikyo-PJ-9"),
3190 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3192 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3193 build_string ("jisx0208\\.Mojikyo-9$"),
3195 staticpro (&Vcharset_mojikyo_pj_10);
3196 Vcharset_mojikyo_pj_10 =
3197 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3198 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3199 build_string ("Mojikyo-PJ-10"),
3200 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3202 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3203 build_string ("jisx0208\\.Mojikyo-10$"),
3205 staticpro (&Vcharset_mojikyo_pj_11);
3206 Vcharset_mojikyo_pj_11 =
3207 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3208 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3209 build_string ("Mojikyo-PJ-11"),
3210 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3212 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3213 build_string ("jisx0208\\.Mojikyo-11$"),
3215 staticpro (&Vcharset_mojikyo_pj_12);
3216 Vcharset_mojikyo_pj_12 =
3217 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3218 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3219 build_string ("Mojikyo-PJ-12"),
3220 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3222 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3223 build_string ("jisx0208\\.Mojikyo-12$"),
3225 staticpro (&Vcharset_mojikyo_pj_13);
3226 Vcharset_mojikyo_pj_13 =
3227 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3228 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3229 build_string ("Mojikyo-PJ-13"),
3230 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3232 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3233 build_string ("jisx0208\\.Mojikyo-13$"),
3235 staticpro (&Vcharset_mojikyo_pj_14);
3236 Vcharset_mojikyo_pj_14 =
3237 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3238 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3239 build_string ("Mojikyo-PJ-14"),
3240 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3242 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3243 build_string ("jisx0208\\.Mojikyo-14$"),
3245 staticpro (&Vcharset_mojikyo_pj_15);
3246 Vcharset_mojikyo_pj_15 =
3247 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3248 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3249 build_string ("Mojikyo-PJ-15"),
3250 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3252 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3253 build_string ("jisx0208\\.Mojikyo-15$"),
3255 staticpro (&Vcharset_mojikyo_pj_16);
3256 Vcharset_mojikyo_pj_16 =
3257 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3258 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3259 build_string ("Mojikyo-PJ-16"),
3260 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3262 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3263 build_string ("jisx0208\\.Mojikyo-16$"),
3265 staticpro (&Vcharset_mojikyo_pj_17);
3266 Vcharset_mojikyo_pj_17 =
3267 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3268 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3269 build_string ("Mojikyo-PJ-17"),
3270 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3272 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3273 build_string ("jisx0208\\.Mojikyo-17$"),
3275 staticpro (&Vcharset_mojikyo_pj_18);
3276 Vcharset_mojikyo_pj_18 =
3277 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3278 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3279 build_string ("Mojikyo-PJ-18"),
3280 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3282 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3283 build_string ("jisx0208\\.Mojikyo-18$"),
3285 staticpro (&Vcharset_mojikyo_pj_19);
3286 Vcharset_mojikyo_pj_19 =
3287 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3288 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3289 build_string ("Mojikyo-PJ-19"),
3290 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3292 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3293 build_string ("jisx0208\\.Mojikyo-19$"),
3295 staticpro (&Vcharset_mojikyo_pj_20);
3296 Vcharset_mojikyo_pj_20 =
3297 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3298 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3299 build_string ("Mojikyo-PJ-20"),
3300 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3302 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3303 build_string ("jisx0208\\.Mojikyo-20$"),
3305 staticpro (&Vcharset_mojikyo_pj_21);
3306 Vcharset_mojikyo_pj_21 =
3307 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3308 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3309 build_string ("Mojikyo-PJ-21"),
3310 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3312 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3313 build_string ("jisx0208\\.Mojikyo-21$"),
3315 staticpro (&Vcharset_ethiopic_ucs);
3316 Vcharset_ethiopic_ucs =
3317 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3318 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3319 build_string ("Ethiopic (UCS)"),
3320 build_string ("Ethiopic (UCS)"),
3321 build_string ("Ethiopic of UCS"),
3322 build_string ("Ethiopic-Unicode"),
3323 Qnil, 0x1200, 0x137F, 0x1200, 0);
3325 staticpro (&Vcharset_chinese_big5_1);
3326 Vcharset_chinese_big5_1 =
3327 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3328 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3329 build_string ("Big5"),
3330 build_string ("Big5 (Level-1)"),
3332 ("Big5 Level-1 Chinese traditional"),
3333 build_string ("big5"),
3335 staticpro (&Vcharset_chinese_big5_2);
3336 Vcharset_chinese_big5_2 =
3337 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3338 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3339 build_string ("Big5"),
3340 build_string ("Big5 (Level-2)"),
3342 ("Big5 Level-2 Chinese traditional"),
3343 build_string ("big5"),
3346 #ifdef ENABLE_COMPOSITE_CHARS
3347 /* #### For simplicity, we put composite chars into a 96x96 charset.
3348 This is going to lead to problems because you can run out of
3349 room, esp. as we don't yet recycle numbers. */
3350 staticpro (&Vcharset_composite);
3351 Vcharset_composite =
3352 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3353 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3354 build_string ("Composite"),
3355 build_string ("Composite characters"),
3356 build_string ("Composite characters"),
3359 /* #### not dumped properly */
3360 composite_char_row_next = 32;
3361 composite_char_col_next = 32;
3363 Vcomposite_char_string2char_hash_table =
3364 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3365 Vcomposite_char_char2string_hash_table =
3366 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3367 staticpro (&Vcomposite_char_string2char_hash_table);
3368 staticpro (&Vcomposite_char_char2string_hash_table);
3369 #endif /* ENABLE_COMPOSITE_CHARS */